Commit 944af01c authored by Yeldar Toktasynov's avatar Yeldar Toktasynov

Last working version with gpuVScpu comparison

parent 5539340f
import time
while(True):
time.sleep(20)
print("Another 20 seconds of your life")
......@@ -25,7 +25,7 @@ producers:
postgres:
username: postgres
password: postgres
host: 10.30.13.116
host: 10.30.10.151
port: 5432
db_name: clusters
table_name: ru_and_kz_cluster
from arase import Arase
......@@ -12,19 +12,23 @@ xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.
ngpus = faiss.get_num_gpus()
print("number of GPUs:", ngpus)
while(True):
print("HUITAA")
time.sleep(20)
# print(faiss.getNumDevices())
res = faiss.StandardGpuResources() # use a single GPU
# build a flat (CPU) index
# index_flat = faiss.IndexFlatL2(d)
# # make it into a gpu index
# gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
index_flat = faiss.IndexFlatL2(d)
# make it into a gpu index
gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
# gpu_index_flat.add(xb) # add vectors to the index
# print(gpu_index_flat.ntotal)
gpu_index_flat.add(xb) # add vectors to the index
print(gpu_index_flat.ntotal)
# k = 4 # we want to see 4 nearest neighbors
# D, I = gpu_index_flat.search(xq, k) # actual search
# print(I[:5]) # neighbors of the 5 first queries
# print(I[-5:]) # neighbors of the 5 last queries
k = 4 # we want to see 4 nearest neighbors
D, I = gpu_index_flat.search(xq, k) # actual search
print(I[:5]) # neighbors of the 5 first queries
print(I[-5:]) # neighbors of the 5 last queries
......@@ -2,6 +2,8 @@ import pickle as pkl
import numpy as np
import faiss
import math
import timeit
class Faiss_cluster:
......@@ -17,7 +19,7 @@ class Faiss_cluster:
dimension = 300
ncentroids = 1
verbose = False
kmeans = faiss.Kmeans(dimension, ncentroids, verbose=verbose)
kmeans = faiss.Kmeans(dimension, ncentroids, verbose=verbose, gpu=True)
kmeans.train(np.ascontiguousarray(vectors))
return kmeans.centroids[0]
......@@ -33,16 +35,43 @@ class Faiss_cluster:
vectors = vectors.astype('float32')
dimension = 300
quantiser = faiss.IndexFlatL2(dimension)
start = timeit.default_timer()
index = faiss.IndexIVFFlat(quantiser, dimension, faiss.METRIC_L2)
stop = timeit.default_timer()
print('Time1: ', stop - start)
start = timeit.default_timer()
res = faiss.StandardGpuResources() # use a single GPU
stop = timeit.default_timer()
print('Time2: ', stop - start)
start = timeit.default_timer()
# make it into a gpu index
gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
stop = timeit.default_timer()
print('Time3: ', stop - start)
start = timeit.default_timer()
gpu_index_flat.train(np.ascontiguousarray(vectors))
stop = timeit.default_timer()
print('Time4: ', stop - start)
start = timeit.default_timer()
gpu_index_flat.add(np.ascontiguousarray(vectors))
stop = timeit.default_timer()
print('Time5: ', stop - start)
k = 1
start = timeit.default_timer()
D, I = gpu_index_flat.search(np.ascontiguousarray(vector_post), k)
stop = timeit.default_timer()
print('Time6: ', stop - start)
proba_sqrt = float(D[0][0] * 10)
if proba_sqrt < 2.5:
return I[0][0]
......
import pickle as pkl
import numpy as np
import faiss
import math
class Faiss_cluster:
@classmethod
def compute_centroid(cls, cluster_vector, vector_post):
"""
create centroid vector with new post
"""
vectors = []
vectors.append(cluster_vector)
vectors.append(vector_post)
vectors = np.asarray(vectors, dtype=np.float32)
dimension = 300
ncentroids = 1
verbose = False
kmeans = faiss.Kmeans(dimension, ncentroids, verbose=verbose)
kmeans.train(np.ascontiguousarray(vectors))
return kmeans.centroids[0]
@classmethod
def faiss_search_similarity(cls, vectors, vector_post):
"""
INDEX by faiss to quick search similar vectors
"""
vectors = vectors.astype('float32')
dimension = 300
quantiser = faiss.IndexFlatL2(dimension)
index = faiss.IndexIVFFlat(quantiser, dimension, faiss.METRIC_L2)
index.train(np.ascontiguousarray(vectors))
index.add(np.ascontiguousarray(vectors))
k = 1
D, I = index.search(np.ascontiguousarray(vector_post), k)
proba_sqrt = float(D[0][0] * 10)
if proba_sqrt < 2.5:
return I[0][0]
else:
return False
df_prod.pkl
This source diff could not be displayed because it is too large. You can view the blob instead.
RUN apt-get update
RUN apt-get -y install python3.6
RUN apt-get -y install python3-pip
FROM plippe/faiss-docker:1.4.0-gpu
faiss_gpu==1.7.2
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment