Commit 944af01c authored by Yeldar Toktasynov's avatar Yeldar Toktasynov

Last working version with gpuVScpu comparison

parent 5539340f
...@@ -177,4 +177,4 @@ def service(body, message): ...@@ -177,4 +177,4 @@ def service(body, message):
yield output_document yield output_document
if __name__ == '__main__': if __name__ == '__main__':
app.run() app.run()
\ No newline at end of file
import time
while(True):
time.sleep(20)
print("Another 20 seconds of your life")
...@@ -25,7 +25,7 @@ producers: ...@@ -25,7 +25,7 @@ producers:
postgres: postgres:
username: postgres username: postgres
password: postgres password: postgres
host: 10.30.13.116 host: 10.30.10.151
port: 5432 port: 5432
db_name: clusters db_name: clusters
table_name: ru_and_kz_cluster table_name: ru_and_kz_cluster
from arase import Arase
...@@ -12,19 +12,23 @@ xq = np.random.random((nq, d)).astype('float32') ...@@ -12,19 +12,23 @@ xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000. xq[:, 0] += np.arange(nq) / 1000.
ngpus = faiss.get_num_gpus()
print("number of GPUs:", ngpus)
while(True): while(True):
print("HUITAA") print("HUITAA")
time.sleep(20) # print(faiss.getNumDevices())
res = faiss.StandardGpuResources() # use a single GPU res = faiss.StandardGpuResources() # use a single GPU
# build a flat (CPU) index # build a flat (CPU) index
# index_flat = faiss.IndexFlatL2(d) index_flat = faiss.IndexFlatL2(d)
# # make it into a gpu index # make it into a gpu index
# gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat) gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
# gpu_index_flat.add(xb) # add vectors to the index gpu_index_flat.add(xb) # add vectors to the index
# print(gpu_index_flat.ntotal) print(gpu_index_flat.ntotal)
# k = 4 # we want to see 4 nearest neighbors k = 4 # we want to see 4 nearest neighbors
# D, I = gpu_index_flat.search(xq, k) # actual search D, I = gpu_index_flat.search(xq, k) # actual search
# print(I[:5]) # neighbors of the 5 first queries print(I[:5]) # neighbors of the 5 first queries
# print(I[-5:]) # neighbors of the 5 last queries print(I[-5:]) # neighbors of the 5 last queries
...@@ -2,6 +2,8 @@ import pickle as pkl ...@@ -2,6 +2,8 @@ import pickle as pkl
import numpy as np import numpy as np
import faiss import faiss
import math import math
import timeit
class Faiss_cluster: class Faiss_cluster:
...@@ -17,7 +19,7 @@ class Faiss_cluster: ...@@ -17,7 +19,7 @@ class Faiss_cluster:
dimension = 300 dimension = 300
ncentroids = 1 ncentroids = 1
verbose = False verbose = False
kmeans = faiss.Kmeans(dimension, ncentroids, verbose=verbose) kmeans = faiss.Kmeans(dimension, ncentroids, verbose=verbose, gpu=True)
kmeans.train(np.ascontiguousarray(vectors)) kmeans.train(np.ascontiguousarray(vectors))
return kmeans.centroids[0] return kmeans.centroids[0]
...@@ -33,18 +35,45 @@ class Faiss_cluster: ...@@ -33,18 +35,45 @@ class Faiss_cluster:
vectors = vectors.astype('float32') vectors = vectors.astype('float32')
dimension = 300 dimension = 300
quantiser = faiss.IndexFlatL2(dimension) quantiser = faiss.IndexFlatL2(dimension)
start = timeit.default_timer()
index = faiss.IndexIVFFlat(quantiser, dimension, faiss.METRIC_L2) index = faiss.IndexIVFFlat(quantiser, dimension, faiss.METRIC_L2)
stop = timeit.default_timer()
print('Time1: ', stop - start)
start = timeit.default_timer()
res = faiss.StandardGpuResources() # use a single GPU res = faiss.StandardGpuResources() # use a single GPU
stop = timeit.default_timer()
print('Time2: ', stop - start)
start = timeit.default_timer()
# make it into a gpu index # make it into a gpu index
gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index) gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
stop = timeit.default_timer()
print('Time3: ', stop - start)
start = timeit.default_timer()
gpu_index_flat.train(np.ascontiguousarray(vectors)) gpu_index_flat.train(np.ascontiguousarray(vectors))
stop = timeit.default_timer()
print('Time4: ', stop - start)
start = timeit.default_timer()
gpu_index_flat.add(np.ascontiguousarray(vectors)) gpu_index_flat.add(np.ascontiguousarray(vectors))
stop = timeit.default_timer()
print('Time5: ', stop - start)
k = 1 k = 1
start = timeit.default_timer()
D, I = gpu_index_flat.search(np.ascontiguousarray(vector_post), k) D, I = gpu_index_flat.search(np.ascontiguousarray(vector_post), k)
stop = timeit.default_timer()
print('Time6: ', stop - start)
proba_sqrt = float(D[0][0] * 10) proba_sqrt = float(D[0][0] * 10)
if proba_sqrt < 2.5: if proba_sqrt < 2.5:
return I[0][0] return I[0][0]
else: else:
return False return False
\ No newline at end of file
import pickle as pkl
import numpy as np
import faiss
import math
class Faiss_cluster:
@classmethod
def compute_centroid(cls, cluster_vector, vector_post):
"""
create centroid vector with new post
"""
vectors = []
vectors.append(cluster_vector)
vectors.append(vector_post)
vectors = np.asarray(vectors, dtype=np.float32)
dimension = 300
ncentroids = 1
verbose = False
kmeans = faiss.Kmeans(dimension, ncentroids, verbose=verbose)
kmeans.train(np.ascontiguousarray(vectors))
return kmeans.centroids[0]
@classmethod
def faiss_search_similarity(cls, vectors, vector_post):
"""
INDEX by faiss to quick search similar vectors
"""
vectors = vectors.astype('float32')
dimension = 300
quantiser = faiss.IndexFlatL2(dimension)
index = faiss.IndexIVFFlat(quantiser, dimension, faiss.METRIC_L2)
index.train(np.ascontiguousarray(vectors))
index.add(np.ascontiguousarray(vectors))
k = 1
D, I = index.search(np.ascontiguousarray(vector_post), k)
proba_sqrt = float(D[0][0] * 10)
if proba_sqrt < 2.5:
return I[0][0]
else:
return False
df_prod.pkl
This source diff could not be displayed because it is too large. You can view the blob instead.
RUN apt-get update
RUN apt-get -y install python3.6
RUN apt-get -y install python3-pip
FROM plippe/faiss-docker:1.4.0-gpu
faiss_gpu==1.7.2
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment