Running on GPUs
Lucas Hosseini edited this page Feb 21, 2018
·
8 revisions
Faiss can leverage your nvidia GPUs almost seamlessly.
First, declare a GPU resource:
res = faiss.StandardGpuResources() # use a single GPU
Then build a (CPU) index the usual way:
index_flat = faiss.IndexFlatL2(d) # build a flat (CPU) index
Finally, build a GPU index from the GPU resource and the CPU index:
# make it a GPU index
gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
The obtained GPU index can be used the same way as a CPU index:
gpu_index_flat.add(xb) # add vectors to the index
print(gpu_index_flat.ntotal)
k = 4 # we want to see 4 nearest neighbors
D, I = gpu_index_flat.search(xq, k) # actual search
print(I[:5]) # neighbors of the 5 first queries
print(I[-5:]) # neighbors of the 5 last queries
First, declare a GPU resource:
faiss::gpu::StandardGpuResources res; // use a single GPU
Then, build a GPU index from the GPU resource:
faiss::gpu::GpuIndexFlatL2 index_flat(&res, d); // build a flat GPU index
The obtained GPU index can be used the same way as a CPU index:
printf("is_trained = %s\n", index_flat.is_trained ? "true" : "false");
index_flat.add(nb, xb); // add vectors to the index
printf("ntotal = %ld\n", index_flat.ntotal);
int k = 4;
{ // search xq
long *I = new long[k * nq];
float *D = new float[k * nq];
index_flat.search(nq, xq, k, D, I);
// print results
printf("I (5 first results)=\n");
for(int i = 0; i < 5; i++) {
for(int j = 0; j < k; j++)
printf("%5ld ", I[i * k + j]);
printf("\n");
}
printf("I (5 last results)=\n");
for(int i = nq - 5; i < nq; i++) {
for(int j = 0; j < k; j++)
printf("%5ld ", I[i * k + j]);
printf("\n");
}
delete [] I;
delete [] D;
}
ngpus = faiss.get_num_gpus()
print("number of GPUs:", ngpus)
cpu_index = faiss.IndexFlatL2(d)
gpu_index = faiss.index_cpu_to_all_gpus( # build the index
cpu_index
)
gpu_index.add(xb) # add vectors to the index
print(gpu_index.ntotal)
k = 4 # we want to see 4 nearest neighbors
D, I = gpu_index.search(xq, k) # actual search
print(I[:5]) # neighbors of the 5 first queries
print(I[-5:]) # neighbors of the 5 last queries
int ngpus = faiss::gpu::getNumDevices();
printf("Number of GPUs: %d\n", ngpus);
std::vector<faiss::gpu::GpuResources*> res;
std::vector<int> devs;
for(int i = 0; i < ngpus; i++) {
res.push_back(new faiss::gpu::StandardGpuResources);
devs.push_back(i);
}
faiss::IndexFlatL2 cpu_index(d);
faiss::Index *gpu_index =
faiss::gpu::index_cpu_to_gpu_multiple(
res,
devs,
&cpu_index
);
printf("is_trained = %s\n", gpu_index->is_trained ? "true" : "false");
gpu_index->add(nb, xb); // vectors to the index
printf("ntotal = %ld\n", gpu_index->ntotal);
int k = 4;
{ // search xq
long *I = new long[k * nq];
float *D = new float[k * nq];
gpu_index->search(nq, xq, k, D, I);
// print results
printf("I (5 first results)=\n");
for(int i = 0; i < 5; i++) {
for(int j = 0; j < k; j++)
printf("%5ld ", I[i * k + j]);
printf("\n");
}
printf("I (5 last results)=\n");
for(int i = nq - 5; i < nq; i++) {
for(int j = 0; j < k; j++)
printf("%5ld ", I[i * k + j]);
printf("\n");
}
delete [] I;
delete [] D;
}
delete gpu_index;
for(int i = 0; i < ngpus; i++) {
delete res[i];
}
Faiss building blocks: clustering, PCA, quantization
Index IO, cloning and hyper parameter tuning
Threads and asynchronous calls
Inverted list objects and scanners
Indexes that do not fit in RAM
Brute force search without an index
Fast accumulation of PQ and AQ codes (FastScan)
Setting search parameters for one query
Binary hashing index benchmark