Commit 395d2ce6 authored by huchen's avatar huchen
Browse files

init the faiss for rocm

parent 5ded39f5
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexFlat.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/Heap.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/extra_distances.h>
#include <faiss/utils/utils.h>
#include <cstring>
namespace faiss {
IndexFlat::IndexFlat(idx_t d, MetricType metric)
: IndexFlatCodes(sizeof(float) * d, d, metric) {}
void IndexFlat::search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const {
FAISS_THROW_IF_NOT(k > 0);
// we see the distances and labels as heaps
if (metric_type == METRIC_INNER_PRODUCT) {
float_minheap_array_t res = {size_t(n), size_t(k), labels, distances};
knn_inner_product(x, get_xb(), d, n, ntotal, &res);
} else if (metric_type == METRIC_L2) {
float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances};
knn_L2sqr(x, get_xb(), d, n, ntotal, &res);
} else {
float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances};
knn_extra_metrics(
x, get_xb(), d, n, ntotal, metric_type, metric_arg, &res);
}
}
void IndexFlat::range_search(
idx_t n,
const float* x,
float radius,
RangeSearchResult* result) const {
switch (metric_type) {
case METRIC_INNER_PRODUCT:
range_search_inner_product(
x, get_xb(), d, n, ntotal, radius, result);
break;
case METRIC_L2:
range_search_L2sqr(x, get_xb(), d, n, ntotal, radius, result);
break;
default:
FAISS_THROW_MSG("metric type not supported");
}
}
void IndexFlat::compute_distance_subset(
idx_t n,
const float* x,
idx_t k,
float* distances,
const idx_t* labels) const {
switch (metric_type) {
case METRIC_INNER_PRODUCT:
fvec_inner_products_by_idx(distances, x, get_xb(), labels, d, n, k);
break;
case METRIC_L2:
fvec_L2sqr_by_idx(distances, x, get_xb(), labels, d, n, k);
break;
default:
FAISS_THROW_MSG("metric type not supported");
}
}
namespace {
struct FlatL2Dis : DistanceComputer {
size_t d;
Index::idx_t nb;
const float* q;
const float* b;
size_t ndis;
float operator()(idx_t i) override {
ndis++;
return fvec_L2sqr(q, b + i * d, d);
}
float symmetric_dis(idx_t i, idx_t j) override {
return fvec_L2sqr(b + j * d, b + i * d, d);
}
explicit FlatL2Dis(const IndexFlat& storage, const float* q = nullptr)
: d(storage.d),
nb(storage.ntotal),
q(q),
b(storage.get_xb()),
ndis(0) {}
void set_query(const float* x) override {
q = x;
}
};
struct FlatIPDis : DistanceComputer {
size_t d;
Index::idx_t nb;
const float* q;
const float* b;
size_t ndis;
float operator()(idx_t i) override {
ndis++;
return fvec_inner_product(q, b + i * d, d);
}
float symmetric_dis(idx_t i, idx_t j) override {
return fvec_inner_product(b + j * d, b + i * d, d);
}
explicit FlatIPDis(const IndexFlat& storage, const float* q = nullptr)
: d(storage.d),
nb(storage.ntotal),
q(q),
b(storage.get_xb()),
ndis(0) {}
void set_query(const float* x) override {
q = x;
}
};
} // namespace
DistanceComputer* IndexFlat::get_distance_computer() const {
if (metric_type == METRIC_L2) {
return new FlatL2Dis(*this);
} else if (metric_type == METRIC_INNER_PRODUCT) {
return new FlatIPDis(*this);
} else {
return get_extra_distance_computer(
d, metric_type, metric_arg, ntotal, get_xb());
}
}
void IndexFlat::reconstruct(idx_t key, float* recons) const {
memcpy(recons, &(codes[key * code_size]), code_size);
}
void IndexFlat::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
if (n > 0) {
memcpy(bytes, x, sizeof(float) * d * n);
}
}
void IndexFlat::sa_decode(idx_t n, const uint8_t* bytes, float* x) const {
if (n > 0) {
memcpy(x, bytes, sizeof(float) * d * n);
}
}
/***************************************************
* IndexFlat1D
***************************************************/
IndexFlat1D::IndexFlat1D(bool continuous_update)
: IndexFlatL2(1), continuous_update(continuous_update) {}
/// if not continuous_update, call this between the last add and
/// the first search
void IndexFlat1D::update_permutation() {
perm.resize(ntotal);
if (ntotal < 1000000) {
fvec_argsort(ntotal, get_xb(), (size_t*)perm.data());
} else {
fvec_argsort_parallel(ntotal, get_xb(), (size_t*)perm.data());
}
}
void IndexFlat1D::add(idx_t n, const float* x) {
IndexFlatL2::add(n, x);
if (continuous_update)
update_permutation();
}
void IndexFlat1D::reset() {
IndexFlatL2::reset();
perm.clear();
}
void IndexFlat1D::search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const {
FAISS_THROW_IF_NOT(k > 0);
FAISS_THROW_IF_NOT_MSG(
perm.size() == ntotal, "Call update_permutation before search");
const float* xb = get_xb();
#pragma omp parallel for
for (idx_t i = 0; i < n; i++) {
float q = x[i]; // query
float* D = distances + i * k;
idx_t* I = labels + i * k;
// binary search
idx_t i0 = 0, i1 = ntotal;
idx_t wp = 0;
if (xb[perm[i0]] > q) {
i1 = 0;
goto finish_right;
}
if (xb[perm[i1 - 1]] <= q) {
i0 = i1 - 1;
goto finish_left;
}
while (i0 + 1 < i1) {
idx_t imed = (i0 + i1) / 2;
if (xb[perm[imed]] <= q)
i0 = imed;
else
i1 = imed;
}
// query is between xb[perm[i0]] and xb[perm[i1]]
// expand to nearest neighs
while (wp < k) {
float xleft = xb[perm[i0]];
float xright = xb[perm[i1]];
if (q - xleft < xright - q) {
D[wp] = q - xleft;
I[wp] = perm[i0];
i0--;
wp++;
if (i0 < 0) {
goto finish_right;
}
} else {
D[wp] = xright - q;
I[wp] = perm[i1];
i1++;
wp++;
if (i1 >= ntotal) {
goto finish_left;
}
}
}
goto done;
finish_right:
// grow to the right from i1
while (wp < k) {
if (i1 < ntotal) {
D[wp] = xb[perm[i1]] - q;
I[wp] = perm[i1];
i1++;
} else {
D[wp] = std::numeric_limits<float>::infinity();
I[wp] = -1;
}
wp++;
}
goto done;
finish_left:
// grow to the left from i0
while (wp < k) {
if (i0 >= 0) {
D[wp] = q - xb[perm[i0]];
I[wp] = perm[i0];
i0--;
} else {
D[wp] = std::numeric_limits<float>::infinity();
I[wp] = -1;
}
wp++;
}
done:;
}
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef INDEX_FLAT_H
#define INDEX_FLAT_H
#include <vector>
#include <faiss/IndexFlatCodes.h>
namespace faiss {
/** Index that stores the full vectors and performs exhaustive search */
struct IndexFlat : IndexFlatCodes {
explicit IndexFlat(idx_t d, MetricType metric = METRIC_L2);
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
void range_search(
idx_t n,
const float* x,
float radius,
RangeSearchResult* result) const override;
void reconstruct(idx_t key, float* recons) const override;
/** compute distance with a subset of vectors
*
* @param x query vectors, size n * d
* @param labels indices of the vectors that should be compared
* for each query vector, size n * k
* @param distances
* corresponding output distances, size n * k
*/
void compute_distance_subset(
idx_t n,
const float* x,
idx_t k,
float* distances,
const idx_t* labels) const;
// get pointer to the floating point data
float* get_xb() {
return (float*)codes.data();
}
const float* get_xb() const {
return (const float*)codes.data();
}
IndexFlat() {}
DistanceComputer* get_distance_computer() const override;
/* The stanadlone codec interface (just memcopies in this case) */
void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
};
struct IndexFlatIP : IndexFlat {
explicit IndexFlatIP(idx_t d) : IndexFlat(d, METRIC_INNER_PRODUCT) {}
IndexFlatIP() {}
};
struct IndexFlatL2 : IndexFlat {
explicit IndexFlatL2(idx_t d) : IndexFlat(d, METRIC_L2) {}
IndexFlatL2() {}
};
/// optimized version for 1D "vectors".
struct IndexFlat1D : IndexFlatL2 {
bool continuous_update; ///< is the permutation updated continuously?
std::vector<idx_t> perm; ///< sorted database indices
explicit IndexFlat1D(bool continuous_update = true);
/// if not continuous_update, call this between the last add and
/// the first search
void update_permutation();
void add(idx_t n, const float* x) override;
void reset() override;
/// Warn: the distances returned are L1 not L2
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
};
} // namespace faiss
#endif
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/IndexFlatCodes.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
namespace faiss {
IndexFlatCodes::IndexFlatCodes(size_t code_size, idx_t d, MetricType metric)
: Index(d, metric), code_size(code_size) {}
IndexFlatCodes::IndexFlatCodes() : code_size(0) {}
void IndexFlatCodes::add(idx_t n, const float* x) {
FAISS_THROW_IF_NOT(is_trained);
codes.resize((ntotal + n) * code_size);
sa_encode(n, x, &codes[ntotal * code_size]);
ntotal += n;
}
void IndexFlatCodes::reset() {
codes.clear();
ntotal = 0;
}
size_t IndexFlatCodes::sa_code_size() const {
return code_size;
}
size_t IndexFlatCodes::remove_ids(const IDSelector& sel) {
idx_t j = 0;
for (idx_t i = 0; i < ntotal; i++) {
if (sel.is_member(i)) {
// should be removed
} else {
if (i > j) {
memmove(&codes[code_size * j],
&codes[code_size * i],
code_size);
}
j++;
}
}
size_t nremove = ntotal - j;
if (nremove > 0) {
ntotal = j;
codes.resize(ntotal * code_size);
}
return nremove;
}
void IndexFlatCodes::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
FAISS_THROW_IF_NOT(ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
sa_decode(ni, codes.data() + i0 * code_size, recons);
}
void IndexFlatCodes::reconstruct(idx_t key, float* recons) const {
reconstruct_n(key, 1, recons);
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <faiss/Index.h>
#include <vector>
namespace faiss {
/** Index that encodes all vectors as fixed-size codes (size code_size). Storage
* is in the codes vector */
struct IndexFlatCodes : Index {
size_t code_size;
/// encoded dataset, size ntotal * code_size
std::vector<uint8_t> codes;
IndexFlatCodes();
IndexFlatCodes(size_t code_size, idx_t d, MetricType metric = METRIC_L2);
/// default add uses sa_encode
void add(idx_t n, const float* x) override;
void reset() override;
/// reconstruction using the codec interface
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
void reconstruct(idx_t key, float* recons) const override;
size_t sa_code_size() const override;
/** remove some ids. NB that Because of the structure of the
* indexing structure, the semantics of this operation are
* different from the usual ones: the new ids are shifted */
size_t remove_ids(const IDSelector& sel) override;
};
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexHNSW.h>
#include <omp.h>
#include <cassert>
#include <cinttypes>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <queue>
#include <unordered_set>
#include <stdint.h>
#include <sys/stat.h>
#include <sys/types.h>
#ifdef __SSE__
#endif
#include <faiss/Index2Layer.h>
#include <faiss/IndexFlat.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/Heap.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/random.h>
extern "C" {
/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
int sgemm_(
const char* transa,
const char* transb,
FINTEGER* m,
FINTEGER* n,
FINTEGER* k,
const float* alpha,
const float* a,
FINTEGER* lda,
const float* b,
FINTEGER* ldb,
float* beta,
float* c,
FINTEGER* ldc);
}
namespace faiss {
using idx_t = Index::idx_t;
using MinimaxHeap = HNSW::MinimaxHeap;
using storage_idx_t = HNSW::storage_idx_t;
using NodeDistFarther = HNSW::NodeDistFarther;
HNSWStats hnsw_stats;
/**************************************************************
* add / search blocks of descriptors
**************************************************************/
namespace {
/* Wrap the distance computer into one that negates the
distances. This makes supporting INNER_PRODUCE search easier */
struct NegativeDistanceComputer : DistanceComputer {
/// owned by this
DistanceComputer* basedis;
explicit NegativeDistanceComputer(DistanceComputer* basedis)
: basedis(basedis) {}
void set_query(const float* x) override {
basedis->set_query(x);
}
/// compute distance of vector i to current query
float operator()(idx_t i) override {
return -(*basedis)(i);
}
/// compute distance between two stored vectors
float symmetric_dis(idx_t i, idx_t j) override {
return -basedis->symmetric_dis(i, j);
}
virtual ~NegativeDistanceComputer() {
delete basedis;
}
};
DistanceComputer* storage_distance_computer(const Index* storage) {
if (storage->metric_type == METRIC_INNER_PRODUCT) {
return new NegativeDistanceComputer(storage->get_distance_computer());
} else {
return storage->get_distance_computer();
}
}
void hnsw_add_vertices(
IndexHNSW& index_hnsw,
size_t n0,
size_t n,
const float* x,
bool verbose,
bool preset_levels = false) {
size_t d = index_hnsw.d;
HNSW& hnsw = index_hnsw.hnsw;
size_t ntotal = n0 + n;
double t0 = getmillisecs();
if (verbose) {
printf("hnsw_add_vertices: adding %zd elements on top of %zd "
"(preset_levels=%d)\n",
n,
n0,
int(preset_levels));
}
if (n == 0) {
return;
}
int max_level = hnsw.prepare_level_tab(n, preset_levels);
if (verbose) {
printf(" max_level = %d\n", max_level);
}
std::vector<omp_lock_t> locks(ntotal);
for (int i = 0; i < ntotal; i++)
omp_init_lock(&locks[i]);
// add vectors from highest to lowest level
std::vector<int> hist;
std::vector<int> order(n);
{ // make buckets with vectors of the same level
// build histogram
for (int i = 0; i < n; i++) {
storage_idx_t pt_id = i + n0;
int pt_level = hnsw.levels[pt_id] - 1;
while (pt_level >= hist.size())
hist.push_back(0);
hist[pt_level]++;
}
// accumulate
std::vector<int> offsets(hist.size() + 1, 0);
for (int i = 0; i < hist.size() - 1; i++) {
offsets[i + 1] = offsets[i] + hist[i];
}
// bucket sort
for (int i = 0; i < n; i++) {
storage_idx_t pt_id = i + n0;
int pt_level = hnsw.levels[pt_id] - 1;
order[offsets[pt_level]++] = pt_id;
}
}
idx_t check_period = InterruptCallback::get_period_hint(
max_level * index_hnsw.d * hnsw.efConstruction);
{ // perform add
RandomGenerator rng2(789);
int i1 = n;
for (int pt_level = hist.size() - 1; pt_level >= 0; pt_level--) {
int i0 = i1 - hist[pt_level];
if (verbose) {
printf("Adding %d elements at level %d\n", i1 - i0, pt_level);
}
// random permutation to get rid of dataset order bias
for (int j = i0; j < i1; j++)
std::swap(order[j], order[j + rng2.rand_int(i1 - j)]);
bool interrupt = false;
#pragma omp parallel if (i1 > i0 + 100)
{
VisitedTable vt(ntotal);
DistanceComputer* dis =
storage_distance_computer(index_hnsw.storage);
ScopeDeleter1<DistanceComputer> del(dis);
int prev_display =
verbose && omp_get_thread_num() == 0 ? 0 : -1;
size_t counter = 0;
#pragma omp for schedule(dynamic)
for (int i = i0; i < i1; i++) {
storage_idx_t pt_id = order[i];
dis->set_query(x + (pt_id - n0) * d);
// cannot break
if (interrupt) {
continue;
}
hnsw.add_with_locks(*dis, pt_level, pt_id, locks, vt);
if (prev_display >= 0 && i - i0 > prev_display + 10000) {
prev_display = i - i0;
printf(" %d / %d\r", i - i0, i1 - i0);
fflush(stdout);
}
if (counter % check_period == 0) {
if (InterruptCallback::is_interrupted()) {
interrupt = true;
}
}
counter++;
}
}
if (interrupt) {
FAISS_THROW_MSG("computation interrupted");
}
i1 = i0;
}
FAISS_ASSERT(i1 == 0);
}
if (verbose) {
printf("Done in %.3f ms\n", getmillisecs() - t0);
}
for (int i = 0; i < ntotal; i++) {
omp_destroy_lock(&locks[i]);
}
}
} // namespace
/**************************************************************
* IndexHNSW implementation
**************************************************************/
IndexHNSW::IndexHNSW(int d, int M, MetricType metric)
: Index(d, metric),
hnsw(M),
own_fields(false),
storage(nullptr),
reconstruct_from_neighbors(nullptr) {}
IndexHNSW::IndexHNSW(Index* storage, int M)
: Index(storage->d, storage->metric_type),
hnsw(M),
own_fields(false),
storage(storage),
reconstruct_from_neighbors(nullptr) {}
IndexHNSW::~IndexHNSW() {
if (own_fields) {
delete storage;
}
}
void IndexHNSW::train(idx_t n, const float* x) {
FAISS_THROW_IF_NOT_MSG(
storage,
"Please use IndexHNSWFlat (or variants) instead of IndexHNSW directly");
// hnsw structure does not require training
storage->train(n, x);
is_trained = true;
}
void IndexHNSW::search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const
{
FAISS_THROW_IF_NOT(k > 0);
FAISS_THROW_IF_NOT_MSG(
storage,
"Please use IndexHNSWFlat (or variants) instead of IndexHNSW directly");
size_t n1 = 0, n2 = 0, n3 = 0, ndis = 0, nreorder = 0;
idx_t check_period = InterruptCallback::get_period_hint(
hnsw.max_level * d * hnsw.efSearch);
for (idx_t i0 = 0; i0 < n; i0 += check_period) {
idx_t i1 = std::min(i0 + check_period, n);
#pragma omp parallel
{
VisitedTable vt(ntotal);
DistanceComputer* dis = storage_distance_computer(storage);
ScopeDeleter1<DistanceComputer> del(dis);
#pragma omp for reduction(+ : n1, n2, n3, ndis, nreorder)
for (idx_t i = i0; i < i1; i++) {
idx_t* idxi = labels + i * k;
float* simi = distances + i * k;
dis->set_query(x + i * d);
maxheap_heapify(k, simi, idxi);
HNSWStats stats = hnsw.search(*dis, k, idxi, simi, vt);
n1 += stats.n1;
n2 += stats.n2;
n3 += stats.n3;
ndis += stats.ndis;
nreorder += stats.nreorder;
maxheap_reorder(k, simi, idxi);
if (reconstruct_from_neighbors &&
reconstruct_from_neighbors->k_reorder != 0) {
int k_reorder = reconstruct_from_neighbors->k_reorder;
if (k_reorder == -1 || k_reorder > k)
k_reorder = k;
nreorder += reconstruct_from_neighbors->compute_distances(
k_reorder, idxi, x + i * d, simi);
// sort top k_reorder
maxheap_heapify(
k_reorder, simi, idxi, simi, idxi, k_reorder);
maxheap_reorder(k_reorder, simi, idxi);
}
}
}
InterruptCallback::check();
}
if (metric_type == METRIC_INNER_PRODUCT) {
// we need to revert the negated distances
for (size_t i = 0; i < k * n; i++) {
distances[i] = -distances[i];
}
}
hnsw_stats.combine({n1, n2, n3, ndis, nreorder});
}
void IndexHNSW::add(idx_t n, const float* x) {
FAISS_THROW_IF_NOT_MSG(
storage,
"Please use IndexHNSWFlat (or variants) instead of IndexHNSW directly");
FAISS_THROW_IF_NOT(is_trained);
int n0 = ntotal;
storage->add(n, x);
ntotal = storage->ntotal;
hnsw_add_vertices(*this, n0, n, x, verbose, hnsw.levels.size() == ntotal);
}
void IndexHNSW::reset() {
hnsw.reset();
storage->reset();
ntotal = 0;
}
void IndexHNSW::reconstruct(idx_t key, float* recons) const {
storage->reconstruct(key, recons);
}
void IndexHNSW::shrink_level_0_neighbors(int new_size) {
#pragma omp parallel
{
DistanceComputer* dis = storage_distance_computer(storage);
ScopeDeleter1<DistanceComputer> del(dis);
#pragma omp for
for (idx_t i = 0; i < ntotal; i++) {
size_t begin, end;
hnsw.neighbor_range(i, 0, &begin, &end);
std::priority_queue<NodeDistFarther> initial_list;
for (size_t j = begin; j < end; j++) {
int v1 = hnsw.neighbors[j];
if (v1 < 0)
break;
initial_list.emplace(dis->symmetric_dis(i, v1), v1);
// initial_list.emplace(qdis(v1), v1);
}
std::vector<NodeDistFarther> shrunk_list;
HNSW::shrink_neighbor_list(
*dis, initial_list, shrunk_list, new_size);
for (size_t j = begin; j < end; j++) {
if (j - begin < shrunk_list.size())
hnsw.neighbors[j] = shrunk_list[j - begin].id;
else
hnsw.neighbors[j] = -1;
}
}
}
}
void IndexHNSW::search_level_0(
idx_t n,
const float* x,
idx_t k,
const storage_idx_t* nearest,
const float* nearest_d,
float* distances,
idx_t* labels,
int nprobe,
int search_type) const {
FAISS_THROW_IF_NOT(k > 0);
FAISS_THROW_IF_NOT(nprobe > 0);
storage_idx_t ntotal = hnsw.levels.size();
size_t n1 = 0, n2 = 0, n3 = 0, ndis = 0, nreorder = 0;
#pragma omp parallel
{
DistanceComputer* qdis = storage_distance_computer(storage);
ScopeDeleter1<DistanceComputer> del(qdis);
VisitedTable vt(ntotal);
#pragma omp for reduction(+ : n1, n2, n3, ndis, nreorder)
for (idx_t i = 0; i < n; i++) {
idx_t* idxi = labels + i * k;
float* simi = distances + i * k;
qdis->set_query(x + i * d);
maxheap_heapify(k, simi, idxi);
if (search_type == 1) {
int nres = 0;
for (int j = 0; j < nprobe; j++) {
storage_idx_t cj = nearest[i * nprobe + j];
if (cj < 0)
break;
if (vt.get(cj))
continue;
int candidates_size = std::max(hnsw.efSearch, int(k));
MinimaxHeap candidates(candidates_size);
candidates.push(cj, nearest_d[i * nprobe + j]);
HNSWStats search_stats;
nres = hnsw.search_from_candidates(
*qdis,
k,
idxi,
simi,
candidates,
vt,
search_stats,
0,
nres);
n1 += search_stats.n1;
n2 += search_stats.n2;
n3 += search_stats.n3;
ndis += search_stats.ndis;
nreorder += search_stats.nreorder;
}
} else if (search_type == 2) {
int candidates_size = std::max(hnsw.efSearch, int(k));
candidates_size = std::max(candidates_size, nprobe);
MinimaxHeap candidates(candidates_size);
for (int j = 0; j < nprobe; j++) {
storage_idx_t cj = nearest[i * nprobe + j];
if (cj < 0)
break;
candidates.push(cj, nearest_d[i * nprobe + j]);
}
HNSWStats search_stats;
hnsw.search_from_candidates(
*qdis, k, idxi, simi, candidates, vt, search_stats, 0);
n1 += search_stats.n1;
n2 += search_stats.n2;
n3 += search_stats.n3;
ndis += search_stats.ndis;
nreorder += search_stats.nreorder;
}
vt.advance();
maxheap_reorder(k, simi, idxi);
}
}
hnsw_stats.combine({n1, n2, n3, ndis, nreorder});
}
void IndexHNSW::init_level_0_from_knngraph(
int k,
const float* D,
const idx_t* I) {
int dest_size = hnsw.nb_neighbors(0);
#pragma omp parallel for
for (idx_t i = 0; i < ntotal; i++) {
DistanceComputer* qdis = storage_distance_computer(storage);
std::vector<float> vec(d);
storage->reconstruct(i, vec.data());
qdis->set_query(vec.data());
std::priority_queue<NodeDistFarther> initial_list;
for (size_t j = 0; j < k; j++) {
int v1 = I[i * k + j];
if (v1 == i)
continue;
if (v1 < 0)
break;
initial_list.emplace(D[i * k + j], v1);
}
std::vector<NodeDistFarther> shrunk_list;
HNSW::shrink_neighbor_list(*qdis, initial_list, shrunk_list, dest_size);
size_t begin, end;
hnsw.neighbor_range(i, 0, &begin, &end);
for (size_t j = begin; j < end; j++) {
if (j - begin < shrunk_list.size())
hnsw.neighbors[j] = shrunk_list[j - begin].id;
else
hnsw.neighbors[j] = -1;
}
}
}
void IndexHNSW::init_level_0_from_entry_points(
int n,
const storage_idx_t* points,
const storage_idx_t* nearests) {
std::vector<omp_lock_t> locks(ntotal);
for (int i = 0; i < ntotal; i++)
omp_init_lock(&locks[i]);
#pragma omp parallel
{
VisitedTable vt(ntotal);
DistanceComputer* dis = storage_distance_computer(storage);
ScopeDeleter1<DistanceComputer> del(dis);
std::vector<float> vec(storage->d);
#pragma omp for schedule(dynamic)
for (int i = 0; i < n; i++) {
storage_idx_t pt_id = points[i];
storage_idx_t nearest = nearests[i];
storage->reconstruct(pt_id, vec.data());
dis->set_query(vec.data());
hnsw.add_links_starting_from(
*dis, pt_id, nearest, (*dis)(nearest), 0, locks.data(), vt);
if (verbose && i % 10000 == 0) {
printf(" %d / %d\r", i, n);
fflush(stdout);
}
}
}
if (verbose) {
printf("\n");
}
for (int i = 0; i < ntotal; i++)
omp_destroy_lock(&locks[i]);
}
void IndexHNSW::reorder_links() {
int M = hnsw.nb_neighbors(0);
#pragma omp parallel
{
std::vector<float> distances(M);
std::vector<size_t> order(M);
std::vector<storage_idx_t> tmp(M);
DistanceComputer* dis = storage_distance_computer(storage);
ScopeDeleter1<DistanceComputer> del(dis);
#pragma omp for
for (storage_idx_t i = 0; i < ntotal; i++) {
size_t begin, end;
hnsw.neighbor_range(i, 0, &begin, &end);
for (size_t j = begin; j < end; j++) {
storage_idx_t nj = hnsw.neighbors[j];
if (nj < 0) {
end = j;
break;
}
distances[j - begin] = dis->symmetric_dis(i, nj);
tmp[j - begin] = nj;
}
fvec_argsort(end - begin, distances.data(), order.data());
for (size_t j = begin; j < end; j++) {
hnsw.neighbors[j] = tmp[order[j - begin]];
}
}
}
}
void IndexHNSW::link_singletons() {
printf("search for singletons\n");
std::vector<bool> seen(ntotal);
for (size_t i = 0; i < ntotal; i++) {
size_t begin, end;
hnsw.neighbor_range(i, 0, &begin, &end);
for (size_t j = begin; j < end; j++) {
storage_idx_t ni = hnsw.neighbors[j];
if (ni >= 0)
seen[ni] = true;
}
}
int n_sing = 0, n_sing_l1 = 0;
std::vector<storage_idx_t> singletons;
for (storage_idx_t i = 0; i < ntotal; i++) {
if (!seen[i]) {
singletons.push_back(i);
n_sing++;
if (hnsw.levels[i] > 1)
n_sing_l1++;
}
}
printf(" Found %d / %" PRId64 " singletons (%d appear in a level above)\n",
n_sing,
ntotal,
n_sing_l1);
std::vector<float> recons(singletons.size() * d);
for (int i = 0; i < singletons.size(); i++) {
FAISS_ASSERT(!"not implemented");
}
}
/**************************************************************
* ReconstructFromNeighbors implementation
**************************************************************/
ReconstructFromNeighbors::ReconstructFromNeighbors(
const IndexHNSW& index,
size_t k,
size_t nsq)
: index(index), k(k), nsq(nsq) {
M = index.hnsw.nb_neighbors(0);
FAISS_ASSERT(k <= 256);
code_size = k == 1 ? 0 : nsq;
ntotal = 0;
d = index.d;
FAISS_ASSERT(d % nsq == 0);
dsub = d / nsq;
k_reorder = -1;
}
void ReconstructFromNeighbors::reconstruct(
storage_idx_t i,
float* x,
float* tmp) const {
const HNSW& hnsw = index.hnsw;
size_t begin, end;
hnsw.neighbor_range(i, 0, &begin, &end);
if (k == 1 || nsq == 1) {
const float* beta;
if (k == 1) {
beta = codebook.data();
} else {
int idx = codes[i];
beta = codebook.data() + idx * (M + 1);
}
float w0 = beta[0]; // weight of image itself
index.storage->reconstruct(i, tmp);
for (int l = 0; l < d; l++)
x[l] = w0 * tmp[l];
for (size_t j = begin; j < end; j++) {
storage_idx_t ji = hnsw.neighbors[j];
if (ji < 0)
ji = i;
float w = beta[j - begin + 1];
index.storage->reconstruct(ji, tmp);
for (int l = 0; l < d; l++)
x[l] += w * tmp[l];
}
} else if (nsq == 2) {
int idx0 = codes[2 * i];
int idx1 = codes[2 * i + 1];
const float* beta0 = codebook.data() + idx0 * (M + 1);
const float* beta1 = codebook.data() + (idx1 + k) * (M + 1);
index.storage->reconstruct(i, tmp);
float w0;
w0 = beta0[0];
for (int l = 0; l < dsub; l++)
x[l] = w0 * tmp[l];
w0 = beta1[0];
for (int l = dsub; l < d; l++)
x[l] = w0 * tmp[l];
for (size_t j = begin; j < end; j++) {
storage_idx_t ji = hnsw.neighbors[j];
if (ji < 0)
ji = i;
index.storage->reconstruct(ji, tmp);
float w;
w = beta0[j - begin + 1];
for (int l = 0; l < dsub; l++)
x[l] += w * tmp[l];
w = beta1[j - begin + 1];
for (int l = dsub; l < d; l++)
x[l] += w * tmp[l];
}
} else {
std::vector<const float*> betas(nsq);
{
const float* b = codebook.data();
const uint8_t* c = &codes[i * code_size];
for (int sq = 0; sq < nsq; sq++) {
betas[sq] = b + (*c++) * (M + 1);
b += (M + 1) * k;
}
}
index.storage->reconstruct(i, tmp);
{
int d0 = 0;
for (int sq = 0; sq < nsq; sq++) {
float w = *(betas[sq]++);
int d1 = d0 + dsub;
for (int l = d0; l < d1; l++) {
x[l] = w * tmp[l];
}
d0 = d1;
}
}
for (size_t j = begin; j < end; j++) {
storage_idx_t ji = hnsw.neighbors[j];
if (ji < 0)
ji = i;
index.storage->reconstruct(ji, tmp);
int d0 = 0;
for (int sq = 0; sq < nsq; sq++) {
float w = *(betas[sq]++);
int d1 = d0 + dsub;
for (int l = d0; l < d1; l++) {
x[l] += w * tmp[l];
}
d0 = d1;
}
}
}
}
void ReconstructFromNeighbors::reconstruct_n(
storage_idx_t n0,
storage_idx_t ni,
float* x) const {
#pragma omp parallel
{
std::vector<float> tmp(index.d);
#pragma omp for
for (storage_idx_t i = 0; i < ni; i++) {
reconstruct(n0 + i, x + i * index.d, tmp.data());
}
}
}
size_t ReconstructFromNeighbors::compute_distances(
size_t n,
const idx_t* shortlist,
const float* query,
float* distances) const {
std::vector<float> tmp(2 * index.d);
size_t ncomp = 0;
for (int i = 0; i < n; i++) {
if (shortlist[i] < 0)
break;
reconstruct(shortlist[i], tmp.data(), tmp.data() + index.d);
distances[i] = fvec_L2sqr(query, tmp.data(), index.d);
ncomp++;
}
return ncomp;
}
void ReconstructFromNeighbors::get_neighbor_table(storage_idx_t i, float* tmp1)
const {
const HNSW& hnsw = index.hnsw;
size_t begin, end;
hnsw.neighbor_range(i, 0, &begin, &end);
size_t d = index.d;
index.storage->reconstruct(i, tmp1);
for (size_t j = begin; j < end; j++) {
storage_idx_t ji = hnsw.neighbors[j];
if (ji < 0)
ji = i;
index.storage->reconstruct(ji, tmp1 + (j - begin + 1) * d);
}
}
/// called by add_codes
void ReconstructFromNeighbors::estimate_code(
const float* x,
storage_idx_t i,
uint8_t* code) const {
// fill in tmp table with the neighbor values
float* tmp1 = new float[d * (M + 1) + (d * k)];
float* tmp2 = tmp1 + d * (M + 1);
ScopeDeleter<float> del(tmp1);
// collect coordinates of base
get_neighbor_table(i, tmp1);
for (size_t sq = 0; sq < nsq; sq++) {
int d0 = sq * dsub;
{
FINTEGER ki = k, di = d, m1 = M + 1;
FINTEGER dsubi = dsub;
float zero = 0, one = 1;
sgemm_("N",
"N",
&dsubi,
&ki,
&m1,
&one,
tmp1 + d0,
&di,
codebook.data() + sq * (m1 * k),
&m1,
&zero,
tmp2,
&dsubi);
}
float min = HUGE_VAL;
int argmin = -1;
for (size_t j = 0; j < k; j++) {
float dis = fvec_L2sqr(x + d0, tmp2 + j * dsub, dsub);
if (dis < min) {
min = dis;
argmin = j;
}
}
code[sq] = argmin;
}
}
void ReconstructFromNeighbors::add_codes(size_t n, const float* x) {
if (k == 1) { // nothing to encode
ntotal += n;
return;
}
codes.resize(codes.size() + code_size * n);
#pragma omp parallel for
for (int i = 0; i < n; i++) {
estimate_code(
x + i * index.d,
ntotal + i,
codes.data() + (ntotal + i) * code_size);
}
ntotal += n;
FAISS_ASSERT(codes.size() == ntotal * code_size);
}
/**************************************************************
* IndexHNSWFlat implementation
**************************************************************/
IndexHNSWFlat::IndexHNSWFlat() {
is_trained = true;
}
IndexHNSWFlat::IndexHNSWFlat(int d, int M, MetricType metric)
: IndexHNSW(new IndexFlat(d, metric), M) {
own_fields = true;
is_trained = true;
}
/**************************************************************
* IndexHNSWPQ implementation
**************************************************************/
IndexHNSWPQ::IndexHNSWPQ() {}
IndexHNSWPQ::IndexHNSWPQ(int d, int pq_m, int M)
: IndexHNSW(new IndexPQ(d, pq_m, 8), M) {
own_fields = true;
is_trained = false;
}
void IndexHNSWPQ::train(idx_t n, const float* x) {
IndexHNSW::train(n, x);
(dynamic_cast<IndexPQ*>(storage))->pq.compute_sdc_table();
}
/**************************************************************
* IndexHNSWSQ implementation
**************************************************************/
IndexHNSWSQ::IndexHNSWSQ(
int d,
ScalarQuantizer::QuantizerType qtype,
int M,
MetricType metric)
: IndexHNSW(new IndexScalarQuantizer(d, qtype, metric), M) {
is_trained = false;
own_fields = true;
}
IndexHNSWSQ::IndexHNSWSQ() {}
/**************************************************************
* IndexHNSW2Level implementation
**************************************************************/
IndexHNSW2Level::IndexHNSW2Level(
Index* quantizer,
size_t nlist,
int m_pq,
int M)
: IndexHNSW(new Index2Layer(quantizer, nlist, m_pq), M) {
own_fields = true;
is_trained = false;
}
IndexHNSW2Level::IndexHNSW2Level() {}
namespace {
// same as search_from_candidates but uses v
// visno -> is in result list
// visno + 1 -> in result list + in candidates
int search_from_candidates_2(
const HNSW& hnsw,
DistanceComputer& qdis,
int k,
idx_t* I,
float* D,
MinimaxHeap& candidates,
VisitedTable& vt,
HNSWStats& stats,
int level,
int nres_in = 0) {
int nres = nres_in;
int ndis = 0;
for (int i = 0; i < candidates.size(); i++) {
idx_t v1 = candidates.ids[i];
FAISS_ASSERT(v1 >= 0);
vt.visited[v1] = vt.visno + 1;
}
int nstep = 0;
while (candidates.size() > 0) {
float d0 = 0;
int v0 = candidates.pop_min(&d0);
size_t begin, end;
hnsw.neighbor_range(v0, level, &begin, &end);
for (size_t j = begin; j < end; j++) {
int v1 = hnsw.neighbors[j];
if (v1 < 0)
break;
if (vt.visited[v1] == vt.visno + 1) {
// nothing to do
} else {
ndis++;
float d = qdis(v1);
candidates.push(v1, d);
// never seen before --> add to heap
if (vt.visited[v1] < vt.visno) {
if (nres < k) {
faiss::maxheap_push(++nres, D, I, d, v1);
} else if (d < D[0]) {
faiss::maxheap_replace_top(nres, D, I, d, v1);
}
}
vt.visited[v1] = vt.visno + 1;
}
}
nstep++;
if (nstep > hnsw.efSearch) {
break;
}
}
stats.n1++;
if (candidates.size() == 0)
stats.n2++;
return nres;
}
} // namespace
void IndexHNSW2Level::search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const {
FAISS_THROW_IF_NOT(k > 0);
if (dynamic_cast<const Index2Layer*>(storage)) {
IndexHNSW::search(n, x, k, distances, labels);
} else { // "mixed" search
size_t n1 = 0, n2 = 0, n3 = 0, ndis = 0, nreorder = 0;
const IndexIVFPQ* index_ivfpq =
dynamic_cast<const IndexIVFPQ*>(storage);
int nprobe = index_ivfpq->nprobe;
std::unique_ptr<idx_t[]> coarse_assign(new idx_t[n * nprobe]);
std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
index_ivfpq->quantizer->search(
n, x, nprobe, coarse_dis.get(), coarse_assign.get());
index_ivfpq->search_preassigned(
n,
x,
k,
coarse_assign.get(),
coarse_dis.get(),
distances,
labels,
false);
#pragma omp parallel
{
VisitedTable vt(ntotal);
DistanceComputer* dis = storage_distance_computer(storage);
ScopeDeleter1<DistanceComputer> del(dis);
int candidates_size = hnsw.upper_beam;
MinimaxHeap candidates(candidates_size);
#pragma omp for reduction(+ : n1, n2, n3, ndis, nreorder)
for (idx_t i = 0; i < n; i++) {
idx_t* idxi = labels + i * k;
float* simi = distances + i * k;
dis->set_query(x + i * d);
// mark all inverted list elements as visited
for (int j = 0; j < nprobe; j++) {
idx_t key = coarse_assign[j + i * nprobe];
if (key < 0)
break;
size_t list_length = index_ivfpq->get_list_size(key);
const idx_t* ids = index_ivfpq->invlists->get_ids(key);
for (int jj = 0; jj < list_length; jj++) {
vt.set(ids[jj]);
}
}
candidates.clear();
// copy the upper_beam elements to candidates list
int search_policy = 2;
if (search_policy == 1) {
for (int j = 0; j < hnsw.upper_beam && j < k; j++) {
if (idxi[j] < 0)
break;
candidates.push(idxi[j], simi[j]);
// search_from_candidates adds them back
idxi[j] = -1;
simi[j] = HUGE_VAL;
}
// reorder from sorted to heap
maxheap_heapify(k, simi, idxi, simi, idxi, k);
HNSWStats search_stats;
hnsw.search_from_candidates(
*dis,
k,
idxi,
simi,
candidates,
vt,
search_stats,
0,
k);
n1 += search_stats.n1;
n2 += search_stats.n2;
n3 += search_stats.n3;
ndis += search_stats.ndis;
nreorder += search_stats.nreorder;
vt.advance();
} else if (search_policy == 2) {
for (int j = 0; j < hnsw.upper_beam && j < k; j++) {
if (idxi[j] < 0)
break;
candidates.push(idxi[j], simi[j]);
}
// reorder from sorted to heap
maxheap_heapify(k, simi, idxi, simi, idxi, k);
HNSWStats search_stats;
search_from_candidates_2(
hnsw,
*dis,
k,
idxi,
simi,
candidates,
vt,
search_stats,
0,
k);
n1 += search_stats.n1;
n2 += search_stats.n2;
n3 += search_stats.n3;
ndis += search_stats.ndis;
nreorder += search_stats.nreorder;
vt.advance();
vt.advance();
}
maxheap_reorder(k, simi, idxi);
}
}
hnsw_stats.combine({n1, n2, n3, ndis, nreorder});
}
}
void IndexHNSW2Level::flip_to_ivf() {
Index2Layer* storage2l = dynamic_cast<Index2Layer*>(storage);
FAISS_THROW_IF_NOT(storage2l);
IndexIVFPQ* index_ivfpq = new IndexIVFPQ(
storage2l->q1.quantizer,
d,
storage2l->q1.nlist,
storage2l->pq.M,
8);
index_ivfpq->pq = storage2l->pq;
index_ivfpq->is_trained = storage2l->is_trained;
index_ivfpq->precompute_table();
index_ivfpq->own_fields = storage2l->q1.own_fields;
storage2l->transfer_to_IVFPQ(*index_ivfpq);
index_ivfpq->make_direct_map(true);
storage = index_ivfpq;
delete storage2l;
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <vector>
#include <faiss/IndexFlat.h>
#include <faiss/IndexPQ.h>
#include <faiss/IndexScalarQuantizer.h>
#include <faiss/impl/HNSW.h>
#include <faiss/utils/utils.h>
namespace faiss {
struct IndexHNSW;
struct ReconstructFromNeighbors {
typedef Index::idx_t idx_t;
typedef HNSW::storage_idx_t storage_idx_t;
const IndexHNSW& index;
size_t M; // number of neighbors
size_t k; // number of codebook entries
size_t nsq; // number of subvectors
size_t code_size;
int k_reorder; // nb to reorder. -1 = all
std::vector<float> codebook; // size nsq * k * (M + 1)
std::vector<uint8_t> codes; // size ntotal * code_size
size_t ntotal;
size_t d, dsub; // derived values
explicit ReconstructFromNeighbors(
const IndexHNSW& index,
size_t k = 256,
size_t nsq = 1);
/// codes must be added in the correct order and the IndexHNSW
/// must be populated and sorted
void add_codes(size_t n, const float* x);
size_t compute_distances(
size_t n,
const idx_t* shortlist,
const float* query,
float* distances) const;
/// called by add_codes
void estimate_code(const float* x, storage_idx_t i, uint8_t* code) const;
/// called by compute_distances
void reconstruct(storage_idx_t i, float* x, float* tmp) const;
void reconstruct_n(storage_idx_t n0, storage_idx_t ni, float* x) const;
/// get the M+1 -by-d table for neighbor coordinates for vector i
void get_neighbor_table(storage_idx_t i, float* out) const;
};
/** The HNSW index is a normal random-access index with a HNSW
* link structure built on top */
struct IndexHNSW : Index {
typedef HNSW::storage_idx_t storage_idx_t;
// the link strcuture
HNSW hnsw;
// the sequential storage
bool own_fields;
Index* storage;
ReconstructFromNeighbors* reconstruct_from_neighbors;
explicit IndexHNSW(int d = 0, int M = 32, MetricType metric = METRIC_L2);
explicit IndexHNSW(Index* storage, int M = 32);
~IndexHNSW() override;
void add(idx_t n, const float* x) override;
/// Trains the storage if needed
void train(idx_t n, const float* x) override;
/// entry point for search
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
void reconstruct(idx_t key, float* recons) const override;
void reset() override;
void shrink_level_0_neighbors(int size);
/** Perform search only on level 0, given the starting points for
* each vertex.
*
* @param search_type 1:perform one search per nprobe, 2: enqueue
* all entry points
*/
void search_level_0(
idx_t n,
const float* x,
idx_t k,
const storage_idx_t* nearest,
const float* nearest_d,
float* distances,
idx_t* labels,
int nprobe = 1,
int search_type = 1) const;
/// alternative graph building
void init_level_0_from_knngraph(int k, const float* D, const idx_t* I);
/// alternative graph building
void init_level_0_from_entry_points(
int npt,
const storage_idx_t* points,
const storage_idx_t* nearests);
// reorder links from nearest to farthest
void reorder_links();
void link_singletons();
};
/** Flat index topped with with a HNSW structure to access elements
* more efficiently.
*/
struct IndexHNSWFlat : IndexHNSW {
IndexHNSWFlat();
IndexHNSWFlat(int d, int M, MetricType metric = METRIC_L2);
};
/** PQ index topped with with a HNSW structure to access elements
* more efficiently.
*/
struct IndexHNSWPQ : IndexHNSW {
IndexHNSWPQ();
IndexHNSWPQ(int d, int pq_m, int M);
void train(idx_t n, const float* x) override;
};
/** SQ index topped with with a HNSW structure to access elements
* more efficiently.
*/
struct IndexHNSWSQ : IndexHNSW {
IndexHNSWSQ();
IndexHNSWSQ(
int d,
ScalarQuantizer::QuantizerType qtype,
int M,
MetricType metric = METRIC_L2);
};
/** 2-level code structure with fast random access
*/
struct IndexHNSW2Level : IndexHNSW {
IndexHNSW2Level();
IndexHNSW2Level(Index* quantizer, size_t nlist, int m_pq, int M);
void flip_to_ivf();
/// entry point for search
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
};
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexIVF.h>
#include <omp.h>
#include <mutex>
#include <algorithm>
#include <cinttypes>
#include <cstdio>
#include <memory>
#include <faiss/utils/hamming.h>
#include <faiss/utils/utils.h>
#include <faiss/IndexFlat.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
namespace faiss {
using ScopedIds = InvertedLists::ScopedIds;
using ScopedCodes = InvertedLists::ScopedCodes;
/*****************************************
* Level1Quantizer implementation
******************************************/
Level1Quantizer::Level1Quantizer(Index* quantizer, size_t nlist)
: quantizer(quantizer),
nlist(nlist),
quantizer_trains_alone(0),
own_fields(false),
clustering_index(nullptr) {
// here we set a low # iterations because this is typically used
// for large clusterings (nb this is not used for the MultiIndex,
// for which quantizer_trains_alone = true)
cp.niter = 10;
}
Level1Quantizer::Level1Quantizer()
: quantizer(nullptr),
nlist(0),
quantizer_trains_alone(0),
own_fields(false),
clustering_index(nullptr) {}
Level1Quantizer::~Level1Quantizer() {
if (own_fields)
delete quantizer;
}
void Level1Quantizer::train_q1(
size_t n,
const float* x,
bool verbose,
MetricType metric_type) {
size_t d = quantizer->d;
if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
if (verbose)
printf("IVF quantizer does not need training.\n");
} else if (quantizer_trains_alone == 1) {
if (verbose)
printf("IVF quantizer trains alone...\n");
quantizer->train(n, x);
quantizer->verbose = verbose;
FAISS_THROW_IF_NOT_MSG(
quantizer->ntotal == nlist,
"nlist not consistent with quantizer size");
} else if (quantizer_trains_alone == 0) {
if (verbose)
printf("Training level-1 quantizer on %zd vectors in %zdD\n", n, d);
Clustering clus(d, nlist, cp);
quantizer->reset();
if (clustering_index) {
clus.train(n, x, *clustering_index);
quantizer->add(nlist, clus.centroids.data());
} else {
clus.train(n, x, *quantizer);
}
quantizer->is_trained = true;
} else if (quantizer_trains_alone == 2) {
if (verbose) {
printf("Training L2 quantizer on %zd vectors in %zdD%s\n",
n,
d,
clustering_index ? "(user provided index)" : "");
}
// also accept spherical centroids because in that case
// L2 and IP are equivalent
FAISS_THROW_IF_NOT(
metric_type == METRIC_L2 ||
(metric_type == METRIC_INNER_PRODUCT && cp.spherical));
Clustering clus(d, nlist, cp);
if (!clustering_index) {
IndexFlatL2 assigner(d);
clus.train(n, x, assigner);
} else {
clus.train(n, x, *clustering_index);
}
if (verbose) {
printf("Adding centroids to quantizer\n");
}
if (!quantizer->is_trained) {
if (verbose) {
printf("But training it first on centroids table...\n");
}
quantizer->train(nlist, clus.centroids.data());
}
quantizer->add(nlist, clus.centroids.data());
}
}
size_t Level1Quantizer::coarse_code_size() const {
size_t nl = nlist - 1;
size_t nbyte = 0;
while (nl > 0) {
nbyte++;
nl >>= 8;
}
return nbyte;
}
void Level1Quantizer::encode_listno(Index::idx_t list_no, uint8_t* code) const {
// little endian
size_t nl = nlist - 1;
while (nl > 0) {
*code++ = list_no & 0xff;
list_no >>= 8;
nl >>= 8;
}
}
Index::idx_t Level1Quantizer::decode_listno(const uint8_t* code) const {
size_t nl = nlist - 1;
int64_t list_no = 0;
int nbit = 0;
while (nl > 0) {
list_no |= int64_t(*code++) << nbit;
nbit += 8;
nl >>= 8;
}
FAISS_THROW_IF_NOT(list_no >= 0 && list_no < nlist);
return list_no;
}
/*****************************************
* IndexIVF implementation
******************************************/
IndexIVF::IndexIVF(
Index* quantizer,
size_t d,
size_t nlist,
size_t code_size,
MetricType metric)
: Index(d, metric),
Level1Quantizer(quantizer, nlist),
invlists(new ArrayInvertedLists(nlist, code_size)),
own_invlists(true),
code_size(code_size),
nprobe(1),
max_codes(0),
parallel_mode(0) {
FAISS_THROW_IF_NOT(d == quantizer->d);
is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
// Spherical by default if the metric is inner_product
if (metric_type == METRIC_INNER_PRODUCT) {
cp.spherical = true;
}
}
IndexIVF::IndexIVF()
: invlists(nullptr),
own_invlists(false),
code_size(0),
nprobe(1),
max_codes(0),
parallel_mode(0) {}
void IndexIVF::add(idx_t n, const float* x) {
add_with_ids(n, x, nullptr);
}
void IndexIVF::add_with_ids(idx_t n, const float* x, const idx_t* xids) {
std::unique_ptr<idx_t[]> coarse_idx(new idx_t[n]);
quantizer->assign(n, x, coarse_idx.get());
add_core(n, x, xids, coarse_idx.get());
}
void IndexIVF::add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids) {
size_t coarse_size = coarse_code_size();
DirectMapAdd dm_adder(direct_map, n, xids);
for (idx_t i = 0; i < n; i++) {
const uint8_t* code = codes + (code_size + coarse_size) * i;
idx_t list_no = decode_listno(code);
idx_t id = xids ? xids[i] : ntotal + i;
size_t ofs = invlists->add_entry(list_no, id, code + coarse_size);
dm_adder.add(i, list_no, ofs);
}
ntotal += n;
}
void IndexIVF::add_core(
idx_t n,
const float* x,
const idx_t* xids,
const idx_t* coarse_idx) {
// do some blocking to avoid excessive allocs
idx_t bs = 65536;
if (n > bs) {
for (idx_t i0 = 0; i0 < n; i0 += bs) {
idx_t i1 = std::min(n, i0 + bs);
if (verbose) {
printf(" IndexIVF::add_with_ids %" PRId64 ":%" PRId64 "\n",
i0,
i1);
}
add_core(
i1 - i0,
x + i0 * d,
xids ? xids + i0 : nullptr,
coarse_idx + i0);
}
return;
}
FAISS_THROW_IF_NOT(coarse_idx);
FAISS_THROW_IF_NOT(is_trained);
direct_map.check_can_add(xids);
size_t nadd = 0, nminus1 = 0;
for (size_t i = 0; i < n; i++) {
if (coarse_idx[i] < 0)
nminus1++;
}
std::unique_ptr<uint8_t[]> flat_codes(new uint8_t[n * code_size]);
encode_vectors(n, x, coarse_idx, flat_codes.get());
DirectMapAdd dm_adder(direct_map, n, xids);
#pragma omp parallel reduction(+ : nadd)
{
int nt = omp_get_num_threads();
int rank = omp_get_thread_num();
// each thread takes care of a subset of lists
for (size_t i = 0; i < n; i++) {
idx_t list_no = coarse_idx[i];
if (list_no >= 0 && list_no % nt == rank) {
idx_t id = xids ? xids[i] : ntotal + i;
size_t ofs = invlists->add_entry(
list_no, id, flat_codes.get() + i * code_size);
dm_adder.add(i, list_no, ofs);
nadd++;
} else if (rank == 0 && list_no == -1) {
dm_adder.add(i, -1, 0);
}
}
}
if (verbose) {
printf(" added %zd / %" PRId64 " vectors (%zd -1s)\n",
nadd,
n,
nminus1);
}
ntotal += n;
}
void IndexIVF::make_direct_map(bool b) {
if (b) {
direct_map.set_type(DirectMap::Array, invlists, ntotal);
} else {
direct_map.set_type(DirectMap::NoMap, invlists, ntotal);
}
}
void IndexIVF::set_direct_map_type(DirectMap::Type type) {
direct_map.set_type(type, invlists, ntotal);
}
/** It is a sad fact of software that a conceptually simple function like this
* becomes very complex when you factor in several ways of parallelizing +
* interrupt/error handling + collecting stats + min/max collection. The
* codepath that is used 95% of time is the one for parallel_mode = 0 */
void IndexIVF::search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const {
FAISS_THROW_IF_NOT(k > 0);
const size_t nprobe = std::min(nlist, this->nprobe);
FAISS_THROW_IF_NOT(nprobe > 0);
// search function for a subset of queries
auto sub_search_func = [this, k, nprobe](
idx_t n,
const float* x,
float* distances,
idx_t* labels,
IndexIVFStats* ivf_stats) {
std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
double t0 = getmillisecs();
quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
double t1 = getmillisecs();
invlists->prefetch_lists(idx.get(), n * nprobe);
search_preassigned(
n,
x,
k,
idx.get(),
coarse_dis.get(),
distances,
labels,
false,
nullptr,
ivf_stats);
double t2 = getmillisecs();
ivf_stats->quantization_time += t1 - t0;
ivf_stats->search_time += t2 - t0;
};
if ((parallel_mode & ~PARALLEL_MODE_NO_HEAP_INIT) == 0) {
int nt = std::min(omp_get_max_threads(), int(n));
std::vector<IndexIVFStats> stats(nt);
std::mutex exception_mutex;
std::string exception_string;
#pragma omp parallel for if (nt > 1)
for (idx_t slice = 0; slice < nt; slice++) {
IndexIVFStats local_stats;
idx_t i0 = n * slice / nt;
idx_t i1 = n * (slice + 1) / nt;
if (i1 > i0) {
try {
sub_search_func(
i1 - i0,
x + i0 * d,
distances + i0 * k,
labels + i0 * k,
&stats[slice]);
} catch (const std::exception& e) {
std::lock_guard<std::mutex> lock(exception_mutex);
exception_string = e.what();
}
}
}
if (!exception_string.empty()) {
FAISS_THROW_MSG(exception_string.c_str());
}
// collect stats
for (idx_t slice = 0; slice < nt; slice++) {
indexIVF_stats.add(stats[slice]);
}
} else {
// handle paralellization at level below (or don't run in parallel at
// all)
sub_search_func(n, x, distances, labels, &indexIVF_stats);
}
}
void IndexIVF::search_preassigned(
idx_t n,
const float* x,
idx_t k,
const idx_t* keys,
const float* coarse_dis,
float* distances,
idx_t* labels,
bool store_pairs,
const IVFSearchParameters* params,
IndexIVFStats* ivf_stats) const {
FAISS_THROW_IF_NOT(k > 0);
idx_t nprobe = params ? params->nprobe : this->nprobe;
nprobe = std::min((idx_t)nlist, nprobe);
FAISS_THROW_IF_NOT(nprobe > 0);
idx_t max_codes = params ? params->max_codes : this->max_codes;
size_t nlistv = 0, ndis = 0, nheap = 0;
using HeapForIP = CMin<float, idx_t>;
using HeapForL2 = CMax<float, idx_t>;
bool interrupt = false;
std::mutex exception_mutex;
std::string exception_string;
int pmode = this->parallel_mode & ~PARALLEL_MODE_NO_HEAP_INIT;
bool do_heap_init = !(this->parallel_mode & PARALLEL_MODE_NO_HEAP_INIT);
bool do_parallel = omp_get_max_threads() >= 2 &&
(pmode == 0 ? false
: pmode == 3 ? n > 1
: pmode == 1 ? nprobe > 1
: nprobe * n > 1);
#pragma omp parallel if (do_parallel) reduction(+ : nlistv, ndis, nheap)
{
InvertedListScanner* scanner = get_InvertedListScanner(store_pairs);
ScopeDeleter1<InvertedListScanner> del(scanner);
/*****************************************************
* Depending on parallel_mode, there are two possible ways
* to organize the search. Here we define local functions
* that are in common between the two
******************************************************/
// intialize + reorder a result heap
auto init_result = [&](float* simi, idx_t* idxi) {
if (!do_heap_init)
return;
if (metric_type == METRIC_INNER_PRODUCT) {
heap_heapify<HeapForIP>(k, simi, idxi);
} else {
heap_heapify<HeapForL2>(k, simi, idxi);
}
};
auto add_local_results = [&](const float* local_dis,
const idx_t* local_idx,
float* simi,
idx_t* idxi) {
if (metric_type == METRIC_INNER_PRODUCT) {
heap_addn<HeapForIP>(k, simi, idxi, local_dis, local_idx, k);
} else {
heap_addn<HeapForL2>(k, simi, idxi, local_dis, local_idx, k);
}
};
auto reorder_result = [&](float* simi, idx_t* idxi) {
if (!do_heap_init)
return;
if (metric_type == METRIC_INNER_PRODUCT) {
heap_reorder<HeapForIP>(k, simi, idxi);
} else {
heap_reorder<HeapForL2>(k, simi, idxi);
}
};
// single list scan using the current scanner (with query
// set porperly) and storing results in simi and idxi
auto scan_one_list = [&](idx_t key,
float coarse_dis_i,
float* simi,
idx_t* idxi) {
if (key < 0) {
// not enough centroids for multiprobe
return (size_t)0;
}
FAISS_THROW_IF_NOT_FMT(
key < (idx_t)nlist,
"Invalid key=%" PRId64 " nlist=%zd\n",
key,
nlist);
size_t list_size = invlists->list_size(key);
// don't waste time on empty lists
if (list_size == 0) {
return (size_t)0;
}
scanner->set_list(key, coarse_dis_i);
nlistv++;
try {
InvertedLists::ScopedCodes scodes(invlists, key);
std::unique_ptr<InvertedLists::ScopedIds> sids;
const Index::idx_t* ids = nullptr;
if (!store_pairs) {
sids.reset(new InvertedLists::ScopedIds(invlists, key));
ids = sids->get();
}
nheap += scanner->scan_codes(
list_size, scodes.get(), ids, simi, idxi, k);
} catch (const std::exception& e) {
std::lock_guard<std::mutex> lock(exception_mutex);
exception_string =
demangle_cpp_symbol(typeid(e).name()) + " " + e.what();
interrupt = true;
return size_t(0);
}
return list_size;
};
/****************************************************
* Actual loops, depending on parallel_mode
****************************************************/
if (pmode == 0 || pmode == 3) {
#pragma omp for
for (idx_t i = 0; i < n; i++) {
if (interrupt) {
continue;
}
// loop over queries
scanner->set_query(x + i * d);
float* simi = distances + i * k;
idx_t* idxi = labels + i * k;
init_result(simi, idxi);
idx_t nscan = 0;
// loop over probes
for (size_t ik = 0; ik < nprobe; ik++) {
nscan += scan_one_list(
keys[i * nprobe + ik],
coarse_dis[i * nprobe + ik],
simi,
idxi);
if (max_codes && nscan >= max_codes) {
break;
}
}
ndis += nscan;
reorder_result(simi, idxi);
if (InterruptCallback::is_interrupted()) {
interrupt = true;
}
} // parallel for
} else if (pmode == 1) {
std::vector<idx_t> local_idx(k);
std::vector<float> local_dis(k);
for (size_t i = 0; i < n; i++) {
scanner->set_query(x + i * d);
init_result(local_dis.data(), local_idx.data());
#pragma omp for schedule(dynamic)
for (idx_t ik = 0; ik < nprobe; ik++) {
ndis += scan_one_list(
keys[i * nprobe + ik],
coarse_dis[i * nprobe + ik],
local_dis.data(),
local_idx.data());
// can't do the test on max_codes
}
// merge thread-local results
float* simi = distances + i * k;
idx_t* idxi = labels + i * k;
#pragma omp single
init_result(simi, idxi);
#pragma omp barrier
#pragma omp critical
{
add_local_results(
local_dis.data(), local_idx.data(), simi, idxi);
}
#pragma omp barrier
#pragma omp single
reorder_result(simi, idxi);
}
} else if (pmode == 2) {
std::vector<idx_t> local_idx(k);
std::vector<float> local_dis(k);
#pragma omp single
for (int64_t i = 0; i < n; i++) {
init_result(distances + i * k, labels + i * k);
}
#pragma omp for schedule(dynamic)
for (int64_t ij = 0; ij < n * nprobe; ij++) {
size_t i = ij / nprobe;
size_t j = ij % nprobe;
scanner->set_query(x + i * d);
init_result(local_dis.data(), local_idx.data());
ndis += scan_one_list(
keys[ij],
coarse_dis[ij],
local_dis.data(),
local_idx.data());
#pragma omp critical
{
add_local_results(
local_dis.data(),
local_idx.data(),
distances + i * k,
labels + i * k);
}
}
#pragma omp single
for (int64_t i = 0; i < n; i++) {
reorder_result(distances + i * k, labels + i * k);
}
} else {
FAISS_THROW_FMT("parallel_mode %d not supported\n", pmode);
}
} // parallel section
if (interrupt) {
if (!exception_string.empty()) {
FAISS_THROW_FMT(
"search interrupted with: %s", exception_string.c_str());
} else {
FAISS_THROW_MSG("computation interrupted");
}
}
if (ivf_stats) {
ivf_stats->nq += n;
ivf_stats->nlist += nlistv;
ivf_stats->ndis += ndis;
ivf_stats->nheap_updates += nheap;
}
}
void IndexIVF::range_search(
idx_t nx,
const float* x,
float radius,
RangeSearchResult* result) const {
const size_t nprobe = std::min(nlist, this->nprobe);
std::unique_ptr<idx_t[]> keys(new idx_t[nx * nprobe]);
std::unique_ptr<float[]> coarse_dis(new float[nx * nprobe]);
double t0 = getmillisecs();
quantizer->search(nx, x, nprobe, coarse_dis.get(), keys.get());
indexIVF_stats.quantization_time += getmillisecs() - t0;
t0 = getmillisecs();
invlists->prefetch_lists(keys.get(), nx * nprobe);
range_search_preassigned(
nx,
x,
radius,
keys.get(),
coarse_dis.get(),
result,
false,
nullptr,
&indexIVF_stats);
indexIVF_stats.search_time += getmillisecs() - t0;
}
void IndexIVF::range_search_preassigned(
idx_t nx,
const float* x,
float radius,
const idx_t* keys,
const float* coarse_dis,
RangeSearchResult* result,
bool store_pairs,
const IVFSearchParameters* params,
IndexIVFStats* stats) const {
idx_t nprobe = params ? params->nprobe : this->nprobe;
nprobe = std::min((idx_t)nlist, nprobe);
idx_t max_codes = params ? params->max_codes : this->max_codes;
size_t nlistv = 0, ndis = 0;
bool interrupt = false;
std::mutex exception_mutex;
std::string exception_string;
std::vector<RangeSearchPartialResult*> all_pres(omp_get_max_threads());
int pmode = this->parallel_mode & ~PARALLEL_MODE_NO_HEAP_INIT;
// don't start parallel section if single query
bool do_parallel = omp_get_max_threads() >= 2 &&
(pmode == 3 ? false
: pmode == 0 ? nx > 1
: pmode == 1 ? nprobe > 1
: nprobe * nx > 1);
#pragma omp parallel if (do_parallel) reduction(+ : nlistv, ndis)
{
RangeSearchPartialResult pres(result);
std::unique_ptr<InvertedListScanner> scanner(
get_InvertedListScanner(store_pairs));
FAISS_THROW_IF_NOT(scanner.get());
all_pres[omp_get_thread_num()] = &pres;
// prepare the list scanning function
auto scan_list_func = [&](size_t i, size_t ik, RangeQueryResult& qres) {
idx_t key = keys[i * nprobe + ik]; /* select the list */
if (key < 0)
return;
FAISS_THROW_IF_NOT_FMT(
key < (idx_t)nlist,
"Invalid key=%" PRId64 " at ik=%zd nlist=%zd\n",
key,
ik,
nlist);
const size_t list_size = invlists->list_size(key);
if (list_size == 0)
return;
try {
InvertedLists::ScopedCodes scodes(invlists, key);
InvertedLists::ScopedIds ids(invlists, key);
scanner->set_list(key, coarse_dis[i * nprobe + ik]);
nlistv++;
ndis += list_size;
scanner->scan_codes_range(
list_size, scodes.get(), ids.get(), radius, qres);
} catch (const std::exception& e) {
std::lock_guard<std::mutex> lock(exception_mutex);
exception_string =
demangle_cpp_symbol(typeid(e).name()) + " " + e.what();
interrupt = true;
}
};
if (parallel_mode == 0) {
#pragma omp for
for (idx_t i = 0; i < nx; i++) {
scanner->set_query(x + i * d);
RangeQueryResult& qres = pres.new_result(i);
for (size_t ik = 0; ik < nprobe; ik++) {
scan_list_func(i, ik, qres);
}
}
} else if (parallel_mode == 1) {
for (size_t i = 0; i < nx; i++) {
scanner->set_query(x + i * d);
RangeQueryResult& qres = pres.new_result(i);
#pragma omp for schedule(dynamic)
for (int64_t ik = 0; ik < nprobe; ik++) {
scan_list_func(i, ik, qres);
}
}
} else if (parallel_mode == 2) {
std::vector<RangeQueryResult*> all_qres(nx);
RangeQueryResult* qres = nullptr;
#pragma omp for schedule(dynamic)
for (idx_t iik = 0; iik < nx * (idx_t)nprobe; iik++) {
idx_t i = iik / (idx_t)nprobe;
idx_t ik = iik % (idx_t)nprobe;
if (qres == nullptr || qres->qno != i) {
FAISS_ASSERT(!qres || i > qres->qno);
qres = &pres.new_result(i);
scanner->set_query(x + i * d);
}
scan_list_func(i, ik, *qres);
}
} else {
FAISS_THROW_FMT("parallel_mode %d not supported\n", parallel_mode);
}
if (parallel_mode == 0) {
pres.finalize();
} else {
#pragma omp barrier
#pragma omp single
RangeSearchPartialResult::merge(all_pres, false);
#pragma omp barrier
}
}
if (interrupt) {
if (!exception_string.empty()) {
FAISS_THROW_FMT(
"search interrupted with: %s", exception_string.c_str());
} else {
FAISS_THROW_MSG("computation interrupted");
}
}
if (stats) {
stats->nq += nx;
stats->nlist += nlistv;
stats->ndis += ndis;
}
}
InvertedListScanner* IndexIVF::get_InvertedListScanner(
bool /*store_pairs*/) const {
return nullptr;
}
void IndexIVF::reconstruct(idx_t key, float* recons) const {
idx_t lo = direct_map.get(key);
reconstruct_from_offset(lo_listno(lo), lo_offset(lo), recons);
}
void IndexIVF::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
FAISS_THROW_IF_NOT(ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
for (idx_t list_no = 0; list_no < nlist; list_no++) {
size_t list_size = invlists->list_size(list_no);
ScopedIds idlist(invlists, list_no);
for (idx_t offset = 0; offset < list_size; offset++) {
idx_t id = idlist[offset];
if (!(id >= i0 && id < i0 + ni)) {
continue;
}
float* reconstructed = recons + (id - i0) * d;
reconstruct_from_offset(list_no, offset, reconstructed);
}
}
}
/* standalone codec interface */
size_t IndexIVF::sa_code_size() const {
size_t coarse_size = coarse_code_size();
return code_size + coarse_size;
}
void IndexIVF::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
FAISS_THROW_IF_NOT(is_trained);
std::unique_ptr<int64_t[]> idx(new int64_t[n]);
quantizer->assign(n, x, idx.get());
encode_vectors(n, x, idx.get(), bytes, true);
}
void IndexIVF::search_and_reconstruct(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels,
float* recons) const {
FAISS_THROW_IF_NOT(k > 0);
const size_t nprobe = std::min(nlist, this->nprobe);
FAISS_THROW_IF_NOT(nprobe > 0);
idx_t* idx = new idx_t[n * nprobe];
ScopeDeleter<idx_t> del(idx);
float* coarse_dis = new float[n * nprobe];
ScopeDeleter<float> del2(coarse_dis);
quantizer->search(n, x, nprobe, coarse_dis, idx);
invlists->prefetch_lists(idx, n * nprobe);
// search_preassigned() with `store_pairs` enabled to obtain the list_no
// and offset into `codes` for reconstruction
search_preassigned(
n,
x,
k,
idx,
coarse_dis,
distances,
labels,
true /* store_pairs */);
for (idx_t i = 0; i < n; ++i) {
for (idx_t j = 0; j < k; ++j) {
idx_t ij = i * k + j;
idx_t key = labels[ij];
float* reconstructed = recons + ij * d;
if (key < 0) {
// Fill with NaNs
memset(reconstructed, -1, sizeof(*reconstructed) * d);
} else {
int list_no = lo_listno(key);
int offset = lo_offset(key);
// Update label to the actual id
labels[ij] = invlists->get_single_id(list_no, offset);
reconstruct_from_offset(list_no, offset, reconstructed);
}
}
}
}
void IndexIVF::reconstruct_from_offset(
int64_t /*list_no*/,
int64_t /*offset*/,
float* /*recons*/) const {
FAISS_THROW_MSG("reconstruct_from_offset not implemented");
}
void IndexIVF::reset() {
direct_map.clear();
invlists->reset();
ntotal = 0;
}
size_t IndexIVF::remove_ids(const IDSelector& sel) {
size_t nremove = direct_map.remove_ids(sel, invlists);
ntotal -= nremove;
return nremove;
}
void IndexIVF::update_vectors(int n, const idx_t* new_ids, const float* x) {
if (direct_map.type == DirectMap::Hashtable) {
// just remove then add
IDSelectorArray sel(n, new_ids);
size_t nremove = remove_ids(sel);
FAISS_THROW_IF_NOT_MSG(
nremove == n, "did not find all entries to remove");
add_with_ids(n, x, new_ids);
return;
}
FAISS_THROW_IF_NOT(direct_map.type == DirectMap::Array);
// here it is more tricky because we don't want to introduce holes
// in continuous range of ids
FAISS_THROW_IF_NOT(is_trained);
std::vector<idx_t> assign(n);
quantizer->assign(n, x, assign.data());
std::vector<uint8_t> flat_codes(n * code_size);
encode_vectors(n, x, assign.data(), flat_codes.data());
direct_map.update_codes(
invlists, n, new_ids, assign.data(), flat_codes.data());
}
void IndexIVF::train(idx_t n, const float* x) {
if (verbose)
printf("Training level-1 quantizer\n");
train_q1(n, x, verbose, metric_type);
if (verbose)
printf("Training IVF residual\n");
train_residual(n, x);
is_trained = true;
}
void IndexIVF::train_residual(idx_t /*n*/, const float* /*x*/) {
if (verbose)
printf("IndexIVF: no residual training\n");
// does nothing by default
}
void IndexIVF::check_compatible_for_merge(const IndexIVF& other) const {
// minimal sanity checks
FAISS_THROW_IF_NOT(other.d == d);
FAISS_THROW_IF_NOT(other.nlist == nlist);
FAISS_THROW_IF_NOT(other.code_size == code_size);
FAISS_THROW_IF_NOT_MSG(
typeid(*this) == typeid(other),
"can only merge indexes of the same type");
FAISS_THROW_IF_NOT_MSG(
this->direct_map.no() && other.direct_map.no(),
"merge direct_map not implemented");
}
void IndexIVF::merge_from(IndexIVF& other, idx_t add_id) {
check_compatible_for_merge(other);
invlists->merge_from(other.invlists, add_id);
ntotal += other.ntotal;
other.ntotal = 0;
}
void IndexIVF::replace_invlists(InvertedLists* il, bool own) {
if (own_invlists) {
delete invlists;
invlists = nullptr;
}
// FAISS_THROW_IF_NOT (ntotal == 0);
if (il) {
FAISS_THROW_IF_NOT(il->nlist == nlist);
FAISS_THROW_IF_NOT(
il->code_size == code_size ||
il->code_size == InvertedLists::INVALID_CODE_SIZE);
}
invlists = il;
own_invlists = own;
}
void IndexIVF::copy_subset_to(
IndexIVF& other,
int subset_type,
idx_t a1,
idx_t a2) const {
FAISS_THROW_IF_NOT(nlist == other.nlist);
FAISS_THROW_IF_NOT(code_size == other.code_size);
FAISS_THROW_IF_NOT(other.direct_map.no());
FAISS_THROW_IF_NOT_FMT(
subset_type == 0 || subset_type == 1 || subset_type == 2,
"subset type %d not implemented",
subset_type);
size_t accu_n = 0;
size_t accu_a1 = 0;
size_t accu_a2 = 0;
InvertedLists* oivf = other.invlists;
for (idx_t list_no = 0; list_no < nlist; list_no++) {
size_t n = invlists->list_size(list_no);
ScopedIds ids_in(invlists, list_no);
if (subset_type == 0) {
for (idx_t i = 0; i < n; i++) {
idx_t id = ids_in[i];
if (a1 <= id && id < a2) {
oivf->add_entry(
list_no,
invlists->get_single_id(list_no, i),
ScopedCodes(invlists, list_no, i).get());
other.ntotal++;
}
}
} else if (subset_type == 1) {
for (idx_t i = 0; i < n; i++) {
idx_t id = ids_in[i];
if (id % a1 == a2) {
oivf->add_entry(
list_no,
invlists->get_single_id(list_no, i),
ScopedCodes(invlists, list_no, i).get());
other.ntotal++;
}
}
} else if (subset_type == 2) {
// see what is allocated to a1 and to a2
size_t next_accu_n = accu_n + n;
size_t next_accu_a1 = next_accu_n * a1 / ntotal;
size_t i1 = next_accu_a1 - accu_a1;
size_t next_accu_a2 = next_accu_n * a2 / ntotal;
size_t i2 = next_accu_a2 - accu_a2;
for (idx_t i = i1; i < i2; i++) {
oivf->add_entry(
list_no,
invlists->get_single_id(list_no, i),
ScopedCodes(invlists, list_no, i).get());
}
other.ntotal += i2 - i1;
accu_a1 = next_accu_a1;
accu_a2 = next_accu_a2;
}
accu_n += n;
}
FAISS_ASSERT(accu_n == ntotal);
}
IndexIVF::~IndexIVF() {
if (own_invlists) {
delete invlists;
}
}
/*************************************************************************
* IndexIVFStats
*************************************************************************/
void IndexIVFStats::reset() {
memset((void*)this, 0, sizeof(*this));
}
void IndexIVFStats::add(const IndexIVFStats& other) {
nq += other.nq;
nlist += other.nlist;
ndis += other.ndis;
nheap_updates += other.nheap_updates;
quantization_time += other.quantization_time;
search_time += other.search_time;
}
IndexIVFStats indexIVF_stats;
/*************************************************************************
* InvertedListScanner
*************************************************************************/
size_t InvertedListScanner::scan_codes(
size_t list_size,
const uint8_t* codes,
const idx_t* ids,
float* simi,
idx_t* idxi,
size_t k) const {
size_t nup = 0;
if (!keep_max) {
for (size_t j = 0; j < list_size; j++) {
float dis = distance_to_code(codes);
if (dis < simi[0]) {
int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
maxheap_replace_top(k, simi, idxi, dis, id);
nup++;
}
codes += code_size;
}
} else {
for (size_t j = 0; j < list_size; j++) {
float dis = distance_to_code(codes);
if (dis > simi[0]) {
int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
minheap_replace_top(k, simi, idxi, dis, id);
nup++;
}
codes += code_size;
}
}
return nup;
}
void InvertedListScanner::scan_codes_range(
size_t list_size,
const uint8_t* codes,
const idx_t* ids,
float radius,
RangeQueryResult& res) const {
for (size_t j = 0; j < list_size; j++) {
float dis = distance_to_code(codes);
bool keep = !keep_max
? dis < radius
: dis > radius; // TODO templatize to remove this test
if (keep) {
int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
res.add(dis, id);
}
codes += code_size;
}
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_IVF_H
#define FAISS_INDEX_IVF_H
#include <stdint.h>
#include <unordered_map>
#include <vector>
#include <faiss/Clustering.h>
#include <faiss/Index.h>
#include <faiss/impl/platform_macros.h>
#include <faiss/invlists/DirectMap.h>
#include <faiss/invlists/InvertedLists.h>
#include <faiss/utils/Heap.h>
namespace faiss {
/** Encapsulates a quantizer object for the IndexIVF
*
* The class isolates the fields that are independent of the storage
* of the lists (especially training)
*/
struct Level1Quantizer {
Index* quantizer; ///< quantizer that maps vectors to inverted lists
size_t nlist; ///< number of possible key values
/**
* = 0: use the quantizer as index in a kmeans training
* = 1: just pass on the training set to the train() of the quantizer
* = 2: kmeans training on a flat index + add the centroids to the quantizer
*/
char quantizer_trains_alone;
bool own_fields; ///< whether object owns the quantizer (false by default)
ClusteringParameters cp; ///< to override default clustering params
Index* clustering_index; ///< to override index used during clustering
/// Trains the quantizer and calls train_residual to train sub-quantizers
void train_q1(
size_t n,
const float* x,
bool verbose,
MetricType metric_type);
/// compute the number of bytes required to store list ids
size_t coarse_code_size() const;
void encode_listno(Index::idx_t list_no, uint8_t* code) const;
Index::idx_t decode_listno(const uint8_t* code) const;
Level1Quantizer(Index* quantizer, size_t nlist);
Level1Quantizer();
~Level1Quantizer();
};
struct IVFSearchParameters {
size_t nprobe; ///< number of probes at query time
size_t max_codes; ///< max nb of codes to visit to do a query
IVFSearchParameters() : nprobe(1), max_codes(0) {}
virtual ~IVFSearchParameters() {}
};
struct InvertedListScanner;
struct IndexIVFStats;
/** Index based on a inverted file (IVF)
*
* In the inverted file, the quantizer (an Index instance) provides a
* quantization index for each vector to be added. The quantization
* index maps to a list (aka inverted list or posting list), where the
* id of the vector is stored.
*
* The inverted list object is required only after trainng. If none is
* set externally, an ArrayInvertedLists is used automatically.
*
* At search time, the vector to be searched is also quantized, and
* only the list corresponding to the quantization index is
* searched. This speeds up the search by making it
* non-exhaustive. This can be relaxed using multi-probe search: a few
* (nprobe) quantization indices are selected and several inverted
* lists are visited.
*
* Sub-classes implement a post-filtering of the index that refines
* the distance estimation from the query to databse vectors.
*/
struct IndexIVF : Index, Level1Quantizer {
/// Access to the actual data
InvertedLists* invlists;
bool own_invlists;
size_t code_size; ///< code size per vector in bytes
size_t nprobe; ///< number of probes at query time
size_t max_codes; ///< max nb of codes to visit to do a query
/** Parallel mode determines how queries are parallelized with OpenMP
*
* 0 (default): split over queries
* 1: parallelize over inverted lists
* 2: parallelize over both
* 3: split over queries with a finer granularity
*
* PARALLEL_MODE_NO_HEAP_INIT: binary or with the previous to
* prevent the heap to be initialized and finalized
*/
int parallel_mode;
const int PARALLEL_MODE_NO_HEAP_INIT = 1024;
/** optional map that maps back ids to invlist entries. This
* enables reconstruct() */
DirectMap direct_map;
/** The Inverted file takes a quantizer (an Index) on input,
* which implements the function mapping a vector to a list
* identifier.
*/
IndexIVF(
Index* quantizer,
size_t d,
size_t nlist,
size_t code_size,
MetricType metric = METRIC_L2);
void reset() override;
/// Trains the quantizer and calls train_residual to train sub-quantizers
void train(idx_t n, const float* x) override;
/// Calls add_with_ids with NULL ids
void add(idx_t n, const float* x) override;
/// default implementation that calls encode_vectors
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
/** Implementation of vector addition where the vector assignments are
* predefined. The default implementation hands over the code extraction to
* encode_vectors.
*
* @param precomputed_idx quantization indices for the input vectors
* (size n)
*/
virtual void add_core(
idx_t n,
const float* x,
const idx_t* xids,
const idx_t* precomputed_idx);
/** Encodes a set of vectors as they would appear in the inverted lists
*
* @param list_nos inverted list ids as returned by the
* quantizer (size n). -1s are ignored.
* @param codes output codes, size n * code_size
* @param include_listno
* include the list ids in the code (in this case add
* ceil(log8(nlist)) to the code size)
*/
virtual void encode_vectors(
idx_t n,
const float* x,
const idx_t* list_nos,
uint8_t* codes,
bool include_listno = false) const = 0;
/** Add vectors that are computed with the standalone codec
*
* @param codes codes to add size n * sa_code_size()
* @param xids corresponding ids, size n
*/
void add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids);
/// Sub-classes that encode the residuals can train their encoders here
/// does nothing by default
virtual void train_residual(idx_t n, const float* x);
/** search a set of vectors, that are pre-quantized by the IVF
* quantizer. Fill in the corresponding heaps with the query
* results. The default implementation uses InvertedListScanners
* to do the search.
*
* @param n nb of vectors to query
* @param x query vectors, size nx * d
* @param assign coarse quantization indices, size nx * nprobe
* @param centroid_dis
* distances to coarse centroids, size nx * nprobe
* @param distance
* output distances, size n * k
* @param labels output labels, size n * k
* @param store_pairs store inv list index + inv list offset
* instead in upper/lower 32 bit of result,
* instead of ids (used for reranking).
* @param params used to override the object's search parameters
* @param stats search stats to be updated (can be null)
*/
virtual void search_preassigned(
idx_t n,
const float* x,
idx_t k,
const idx_t* assign,
const float* centroid_dis,
float* distances,
idx_t* labels,
bool store_pairs,
const IVFSearchParameters* params = nullptr,
IndexIVFStats* stats = nullptr) const;
/** assign the vectors, then call search_preassign */
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
void range_search(
idx_t n,
const float* x,
float radius,
RangeSearchResult* result) const override;
void range_search_preassigned(
idx_t nx,
const float* x,
float radius,
const idx_t* keys,
const float* coarse_dis,
RangeSearchResult* result,
bool store_pairs = false,
const IVFSearchParameters* params = nullptr,
IndexIVFStats* stats = nullptr) const;
/** Get a scanner for this index (store_pairs means ignore labels)
*
* The default search implementation uses this to compute the distances
*/
virtual InvertedListScanner* get_InvertedListScanner(
bool store_pairs = false) const;
/** reconstruct a vector. Works only if maintain_direct_map is set to 1 or 2
*/
void reconstruct(idx_t key, float* recons) const override;
/** Update a subset of vectors.
*
* The index must have a direct_map
*
* @param nv nb of vectors to update
* @param idx vector indices to update, size nv
* @param v vectors of new values, size nv*d
*/
virtual void update_vectors(int nv, const idx_t* idx, const float* v);
/** Reconstruct a subset of the indexed vectors.
*
* Overrides default implementation to bypass reconstruct() which requires
* direct_map to be maintained.
*
* @param i0 first vector to reconstruct
* @param ni nb of vectors to reconstruct
* @param recons output array of reconstructed vectors, size ni * d
*/
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
/** Similar to search, but also reconstructs the stored vectors (or an
* approximation in the case of lossy coding) for the search results.
*
* Overrides default implementation to avoid having to maintain direct_map
* and instead fetch the code offsets through the `store_pairs` flag in
* search_preassigned().
*
* @param recons reconstructed vectors size (n, k, d)
*/
void search_and_reconstruct(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels,
float* recons) const override;
/** Reconstruct a vector given the location in terms of (inv list index +
* inv list offset) instead of the id.
*
* Useful for reconstructing when the direct_map is not maintained and
* the inv list offset is computed by search_preassigned() with
* `store_pairs` set.
*/
virtual void reconstruct_from_offset(
int64_t list_no,
int64_t offset,
float* recons) const;
/// Dataset manipulation functions
size_t remove_ids(const IDSelector& sel) override;
/** check that the two indexes are compatible (ie, they are
* trained in the same way and have the same
* parameters). Otherwise throw. */
void check_compatible_for_merge(const IndexIVF& other) const;
/** moves the entries from another dataset to self. On output,
* other is empty. add_id is added to all moved ids (for
* sequential ids, this would be this->ntotal */
virtual void merge_from(IndexIVF& other, idx_t add_id);
/** copy a subset of the entries index to the other index
*
* if subset_type == 0: copies ids in [a1, a2)
* if subset_type == 1: copies ids if id % a1 == a2
* if subset_type == 2: copies inverted lists such that a1
* elements are left before and a2 elements are after
*/
virtual void copy_subset_to(
IndexIVF& other,
int subset_type,
idx_t a1,
idx_t a2) const;
~IndexIVF() override;
size_t get_list_size(size_t list_no) const {
return invlists->list_size(list_no);
}
/** intialize a direct map
*
* @param new_maintain_direct_map if true, create a direct map,
* else clear it
*/
void make_direct_map(bool new_maintain_direct_map = true);
void set_direct_map_type(DirectMap::Type type);
/// replace the inverted lists, old one is deallocated if own_invlists
void replace_invlists(InvertedLists* il, bool own = false);
/* The standalone codec interface (except sa_decode that is specific) */
size_t sa_code_size() const override;
void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
IndexIVF();
};
struct RangeQueryResult;
/** Object that handles a query. The inverted lists to scan are
* provided externally. The object has a lot of state, but
* distance_to_code and scan_codes can be called in multiple
* threads */
struct InvertedListScanner {
using idx_t = Index::idx_t;
idx_t list_no = -1; ///< remember current list
bool keep_max = false; ///< keep maximum instead of minimum
/// store positions in invlists rather than labels
bool store_pairs = false;
/// used in default implementation of scan_codes
size_t code_size = 0;
/// from now on we handle this query.
virtual void set_query(const float* query_vector) = 0;
/// following codes come from this inverted list
virtual void set_list(idx_t list_no, float coarse_dis) = 0;
/// compute a single query-to-code distance
virtual float distance_to_code(const uint8_t* code) const = 0;
/** scan a set of codes, compute distances to current query and
* update heap of results if necessary. Default implemetation
* calls distance_to_code.
*
* @param n number of codes to scan
* @param codes codes to scan (n * code_size)
* @param ids corresponding ids (ignored if store_pairs)
* @param distances heap distances (size k)
* @param labels heap labels (size k)
* @param k heap size
* @return number of heap updates performed
*/
virtual size_t scan_codes(
size_t n,
const uint8_t* codes,
const idx_t* ids,
float* distances,
idx_t* labels,
size_t k) const;
/** scan a set of codes, compute distances to current query and
* update results if distances are below radius
*
* (default implementation fails) */
virtual void scan_codes_range(
size_t n,
const uint8_t* codes,
const idx_t* ids,
float radius,
RangeQueryResult& result) const;
virtual ~InvertedListScanner() {}
};
struct IndexIVFStats {
size_t nq; // nb of queries run
size_t nlist; // nb of inverted lists scanned
size_t ndis; // nb of distances computed
size_t nheap_updates; // nb of times the heap was updated
double quantization_time; // time spent quantizing vectors (in ms)
double search_time; // time spent searching lists (in ms)
IndexIVFStats() {
reset();
}
void reset();
void add(const IndexIVFStats& other);
};
// global var that collects them all
FAISS_API extern IndexIVFStats indexIVF_stats;
} // namespace faiss
#endif
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// quiet the noise
// XXclang-format off
#include <faiss/IndexIVFAdditiveQuantizer.h>
#include <algorithm>
#include <cmath>
#include <cstring>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/ResidualQuantizer.h>
#include <faiss/impl/ResultHandler.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/extra_distances.h>
#include <faiss/utils/utils.h>
namespace faiss {
/**************************************************************************************
* IndexIVFAdditiveQuantizer
**************************************************************************************/
IndexIVFAdditiveQuantizer::IndexIVFAdditiveQuantizer(
AdditiveQuantizer* aq,
Index* quantizer,
size_t d,
size_t nlist,
MetricType metric)
: IndexIVF(quantizer, d, nlist, 0, metric), aq(aq) {
by_residual = true;
}
IndexIVFAdditiveQuantizer::IndexIVFAdditiveQuantizer(AdditiveQuantizer* aq)
: IndexIVF(), aq(aq) {}
void IndexIVFAdditiveQuantizer::train_residual(idx_t n, const float* x) {
const float* x_in = x;
size_t max_train_points = 1024 * ((size_t)1 << aq->nbits[0]);
x = fvecs_maybe_subsample(
d, (size_t*)&n, max_train_points, x, verbose, 1234);
ScopeDeleter1<float> del_x(x_in == x ? nullptr : x);
if (by_residual) {
std::vector<Index::idx_t> idx(n);
quantizer->assign(n, x, idx.data());
std::vector<float> residuals(n * d);
quantizer->compute_residual_n(n, x, residuals.data(), idx.data());
aq->train(n, residuals.data());
} else {
aq->train(n, x);
}
}
void IndexIVFAdditiveQuantizer::encode_vectors(
idx_t n,
const float* x,
const idx_t* list_nos,
uint8_t* codes,
bool include_listnos) const {
FAISS_THROW_IF_NOT(is_trained);
// first encode then possibly add listnos
if (by_residual) {
// subtract centroids
std::vector<float> residuals(n * d);
#pragma omp parallel if (n > 10000)
for (idx_t i = 0; i < n; i++) {
quantizer->compute_residual(
x + i * d,
residuals.data() + i * d,
list_nos[i] >= 0 ? list_nos[i] : 0);
}
aq->compute_codes(residuals.data(), codes, n);
} else {
aq->compute_codes(x, codes, n);
}
if (include_listnos) {
// write back from the end, where there is enough space
size_t coarse_size = coarse_code_size();
for (idx_t i = n - 1; i >= 0; i--) {
uint8_t* code = codes + i * (code_size + coarse_size);
memmove(code + coarse_size, codes + i * code_size, code_size);
encode_listno(list_nos[i], code);
}
}
}
IndexIVFAdditiveQuantizer::~IndexIVFAdditiveQuantizer() {}
/*********************************************
* AQInvertedListScanner
*********************************************/
namespace {
using Search_type_t = AdditiveQuantizer::Search_type_t;
struct AQInvertedListScanner : InvertedListScanner {
const IndexIVFAdditiveQuantizer& ia;
const AdditiveQuantizer& aq;
std::vector<float> tmp;
AQInvertedListScanner(const IndexIVFAdditiveQuantizer& ia, bool store_pairs)
: ia(ia), aq(*ia.aq) {
this->store_pairs = store_pairs;
this->code_size = ia.code_size;
keep_max = ia.metric_type == METRIC_INNER_PRODUCT;
tmp.resize(ia.d);
}
const float* q0;
/// from now on we handle this query.
void set_query(const float* query_vector) override {
q0 = query_vector;
}
const float* q;
/// following codes come from this inverted list
void set_list(idx_t list_no, float coarse_dis) override {
if (ia.metric_type == METRIC_L2 && ia.by_residual) {
ia.quantizer->compute_residual(q0, tmp.data(), list_no);
q = tmp.data();
} else {
q = q0;
}
}
~AQInvertedListScanner() {}
};
template <bool is_IP>
struct AQInvertedListScannerDecompress : AQInvertedListScanner {
AQInvertedListScannerDecompress(
const IndexIVFAdditiveQuantizer& ia,
bool store_pairs)
: AQInvertedListScanner(ia, store_pairs) {}
float coarse_dis = 0;
/// following codes come from this inverted list
void set_list(idx_t list_no, float coarse_dis) override {
AQInvertedListScanner::set_list(list_no, coarse_dis);
if (ia.by_residual) {
this->coarse_dis = coarse_dis;
}
}
/// compute a single query-to-code distance
float distance_to_code(const uint8_t* code) const final {
std::vector<float> b(aq.d);
aq.decode(code, b.data(), 1);
FAISS_ASSERT(q);
FAISS_ASSERT(b.data());
return is_IP ? coarse_dis + fvec_inner_product(q, b.data(), aq.d)
: fvec_L2sqr(q, b.data(), aq.d);
}
~AQInvertedListScannerDecompress() override {}
};
template <bool is_IP, Search_type_t search_type>
struct AQInvertedListScannerLUT : AQInvertedListScanner {
std::vector<float> LUT, tmp;
float distance_bias;
AQInvertedListScannerLUT(
const IndexIVFAdditiveQuantizer& ia,
bool store_pairs)
: AQInvertedListScanner(ia, store_pairs) {
LUT.resize(aq.total_codebook_size);
tmp.resize(ia.d);
distance_bias = 0;
}
/// from now on we handle this query.
void set_query(const float* query_vector) override {
AQInvertedListScanner::set_query(query_vector);
if (!is_IP && !ia.by_residual) {
distance_bias = fvec_norm_L2sqr(query_vector, ia.d);
}
}
/// following codes come from this inverted list
void set_list(idx_t list_no, float coarse_dis) override {
AQInvertedListScanner::set_list(list_no, coarse_dis);
// TODO find a way to provide the nprobes together to do a matmul
// + precompute tables
aq.compute_LUT(1, q, LUT.data());
if (ia.by_residual) {
distance_bias = coarse_dis;
}
}
/// compute a single query-to-code distance
float distance_to_code(const uint8_t* code) const final {
return distance_bias +
aq.compute_1_distance_LUT<is_IP, search_type>(code, LUT.data());
}
~AQInvertedListScannerLUT() override {}
};
} // anonymous namespace
InvertedListScanner* IndexIVFAdditiveQuantizer::get_InvertedListScanner(
bool store_pairs) const {
if (metric_type == METRIC_INNER_PRODUCT) {
if (aq->search_type == AdditiveQuantizer::ST_decompress) {
return new AQInvertedListScannerDecompress<true>(
*this, store_pairs);
} else {
return new AQInvertedListScannerLUT<
true,
AdditiveQuantizer::ST_LUT_nonorm>(*this, store_pairs);
}
} else {
switch (aq->search_type) {
case AdditiveQuantizer::ST_decompress:
return new AQInvertedListScannerDecompress<false>(
*this, store_pairs);
#define A(st) \
case AdditiveQuantizer::st: \
return new AQInvertedListScannerLUT<false, AdditiveQuantizer::st>( \
*this, store_pairs);
A(ST_LUT_nonorm)
// A(ST_norm_from_LUT)
A(ST_norm_float)
A(ST_norm_qint8)
A(ST_norm_qint4)
A(ST_norm_cqint8)
A(ST_norm_cqint4)
#undef A
default:
FAISS_THROW_FMT(
"search type %d not supported", aq->search_type);
}
}
}
/**************************************************************************************
* IndexIVFResidualQuantizer
**************************************************************************************/
IndexIVFResidualQuantizer::IndexIVFResidualQuantizer(
Index* quantizer,
size_t d,
size_t nlist,
const std::vector<size_t>& nbits,
MetricType metric,
Search_type_t search_type)
: IndexIVFAdditiveQuantizer(&rq, quantizer, d, nlist, metric),
rq(d, nbits, search_type) {
code_size = invlists->code_size = rq.code_size;
}
IndexIVFResidualQuantizer::IndexIVFResidualQuantizer()
: IndexIVFAdditiveQuantizer(&rq) {}
IndexIVFResidualQuantizer::IndexIVFResidualQuantizer(
Index* quantizer,
size_t d,
size_t nlist,
size_t M, /* number of subquantizers */
size_t nbits, /* number of bit per subvector index */
MetricType metric,
Search_type_t search_type)
: IndexIVFResidualQuantizer(
quantizer,
d,
nlist,
std::vector<size_t>(M, nbits),
metric,
search_type) {}
IndexIVFResidualQuantizer::~IndexIVFResidualQuantizer() {}
/**************************************************************************************
* IndexIVFLocalSearchQuantizer
**************************************************************************************/
IndexIVFLocalSearchQuantizer::IndexIVFLocalSearchQuantizer(
Index* quantizer,
size_t d,
size_t nlist,
size_t M, /* number of subquantizers */
size_t nbits, /* number of bit per subvector index */
MetricType metric,
Search_type_t search_type)
: IndexIVFAdditiveQuantizer(&lsq, quantizer, d, nlist, metric),
lsq(d, M, nbits, search_type) {
code_size = invlists->code_size = lsq.code_size;
}
IndexIVFLocalSearchQuantizer::IndexIVFLocalSearchQuantizer()
: IndexIVFAdditiveQuantizer(&lsq) {}
IndexIVFLocalSearchQuantizer::~IndexIVFLocalSearchQuantizer() {}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#ifndef FAISS_INDEX_IVF_ADDITIVE_QUANTIZER_H
#define FAISS_INDEX_IVF_ADDITIVE_QUANTIZER_H
#include <faiss/impl/AdditiveQuantizer.h>
#include <cstdint>
#include <vector>
#include <faiss/IndexIVF.h>
#include <faiss/impl/LocalSearchQuantizer.h>
#include <faiss/impl/ResidualQuantizer.h>
#include <faiss/impl/platform_macros.h>
namespace faiss {
/// Abstract class for IVF additive quantizers.
/// The search functions are in common.
struct IndexIVFAdditiveQuantizer : IndexIVF {
// the quantizer
AdditiveQuantizer* aq;
bool by_residual = true;
int use_precomputed_table = 0; // for future use
using Search_type_t = AdditiveQuantizer::Search_type_t;
IndexIVFAdditiveQuantizer(
AdditiveQuantizer* aq,
Index* quantizer,
size_t d,
size_t nlist,
MetricType metric = METRIC_L2);
explicit IndexIVFAdditiveQuantizer(AdditiveQuantizer* aq);
void train_residual(idx_t n, const float* x) override;
void encode_vectors(
idx_t n,
const float* x,
const idx_t* list_nos,
uint8_t* codes,
bool include_listnos = false) const override;
InvertedListScanner* get_InvertedListScanner(
bool store_pairs) const override;
~IndexIVFAdditiveQuantizer() override;
};
/** IndexIVF based on a residual quantizer. Stored vectors are
* approximated by residual quantization codes.
*/
struct IndexIVFResidualQuantizer : IndexIVFAdditiveQuantizer {
/// The residual quantizer used to encode the vectors
ResidualQuantizer rq;
/** Constructor.
*
* @param d dimensionality of the input vectors
* @param M number of subquantizers
* @param nbits number of bit per subvector index
*/
IndexIVFResidualQuantizer(
Index* quantizer,
size_t d,
size_t nlist,
const std::vector<size_t>& nbits,
MetricType metric = METRIC_L2,
Search_type_t search_type = AdditiveQuantizer::ST_decompress);
IndexIVFResidualQuantizer(
Index* quantizer,
size_t d,
size_t nlist,
size_t M, /* number of subquantizers */
size_t nbits, /* number of bit per subvector index */
MetricType metric = METRIC_L2,
Search_type_t search_type = AdditiveQuantizer::ST_decompress);
IndexIVFResidualQuantizer();
virtual ~IndexIVFResidualQuantizer();
};
/** IndexIVF based on a residual quantizer. Stored vectors are
* approximated by residual quantization codes.
*/
struct IndexIVFLocalSearchQuantizer : IndexIVFAdditiveQuantizer {
/// The LSQ quantizer used to encode the vectors
LocalSearchQuantizer lsq;
/** Constructor.
*
* @param d dimensionality of the input vectors
* @param M number of subquantizers
* @param nbits number of bit per subvector index
*/
IndexIVFLocalSearchQuantizer(
Index* quantizer,
size_t d,
size_t nlist,
size_t M, /* number of subquantizers */
size_t nbits, /* number of bit per subvector index */
MetricType metric = METRIC_L2,
Search_type_t search_type = AdditiveQuantizer::ST_decompress);
IndexIVFLocalSearchQuantizer();
virtual ~IndexIVFLocalSearchQuantizer();
};
} // namespace faiss
#endif
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexIVFFlat.h>
#include <omp.h>
#include <cinttypes>
#include <cstdio>
#include <faiss/IndexFlat.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/utils.h>
namespace faiss {
/*****************************************
* IndexIVFFlat implementation
******************************************/
IndexIVFFlat::IndexIVFFlat(
Index* quantizer,
size_t d,
size_t nlist,
MetricType metric)
: IndexIVF(quantizer, d, nlist, sizeof(float) * d, metric) {
code_size = sizeof(float) * d;
}
void IndexIVFFlat::add_core(
idx_t n,
const float* x,
const int64_t* xids,
const int64_t* coarse_idx)
{
FAISS_THROW_IF_NOT(is_trained);
FAISS_THROW_IF_NOT(coarse_idx);
assert(invlists);
direct_map.check_can_add(xids);
int64_t n_add = 0;
DirectMapAdd dm_adder(direct_map, n, xids);
#pragma omp parallel reduction(+ : n_add)
{
int nt = omp_get_num_threads();
int rank = omp_get_thread_num();
// each thread takes care of a subset of lists
for (size_t i = 0; i < n; i++) {
idx_t list_no = coarse_idx[i];
if (list_no >= 0 && list_no % nt == rank) {
idx_t id = xids ? xids[i] : ntotal + i;
const float* xi = x + i * d;
size_t offset =
invlists->add_entry(list_no, id, (const uint8_t*)xi);
dm_adder.add(i, list_no, offset);
n_add++;
} else if (rank == 0 && list_no == -1) {
dm_adder.add(i, -1, 0);
}
}
}
if (verbose) {
printf("IndexIVFFlat::add_core: added %" PRId64 " / %" PRId64
" vectors\n",
n_add,
n);
}
ntotal += n;
}
void IndexIVFFlat::encode_vectors(
idx_t n,
const float* x,
const idx_t* list_nos,
uint8_t* codes,
bool include_listnos) const {
if (!include_listnos) {
memcpy(codes, x, code_size * n);
} else {
size_t coarse_size = coarse_code_size();
for (size_t i = 0; i < n; i++) {
int64_t list_no = list_nos[i];
uint8_t* code = codes + i * (code_size + coarse_size);
const float* xi = x + i * d;
if (list_no >= 0) {
encode_listno(list_no, code);
memcpy(code + coarse_size, xi, code_size);
} else {
memset(code, 0, code_size + coarse_size);
}
}
}
}
void IndexIVFFlat::sa_decode(idx_t n, const uint8_t* bytes, float* x) const {
size_t coarse_size = coarse_code_size();
for (size_t i = 0; i < n; i++) {
const uint8_t* code = bytes + i * (code_size + coarse_size);
float* xi = x + i * d;
memcpy(xi, code + coarse_size, code_size);
}
}
namespace {
template <MetricType metric, class C>
struct IVFFlatScanner : InvertedListScanner {
size_t d;
IVFFlatScanner(size_t d, bool store_pairs) : d(d) {
this->store_pairs = store_pairs;
}
const float* xi;
void set_query(const float* query) override {
this->xi = query;
}
void set_list(idx_t list_no, float /* coarse_dis */) override {
this->list_no = list_no;
}
float distance_to_code(const uint8_t* code) const override {
const float* yj = (float*)code;
float dis = metric == METRIC_INNER_PRODUCT
? fvec_inner_product(xi, yj, d)
: fvec_L2sqr(xi, yj, d);
return dis;
}
size_t scan_codes(
size_t list_size,
const uint8_t* codes,
const idx_t* ids,
float* simi,
idx_t* idxi,
size_t k) const override {
const float* list_vecs = (const float*)codes;
size_t nup = 0;
for (size_t j = 0; j < list_size; j++) {
const float* yj = list_vecs + d * j;
float dis = metric == METRIC_INNER_PRODUCT
? fvec_inner_product(xi, yj, d)
: fvec_L2sqr(xi, yj, d);
if (C::cmp(simi[0], dis)) {
int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
heap_replace_top<C>(k, simi, idxi, dis, id);
nup++;
}
}
return nup;
}
void scan_codes_range(
size_t list_size,
const uint8_t* codes,
const idx_t* ids,
float radius,
RangeQueryResult& res) const override {
const float* list_vecs = (const float*)codes;
for (size_t j = 0; j < list_size; j++) {
const float* yj = list_vecs + d * j;
float dis = metric == METRIC_INNER_PRODUCT
? fvec_inner_product(xi, yj, d)
: fvec_L2sqr(xi, yj, d);
if (C::cmp(radius, dis)) {
int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
res.add(dis, id);
}
}
}
};
} // anonymous namespace
InvertedListScanner* IndexIVFFlat::get_InvertedListScanner(
bool store_pairs) const {
if (metric_type == METRIC_INNER_PRODUCT) {
return new IVFFlatScanner<METRIC_INNER_PRODUCT, CMin<float, int64_t>>(
d, store_pairs);
} else if (metric_type == METRIC_L2) {
return new IVFFlatScanner<METRIC_L2, CMax<float, int64_t>>(
d, store_pairs);
} else {
FAISS_THROW_MSG("metric type not supported");
}
return nullptr;
}
void IndexIVFFlat::reconstruct_from_offset(
int64_t list_no,
int64_t offset,
float* recons) const {
memcpy(recons, invlists->get_single_code(list_no, offset), code_size);
}
/*****************************************
* IndexIVFFlatDedup implementation
******************************************/
IndexIVFFlatDedup::IndexIVFFlatDedup(
Index* quantizer,
size_t d,
size_t nlist_,
MetricType metric_type)
: IndexIVFFlat(quantizer, d, nlist_, metric_type) {}
void IndexIVFFlatDedup::train(idx_t n, const float* x) {
std::unordered_map<uint64_t, idx_t> map;
std::unique_ptr<float[]> x2(new float[n * d]);
int64_t n2 = 0;
for (int64_t i = 0; i < n; i++) {
uint64_t hash = hash_bytes((uint8_t*)(x + i * d), code_size);
if (map.count(hash) &&
!memcmp(x2.get() + map[hash] * d, x + i * d, code_size)) {
// is duplicate, skip
} else {
map[hash] = n2;
memcpy(x2.get() + n2 * d, x + i * d, code_size);
n2++;
}
}
if (verbose) {
printf("IndexIVFFlatDedup::train: train on %" PRId64
" points after dedup "
"(was %" PRId64 " points)\n",
n2,
n);
}
IndexIVFFlat::train(n2, x2.get());
}
void IndexIVFFlatDedup::add_with_ids(
idx_t na,
const float* x,
const idx_t* xids) {
FAISS_THROW_IF_NOT(is_trained);
assert(invlists);
FAISS_THROW_IF_NOT_MSG(
direct_map.no(), "IVFFlatDedup not implemented with direct_map");
std::unique_ptr<int64_t[]> idx(new int64_t[na]);
quantizer->assign(na, x, idx.get());
int64_t n_add = 0, n_dup = 0;
#pragma omp parallel reduction(+ : n_add, n_dup)
{
int nt = omp_get_num_threads();
int rank = omp_get_thread_num();
// each thread takes care of a subset of lists
for (size_t i = 0; i < na; i++) {
int64_t list_no = idx[i];
if (list_no < 0 || list_no % nt != rank) {
continue;
}
idx_t id = xids ? xids[i] : ntotal + i;
const float* xi = x + i * d;
// search if there is already an entry with that id
InvertedLists::ScopedCodes codes(invlists, list_no);
int64_t n = invlists->list_size(list_no);
int64_t offset = -1;
for (int64_t o = 0; o < n; o++) {
if (!memcmp(codes.get() + o * code_size, xi, code_size)) {
offset = o;
break;
}
}
if (offset == -1) { // not found
invlists->add_entry(list_no, id, (const uint8_t*)xi);
} else {
// mark equivalence
idx_t id2 = invlists->get_single_id(list_no, offset);
std::pair<idx_t, idx_t> pair(id2, id);
#pragma omp critical
// executed by one thread at a time
instances.insert(pair);
n_dup++;
}
n_add++;
}
}
if (verbose) {
printf("IndexIVFFlat::add_with_ids: added %" PRId64 " / %" PRId64
" vectors"
" (out of which %" PRId64 " are duplicates)\n",
n_add,
na,
n_dup);
}
ntotal += n_add;
}
void IndexIVFFlatDedup::search_preassigned(
idx_t n,
const float* x,
idx_t k,
const idx_t* assign,
const float* centroid_dis,
float* distances,
idx_t* labels,
bool store_pairs,
const IVFSearchParameters* params,
IndexIVFStats* stats) const {
FAISS_THROW_IF_NOT_MSG(
!store_pairs, "store_pairs not supported in IVFDedup");
IndexIVFFlat::search_preassigned(
n, x, k, assign, centroid_dis, distances, labels, false, params);
std::vector<idx_t> labels2(k);
std::vector<float> dis2(k);
for (int64_t i = 0; i < n; i++) {
idx_t* labels1 = labels + i * k;
float* dis1 = distances + i * k;
int64_t j = 0;
for (; j < k; j++) {
if (instances.find(labels1[j]) != instances.end()) {
// a duplicate: special handling
break;
}
}
if (j < k) {
// there are duplicates, special handling
int64_t j0 = j;
int64_t rp = j;
while (j < k) {
auto range = instances.equal_range(labels1[rp]);
float dis = dis1[rp];
labels2[j] = labels1[rp];
dis2[j] = dis;
j++;
for (auto it = range.first; j < k && it != range.second; ++it) {
labels2[j] = it->second;
dis2[j] = dis;
j++;
}
rp++;
}
memcpy(labels1 + j0,
labels2.data() + j0,
sizeof(labels1[0]) * (k - j0));
memcpy(dis1 + j0, dis2.data() + j0, sizeof(dis2[0]) * (k - j0));
}
}
}
size_t IndexIVFFlatDedup::remove_ids(const IDSelector& sel) {
std::unordered_map<idx_t, idx_t> replace;
std::vector<std::pair<idx_t, idx_t>> toadd;
for (auto it = instances.begin(); it != instances.end();) {
if (sel.is_member(it->first)) {
// then we erase this entry
if (!sel.is_member(it->second)) {
// if the second is not erased
if (replace.count(it->first) == 0) {
replace[it->first] = it->second;
} else { // remember we should add an element
std::pair<idx_t, idx_t> new_entry(
replace[it->first], it->second);
toadd.push_back(new_entry);
}
}
it = instances.erase(it);
} else {
if (sel.is_member(it->second)) {
it = instances.erase(it);
} else {
++it;
}
}
}
instances.insert(toadd.begin(), toadd.end());
// mostly copied from IndexIVF.cpp
FAISS_THROW_IF_NOT_MSG(
direct_map.no(), "direct map remove not implemented");
std::vector<int64_t> toremove(nlist);
#pragma omp parallel for
for (int64_t i = 0; i < nlist; i++) {
int64_t l0 = invlists->list_size(i), l = l0, j = 0;
InvertedLists::ScopedIds idsi(invlists, i);
while (j < l) {
if (sel.is_member(idsi[j])) {
if (replace.count(idsi[j]) == 0) {
l--;
invlists->update_entry(
i,
j,
invlists->get_single_id(i, l),
InvertedLists::ScopedCodes(invlists, i, l).get());
} else {
invlists->update_entry(
i,
j,
replace[idsi[j]],
InvertedLists::ScopedCodes(invlists, i, j).get());
j++;
}
} else {
j++;
}
}
toremove[i] = l0 - l;
}
// this will not run well in parallel on ondisk because of possible shrinks
int64_t nremove = 0;
for (int64_t i = 0; i < nlist; i++) {
if (toremove[i] > 0) {
nremove += toremove[i];
invlists->resize(i, invlists->list_size(i) - toremove[i]);
}
}
ntotal -= nremove;
return nremove;
}
void IndexIVFFlatDedup::range_search(
idx_t,
const float*,
float,
RangeSearchResult*) const {
FAISS_THROW_MSG("not implemented");
}
void IndexIVFFlatDedup::update_vectors(int, const idx_t*, const float*) {
FAISS_THROW_MSG("not implemented");
}
void IndexIVFFlatDedup::reconstruct_from_offset(int64_t, int64_t, float*)
const {
FAISS_THROW_MSG("not implemented");
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_IVF_FLAT_H
#define FAISS_INDEX_IVF_FLAT_H
#include <stdint.h>
#include <unordered_map>
#include <faiss/IndexIVF.h>
namespace faiss {
/** Inverted file with stored vectors. Here the inverted file
* pre-selects the vectors to be searched, but they are not otherwise
* encoded, the code array just contains the raw float entries.
*/
struct IndexIVFFlat : IndexIVF {
IndexIVFFlat(
Index* quantizer,
size_t d,
size_t nlist_,
MetricType = METRIC_L2);
void add_core(
idx_t n,
const float* x,
const idx_t* xids,
const idx_t* precomputed_idx) override;
void encode_vectors(
idx_t n,
const float* x,
const idx_t* list_nos,
uint8_t* codes,
bool include_listnos = false) const override;
InvertedListScanner* get_InvertedListScanner(
bool store_pairs) const override;
void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
const override;
void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
IndexIVFFlat() {}
};
struct IndexIVFFlatDedup : IndexIVFFlat {
/** Maps ids stored in the index to the ids of vectors that are
* the same. When a vector is unique, it does not appear in the
* instances map */
std::unordered_multimap<idx_t, idx_t> instances;
IndexIVFFlatDedup(
Index* quantizer,
size_t d,
size_t nlist_,
MetricType = METRIC_L2);
/// also dedups the training set
void train(idx_t n, const float* x) override;
/// implemented for all IndexIVF* classes
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
void search_preassigned(
idx_t n,
const float* x,
idx_t k,
const idx_t* assign,
const float* centroid_dis,
float* distances,
idx_t* labels,
bool store_pairs,
const IVFSearchParameters* params = nullptr,
IndexIVFStats* stats = nullptr) const override;
size_t remove_ids(const IDSelector& sel) override;
/// not implemented
void range_search(
idx_t n,
const float* x,
float radius,
RangeSearchResult* result) const override;
/// not implemented
void update_vectors(int nv, const idx_t* idx, const float* v) override;
/// not implemented
void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
const override;
IndexIVFFlatDedup() {}
};
} // namespace faiss
#endif
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexIVFPQ.h>
#include <stdint.h>
#include <cassert>
#include <cinttypes>
#include <cmath>
#include <cstdio>
#include <algorithm>
#include <faiss/utils/Heap.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/utils.h>
#include <faiss/Clustering.h>
#include <faiss/IndexFlat.h>
#include <faiss/utils/hamming.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/AuxIndexStructures.h>
namespace faiss {
/*****************************************
* IndexIVFPQ implementation
******************************************/
IndexIVFPQ::IndexIVFPQ(
Index* quantizer,
size_t d,
size_t nlist,
size_t M,
size_t nbits_per_idx,
MetricType metric)
: IndexIVF(quantizer, d, nlist, 0, metric), pq(d, M, nbits_per_idx) {
FAISS_THROW_IF_NOT(nbits_per_idx <= 8);
code_size = pq.code_size;
invlists->code_size = code_size;
is_trained = false;
by_residual = true;
use_precomputed_table = 0;
scan_table_threshold = 0;
polysemous_training = nullptr;
do_polysemous_training = false;
polysemous_ht = 0;
}
/****************************************************************
* training */
void IndexIVFPQ::train_residual(idx_t n, const float* x) {
train_residual_o(n, x, nullptr);
}
void IndexIVFPQ::train_residual_o(idx_t n, const float* x, float* residuals_2) {
const float* x_in = x;
x = fvecs_maybe_subsample(
d,
(size_t*)&n,
pq.cp.max_points_per_centroid * pq.ksub,
x,
verbose,
pq.cp.seed);
ScopeDeleter<float> del_x(x_in == x ? nullptr : x);
const float* trainset;
ScopeDeleter<float> del_residuals;
if (by_residual) {
if (verbose)
printf("computing residuals\n");
idx_t* assign = new idx_t[n]; // assignement to coarse centroids
ScopeDeleter<idx_t> del(assign);
quantizer->assign(n, x, assign);
float* residuals = new float[n * d];
del_residuals.set(residuals);
for (idx_t i = 0; i < n; i++)
quantizer->compute_residual(
x + i * d, residuals + i * d, assign[i]);
trainset = residuals;
} else {
trainset = x;
}
if (verbose)
printf("training %zdx%zd product quantizer on %" PRId64
" vectors in %dD\n",
pq.M,
pq.ksub,
n,
d);
pq.verbose = verbose;
pq.train(n, trainset);
if (do_polysemous_training) {
if (verbose)
printf("doing polysemous training for PQ\n");
PolysemousTraining default_pt;
PolysemousTraining* pt = polysemous_training;
if (!pt)
pt = &default_pt;
pt->optimize_pq_for_hamming(pq, n, trainset);
}
// prepare second-level residuals for refine PQ
if (residuals_2) {
uint8_t* train_codes = new uint8_t[pq.code_size * n];
ScopeDeleter<uint8_t> del(train_codes);
pq.compute_codes(trainset, train_codes, n);
for (idx_t i = 0; i < n; i++) {
const float* xx = trainset + i * d;
float* res = residuals_2 + i * d;
pq.decode(train_codes + i * pq.code_size, res);
for (int j = 0; j < d; j++)
res[j] = xx[j] - res[j];
}
}
if (by_residual) {
precompute_table();
}
}
/****************************************************************
* IVFPQ as codec */
/* produce a binary signature based on the residual vector */
void IndexIVFPQ::encode(idx_t key, const float* x, uint8_t* code) const {
if (by_residual) {
std::vector<float> residual_vec(d);
quantizer->compute_residual(x, residual_vec.data(), key);
pq.compute_code(residual_vec.data(), code);
} else
pq.compute_code(x, code);
}
void IndexIVFPQ::encode_multiple(
size_t n,
idx_t* keys,
const float* x,
uint8_t* xcodes,
bool compute_keys) const {
if (compute_keys)
quantizer->assign(n, x, keys);
encode_vectors(n, x, keys, xcodes);
}
void IndexIVFPQ::decode_multiple(
size_t n,
const idx_t* keys,
const uint8_t* xcodes,
float* x) const {
pq.decode(xcodes, x, n);
if (by_residual) {
std::vector<float> centroid(d);
for (size_t i = 0; i < n; i++) {
quantizer->reconstruct(keys[i], centroid.data());
float* xi = x + i * d;
for (size_t j = 0; j < d; j++) {
xi[j] += centroid[j];
}
}
}
}
/****************************************************************
* add */
void IndexIVFPQ::add_core(
idx_t n,
const float* x,
const idx_t* xids,
const idx_t* coarse_idx) {
add_core_o(n, x, xids, nullptr, coarse_idx);
}
static float* compute_residuals(
const Index* quantizer,
Index::idx_t n,
const float* x,
const Index::idx_t* list_nos) {
size_t d = quantizer->d;
float* residuals = new float[n * d];
// TODO: parallelize?
for (size_t i = 0; i < n; i++) {
if (list_nos[i] < 0)
memset(residuals + i * d, 0, sizeof(*residuals) * d);
else
quantizer->compute_residual(
x + i * d, residuals + i * d, list_nos[i]);
}
return residuals;
}
void IndexIVFPQ::encode_vectors(
idx_t n,
const float* x,
const idx_t* list_nos,
uint8_t* codes,
bool include_listnos) const {
if (by_residual) {
float* to_encode = compute_residuals(quantizer, n, x, list_nos);
ScopeDeleter<float> del(to_encode);
pq.compute_codes(to_encode, codes, n);
} else {
pq.compute_codes(x, codes, n);
}
if (include_listnos) {
size_t coarse_size = coarse_code_size();
for (idx_t i = n - 1; i >= 0; i--) {
uint8_t* code = codes + i * (coarse_size + code_size);
memmove(code + coarse_size, codes + i * code_size, code_size);
encode_listno(list_nos[i], code);
}
}
}
void IndexIVFPQ::sa_decode(idx_t n, const uint8_t* codes, float* x) const {
size_t coarse_size = coarse_code_size();
#pragma omp parallel
{
std::vector<float> residual(d);
#pragma omp for
for (idx_t i = 0; i < n; i++) {
const uint8_t* code = codes + i * (code_size + coarse_size);
int64_t list_no = decode_listno(code);
float* xi = x + i * d;
pq.decode(code + coarse_size, xi);
if (by_residual) {
quantizer->reconstruct(list_no, residual.data());
for (size_t j = 0; j < d; j++) {
xi[j] += residual[j];
}
}
}
}
}
void IndexIVFPQ::add_core_o(
idx_t n,
const float* x,
const idx_t* xids,
float* residuals_2,
const idx_t* precomputed_idx) {
idx_t bs = 32768;
if (n > bs) {
for (idx_t i0 = 0; i0 < n; i0 += bs) {
idx_t i1 = std::min(i0 + bs, n);
if (verbose) {
printf("IndexIVFPQ::add_core_o: adding %" PRId64 ":%" PRId64
" / %" PRId64 "\n",
i0,
i1,
n);
}
add_core_o(
i1 - i0,
x + i0 * d,
xids ? xids + i0 : nullptr,
residuals_2 ? residuals_2 + i0 * d : nullptr,
precomputed_idx ? precomputed_idx + i0 : nullptr);
}
return;
}
InterruptCallback::check();
direct_map.check_can_add(xids);
FAISS_THROW_IF_NOT(is_trained);
double t0 = getmillisecs();
const idx_t* idx;
ScopeDeleter<idx_t> del_idx;
if (precomputed_idx) {
idx = precomputed_idx;
} else {
idx_t* idx0 = new idx_t[n];
del_idx.set(idx0);
quantizer->assign(n, x, idx0);
idx = idx0;
}
double t1 = getmillisecs();
uint8_t* xcodes = new uint8_t[n * code_size];
ScopeDeleter<uint8_t> del_xcodes(xcodes);
const float* to_encode = nullptr;
ScopeDeleter<float> del_to_encode;
if (by_residual) {
to_encode = compute_residuals(quantizer, n, x, idx);
del_to_encode.set(to_encode);
} else {
to_encode = x;
}
pq.compute_codes(to_encode, xcodes, n);
double t2 = getmillisecs();
// TODO: parallelize?
size_t n_ignore = 0;
for (size_t i = 0; i < n; i++) {
idx_t key = idx[i];
idx_t id = xids ? xids[i] : ntotal + i;
if (key < 0) {
direct_map.add_single_id(id, -1, 0);
n_ignore++;
if (residuals_2)
memset(residuals_2, 0, sizeof(*residuals_2) * d);
continue;
}
uint8_t* code = xcodes + i * code_size;
size_t offset = invlists->add_entry(key, id, code);
if (residuals_2) {
float* res2 = residuals_2 + i * d;
const float* xi = to_encode + i * d;
pq.decode(code, res2);
for (int j = 0; j < d; j++)
res2[j] = xi[j] - res2[j];
}
direct_map.add_single_id(id, key, offset);
}
double t3 = getmillisecs();
if (verbose) {
char comment[100] = {0};
if (n_ignore > 0)
snprintf(comment, 100, "(%zd vectors ignored)", n_ignore);
printf(" add_core times: %.3f %.3f %.3f %s\n",
t1 - t0,
t2 - t1,
t3 - t2,
comment);
}
ntotal += n;
}
void IndexIVFPQ::reconstruct_from_offset(
int64_t list_no,
int64_t offset,
float* recons) const {
const uint8_t* code = invlists->get_single_code(list_no, offset);
if (by_residual) {
std::vector<float> centroid(d);
quantizer->reconstruct(list_no, centroid.data());
pq.decode(code, recons);
for (int i = 0; i < d; ++i) {
recons[i] += centroid[i];
}
} else {
pq.decode(code, recons);
}
}
/// 2G by default, accommodates tables up to PQ32 w/ 65536 centroids
size_t precomputed_table_max_bytes = ((size_t)1) << 31;
/** Precomputed tables for residuals
*
* During IVFPQ search with by_residual, we compute
*
* d = || x - y_C - y_R ||^2
*
* where x is the query vector, y_C the coarse centroid, y_R the
* refined PQ centroid. The expression can be decomposed as:
*
* d = || x - y_C ||^2 + || y_R ||^2 + 2 * (y_C|y_R) - 2 * (x|y_R)
* --------------- --------------------------- -------
* term 1 term 2 term 3
*
* When using multiprobe, we use the following decomposition:
* - term 1 is the distance to the coarse centroid, that is computed
* during the 1st stage search.
* - term 2 can be precomputed, as it does not involve x. However,
* because of the PQ, it needs nlist * M * ksub storage. This is why
* use_precomputed_table is off by default
* - term 3 is the classical non-residual distance table.
*
* Since y_R defined by a product quantizer, it is split across
* subvectors and stored separately for each subvector. If the coarse
* quantizer is a MultiIndexQuantizer then the table can be stored
* more compactly.
*
* At search time, the tables for term 2 and term 3 are added up. This
* is faster when the length of the lists is > ksub * M.
*/
void initialize_IVFPQ_precomputed_table(
int& use_precomputed_table,
const Index* quantizer,
const ProductQuantizer& pq,
AlignedTable<float>& precomputed_table,
bool verbose) {
size_t nlist = quantizer->ntotal;
size_t d = quantizer->d;
FAISS_THROW_IF_NOT(d == pq.d);
if (use_precomputed_table == -1) {
precomputed_table.resize(0);
return;
}
if (use_precomputed_table == 0) { // then choose the type of table
if (quantizer->metric_type == METRIC_INNER_PRODUCT) {
if (verbose) {
printf("IndexIVFPQ::precompute_table: precomputed "
"tables not needed for inner product quantizers\n");
}
precomputed_table.resize(0);
return;
}
const MultiIndexQuantizer* miq =
dynamic_cast<const MultiIndexQuantizer*>(quantizer);
if (miq && pq.M % miq->pq.M == 0)
use_precomputed_table = 2;
else {
size_t table_size = pq.M * pq.ksub * nlist * sizeof(float);
if (table_size > precomputed_table_max_bytes) {
if (verbose) {
printf("IndexIVFPQ::precompute_table: not precomputing table, "
"it would be too big: %zd bytes (max %zd)\n",
table_size,
precomputed_table_max_bytes);
use_precomputed_table = 0;
}
return;
}
use_precomputed_table = 1;
}
} // otherwise assume user has set appropriate flag on input
if (verbose) {
printf("precomputing IVFPQ tables type %d\n", use_precomputed_table);
}
// squared norms of the PQ centroids
std::vector<float> r_norms(pq.M * pq.ksub, NAN);
for (int m = 0; m < pq.M; m++)
for (int j = 0; j < pq.ksub; j++)
r_norms[m * pq.ksub + j] =
fvec_norm_L2sqr(pq.get_centroids(m, j), pq.dsub);
if (use_precomputed_table == 1) {
precomputed_table.resize(nlist * pq.M * pq.ksub);
std::vector<float> centroid(d);
for (size_t i = 0; i < nlist; i++) {
quantizer->reconstruct(i, centroid.data());
float* tab = &precomputed_table[i * pq.M * pq.ksub];
pq.compute_inner_prod_table(centroid.data(), tab);
fvec_madd(pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
}
} else if (use_precomputed_table == 2) {
const MultiIndexQuantizer* miq =
dynamic_cast<const MultiIndexQuantizer*>(quantizer);
FAISS_THROW_IF_NOT(miq);
const ProductQuantizer& cpq = miq->pq;
FAISS_THROW_IF_NOT(pq.M % cpq.M == 0);
precomputed_table.resize(cpq.ksub * pq.M * pq.ksub);
// reorder PQ centroid table
std::vector<float> centroids(d * cpq.ksub, NAN);
for (int m = 0; m < cpq.M; m++) {
for (size_t i = 0; i < cpq.ksub; i++) {
memcpy(centroids.data() + i * d + m * cpq.dsub,
cpq.get_centroids(m, i),
sizeof(*centroids.data()) * cpq.dsub);
}
}
pq.compute_inner_prod_tables(
cpq.ksub, centroids.data(), precomputed_table.data());
for (size_t i = 0; i < cpq.ksub; i++) {
float* tab = &precomputed_table[i * pq.M * pq.ksub];
fvec_madd(pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
}
}
}
void IndexIVFPQ::precompute_table() {
initialize_IVFPQ_precomputed_table(
use_precomputed_table, quantizer, pq, precomputed_table, verbose);
}
namespace {
using idx_t = Index::idx_t;
#define TIC t0 = get_cycles()
#define TOC get_cycles() - t0
/** QueryTables manages the various ways of searching an
* IndexIVFPQ. The code contains a lot of branches, depending on:
* - metric_type: are we computing L2 or Inner product similarity?
* - by_residual: do we encode raw vectors or residuals?
* - use_precomputed_table: are x_R|x_C tables precomputed?
* - polysemous_ht: are we filtering with polysemous codes?
*/
struct QueryTables {
/*****************************************************
* General data from the IVFPQ
*****************************************************/
const IndexIVFPQ& ivfpq;
const IVFSearchParameters* params;
// copied from IndexIVFPQ for easier access
int d;
const ProductQuantizer& pq;
MetricType metric_type;
bool by_residual;
int use_precomputed_table;
int polysemous_ht;
// pre-allocated data buffers
float *sim_table, *sim_table_2;
float *residual_vec, *decoded_vec;
// single data buffer
std::vector<float> mem;
// for table pointers
std::vector<const float*> sim_table_ptrs;
explicit QueryTables(
const IndexIVFPQ& ivfpq,
const IVFSearchParameters* params)
: ivfpq(ivfpq),
d(ivfpq.d),
pq(ivfpq.pq),
metric_type(ivfpq.metric_type),
by_residual(ivfpq.by_residual),
use_precomputed_table(ivfpq.use_precomputed_table) {
mem.resize(pq.ksub * pq.M * 2 + d * 2);
sim_table = mem.data();
sim_table_2 = sim_table + pq.ksub * pq.M;
residual_vec = sim_table_2 + pq.ksub * pq.M;
decoded_vec = residual_vec + d;
// for polysemous
polysemous_ht = ivfpq.polysemous_ht;
if (auto ivfpq_params =
dynamic_cast<const IVFPQSearchParameters*>(params)) {
polysemous_ht = ivfpq_params->polysemous_ht;
}
if (polysemous_ht != 0) {
q_code.resize(pq.code_size);
}
init_list_cycles = 0;
sim_table_ptrs.resize(pq.M);
}
/*****************************************************
* What we do when query is known
*****************************************************/
// field specific to query
const float* qi;
// query-specific initialization
void init_query(const float* qi) {
this->qi = qi;
if (metric_type == METRIC_INNER_PRODUCT)
init_query_IP();
else
init_query_L2();
if (!by_residual && polysemous_ht != 0)
pq.compute_code(qi, q_code.data());
}
void init_query_IP() {
// precompute some tables specific to the query qi
pq.compute_inner_prod_table(qi, sim_table);
}
void init_query_L2() {
if (!by_residual) {
pq.compute_distance_table(qi, sim_table);
} else if (use_precomputed_table) {
pq.compute_inner_prod_table(qi, sim_table_2);
}
}
/*****************************************************
* When inverted list is known: prepare computations
*****************************************************/
// fields specific to list
Index::idx_t key;
float coarse_dis;
std::vector<uint8_t> q_code;
uint64_t init_list_cycles;
/// once we know the query and the centroid, we can prepare the
/// sim_table that will be used for accumulation
/// and dis0, the initial value
float precompute_list_tables() {
float dis0 = 0;
uint64_t t0;
TIC;
if (by_residual) {
if (metric_type == METRIC_INNER_PRODUCT)
dis0 = precompute_list_tables_IP();
else
dis0 = precompute_list_tables_L2();
}
init_list_cycles += TOC;
return dis0;
}
float precompute_list_table_pointers() {
float dis0 = 0;
uint64_t t0;
TIC;
if (by_residual) {
if (metric_type == METRIC_INNER_PRODUCT)
FAISS_THROW_MSG("not implemented");
else
dis0 = precompute_list_table_pointers_L2();
}
init_list_cycles += TOC;
return dis0;
}
/*****************************************************
* compute tables for inner prod
*****************************************************/
float precompute_list_tables_IP() {
// prepare the sim_table that will be used for accumulation
// and dis0, the initial value
ivfpq.quantizer->reconstruct(key, decoded_vec);
// decoded_vec = centroid
float dis0 = fvec_inner_product(qi, decoded_vec, d);
if (polysemous_ht) {
for (int i = 0; i < d; i++) {
residual_vec[i] = qi[i] - decoded_vec[i];
}
pq.compute_code(residual_vec, q_code.data());
}
return dis0;
}
/*****************************************************
* compute tables for L2 distance
*****************************************************/
float precompute_list_tables_L2() {
float dis0 = 0;
if (use_precomputed_table == 0 || use_precomputed_table == -1) {
ivfpq.quantizer->compute_residual(qi, residual_vec, key);
pq.compute_distance_table(residual_vec, sim_table);
if (polysemous_ht != 0) {
pq.compute_code(residual_vec, q_code.data());
}
} else if (use_precomputed_table == 1) {
dis0 = coarse_dis;
fvec_madd(
pq.M * pq.ksub,
ivfpq.precomputed_table.data() + key * pq.ksub * pq.M,
-2.0,
sim_table_2,
sim_table);
if (polysemous_ht != 0) {
ivfpq.quantizer->compute_residual(qi, residual_vec, key);
pq.compute_code(residual_vec, q_code.data());
}
} else if (use_precomputed_table == 2) {
dis0 = coarse_dis;
const MultiIndexQuantizer* miq =
dynamic_cast<const MultiIndexQuantizer*>(ivfpq.quantizer);
FAISS_THROW_IF_NOT(miq);
const ProductQuantizer& cpq = miq->pq;
int Mf = pq.M / cpq.M;
const float* qtab = sim_table_2; // query-specific table
float* ltab = sim_table; // (output) list-specific table
long k = key;
for (int cm = 0; cm < cpq.M; cm++) {
// compute PQ index
int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
k >>= cpq.nbits;
// get corresponding table
const float* pc = ivfpq.precomputed_table.data() +
(ki * pq.M + cm * Mf) * pq.ksub;
if (polysemous_ht == 0) {
// sum up with query-specific table
fvec_madd(Mf * pq.ksub, pc, -2.0, qtab, ltab);
ltab += Mf * pq.ksub;
qtab += Mf * pq.ksub;
} else {
for (int m = cm * Mf; m < (cm + 1) * Mf; m++) {
q_code[m] = fvec_madd_and_argmin(
pq.ksub, pc, -2, qtab, ltab);
pc += pq.ksub;
ltab += pq.ksub;
qtab += pq.ksub;
}
}
}
}
return dis0;
}
float precompute_list_table_pointers_L2() {
float dis0 = 0;
if (use_precomputed_table == 1) {
dis0 = coarse_dis;
const float* s =
ivfpq.precomputed_table.data() + key * pq.ksub * pq.M;
for (int m = 0; m < pq.M; m++) {
sim_table_ptrs[m] = s;
s += pq.ksub;
}
} else if (use_precomputed_table == 2) {
dis0 = coarse_dis;
const MultiIndexQuantizer* miq =
dynamic_cast<const MultiIndexQuantizer*>(ivfpq.quantizer);
FAISS_THROW_IF_NOT(miq);
const ProductQuantizer& cpq = miq->pq;
int Mf = pq.M / cpq.M;
long k = key;
int m0 = 0;
for (int cm = 0; cm < cpq.M; cm++) {
int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
k >>= cpq.nbits;
const float* pc = ivfpq.precomputed_table.data() +
(ki * pq.M + cm * Mf) * pq.ksub;
for (int m = m0; m < m0 + Mf; m++) {
sim_table_ptrs[m] = pc;
pc += pq.ksub;
}
m0 += Mf;
}
} else {
FAISS_THROW_MSG("need precomputed tables");
}
if (polysemous_ht) {
FAISS_THROW_MSG("not implemented");
// Not clear that it makes sense to implemente this,
// because it costs M * ksub, which is what we wanted to
// avoid with the tables pointers.
}
return dis0;
}
};
template <class C>
struct KnnSearchResults {
idx_t key;
const idx_t* ids;
// heap params
size_t k;
float* heap_sim;
idx_t* heap_ids;
size_t nup;
inline void add(idx_t j, float dis) {
if (C::cmp(heap_sim[0], dis)) {
idx_t id = ids ? ids[j] : lo_build(key, j);
heap_replace_top<C>(k, heap_sim, heap_ids, dis, id);
nup++;
}
}
};
template <class C>
struct RangeSearchResults {
idx_t key;
const idx_t* ids;
// wrapped result structure
float radius;
RangeQueryResult& rres;
inline void add(idx_t j, float dis) {
if (C::cmp(radius, dis)) {
idx_t id = ids ? ids[j] : lo_build(key, j);
rres.add(dis, id);
}
}
};
/*****************************************************
* Scaning the codes.
* The scanning functions call their favorite precompute_*
* function to precompute the tables they need.
*****************************************************/
template <typename IDType, MetricType METRIC_TYPE, class PQDecoder>
struct IVFPQScannerT : QueryTables {
const uint8_t* list_codes;
const IDType* list_ids;
size_t list_size;
IVFPQScannerT(const IndexIVFPQ& ivfpq, const IVFSearchParameters* params)
: QueryTables(ivfpq, params) {
assert(METRIC_TYPE == metric_type);
}
float dis0;
void init_list(idx_t list_no, float coarse_dis, int mode) {
this->key = list_no;
this->coarse_dis = coarse_dis;
if (mode == 2) {
dis0 = precompute_list_tables();
} else if (mode == 1) {
dis0 = precompute_list_table_pointers();
}
}
/*****************************************************
* Scaning the codes: simple PQ scan.
*****************************************************/
/// version of the scan where we use precomputed tables
template <class SearchResultType>
void scan_list_with_table(
size_t ncode,
const uint8_t* codes,
SearchResultType& res) const {
for (size_t j = 0; j < ncode; j++) {
PQDecoder decoder(codes, pq.nbits);
codes += pq.code_size;
float dis = dis0;
const float* tab = sim_table;
for (size_t m = 0; m < pq.M; m++) {
dis += tab[decoder.decode()];
tab += pq.ksub;
}
res.add(j, dis);
}
}
/// tables are not precomputed, but pointers are provided to the
/// relevant X_c|x_r tables
template <class SearchResultType>
void scan_list_with_pointer(
size_t ncode,
const uint8_t* codes,
SearchResultType& res) const {
for (size_t j = 0; j < ncode; j++) {
PQDecoder decoder(codes, pq.nbits);
codes += pq.code_size;
float dis = dis0;
const float* tab = sim_table_2;
for (size_t m = 0; m < pq.M; m++) {
int ci = decoder.decode();
dis += sim_table_ptrs[m][ci] - 2 * tab[ci];
tab += pq.ksub;
}
res.add(j, dis);
}
}
/// nothing is precomputed: access residuals on-the-fly
template <class SearchResultType>
void scan_on_the_fly_dist(
size_t ncode,
const uint8_t* codes,
SearchResultType& res) const {
const float* dvec;
float dis0 = 0;
if (by_residual) {
if (METRIC_TYPE == METRIC_INNER_PRODUCT) {
ivfpq.quantizer->reconstruct(key, residual_vec);
dis0 = fvec_inner_product(residual_vec, qi, d);
} else {
ivfpq.quantizer->compute_residual(qi, residual_vec, key);
}
dvec = residual_vec;
} else {
dvec = qi;
dis0 = 0;
}
for (size_t j = 0; j < ncode; j++) {
pq.decode(codes, decoded_vec);
codes += pq.code_size;
float dis;
if (METRIC_TYPE == METRIC_INNER_PRODUCT) {
dis = dis0 + fvec_inner_product(decoded_vec, qi, d);
} else {
dis = fvec_L2sqr(decoded_vec, dvec, d);
}
res.add(j, dis);
}
}
/*****************************************************
* Scanning codes with polysemous filtering
*****************************************************/
template <class HammingComputer, class SearchResultType>
void scan_list_polysemous_hc(
size_t ncode,
const uint8_t* codes,
SearchResultType& res) const {
int ht = ivfpq.polysemous_ht;
size_t n_hamming_pass = 0, nup = 0;
int code_size = pq.code_size;
HammingComputer hc(q_code.data(), code_size);
for (size_t j = 0; j < ncode; j++) {
const uint8_t* b_code = codes;
int hd = hc.hamming(b_code);
if (hd < ht) {
n_hamming_pass++;
PQDecoder decoder(codes, pq.nbits);
float dis = dis0;
const float* tab = sim_table;
for (size_t m = 0; m < pq.M; m++) {
dis += tab[decoder.decode()];
tab += pq.ksub;
}
res.add(j, dis);
}
codes += code_size;
}
#pragma omp critical
{ indexIVFPQ_stats.n_hamming_pass += n_hamming_pass; }
}
template <class SearchResultType>
void scan_list_polysemous(
size_t ncode,
const uint8_t* codes,
SearchResultType& res) const {
switch (pq.code_size) {
#define HANDLE_CODE_SIZE(cs) \
case cs: \
scan_list_polysemous_hc<HammingComputer##cs, SearchResultType>( \
ncode, codes, res); \
break
HANDLE_CODE_SIZE(4);
HANDLE_CODE_SIZE(8);
HANDLE_CODE_SIZE(16);
HANDLE_CODE_SIZE(20);
HANDLE_CODE_SIZE(32);
HANDLE_CODE_SIZE(64);
#undef HANDLE_CODE_SIZE
default:
scan_list_polysemous_hc<
HammingComputerDefault,
SearchResultType>(ncode, codes, res);
break;
}
}
};
/* We put as many parameters as possible in template. Hopefully the
* gain in runtime is worth the code bloat. C is the comparator < or
* >, it is directly related to METRIC_TYPE. precompute_mode is how
* much we precompute (2 = precompute distance tables, 1 = precompute
* pointers to distances, 0 = compute distances one by one).
* Currently only 2 is supported */
template <MetricType METRIC_TYPE, class C, class PQDecoder>
struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
InvertedListScanner {
int precompute_mode;
IVFPQScanner(const IndexIVFPQ& ivfpq, bool store_pairs, int precompute_mode)
: IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>(
ivfpq,
nullptr),
precompute_mode(precompute_mode) {
this->store_pairs = store_pairs;
}
void set_query(const float* query) override {
this->init_query(query);
}
void set_list(idx_t list_no, float coarse_dis) override {
this->list_no = list_no;
this->init_list(list_no, coarse_dis, precompute_mode);
}
float distance_to_code(const uint8_t* code) const override {
assert(precompute_mode == 2);
float dis = this->dis0;
const float* tab = this->sim_table;
PQDecoder decoder(code, this->pq.nbits);
for (size_t m = 0; m < this->pq.M; m++) {
dis += tab[decoder.decode()];
tab += this->pq.ksub;
}
return dis;
}
size_t scan_codes(
size_t ncode,
const uint8_t* codes,
const idx_t* ids,
float* heap_sim,
idx_t* heap_ids,
size_t k) const override {
KnnSearchResults<C> res = {
/* key */ this->key,
/* ids */ this->store_pairs ? nullptr : ids,
/* k */ k,
/* heap_sim */ heap_sim,
/* heap_ids */ heap_ids,
/* nup */ 0};
if (this->polysemous_ht > 0) {
assert(precompute_mode == 2);
this->scan_list_polysemous(ncode, codes, res);
} else if (precompute_mode == 2) {
this->scan_list_with_table(ncode, codes, res);
} else if (precompute_mode == 1) {
this->scan_list_with_pointer(ncode, codes, res);
} else if (precompute_mode == 0) {
this->scan_on_the_fly_dist(ncode, codes, res);
} else {
FAISS_THROW_MSG("bad precomp mode");
}
return res.nup;
}
void scan_codes_range(
size_t ncode,
const uint8_t* codes,
const idx_t* ids,
float radius,
RangeQueryResult& rres) const override {
RangeSearchResults<C> res = {
/* key */ this->key,
/* ids */ this->store_pairs ? nullptr : ids,
/* radius */ radius,
/* rres */ rres};
if (this->polysemous_ht > 0) {
assert(precompute_mode == 2);
this->scan_list_polysemous(ncode, codes, res);
} else if (precompute_mode == 2) {
this->scan_list_with_table(ncode, codes, res);
} else if (precompute_mode == 1) {
this->scan_list_with_pointer(ncode, codes, res);
} else if (precompute_mode == 0) {
this->scan_on_the_fly_dist(ncode, codes, res);
} else {
FAISS_THROW_MSG("bad precomp mode");
}
}
};
template <class PQDecoder>
InvertedListScanner* get_InvertedListScanner1(
const IndexIVFPQ& index,
bool store_pairs) {
if (index.metric_type == METRIC_INNER_PRODUCT) {
return new IVFPQScanner<
METRIC_INNER_PRODUCT,
CMin<float, idx_t>,
PQDecoder>(index, store_pairs, 2);
} else if (index.metric_type == METRIC_L2) {
return new IVFPQScanner<METRIC_L2, CMax<float, idx_t>, PQDecoder>(
index, store_pairs, 2);
}
return nullptr;
}
} // anonymous namespace
InvertedListScanner* IndexIVFPQ::get_InvertedListScanner(
bool store_pairs) const {
if (pq.nbits == 8) {
return get_InvertedListScanner1<PQDecoder8>(*this, store_pairs);
} else if (pq.nbits == 16) {
return get_InvertedListScanner1<PQDecoder16>(*this, store_pairs);
} else {
return get_InvertedListScanner1<PQDecoderGeneric>(*this, store_pairs);
}
return nullptr;
}
IndexIVFPQStats indexIVFPQ_stats;
void IndexIVFPQStats::reset() {
memset(this, 0, sizeof(*this));
}
IndexIVFPQ::IndexIVFPQ() {
// initialize some runtime values
use_precomputed_table = 0;
scan_table_threshold = 0;
do_polysemous_training = false;
polysemous_ht = 0;
polysemous_training = nullptr;
}
struct CodeCmp {
const uint8_t* tab;
size_t code_size;
bool operator()(int a, int b) const {
return cmp(a, b) > 0;
}
int cmp(int a, int b) const {
return memcmp(tab + a * code_size, tab + b * code_size, code_size);
}
};
size_t IndexIVFPQ::find_duplicates(idx_t* dup_ids, size_t* lims) const {
size_t ngroup = 0;
lims[0] = 0;
for (size_t list_no = 0; list_no < nlist; list_no++) {
size_t n = invlists->list_size(list_no);
std::vector<int> ord(n);
for (int i = 0; i < n; i++)
ord[i] = i;
InvertedLists::ScopedCodes codes(invlists, list_no);
CodeCmp cs = {codes.get(), code_size};
std::sort(ord.begin(), ord.end(), cs);
InvertedLists::ScopedIds list_ids(invlists, list_no);
int prev = -1; // all elements from prev to i-1 are equal
for (int i = 0; i < n; i++) {
if (prev >= 0 && cs.cmp(ord[prev], ord[i]) == 0) {
// same as previous => remember
if (prev + 1 == i) { // start new group
ngroup++;
lims[ngroup] = lims[ngroup - 1];
dup_ids[lims[ngroup]++] = list_ids[ord[prev]];
}
dup_ids[lims[ngroup]++] = list_ids[ord[i]];
} else { // not same as previous.
prev = i;
}
}
}
return ngroup;
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_IVFPQ_H
#define FAISS_INDEX_IVFPQ_H
#include <vector>
#include <faiss/IndexIVF.h>
#include <faiss/IndexPQ.h>
#include <faiss/impl/platform_macros.h>
#include <faiss/utils/AlignedTable.h>
namespace faiss {
struct IVFPQSearchParameters : IVFSearchParameters {
size_t scan_table_threshold; ///< use table computation or on-the-fly?
int polysemous_ht; ///< Hamming thresh for polysemous filtering
IVFPQSearchParameters() : scan_table_threshold(0), polysemous_ht(0) {}
~IVFPQSearchParameters() {}
};
FAISS_API extern size_t precomputed_table_max_bytes;
/** Inverted file with Product Quantizer encoding. Each residual
* vector is encoded as a product quantizer code.
*/
struct IndexIVFPQ : IndexIVF {
bool by_residual; ///< Encode residual or plain vector?
ProductQuantizer pq; ///< produces the codes
bool do_polysemous_training; ///< reorder PQ centroids after training?
PolysemousTraining* polysemous_training; ///< if NULL, use default
// search-time parameters
size_t scan_table_threshold; ///< use table computation or on-the-fly?
int polysemous_ht; ///< Hamming thresh for polysemous filtering
/** Precompute table that speed up query preprocessing at some
* memory cost (used only for by_residual with L2 metric)
*/
int use_precomputed_table;
/// if use_precompute_table
/// size nlist * pq.M * pq.ksub
AlignedTable<float> precomputed_table;
IndexIVFPQ(
Index* quantizer,
size_t d,
size_t nlist,
size_t M,
size_t nbits_per_idx,
MetricType metric = METRIC_L2);
void encode_vectors(
idx_t n,
const float* x,
const idx_t* list_nos,
uint8_t* codes,
bool include_listnos = false) const override;
void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
void add_core(
idx_t n,
const float* x,
const idx_t* xids,
const idx_t* precomputed_idx) override;
/// same as add_core, also:
/// - output 2nd level residuals if residuals_2 != NULL
/// - accepts precomputed_idx = nullptr
void add_core_o(
idx_t n,
const float* x,
const idx_t* xids,
float* residuals_2,
const idx_t* precomputed_idx = nullptr);
/// trains the product quantizer
void train_residual(idx_t n, const float* x) override;
/// same as train_residual, also output 2nd level residuals
void train_residual_o(idx_t n, const float* x, float* residuals_2);
void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
const override;
/** Find exact duplicates in the dataset.
*
* the duplicates are returned in pre-allocated arrays (see the
* max sizes).
*
* @param lims limits between groups of duplicates
* (max size ntotal / 2 + 1)
* @param ids ids[lims[i]] : ids[lims[i+1]-1] is a group of
* duplicates (max size ntotal)
* @return n number of groups found
*/
size_t find_duplicates(idx_t* ids, size_t* lims) const;
// map a vector to a binary code knowning the index
void encode(idx_t key, const float* x, uint8_t* code) const;
/** Encode multiple vectors
*
* @param n nb vectors to encode
* @param keys posting list ids for those vectors (size n)
* @param x vectors (size n * d)
* @param codes output codes (size n * code_size)
* @param compute_keys if false, assume keys are precomputed,
* otherwise compute them
*/
void encode_multiple(
size_t n,
idx_t* keys,
const float* x,
uint8_t* codes,
bool compute_keys = false) const;
/// inverse of encode_multiple
void decode_multiple(
size_t n,
const idx_t* keys,
const uint8_t* xcodes,
float* x) const;
InvertedListScanner* get_InvertedListScanner(
bool store_pairs) const override;
/// build precomputed table
void precompute_table();
IndexIVFPQ();
};
/** Pre-compute distance tables for IVFPQ with by-residual and METRIC_L2
*
* @param use_precomputed_table (I/O)
* =-1: force disable
* =0: decide heuristically (default: use tables only if they are
* < precomputed_tables_max_bytes), set use_precomputed_table on
* output =1: tables that work for all quantizers (size 256 * nlist * M) =2:
* specific version for MultiIndexQuantizer (much more compact)
* @param precomputed_table precomputed table to initialize
*/
void initialize_IVFPQ_precomputed_table(
int& use_precomputed_table,
const Index* quantizer,
const ProductQuantizer& pq,
AlignedTable<float>& precomputed_table,
bool verbose);
/// statistics are robust to internal threading, but not if
/// IndexIVFPQ::search_preassigned is called by multiple threads
struct IndexIVFPQStats {
size_t nrefine; ///< nb of refines (IVFPQR)
size_t n_hamming_pass;
///< nb of passed Hamming distance tests (for polysemous)
// timings measured with the CPU RTC on all threads
size_t search_cycles;
size_t refine_cycles; ///< only for IVFPQR
IndexIVFPQStats() {
reset();
}
void reset();
};
// global var that collects them all
FAISS_API extern IndexIVFPQStats indexIVFPQ_stats;
} // namespace faiss
#endif
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/IndexIVFPQFastScan.h>
#include <cassert>
#include <cinttypes>
#include <cstdio>
#include <omp.h>
#include <memory>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/simdlib.h>
#include <faiss/utils/utils.h>
#include <faiss/invlists/BlockInvertedLists.h>
#include <faiss/impl/pq4_fast_scan.h>
#include <faiss/impl/simd_result_handlers.h>
#include <faiss/utils/quantize_lut.h>
namespace faiss {
using namespace simd_result_handlers;
inline size_t roundup(size_t a, size_t b) {
return (a + b - 1) / b * b;
}
IndexIVFPQFastScan::IndexIVFPQFastScan(
Index* quantizer,
size_t d,
size_t nlist,
size_t M,
size_t nbits_per_idx,
MetricType metric,
int bbs)
: IndexIVF(quantizer, d, nlist, 0, metric),
pq(d, M, nbits_per_idx),
bbs(bbs) {
FAISS_THROW_IF_NOT(nbits_per_idx == 4);
M2 = roundup(pq.M, 2);
by_residual = false; // set to false by default because it's much faster
is_trained = false;
code_size = pq.code_size;
replace_invlists(new BlockInvertedLists(nlist, bbs, bbs * M2 / 2), true);
}
IndexIVFPQFastScan::IndexIVFPQFastScan() {
by_residual = false;
bbs = 0;
M2 = 0;
}
IndexIVFPQFastScan::IndexIVFPQFastScan(const IndexIVFPQ& orig, int bbs)
: IndexIVF(
orig.quantizer,
orig.d,
orig.nlist,
orig.pq.code_size,
orig.metric_type),
pq(orig.pq),
bbs(bbs) {
FAISS_THROW_IF_NOT(orig.pq.nbits == 4);
by_residual = orig.by_residual;
ntotal = orig.ntotal;
is_trained = orig.is_trained;
nprobe = orig.nprobe;
size_t M = pq.M;
M2 = roundup(M, 2);
replace_invlists(
new BlockInvertedLists(orig.nlist, bbs, bbs * M2 / 2), true);
precomputed_table.resize(orig.precomputed_table.size());
if (precomputed_table.nbytes() > 0) {
memcpy(precomputed_table.get(),
orig.precomputed_table.data(),
precomputed_table.nbytes());
}
for (size_t i = 0; i < nlist; i++) {
size_t nb = orig.invlists->list_size(i);
size_t nb2 = roundup(nb, bbs);
AlignedTable<uint8_t> tmp(nb2 * M2 / 2);
pq4_pack_codes(
InvertedLists::ScopedCodes(orig.invlists, i).get(),
nb,
M,
nb2,
bbs,
M2,
tmp.get());
invlists->add_entries(
i,
nb,
InvertedLists::ScopedIds(orig.invlists, i).get(),
tmp.get());
}
orig_invlists = orig.invlists;
}
/*********************************************************
* Training
*********************************************************/
void IndexIVFPQFastScan::train_residual(idx_t n, const float* x_in) {
const float* x = fvecs_maybe_subsample(
d,
(size_t*)&n,
pq.cp.max_points_per_centroid * pq.ksub,
x_in,
verbose,
pq.cp.seed);
std::unique_ptr<float[]> del_x;
if (x != x_in) {
del_x.reset((float*)x);
}
const float* trainset;
AlignedTable<float> residuals;
if (by_residual) {
if (verbose)
printf("computing residuals\n");
std::vector<idx_t> assign(n);
quantizer->assign(n, x, assign.data());
residuals.resize(n * d);
for (idx_t i = 0; i < n; i++) {
quantizer->compute_residual(
x + i * d, residuals.data() + i * d, assign[i]);
}
trainset = residuals.data();
} else {
trainset = x;
}
if (verbose) {
printf("training %zdx%zd product quantizer on "
"%" PRId64 " vectors in %dD\n",
pq.M,
pq.ksub,
n,
d);
}
pq.verbose = verbose;
pq.train(n, trainset);
if (by_residual && metric_type == METRIC_L2) {
precompute_table();
}
}
void IndexIVFPQFastScan::precompute_table() {
initialize_IVFPQ_precomputed_table(
use_precomputed_table, quantizer, pq, precomputed_table, verbose);
}
/*********************************************************
* Code management functions
*********************************************************/
void IndexIVFPQFastScan::encode_vectors(
idx_t n,
const float* x,
const idx_t* list_nos,
uint8_t* codes,
bool include_listnos) const {
if (by_residual) {
AlignedTable<float> residuals(n * d);
for (size_t i = 0; i < n; i++) {
if (list_nos[i] < 0) {
memset(residuals.data() + i * d, 0, sizeof(residuals[0]) * d);
} else {
quantizer->compute_residual(
x + i * d, residuals.data() + i * d, list_nos[i]);
}
}
pq.compute_codes(residuals.data(), codes, n);
} else {
pq.compute_codes(x, codes, n);
}
if (include_listnos) {
size_t coarse_size = coarse_code_size();
for (idx_t i = n - 1; i >= 0; i--) {
uint8_t* code = codes + i * (coarse_size + code_size);
memmove(code + coarse_size, codes + i * code_size, code_size);
encode_listno(list_nos[i], code);
}
}
}
void IndexIVFPQFastScan::add_with_ids(
idx_t n,
const float* x,
const idx_t* xids) {
// copied from IndexIVF::add_with_ids --->
// do some blocking to avoid excessive allocs
idx_t bs = 65536;
if (n > bs) {
for (idx_t i0 = 0; i0 < n; i0 += bs) {
idx_t i1 = std::min(n, i0 + bs);
if (verbose) {
printf(" IndexIVFPQFastScan::add_with_ids %zd: %zd",
size_t(i0),
size_t(i1));
}
add_with_ids(i1 - i0, x + i0 * d, xids ? xids + i0 : nullptr);
}
return;
}
InterruptCallback::check();
AlignedTable<uint8_t> codes(n * code_size);
FAISS_THROW_IF_NOT(is_trained);
direct_map.check_can_add(xids);
std::unique_ptr<idx_t[]> idx(new idx_t[n]);
quantizer->assign(n, x, idx.get());
size_t nadd = 0, nminus1 = 0;
for (size_t i = 0; i < n; i++) {
if (idx[i] < 0)
nminus1++;
}
AlignedTable<uint8_t> flat_codes(n * code_size);
encode_vectors(n, x, idx.get(), flat_codes.get());
DirectMapAdd dm_adder(direct_map, n, xids);
// <---
BlockInvertedLists* bil = dynamic_cast<BlockInvertedLists*>(invlists);
FAISS_THROW_IF_NOT_MSG(bil, "only block inverted lists supported");
// prepare batches
std::vector<idx_t> order(n);
for (idx_t i = 0; i < n; i++) {
order[i] = i;
}
// TODO should not need stable
std::stable_sort(order.begin(), order.end(), [&idx](idx_t a, idx_t b) {
return idx[a] < idx[b];
});
// TODO parallelize
idx_t i0 = 0;
while (i0 < n) {
idx_t list_no = idx[order[i0]];
idx_t i1 = i0 + 1;
while (i1 < n && idx[order[i1]] == list_no) {
i1++;
}
if (list_no == -1) {
i0 = i1;
continue;
}
// make linear array
AlignedTable<uint8_t> list_codes((i1 - i0) * code_size);
size_t list_size = bil->list_size(list_no);
bil->resize(list_no, list_size + i1 - i0);
for (idx_t i = i0; i < i1; i++) {
size_t ofs = list_size + i - i0;
idx_t id = xids ? xids[order[i]] : ntotal + order[i];
dm_adder.add(order[i], list_no, ofs);
bil->ids[list_no][ofs] = id;
memcpy(list_codes.data() + (i - i0) * code_size,
flat_codes.data() + order[i] * code_size,
code_size);
nadd++;
}
pq4_pack_codes_range(
list_codes.data(),
pq.M,
list_size,
list_size + i1 - i0,
bbs,
M2,
bil->codes[list_no].data());
i0 = i1;
}
ntotal += n;
}
/*********************************************************
* search
*********************************************************/
namespace {
// from impl/ProductQuantizer.cpp
template <class C, typename dis_t>
void pq_estimators_from_tables_generic(
const ProductQuantizer& pq,
size_t nbits,
const uint8_t* codes,
size_t ncodes,
const dis_t* dis_table,
const int64_t* ids,
float dis0,
size_t k,
typename C::T* heap_dis,
int64_t* heap_ids) {
using accu_t = typename C::T;
const size_t M = pq.M;
const size_t ksub = pq.ksub;
for (size_t j = 0; j < ncodes; ++j) {
PQDecoderGeneric decoder(codes + j * pq.code_size, nbits);
accu_t dis = dis0;
const dis_t* dt = dis_table;
for (size_t m = 0; m < M; m++) {
uint64_t c = decoder.decode();
dis += dt[c];
dt += ksub;
}
if (C::cmp(heap_dis[0], dis)) {
heap_pop<C>(k, heap_dis, heap_ids);
heap_push<C>(k, heap_dis, heap_ids, dis, ids[j]);
}
}
}
using idx_t = Index::idx_t;
using namespace quantize_lut;
void fvec_madd_avx(
size_t n,
const float* a,
float bf,
const float* b,
float* c) {
assert(is_aligned_pointer(a));
assert(is_aligned_pointer(b));
assert(is_aligned_pointer(c));
assert(n % 8 == 0);
simd8float32 bf8(bf);
n /= 8;
for (size_t i = 0; i < n; i++) {
simd8float32 ai(a);
simd8float32 bi(b);
simd8float32 ci = fmadd(bf8, bi, ai);
ci.store(c);
c += 8;
a += 8;
b += 8;
}
}
} // anonymous namespace
/*********************************************************
* Look-Up Table functions
*********************************************************/
void IndexIVFPQFastScan::compute_LUT(
size_t n,
const float* x,
const idx_t* coarse_ids,
const float* coarse_dis,
AlignedTable<float>& dis_tables,
AlignedTable<float>& biases) const {
const IndexIVFPQFastScan& ivfpq = *this;
size_t dim12 = pq.ksub * pq.M;
size_t d = pq.d;
size_t nprobe = ivfpq.nprobe;
if (ivfpq.by_residual) {
if (ivfpq.metric_type == METRIC_L2) {
dis_tables.resize(n * nprobe * dim12);
if (ivfpq.use_precomputed_table == 1) {
biases.resize(n * nprobe);
memcpy(biases.get(), coarse_dis, sizeof(float) * n * nprobe);
AlignedTable<float> ip_table(n * dim12);
pq.compute_inner_prod_tables(n, x, ip_table.get());
#pragma omp parallel for if (n * nprobe > 8000)
for (idx_t ij = 0; ij < n * nprobe; ij++) {
idx_t i = ij / nprobe;
float* tab = dis_tables.get() + ij * dim12;
idx_t cij = coarse_ids[ij];
if (cij >= 0) {
fvec_madd_avx(
dim12,
precomputed_table.get() + cij * dim12,
-2,
ip_table.get() + i * dim12,
tab);
} else {
// fill with NaNs so that they are ignored during
// LUT quantization
memset(tab, -1, sizeof(float) * dim12);
}
}
} else {
std::unique_ptr<float[]> xrel(new float[n * nprobe * d]);
biases.resize(n * nprobe);
memset(biases.get(), 0, sizeof(float) * n * nprobe);
#pragma omp parallel for if (n * nprobe > 8000)
for (idx_t ij = 0; ij < n * nprobe; ij++) {
idx_t i = ij / nprobe;
float* xij = &xrel[ij * d];
idx_t cij = coarse_ids[ij];
if (cij >= 0) {
ivfpq.quantizer->compute_residual(x + i * d, xij, cij);
} else {
// will fill with NaNs
memset(xij, -1, sizeof(float) * d);
}
}
pq.compute_distance_tables(
n * nprobe, xrel.get(), dis_tables.get());
}
} else if (ivfpq.metric_type == METRIC_INNER_PRODUCT) {
dis_tables.resize(n * dim12);
pq.compute_inner_prod_tables(n, x, dis_tables.get());
// compute_inner_prod_tables(pq, n, x, dis_tables.get());
biases.resize(n * nprobe);
memcpy(biases.get(), coarse_dis, sizeof(float) * n * nprobe);
} else {
FAISS_THROW_FMT("metric %d not supported", ivfpq.metric_type);
}
} else {
dis_tables.resize(n * dim12);
if (ivfpq.metric_type == METRIC_L2) {
pq.compute_distance_tables(n, x, dis_tables.get());
} else if (ivfpq.metric_type == METRIC_INNER_PRODUCT) {
pq.compute_inner_prod_tables(n, x, dis_tables.get());
} else {
FAISS_THROW_FMT("metric %d not supported", ivfpq.metric_type);
}
}
}
void IndexIVFPQFastScan::compute_LUT_uint8(
size_t n,
const float* x,
const idx_t* coarse_ids,
const float* coarse_dis,
AlignedTable<uint8_t>& dis_tables,
AlignedTable<uint16_t>& biases,
float* normalizers) const {
const IndexIVFPQFastScan& ivfpq = *this;
AlignedTable<float> dis_tables_float;
AlignedTable<float> biases_float;
uint64_t t0 = get_cy();
compute_LUT(n, x, coarse_ids, coarse_dis, dis_tables_float, biases_float);
IVFFastScan_stats.t_compute_distance_tables += get_cy() - t0;
bool lut_is_3d = ivfpq.by_residual && ivfpq.metric_type == METRIC_L2;
size_t dim123 = pq.ksub * pq.M;
size_t dim123_2 = pq.ksub * M2;
if (lut_is_3d) {
dim123 *= nprobe;
dim123_2 *= nprobe;
}
dis_tables.resize(n * dim123_2);
if (biases_float.get()) {
biases.resize(n * nprobe);
}
uint64_t t1 = get_cy();
#pragma omp parallel for if (n > 100)
for (int64_t i = 0; i < n; i++) {
const float* t_in = dis_tables_float.get() + i * dim123;
const float* b_in = nullptr;
uint8_t* t_out = dis_tables.get() + i * dim123_2;
uint16_t* b_out = nullptr;
if (biases_float.get()) {
b_in = biases_float.get() + i * nprobe;
b_out = biases.get() + i * nprobe;
}
quantize_LUT_and_bias(
nprobe,
pq.M,
pq.ksub,
lut_is_3d,
t_in,
b_in,
t_out,
M2,
b_out,
normalizers + 2 * i,
normalizers + 2 * i + 1);
}
IVFFastScan_stats.t_round += get_cy() - t1;
}
/*********************************************************
* Search functions
*********************************************************/
template <bool is_max>
void IndexIVFPQFastScan::search_dispatch_implem(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const {
using Cfloat = typename std::conditional<
is_max,
CMax<float, int64_t>,
CMin<float, int64_t>>::type;
using C = typename std::conditional<
is_max,
CMax<uint16_t, int64_t>,
CMin<uint16_t, int64_t>>::type;
if (n == 0) {
return;
}
// actual implementation used
int impl = implem;
if (impl == 0) {
if (bbs == 32) {
impl = 12;
} else {
impl = 10;
}
if (k > 20) {
impl++;
}
}
if (impl == 1) {
search_implem_1<Cfloat>(n, x, k, distances, labels);
} else if (impl == 2) {
search_implem_2<C>(n, x, k, distances, labels);
} else if (impl >= 10 && impl <= 13) {
size_t ndis = 0, nlist_visited = 0;
if (n < 2) {
if (impl == 12 || impl == 13) {
search_implem_12<C>(
n,
x,
k,
distances,
labels,
impl,
&ndis,
&nlist_visited);
} else {
search_implem_10<C>(
n,
x,
k,
distances,
labels,
impl,
&ndis,
&nlist_visited);
}
} else {
// explicitly slice over threads
int nslice;
if (n <= omp_get_max_threads()) {
nslice = n;
} else if (by_residual && metric_type == METRIC_L2) {
// make sure we don't make too big LUT tables
size_t lut_size_per_query = pq.M * pq.ksub * nprobe *
(sizeof(float) + sizeof(uint8_t));
size_t max_lut_size = precomputed_table_max_bytes;
// how many queries we can handle within mem budget
size_t nq_ok =
std::max(max_lut_size / lut_size_per_query, size_t(1));
nslice =
roundup(std::max(size_t(n / nq_ok), size_t(1)),
omp_get_max_threads());
} else {
// LUTs unlikely to be a limiting factor
nslice = omp_get_max_threads();
}
#pragma omp parallel for reduction(+ : ndis, nlist_visited)
for (int slice = 0; slice < nslice; slice++) {
idx_t i0 = n * slice / nslice;
idx_t i1 = n * (slice + 1) / nslice;
float* dis_i = distances + i0 * k;
idx_t* lab_i = labels + i0 * k;
if (impl == 12 || impl == 13) {
search_implem_12<C>(
i1 - i0,
x + i0 * d,
k,
dis_i,
lab_i,
impl,
&ndis,
&nlist_visited);
} else {
search_implem_10<C>(
i1 - i0,
x + i0 * d,
k,
dis_i,
lab_i,
impl,
&ndis,
&nlist_visited);
}
}
}
indexIVF_stats.nq += n;
indexIVF_stats.ndis += ndis;
indexIVF_stats.nlist += nlist_visited;
} else {
FAISS_THROW_FMT("implem %d does not exist", implem);
}
}
void IndexIVFPQFastScan::search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const {
FAISS_THROW_IF_NOT(k > 0);
if (metric_type == METRIC_L2) {
search_dispatch_implem<true>(n, x, k, distances, labels);
} else {
search_dispatch_implem<false>(n, x, k, distances, labels);
}
}
template <class C>
void IndexIVFPQFastScan::search_implem_1(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const {
FAISS_THROW_IF_NOT(orig_invlists);
std::unique_ptr<idx_t[]> coarse_ids(new idx_t[n * nprobe]);
std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get());
size_t dim12 = pq.ksub * pq.M;
AlignedTable<float> dis_tables;
AlignedTable<float> biases;
compute_LUT(n, x, coarse_ids.get(), coarse_dis.get(), dis_tables, biases);
bool single_LUT = !(by_residual && metric_type == METRIC_L2);
size_t ndis = 0, nlist_visited = 0;
#pragma omp parallel for reduction(+ : ndis, nlist_visited)
for (idx_t i = 0; i < n; i++) {
int64_t* heap_ids = labels + i * k;
float* heap_dis = distances + i * k;
heap_heapify<C>(k, heap_dis, heap_ids);
float* LUT = nullptr;
if (single_LUT) {
LUT = dis_tables.get() + i * dim12;
}
for (idx_t j = 0; j < nprobe; j++) {
if (!single_LUT) {
LUT = dis_tables.get() + (i * nprobe + j) * dim12;
}
idx_t list_no = coarse_ids[i * nprobe + j];
if (list_no < 0)
continue;
size_t ls = orig_invlists->list_size(list_no);
if (ls == 0)
continue;
InvertedLists::ScopedCodes codes(orig_invlists, list_no);
InvertedLists::ScopedIds ids(orig_invlists, list_no);
float bias = biases.get() ? biases[i * nprobe + j] : 0;
pq_estimators_from_tables_generic<C>(
pq,
pq.nbits,
codes.get(),
ls,
LUT,
ids.get(),
bias,
k,
heap_dis,
heap_ids);
nlist_visited++;
ndis++;
}
heap_reorder<C>(k, heap_dis, heap_ids);
}
indexIVF_stats.nq += n;
indexIVF_stats.ndis += ndis;
indexIVF_stats.nlist += nlist_visited;
}
template <class C>
void IndexIVFPQFastScan::search_implem_2(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const {
FAISS_THROW_IF_NOT(orig_invlists);
std::unique_ptr<idx_t[]> coarse_ids(new idx_t[n * nprobe]);
std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get());
size_t dim12 = pq.ksub * M2;
AlignedTable<uint8_t> dis_tables;
AlignedTable<uint16_t> biases;
std::unique_ptr<float[]> normalizers(new float[2 * n]);
compute_LUT_uint8(
n,
x,
coarse_ids.get(),
coarse_dis.get(),
dis_tables,
biases,
normalizers.get());
bool single_LUT = !(by_residual && metric_type == METRIC_L2);
size_t ndis = 0, nlist_visited = 0;
#pragma omp parallel for reduction(+ : ndis, nlist_visited)
for (idx_t i = 0; i < n; i++) {
std::vector<uint16_t> tmp_dis(k);
int64_t* heap_ids = labels + i * k;
uint16_t* heap_dis = tmp_dis.data();
heap_heapify<C>(k, heap_dis, heap_ids);
const uint8_t* LUT = nullptr;
if (single_LUT) {
LUT = dis_tables.get() + i * dim12;
}
for (idx_t j = 0; j < nprobe; j++) {
if (!single_LUT) {
LUT = dis_tables.get() + (i * nprobe + j) * dim12;
}
idx_t list_no = coarse_ids[i * nprobe + j];
if (list_no < 0)
continue;
size_t ls = orig_invlists->list_size(list_no);
if (ls == 0)
continue;
InvertedLists::ScopedCodes codes(orig_invlists, list_no);
InvertedLists::ScopedIds ids(orig_invlists, list_no);
uint16_t bias = biases.get() ? biases[i * nprobe + j] : 0;
pq_estimators_from_tables_generic<C>(
pq,
pq.nbits,
codes.get(),
ls,
LUT,
ids.get(),
bias,
k,
heap_dis,
heap_ids);
nlist_visited++;
ndis += ls;
}
heap_reorder<C>(k, heap_dis, heap_ids);
// convert distances to float
{
float one_a = 1 / normalizers[2 * i], b = normalizers[2 * i + 1];
if (skip & 16) {
one_a = 1;
b = 0;
}
float* heap_dis_float = distances + i * k;
for (int j = 0; j < k; j++) {
heap_dis_float[j] = b + heap_dis[j] * one_a;
}
}
}
indexIVF_stats.nq += n;
indexIVF_stats.ndis += ndis;
indexIVF_stats.nlist += nlist_visited;
}
template <class C>
void IndexIVFPQFastScan::search_implem_10(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels,
int impl,
size_t* ndis_out,
size_t* nlist_out) const {
memset(distances, -1, sizeof(float) * k * n);
memset(labels, -1, sizeof(idx_t) * k * n);
using HeapHC = HeapHandler<C, true>;
using ReservoirHC = ReservoirHandler<C, true>;
using SingleResultHC = SingleResultHandler<C, true>;
std::unique_ptr<idx_t[]> coarse_ids(new idx_t[n * nprobe]);
std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
uint64_t times[10];
memset(times, 0, sizeof(times));
int ti = 0;
#define TIC times[ti++] = get_cy()
TIC;
quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get());
TIC;
size_t dim12 = pq.ksub * M2;
AlignedTable<uint8_t> dis_tables;
AlignedTable<uint16_t> biases;
std::unique_ptr<float[]> normalizers(new float[2 * n]);
compute_LUT_uint8(
n,
x,
coarse_ids.get(),
coarse_dis.get(),
dis_tables,
biases,
normalizers.get());
TIC;
bool single_LUT = !(by_residual && metric_type == METRIC_L2);
TIC;
size_t ndis = 0, nlist_visited = 0;
{
AlignedTable<uint16_t> tmp_distances(k);
for (idx_t i = 0; i < n; i++) {
const uint8_t* LUT = nullptr;
int qmap1[1] = {0};
std::unique_ptr<SIMDResultHandler<C, true>> handler;
if (k == 1) {
handler.reset(new SingleResultHC(1, 0));
} else if (impl == 10) {
handler.reset(new HeapHC(
1, tmp_distances.get(), labels + i * k, k, 0));
} else if (impl == 11) {
handler.reset(new ReservoirHC(1, 0, k, 2 * k));
} else {
FAISS_THROW_MSG("invalid");
}
handler->q_map = qmap1;
if (single_LUT) {
LUT = dis_tables.get() + i * dim12;
}
for (idx_t j = 0; j < nprobe; j++) {
size_t ij = i * nprobe + j;
if (!single_LUT) {
LUT = dis_tables.get() + ij * dim12;
}
if (biases.get()) {
handler->dbias = biases.get() + ij;
}
idx_t list_no = coarse_ids[ij];
if (list_no < 0)
continue;
size_t ls = invlists->list_size(list_no);
if (ls == 0)
continue;
InvertedLists::ScopedCodes codes(invlists, list_no);
InvertedLists::ScopedIds ids(invlists, list_no);
handler->ntotal = ls;
handler->id_map = ids.get();
#define DISPATCH(classHC) \
if (dynamic_cast<classHC*>(handler.get())) { \
auto* res = static_cast<classHC*>(handler.get()); \
pq4_accumulate_loop( \
1, roundup(ls, bbs), bbs, M2, codes.get(), LUT, *res); \
}
DISPATCH(HeapHC)
else DISPATCH(ReservoirHC) else DISPATCH(SingleResultHC)
#undef DISPATCH
nlist_visited++;
ndis++;
}
handler->to_flat_arrays(
distances + i * k,
labels + i * k,
skip & 16 ? nullptr : normalizers.get() + i * 2);
}
}
*ndis_out = ndis;
*nlist_out = nlist;
}
template <class C>
void IndexIVFPQFastScan::search_implem_12(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels,
int impl,
size_t* ndis_out,
size_t* nlist_out) const {
if (n == 0) { // does not work well with reservoir
return;
}
FAISS_THROW_IF_NOT(bbs == 32);
std::unique_ptr<idx_t[]> coarse_ids(new idx_t[n * nprobe]);
std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
uint64_t times[10];
memset(times, 0, sizeof(times));
int ti = 0;
#define TIC times[ti++] = get_cy()
TIC;
quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get());
TIC;
size_t dim12 = pq.ksub * M2;
AlignedTable<uint8_t> dis_tables;
AlignedTable<uint16_t> biases;
std::unique_ptr<float[]> normalizers(new float[2 * n]);
compute_LUT_uint8(
n,
x,
coarse_ids.get(),
coarse_dis.get(),
dis_tables,
biases,
normalizers.get());
TIC;
struct QC {
int qno; // sequence number of the query
int list_no; // list to visit
int rank; // this is the rank'th result of the coarse quantizer
};
bool single_LUT = !(by_residual && metric_type == METRIC_L2);
std::vector<QC> qcs;
{
int ij = 0;
for (int i = 0; i < n; i++) {
for (int j = 0; j < nprobe; j++) {
if (coarse_ids[ij] >= 0) {
qcs.push_back(QC{i, int(coarse_ids[ij]), int(j)});
}
ij++;
}
}
std::sort(qcs.begin(), qcs.end(), [](const QC& a, const QC& b) {
return a.list_no < b.list_no;
});
}
TIC;
// prepare the result handlers
std::unique_ptr<SIMDResultHandler<C, true>> handler;
AlignedTable<uint16_t> tmp_distances;
using HeapHC = HeapHandler<C, true>;
using ReservoirHC = ReservoirHandler<C, true>;
using SingleResultHC = SingleResultHandler<C, true>;
if (k == 1) {
handler.reset(new SingleResultHC(n, 0));
} else if (impl == 12) {
tmp_distances.resize(n * k);
handler.reset(new HeapHC(n, tmp_distances.get(), labels, k, 0));
} else if (impl == 13) {
handler.reset(new ReservoirHC(n, 0, k, 2 * k));
}
int qbs2 = this->qbs2 ? this->qbs2 : 11;
std::vector<uint16_t> tmp_bias;
if (biases.get()) {
tmp_bias.resize(qbs2);
handler->dbias = tmp_bias.data();
}
TIC;
size_t ndis = 0;
size_t i0 = 0;
uint64_t t_copy_pack = 0, t_scan = 0;
while (i0 < qcs.size()) {
uint64_t tt0 = get_cy();
// find all queries that access this inverted list
int list_no = qcs[i0].list_no;
size_t i1 = i0 + 1;
while (i1 < qcs.size() && i1 < i0 + qbs2) {
if (qcs[i1].list_no != list_no) {
break;
}
i1++;
}
size_t list_size = invlists->list_size(list_no);
if (list_size == 0) {
i0 = i1;
continue;
}
// re-organize LUTs and biases into the right order
int nc = i1 - i0;
std::vector<int> q_map(nc), lut_entries(nc);
AlignedTable<uint8_t> LUT(nc * dim12);
memset(LUT.get(), -1, nc * dim12);
int qbs = pq4_preferred_qbs(nc);
for (size_t i = i0; i < i1; i++) {
const QC& qc = qcs[i];
q_map[i - i0] = qc.qno;
int ij = qc.qno * nprobe + qc.rank;
lut_entries[i - i0] = single_LUT ? qc.qno : ij;
if (biases.get()) {
tmp_bias[i - i0] = biases[ij];
}
}
pq4_pack_LUT_qbs_q_map(
qbs, M2, dis_tables.get(), lut_entries.data(), LUT.get());
// access the inverted list
ndis += (i1 - i0) * list_size;
InvertedLists::ScopedCodes codes(invlists, list_no);
InvertedLists::ScopedIds ids(invlists, list_no);
// prepare the handler
handler->ntotal = list_size;
handler->q_map = q_map.data();
handler->id_map = ids.get();
uint64_t tt1 = get_cy();
#define DISPATCH(classHC) \
if (dynamic_cast<classHC*>(handler.get())) { \
auto* res = static_cast<classHC*>(handler.get()); \
pq4_accumulate_loop_qbs( \
qbs, list_size, M2, codes.get(), LUT.get(), *res); \
}
DISPATCH(HeapHC)
else DISPATCH(ReservoirHC) else DISPATCH(SingleResultHC)
// prepare for next loop
i0 = i1;
uint64_t tt2 = get_cy();
t_copy_pack += tt1 - tt0;
t_scan += tt2 - tt1;
}
TIC;
// labels is in-place for HeapHC
handler->to_flat_arrays(
distances, labels, skip & 16 ? nullptr : normalizers.get());
TIC;
// these stats are not thread-safe
for (int i = 1; i < ti; i++) {
IVFFastScan_stats.times[i] += times[i] - times[i - 1];
}
IVFFastScan_stats.t_copy_pack += t_copy_pack;
IVFFastScan_stats.t_scan += t_scan;
if (auto* rh = dynamic_cast<ReservoirHC*>(handler.get())) {
for (int i = 0; i < 4; i++) {
IVFFastScan_stats.reservoir_times[i] += rh->times[i];
}
}
*ndis_out = ndis;
*nlist_out = nlist;
}
IVFFastScanStats IVFFastScan_stats;
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <memory>
#include <faiss/IndexIVFPQ.h>
#include <faiss/impl/ProductQuantizer.h>
#include <faiss/utils/AlignedTable.h>
namespace faiss {
/** Fast scan version of IVFPQ. Works for 4-bit PQ for now.
*
* The codes in the inverted lists are not stored sequentially but
* grouped in blocks of size bbs. This makes it possible to very quickly
* compute distances with SIMD instructions.
*
* Implementations (implem):
* 0: auto-select implementation (default)
* 1: orig's search, re-implemented
* 2: orig's search, re-ordered by invlist
* 10: optimizer int16 search, collect results in heap, no qbs
* 11: idem, collect results in reservoir
* 12: optimizer int16 search, collect results in heap, uses qbs
* 13: idem, collect results in reservoir
*/
struct IndexIVFPQFastScan : IndexIVF {
bool by_residual; ///< Encode residual or plain vector?
ProductQuantizer pq; ///< produces the codes
// size of the kernel
int bbs; // set at build time
// M rounded up to a multiple of 2
size_t M2;
/// precomputed tables management
int use_precomputed_table = 0;
/// if use_precompute_table size (nlist, pq.M, pq.ksub)
AlignedTable<float> precomputed_table;
// search-time implementation
int implem = 0;
// skip some parts of the computation (for timing)
int skip = 0;
// batching factors at search time (0 = default)
int qbs = 0;
size_t qbs2 = 0;
IndexIVFPQFastScan(
Index* quantizer,
size_t d,
size_t nlist,
size_t M,
size_t nbits_per_idx,
MetricType metric = METRIC_L2,
int bbs = 32);
IndexIVFPQFastScan();
// built from an IndexIVFPQ
explicit IndexIVFPQFastScan(const IndexIVFPQ& orig, int bbs = 32);
/// orig's inverted lists (for debugging)
InvertedLists* orig_invlists = nullptr;
void train_residual(idx_t n, const float* x) override;
/// build precomputed table, possibly updating use_precomputed_table
void precompute_table();
/// same as the regular IVFPQ encoder. The codes are not reorganized by
/// blocks a that point
void encode_vectors(
idx_t n,
const float* x,
const idx_t* list_nos,
uint8_t* codes,
bool include_listno = false) const override;
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
// prepare look-up tables
void compute_LUT(
size_t n,
const float* x,
const idx_t* coarse_ids,
const float* coarse_dis,
AlignedTable<float>& dis_tables,
AlignedTable<float>& biases) const;
void compute_LUT_uint8(
size_t n,
const float* x,
const idx_t* coarse_ids,
const float* coarse_dis,
AlignedTable<uint8_t>& dis_tables,
AlignedTable<uint16_t>& biases,
float* normalizers) const;
// internal search funcs
template <bool is_max>
void search_dispatch_implem(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const;
template <class C>
void search_implem_1(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const;
template <class C>
void search_implem_2(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const;
// implem 10 and 12 are not multithreaded internally, so
// export search stats
template <class C>
void search_implem_10(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels,
int impl,
size_t* ndis_out,
size_t* nlist_out) const;
template <class C>
void search_implem_12(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels,
int impl,
size_t* ndis_out,
size_t* nlist_out) const;
};
struct IVFFastScanStats {
uint64_t times[10];
uint64_t t_compute_distance_tables, t_round;
uint64_t t_copy_pack, t_scan, t_to_flat;
uint64_t reservoir_times[4];
double Mcy_at(int i) {
return times[i] / (1000 * 1000.0);
}
double Mcy_reservoir_at(int i) {
return reservoir_times[i] / (1000 * 1000.0);
}
IVFFastScanStats() {
reset();
}
void reset() {
memset(this, 0, sizeof(*this));
}
};
FAISS_API extern IVFFastScanStats IVFFastScan_stats;
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexIVFPQR.h>
#include <cinttypes>
#include <faiss/utils/Heap.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
namespace faiss {
/*****************************************
* IndexIVFPQR implementation
******************************************/
IndexIVFPQR::IndexIVFPQR(
Index* quantizer,
size_t d,
size_t nlist,
size_t M,
size_t nbits_per_idx,
size_t M_refine,
size_t nbits_per_idx_refine)
: IndexIVFPQ(quantizer, d, nlist, M, nbits_per_idx),
refine_pq(d, M_refine, nbits_per_idx_refine),
k_factor(4) {
by_residual = true;
}
IndexIVFPQR::IndexIVFPQR() : k_factor(1) {
by_residual = true;
}
void IndexIVFPQR::reset() {
IndexIVFPQ::reset();
refine_codes.clear();
}
void IndexIVFPQR::train_residual(idx_t n, const float* x) {
float* residual_2 = new float[n * d];
ScopeDeleter<float> del(residual_2);
train_residual_o(n, x, residual_2);
if (verbose)
printf("training %zdx%zd 2nd level PQ quantizer on %" PRId64
" %dD-vectors\n",
refine_pq.M,
refine_pq.ksub,
n,
d);
refine_pq.cp.max_points_per_centroid = 1000;
refine_pq.cp.verbose = verbose;
refine_pq.train(n, residual_2);
}
void IndexIVFPQR::add_with_ids(idx_t n, const float* x, const idx_t* xids) {
add_core(n, x, xids, nullptr);
}
void IndexIVFPQR::add_core(
idx_t n,
const float* x,
const idx_t* xids,
const idx_t* precomputed_idx) {
float* residual_2 = new float[n * d];
ScopeDeleter<float> del(residual_2);
idx_t n0 = ntotal;
add_core_o(n, x, xids, residual_2, precomputed_idx);
refine_codes.resize(ntotal * refine_pq.code_size);
refine_pq.compute_codes(
residual_2, &refine_codes[n0 * refine_pq.code_size], n);
}
#define TIC t0 = get_cycles()
#define TOC get_cycles() - t0
void IndexIVFPQR::search_preassigned(
idx_t n,
const float* x,
idx_t k,
const idx_t* idx,
const float* L1_dis,
float* distances,
idx_t* labels,
bool store_pairs,
const IVFSearchParameters* params,
IndexIVFStats* stats) const {
uint64_t t0;
TIC;
size_t k_coarse = long(k * k_factor);
idx_t* coarse_labels = new idx_t[k_coarse * n];
ScopeDeleter<idx_t> del1(coarse_labels);
{ // query with quantizer levels 1 and 2.
float* coarse_distances = new float[k_coarse * n];
ScopeDeleter<float> del(coarse_distances);
IndexIVFPQ::search_preassigned(
n,
x,
k_coarse,
idx,
L1_dis,
coarse_distances,
coarse_labels,
true,
params);
}
indexIVFPQ_stats.search_cycles += TOC;
TIC;
// 3rd level refinement
size_t n_refine = 0;
#pragma omp parallel reduction(+ : n_refine)
{
// tmp buffers
float* residual_1 = new float[2 * d];
ScopeDeleter<float> del(residual_1);
float* residual_2 = residual_1 + d;
#pragma omp for
for (idx_t i = 0; i < n; i++) {
const float* xq = x + i * d;
const idx_t* shortlist = coarse_labels + k_coarse * i;
float* heap_sim = distances + k * i;
idx_t* heap_ids = labels + k * i;
maxheap_heapify(k, heap_sim, heap_ids);
for (int j = 0; j < k_coarse; j++) {
idx_t sl = shortlist[j];
if (sl == -1)
continue;
int list_no = lo_listno(sl);
int ofs = lo_offset(sl);
assert(list_no >= 0 && list_no < nlist);
assert(ofs >= 0 && ofs < invlists->list_size(list_no));
// 1st level residual
quantizer->compute_residual(xq, residual_1, list_no);
// 2nd level residual
const uint8_t* l2code = invlists->get_single_code(list_no, ofs);
pq.decode(l2code, residual_2);
for (int l = 0; l < d; l++)
residual_2[l] = residual_1[l] - residual_2[l];
// 3rd level residual's approximation
idx_t id = invlists->get_single_id(list_no, ofs);
assert(0 <= id && id < ntotal);
refine_pq.decode(
&refine_codes[id * refine_pq.code_size], residual_1);
float dis = fvec_L2sqr(residual_1, residual_2, d);
if (dis < heap_sim[0]) {
idx_t id_or_pair = store_pairs ? sl : id;
maxheap_replace_top(k, heap_sim, heap_ids, dis, id_or_pair);
}
n_refine++;
}
maxheap_reorder(k, heap_sim, heap_ids);
}
}
indexIVFPQ_stats.nrefine += n_refine;
indexIVFPQ_stats.refine_cycles += TOC;
}
void IndexIVFPQR::reconstruct_from_offset(
int64_t list_no,
int64_t offset,
float* recons) const {
IndexIVFPQ::reconstruct_from_offset(list_no, offset, recons);
idx_t id = invlists->get_single_id(list_no, offset);
assert(0 <= id && id < ntotal);
std::vector<float> r3(d);
refine_pq.decode(&refine_codes[id * refine_pq.code_size], r3.data());
for (int i = 0; i < d; ++i) {
recons[i] += r3[i];
}
}
void IndexIVFPQR::merge_from(IndexIVF& other_in, idx_t add_id) {
IndexIVFPQR* other = dynamic_cast<IndexIVFPQR*>(&other_in);
FAISS_THROW_IF_NOT(other);
IndexIVF::merge_from(other_in, add_id);
refine_codes.insert(
refine_codes.end(),
other->refine_codes.begin(),
other->refine_codes.end());
other->refine_codes.clear();
}
size_t IndexIVFPQR::remove_ids(const IDSelector& /*sel*/) {
FAISS_THROW_MSG("not implemented");
return 0;
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <vector>
#include <faiss/IndexIVFPQ.h>
namespace faiss {
/** Index with an additional level of PQ refinement */
struct IndexIVFPQR : IndexIVFPQ {
ProductQuantizer refine_pq; ///< 3rd level quantizer
std::vector<uint8_t> refine_codes; ///< corresponding codes
/// factor between k requested in search and the k requested from the IVFPQ
float k_factor;
IndexIVFPQR(
Index* quantizer,
size_t d,
size_t nlist,
size_t M,
size_t nbits_per_idx,
size_t M_refine,
size_t nbits_per_idx_refine);
void reset() override;
size_t remove_ids(const IDSelector& sel) override;
/// trains the two product quantizers
void train_residual(idx_t n, const float* x) override;
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
/// same as add_with_ids, but optionally use the precomputed list ids
void add_core(
idx_t n,
const float* x,
const idx_t* xids,
const idx_t* precomputed_idx) override;
void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
const override;
void merge_from(IndexIVF& other, idx_t add_id) override;
void search_preassigned(
idx_t n,
const float* x,
idx_t k,
const idx_t* assign,
const float* centroid_dis,
float* distances,
idx_t* labels,
bool store_pairs,
const IVFSearchParameters* params = nullptr,
IndexIVFStats* stats = nullptr) const override;
IndexIVFPQR();
};
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexIVFSpectralHash.h>
#include <stdint.h>
#include <algorithm>
#include <memory>
#include <faiss/IndexLSH.h>
#include <faiss/IndexPreTransform.h>
#include <faiss/VectorTransform.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/hamming.h>
#include <faiss/utils/utils.h>
namespace faiss {
IndexIVFSpectralHash::IndexIVFSpectralHash(
Index* quantizer,
size_t d,
size_t nlist,
int nbit,
float period)
: IndexIVF(quantizer, d, nlist, (nbit + 7) / 8, METRIC_L2),
nbit(nbit),
period(period),
threshold_type(Thresh_global) {
RandomRotationMatrix* rr = new RandomRotationMatrix(d, nbit);
rr->init(1234);
vt = rr;
own_fields = true;
is_trained = false;
}
IndexIVFSpectralHash::IndexIVFSpectralHash()
: IndexIVF(),
vt(nullptr),
own_fields(false),
nbit(0),
period(0),
threshold_type(Thresh_global) {}
IndexIVFSpectralHash::~IndexIVFSpectralHash() {
if (own_fields) {
delete vt;
}
}
namespace {
float median(size_t n, float* x) {
std::sort(x, x + n);
if (n % 2 == 1) {
return x[n / 2];
} else {
return (x[n / 2 - 1] + x[n / 2]) / 2;
}
}
} // namespace
void IndexIVFSpectralHash::train_residual(idx_t n, const float* x) {
if (!vt->is_trained) {
vt->train(n, x);
}
if (threshold_type == Thresh_global) {
// nothing to do
return;
} else if (
threshold_type == Thresh_centroid ||
threshold_type == Thresh_centroid_half) {
// convert all centroids with vt
std::vector<float> centroids(nlist * d);
quantizer->reconstruct_n(0, nlist, centroids.data());
trained.resize(nlist * nbit);
vt->apply_noalloc(nlist, centroids.data(), trained.data());
if (threshold_type == Thresh_centroid_half) {
for (size_t i = 0; i < nlist * nbit; i++) {
trained[i] -= 0.25 * period;
}
}
return;
}
// otherwise train medians
// assign
std::unique_ptr<idx_t[]> idx(new idx_t[n]);
quantizer->assign(n, x, idx.get());
std::vector<size_t> sizes(nlist + 1);
for (size_t i = 0; i < n; i++) {
FAISS_THROW_IF_NOT(idx[i] >= 0);
sizes[idx[i]]++;
}
size_t ofs = 0;
for (int j = 0; j < nlist; j++) {
size_t o0 = ofs;
ofs += sizes[j];
sizes[j] = o0;
}
// transform
std::unique_ptr<float[]> xt(vt->apply(n, x));
// transpose + reorder
std::unique_ptr<float[]> xo(new float[n * nbit]);
for (size_t i = 0; i < n; i++) {
size_t idest = sizes[idx[i]]++;
for (size_t j = 0; j < nbit; j++) {
xo[idest + n * j] = xt[i * nbit + j];
}
}
trained.resize(n * nbit);
// compute medians
#pragma omp for
for (int i = 0; i < nlist; i++) {
size_t i0 = i == 0 ? 0 : sizes[i - 1];
size_t i1 = sizes[i];
for (int j = 0; j < nbit; j++) {
float* xoi = xo.get() + i0 + n * j;
if (i0 == i1) { // nothing to train
trained[i * nbit + j] = 0.0;
} else if (i1 == i0 + 1) {
trained[i * nbit + j] = xoi[0];
} else {
trained[i * nbit + j] = median(i1 - i0, xoi);
}
}
}
}
namespace {
void binarize_with_freq(
size_t nbit,
float freq,
const float* x,
const float* c,
uint8_t* codes) {
memset(codes, 0, (nbit + 7) / 8);
for (size_t i = 0; i < nbit; i++) {
float xf = (x[i] - c[i]);
int64_t xi = int64_t(floor(xf * freq));
int64_t bit = xi & 1;
codes[i >> 3] |= bit << (i & 7);
}
}
}; // namespace
void IndexIVFSpectralHash::encode_vectors(
idx_t n,
const float* x_in,
const idx_t* list_nos,
uint8_t* codes,
bool include_listnos) const {
FAISS_THROW_IF_NOT(is_trained);
float freq = 2.0 / period;
size_t coarse_size = include_listnos ? coarse_code_size() : 0;
// transform with vt
std::unique_ptr<float[]> x(vt->apply(n, x_in));
std::vector<float> zero(nbit);
#pragma omp for
for (idx_t i = 0; i < n; i++) {
int64_t list_no = list_nos[i];
uint8_t* code = codes + i * (code_size + coarse_size);
if (list_no >= 0) {
if (coarse_size) {
encode_listno(list_no, code);
}
const float* c;
if (threshold_type == Thresh_global) {
c = zero.data();
} else {
c = trained.data() + list_no * nbit;
}
binarize_with_freq(
nbit, freq, x.get() + i * nbit, c, code + coarse_size);
} else {
memset(code, 0, code_size + coarse_size);
}
}
}
namespace {
template <class HammingComputer>
struct IVFScanner : InvertedListScanner {
// copied from index structure
const IndexIVFSpectralHash* index;
size_t nbit;
float period, freq;
std::vector<float> q;
std::vector<float> zero;
std::vector<uint8_t> qcode;
HammingComputer hc;
using idx_t = Index::idx_t;
IVFScanner(const IndexIVFSpectralHash* index, bool store_pairs)
: index(index),
nbit(index->nbit),
period(index->period),
freq(2.0 / index->period),
q(nbit),
zero(nbit),
qcode(index->code_size),
hc(qcode.data(), index->code_size) {
this->store_pairs = store_pairs;
this->code_size = index->code_size;
}
void set_query(const float* query) override {
FAISS_THROW_IF_NOT(query);
FAISS_THROW_IF_NOT(q.size() == nbit);
index->vt->apply_noalloc(1, query, q.data());
if (index->threshold_type == IndexIVFSpectralHash::Thresh_global) {
binarize_with_freq(nbit, freq, q.data(), zero.data(), qcode.data());
hc.set(qcode.data(), code_size);
}
}
void set_list(idx_t list_no, float /*coarse_dis*/) override {
this->list_no = list_no;
if (index->threshold_type != IndexIVFSpectralHash::Thresh_global) {
const float* c = index->trained.data() + list_no * nbit;
binarize_with_freq(nbit, freq, q.data(), c, qcode.data());
hc.set(qcode.data(), code_size);
}
}
float distance_to_code(const uint8_t* code) const final {
return hc.hamming(code);
}
size_t scan_codes(
size_t list_size,
const uint8_t* codes,
const idx_t* ids,
float* simi,
idx_t* idxi,
size_t k) const override {
size_t nup = 0;
for (size_t j = 0; j < list_size; j++) {
float dis = hc.hamming(codes);
if (dis < simi[0]) {
int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
maxheap_replace_top(k, simi, idxi, dis, id);
nup++;
}
codes += code_size;
}
return nup;
}
void scan_codes_range(
size_t list_size,
const uint8_t* codes,
const idx_t* ids,
float radius,
RangeQueryResult& res) const override {
for (size_t j = 0; j < list_size; j++) {
float dis = hc.hamming(codes);
if (dis < radius) {
int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
res.add(dis, id);
}
codes += code_size;
}
}
};
} // anonymous namespace
InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner(
bool store_pairs) const {
switch (code_size) {
#define HANDLE_CODE_SIZE(cs) \
case cs: \
return new IVFScanner<HammingComputer##cs>(this, store_pairs)
HANDLE_CODE_SIZE(4);
HANDLE_CODE_SIZE(8);
HANDLE_CODE_SIZE(16);
HANDLE_CODE_SIZE(20);
HANDLE_CODE_SIZE(32);
HANDLE_CODE_SIZE(64);
#undef HANDLE_CODE_SIZE
default:
return new IVFScanner<HammingComputerDefault>(this, store_pairs);
}
}
void IndexIVFSpectralHash::replace_vt(VectorTransform* vt_in, bool own) {
FAISS_THROW_IF_NOT(vt_in->d_out == nbit);
FAISS_THROW_IF_NOT(vt_in->d_in == d);
if (own_fields) {
delete vt;
}
vt = vt_in;
threshold_type = Thresh_global;
is_trained = quantizer->is_trained && quantizer->ntotal == nlist &&
vt->is_trained;
own_fields = own;
}
/*
Check that the encoder is a single vector transform followed by a LSH
that just does thresholding.
If this is not the case, the linear transform + threhsolds of the IndexLSH
should be merged into the VectorTransform (which is feasible).
*/
void IndexIVFSpectralHash::replace_vt(IndexPreTransform* encoder, bool own) {
FAISS_THROW_IF_NOT(encoder->chain.size() == 1);
auto sub_index = dynamic_cast<IndexLSH*>(encoder->index);
FAISS_THROW_IF_NOT_MSG(sub_index, "final index should be LSH");
FAISS_THROW_IF_NOT(sub_index->nbits == nbit);
FAISS_THROW_IF_NOT(!sub_index->rotate_data);
FAISS_THROW_IF_NOT(!sub_index->train_thresholds);
replace_vt(encoder->chain[0], own);
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_IVFSH_H
#define FAISS_INDEX_IVFSH_H
#include <vector>
#include <faiss/IndexIVF.h>
namespace faiss {
struct VectorTransform;
struct IndexPreTransform;
/** Inverted list that stores binary codes of size nbit. Before the
* binary conversion, the dimension of the vectors is transformed from
* dim d into dim nbit by vt (a random rotation by default).
*
* Each coordinate is subtracted from a value determined by
* threshold_type, and split into intervals of size period. Half of
* the interval is a 0 bit, the other half a 1.
*
*/
struct IndexIVFSpectralHash : IndexIVF {
/// transformation from d to nbit dim
VectorTransform* vt;
/// own the vt
bool own_fields;
/// nb of bits of the binary signature
int nbit;
/// interval size for 0s and 1s
float period;
enum ThresholdType {
Thresh_global, ///< global threshold at 0
Thresh_centroid, ///< compare to centroid
Thresh_centroid_half, ///< central interval around centroid
Thresh_median ///< median of training set
};
ThresholdType threshold_type;
/// Trained threshold.
/// size nlist * nbit or 0 if Thresh_global
std::vector<float> trained;
IndexIVFSpectralHash(
Index* quantizer,
size_t d,
size_t nlist,
int nbit,
float period);
IndexIVFSpectralHash();
void train_residual(idx_t n, const float* x) override;
void encode_vectors(
idx_t n,
const float* x,
const idx_t* list_nos,
uint8_t* codes,
bool include_listnos = false) const override;
InvertedListScanner* get_InvertedListScanner(
bool store_pairs) const override;
/** replace the vector transform for an empty (and possibly untrained) index
*/
void replace_vt(VectorTransform* vt, bool own = false);
/** convenience function to get the VT from an index constucted by an
* index_factory (should end in "LSH") */
void replace_vt(IndexPreTransform* index, bool own = false);
~IndexIVFSpectralHash() override;
};
} // namespace faiss
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment