"tools/vscode:/vscode.git/clone" did not exist on "c65f3308900568cf48cc2ba891e7c52679471304"
Commit 395d2ce6 authored by huchen's avatar huchen
Browse files

init the faiss for rocm

parent 5ded39f5
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/IndexShards.h>
#include <cinttypes>
#include <cstdio>
#include <functional>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/Heap.h>
#include <faiss/utils/WorkerThread.h>
namespace faiss {
// subroutines
namespace {
typedef Index::idx_t idx_t;
// add translation to all valid labels
void translate_labels(long n, idx_t* labels, long translation) {
if (translation == 0)
return;
for (long i = 0; i < n; i++) {
if (labels[i] < 0)
continue;
labels[i] += translation;
}
}
/** merge result tables from several shards.
* @param all_distances size nshard * n * k
* @param all_labels idem
* @param translartions label translations to apply, size nshard
*/
template <class IndexClass, class C>
void merge_tables(
long n,
long k,
long nshard,
typename IndexClass::distance_t* distances,
idx_t* labels,
const std::vector<typename IndexClass::distance_t>& all_distances,
const std::vector<idx_t>& all_labels,
const std::vector<long>& translations) {
if (k == 0) {
return;
}
using distance_t = typename IndexClass::distance_t;
long stride = n * k;
#pragma omp parallel
{
std::vector<int> buf(2 * nshard);
int* pointer = buf.data();
int* shard_ids = pointer + nshard;
std::vector<distance_t> buf2(nshard);
distance_t* heap_vals = buf2.data();
#pragma omp for
for (long i = 0; i < n; i++) {
// the heap maps values to the shard where they are
// produced.
const distance_t* D_in = all_distances.data() + i * k;
const idx_t* I_in = all_labels.data() + i * k;
int heap_size = 0;
for (long s = 0; s < nshard; s++) {
pointer[s] = 0;
if (I_in[stride * s] >= 0) {
heap_push<C>(
++heap_size,
heap_vals,
shard_ids,
D_in[stride * s],
s);
}
}
distance_t* D = distances + i * k;
idx_t* I = labels + i * k;
for (int j = 0; j < k; j++) {
if (heap_size == 0) {
I[j] = -1;
D[j] = C::neutral();
} else {
// pop best element
int s = shard_ids[0];
int& p = pointer[s];
D[j] = heap_vals[0];
I[j] = I_in[stride * s + p] + translations[s];
heap_pop<C>(heap_size--, heap_vals, shard_ids);
p++;
if (p < k && I_in[stride * s + p] >= 0) {
heap_push<C>(
++heap_size,
heap_vals,
shard_ids,
D_in[stride * s + p],
s);
}
}
}
}
}
}
} // anonymous namespace
template <typename IndexT>
IndexShardsTemplate<IndexT>::IndexShardsTemplate(
idx_t d,
bool threaded,
bool successive_ids)
: ThreadedIndex<IndexT>(d, threaded), successive_ids(successive_ids) {}
template <typename IndexT>
IndexShardsTemplate<IndexT>::IndexShardsTemplate(
int d,
bool threaded,
bool successive_ids)
: ThreadedIndex<IndexT>(d, threaded), successive_ids(successive_ids) {}
template <typename IndexT>
IndexShardsTemplate<IndexT>::IndexShardsTemplate(
bool threaded,
bool successive_ids)
: ThreadedIndex<IndexT>(threaded), successive_ids(successive_ids) {}
template <typename IndexT>
void IndexShardsTemplate<IndexT>::onAfterAddIndex(IndexT* index /* unused */) {
syncWithSubIndexes();
}
template <typename IndexT>
void IndexShardsTemplate<IndexT>::onAfterRemoveIndex(
IndexT* index /* unused */) {
syncWithSubIndexes();
}
// FIXME: assumes that nothing is currently running on the sub-indexes, which is
// true with the normal API, but should use the runOnIndex API instead
template <typename IndexT>
void IndexShardsTemplate<IndexT>::syncWithSubIndexes() {
if (!this->count()) {
this->is_trained = false;
this->ntotal = 0;
return;
}
auto firstIndex = this->at(0);
this->metric_type = firstIndex->metric_type;
this->is_trained = firstIndex->is_trained;
this->ntotal = firstIndex->ntotal;
for (int i = 1; i < this->count(); ++i) {
auto index = this->at(i);
FAISS_THROW_IF_NOT(this->metric_type == index->metric_type);
FAISS_THROW_IF_NOT(this->d == index->d);
FAISS_THROW_IF_NOT(this->is_trained == index->is_trained);
this->ntotal += index->ntotal;
}
}
// No metric_type for IndexBinary
template <>
void IndexShardsTemplate<IndexBinary>::syncWithSubIndexes() {
if (!this->count()) {
this->is_trained = false;
this->ntotal = 0;
return;
}
auto firstIndex = this->at(0);
this->is_trained = firstIndex->is_trained;
this->ntotal = firstIndex->ntotal;
for (int i = 1; i < this->count(); ++i) {
auto index = this->at(i);
FAISS_THROW_IF_NOT(this->d == index->d);
FAISS_THROW_IF_NOT(this->is_trained == index->is_trained);
this->ntotal += index->ntotal;
}
}
template <typename IndexT>
void IndexShardsTemplate<IndexT>::train(idx_t n, const component_t* x) {
auto fn = [n, x](int no, IndexT* index) {
if (index->verbose) {
printf("begin train shard %d on %" PRId64 " points\n", no, n);
}
index->train(n, x);
if (index->verbose) {
printf("end train shard %d\n", no);
}
};
this->runOnIndex(fn);
syncWithSubIndexes();
}
template <typename IndexT>
void IndexShardsTemplate<IndexT>::add(idx_t n, const component_t* x) {
add_with_ids(n, x, nullptr);
}
template <typename IndexT>
void IndexShardsTemplate<IndexT>::add_with_ids(
idx_t n,
const component_t* x,
const idx_t* xids) {
FAISS_THROW_IF_NOT_MSG(
!(successive_ids && xids),
"It makes no sense to pass in ids and "
"request them to be shifted");
if (successive_ids) {
FAISS_THROW_IF_NOT_MSG(
!xids,
"It makes no sense to pass in ids and "
"request them to be shifted");
FAISS_THROW_IF_NOT_MSG(
this->ntotal == 0,
"when adding to IndexShards with sucessive_ids, "
"only add() in a single pass is supported");
}
idx_t nshard = this->count();
const idx_t* ids = xids;
std::vector<idx_t> aids;
if (!ids && !successive_ids) {
aids.resize(n);
for (idx_t i = 0; i < n; i++) {
aids[i] = this->ntotal + i;
}
ids = aids.data();
}
size_t components_per_vec =
sizeof(component_t) == 1 ? (this->d + 7) / 8 : this->d;
auto fn = [n, ids, x, nshard, components_per_vec](int no, IndexT* index) {
idx_t i0 = (idx_t)no * n / nshard;
idx_t i1 = ((idx_t)no + 1) * n / nshard;
auto x0 = x + i0 * components_per_vec;
if (index->verbose) {
printf("begin add shard %d on %" PRId64 " points\n", no, n);
}
if (ids) {
index->add_with_ids(i1 - i0, x0, ids + i0);
} else {
index->add(i1 - i0, x0);
}
if (index->verbose) {
printf("end add shard %d on %" PRId64 " points\n", no, i1 - i0);
}
};
this->runOnIndex(fn);
syncWithSubIndexes();
}
template <typename IndexT>
void IndexShardsTemplate<IndexT>::search(
idx_t n,
const component_t* x,
idx_t k,
distance_t* distances,
idx_t* labels) const {
FAISS_THROW_IF_NOT(k > 0);
long nshard = this->count();
std::vector<distance_t> all_distances(nshard * k * n);
std::vector<idx_t> all_labels(nshard * k * n);
auto fn = [n, k, x, &all_distances, &all_labels](
int no, const IndexT* index) {
if (index->verbose) {
printf("begin query shard %d on %" PRId64 " points\n", no, n);
}
index->search(
n,
x,
k,
all_distances.data() + no * k * n,
all_labels.data() + no * k * n);
if (index->verbose) {
printf("end query shard %d\n", no);
}
};
this->runOnIndex(fn);
std::vector<long> translations(nshard, 0);
// Because we just called runOnIndex above, it is safe to access the
// sub-index ntotal here
if (successive_ids) {
translations[0] = 0;
for (int s = 0; s + 1 < nshard; s++) {
translations[s + 1] = translations[s] + this->at(s)->ntotal;
}
}
if (this->metric_type == METRIC_L2) {
merge_tables<IndexT, CMin<distance_t, int>>(
n,
k,
nshard,
distances,
labels,
all_distances,
all_labels,
translations);
} else {
merge_tables<IndexT, CMax<distance_t, int>>(
n,
k,
nshard,
distances,
labels,
all_distances,
all_labels,
translations);
}
}
// explicit instanciations
template struct IndexShardsTemplate<Index>;
template struct IndexShardsTemplate<IndexBinary>;
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <faiss/Index.h>
#include <faiss/IndexBinary.h>
#include <faiss/impl/ThreadedIndex.h>
namespace faiss {
/**
* Index that concatenates the results from several sub-indexes
*/
template <typename IndexT>
struct IndexShardsTemplate : public ThreadedIndex<IndexT> {
using idx_t = typename IndexT::idx_t;
using component_t = typename IndexT::component_t;
using distance_t = typename IndexT::distance_t;
/**
* The dimension that all sub-indices must share will be the dimension of
* the first sub-index added
*
* @param threaded do we use one thread per sub_index or do
* queries sequentially?
* @param successive_ids should we shift the returned ids by
* the size of each sub-index or return them
* as they are?
*/
explicit IndexShardsTemplate(
bool threaded = false,
bool successive_ids = true);
/**
* @param threaded do we use one thread per sub_index or do
* queries sequentially?
* @param successive_ids should we shift the returned ids by
* the size of each sub-index or return them
* as they are?
*/
explicit IndexShardsTemplate(
idx_t d,
bool threaded = false,
bool successive_ids = true);
/// int version due to the implicit bool conversion ambiguity of int as
/// dimension
explicit IndexShardsTemplate(
int d,
bool threaded = false,
bool successive_ids = true);
/// Alias for addIndex()
void add_shard(IndexT* index) {
this->addIndex(index);
}
/// Alias for removeIndex()
void remove_shard(IndexT* index) {
this->removeIndex(index);
}
/// supported only for sub-indices that implement add_with_ids
void add(idx_t n, const component_t* x) override;
/**
* Cases (successive_ids, xids):
* - true, non-NULL ERROR: it makes no sense to pass in ids and
* request them to be shifted
* - true, NULL OK, but should be called only once (calls add()
* on sub-indexes).
* - false, non-NULL OK: will call add_with_ids with passed in xids
* distributed evenly over shards
* - false, NULL OK: will call add_with_ids on each sub-index,
* starting at ntotal
*/
void add_with_ids(idx_t n, const component_t* x, const idx_t* xids)
override;
void search(
idx_t n,
const component_t* x,
idx_t k,
distance_t* distances,
idx_t* labels) const override;
void train(idx_t n, const component_t* x) override;
bool successive_ids;
/// Synchronize the top-level index (IndexShards) with data in the
/// sub-indices
void syncWithSubIndexes();
protected:
/// Called just after an index is added
void onAfterAddIndex(IndexT* index) override;
/// Called just after an index is removed
void onAfterRemoveIndex(IndexT* index) override;
};
using IndexShards = IndexShardsTemplate<Index>;
using IndexBinaryShards = IndexShardsTemplate<IndexBinary>;
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/MatrixStats.h>
#include <stdarg.h> /* va_list, va_start, va_arg, va_end */
#include <faiss/utils/utils.h>
#include <cmath>
#include <cstdio>
namespace faiss {
/*********************************************************************
* MatrixStats
*********************************************************************/
MatrixStats::PerDimStats::PerDimStats()
: n(0),
n_nan(0),
n_inf(0),
n0(0),
min(HUGE_VALF),
max(-HUGE_VALF),
sum(0),
sum2(0),
mean(NAN),
stddev(NAN) {}
void MatrixStats::PerDimStats::add(float x) {
n++;
if (std::isnan(x)) {
n_nan++;
return;
}
if (!std::isfinite(x)) {
n_inf++;
return;
}
if (x == 0)
n0++;
if (x < min)
min = x;
if (x > max)
max = x;
sum += x;
sum2 += (double)x * (double)x;
}
void MatrixStats::PerDimStats::compute_mean_std() {
n_valid = n - n_nan - n_inf;
mean = sum / n_valid;
double var = sum2 / n_valid - mean * mean;
if (var < 0)
var = 0;
stddev = sqrt(var);
}
void MatrixStats::do_comment(const char* fmt, ...) {
va_list ap;
/* Determine required size */
va_start(ap, fmt);
size_t size = vsnprintf(buf, nbuf, fmt, ap);
va_end(ap);
nbuf -= size;
buf += size;
}
MatrixStats::MatrixStats(size_t n, size_t d, const float* x)
: n(n),
d(d),
n_collision(0),
n_valid(0),
n0(0),
min_norm2(HUGE_VAL),
max_norm2(0) {
std::vector<char> comment_buf(10000);
buf = comment_buf.data();
nbuf = comment_buf.size();
do_comment("analyzing %ld vectors of size %ld\n", n, d);
if (d > 1024) {
do_comment(
"indexing this many dimensions is hard, "
"please consider dimensionality reducution (with PCAMatrix)\n");
}
size_t nbytes = sizeof(x[0]) * d;
per_dim_stats.resize(d);
for (size_t i = 0; i < n; i++) {
const float* xi = x + d * i;
double sum2 = 0;
for (size_t j = 0; j < d; j++) {
per_dim_stats[j].add(xi[j]);
sum2 += xi[j] * (double)xi[j];
}
if (std::isfinite(sum2)) {
n_valid++;
if (sum2 == 0) {
n0++;
} else {
if (sum2 < min_norm2)
min_norm2 = sum2;
if (sum2 > max_norm2)
max_norm2 = sum2;
}
}
{ // check hash
uint64_t hash = hash_bytes((const uint8_t*)xi, nbytes);
auto elt = occurrences.find(hash);
if (elt == occurrences.end()) {
Occurrence occ = {i, 1};
occurrences[hash] = occ;
} else {
if (!memcmp(xi, x + elt->second.first * d, nbytes)) {
elt->second.count++;
} else {
n_collision++;
// we should use a list of collisions but overkill
}
}
}
}
// invalid vecor stats
if (n_valid == n) {
do_comment("no NaN or Infs in data\n");
} else {
do_comment(
"%ld vectors contain NaN or Inf "
"(or have too large components), "
"expect bad results with indexing!\n",
n - n_valid);
}
// copies in dataset
if (occurrences.size() == n) {
do_comment("all vectors are distinct\n");
} else {
do_comment(
"%ld vectors are distinct (%.2f%%)\n",
occurrences.size(),
occurrences.size() * 100.0 / n);
if (n_collision > 0) {
do_comment(
"%ld collisions in hash table, "
"counts may be invalid\n",
n_collision);
}
Occurrence max = {0, 0};
for (auto it = occurrences.begin(); it != occurrences.end(); ++it) {
if (it->second.count > max.count) {
max = it->second;
}
}
do_comment("vector %ld has %ld copies\n", max.first, max.count);
}
{ // norm stats
min_norm2 = sqrt(min_norm2);
max_norm2 = sqrt(max_norm2);
do_comment(
"range of L2 norms=[%g, %g] (%ld null vectors)\n",
min_norm2,
max_norm2,
n0);
if (max_norm2 < min_norm2 * 1.0001) {
do_comment(
"vectors are normalized, inner product and "
"L2 search are equivalent\n");
}
if (max_norm2 > min_norm2 * 100) {
do_comment(
"vectors have very large differences in norms, "
"is this normal?\n");
}
}
{ // per dimension stats
double max_std = 0, min_std = HUGE_VAL;
size_t n_dangerous_range = 0, n_0_range = 0, n0 = 0;
for (size_t j = 0; j < d; j++) {
PerDimStats& st = per_dim_stats[j];
st.compute_mean_std();
n0 += st.n0;
if (st.max == st.min) {
n_0_range++;
} else if (st.max < 1.001 * st.min) {
n_dangerous_range++;
}
if (st.stddev > max_std)
max_std = st.stddev;
if (st.stddev < min_std)
min_std = st.stddev;
}
if (n0 == 0) {
do_comment("matrix contains no 0s\n");
} else {
do_comment(
"matrix contains %.2f %% 0 entries\n",
n0 * 100.0 / (n * d));
}
if (n_0_range == 0) {
do_comment("no constant dimensions\n");
} else {
do_comment(
"%ld dimensions are constant: they can be removed\n",
n_0_range);
}
if (n_dangerous_range == 0) {
do_comment("no dimension has a too large mean\n");
} else {
do_comment(
"%ld dimensions are too large "
"wrt. their variance, may loose precision "
"in IndexFlatL2 (use CenteringTransform)\n",
n_dangerous_range);
}
do_comment("stddevs per dimension are in [%g %g]\n", min_std, max_std);
size_t n_small_var = 0;
for (size_t j = 0; j < d; j++) {
const PerDimStats& st = per_dim_stats[j];
if (st.stddev < max_std * 1e-4) {
n_small_var++;
}
}
if (n_small_var > 0) {
do_comment(
"%ld dimensions have negligible stddev wrt. "
"the largest dimension, they could be ignored",
n_small_var);
}
}
comments = comment_buf.data();
buf = nullptr;
nbuf = 0;
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <stdint.h>
#include <string>
#include <unordered_map>
#include <vector>
namespace faiss {
/** Reports some statistics on a dataset and comments on them.
*
* It is a class rather than a function so that all stats can also be
* accessed from code */
struct MatrixStats {
MatrixStats(size_t n, size_t d, const float* x);
std::string comments;
// raw statistics
size_t n, d;
size_t n_collision, n_valid, n0;
double min_norm2, max_norm2;
struct PerDimStats {
size_t n, n_nan, n_inf, n0;
float min, max;
double sum, sum2;
size_t n_valid;
double mean, stddev;
PerDimStats();
void add(float x);
void compute_mean_std();
};
std::vector<PerDimStats> per_dim_stats;
struct Occurrence {
size_t first;
size_t count;
};
std::unordered_map<uint64_t, Occurrence> occurrences;
char* buf;
size_t nbuf;
void do_comment(const char* fmt, ...);
};
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/MetaIndexes.h>
#include <stdint.h>
#include <cinttypes>
#include <cstdio>
#include <limits>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/Heap.h>
#include <faiss/utils/WorkerThread.h>
namespace faiss {
namespace {} // namespace
/*****************************************************
* IndexIDMap implementation
*******************************************************/
template <typename IndexT>
IndexIDMapTemplate<IndexT>::IndexIDMapTemplate(IndexT* index)
: index(index), own_fields(false) {
FAISS_THROW_IF_NOT_MSG(index->ntotal == 0, "index must be empty on input");
this->is_trained = index->is_trained;
this->metric_type = index->metric_type;
this->verbose = index->verbose;
this->d = index->d;
}
template <typename IndexT>
void IndexIDMapTemplate<IndexT>::add(
idx_t,
const typename IndexT::component_t*) {
FAISS_THROW_MSG(
"add does not make sense with IndexIDMap, "
"use add_with_ids");
}
template <typename IndexT>
void IndexIDMapTemplate<IndexT>::train(
idx_t n,
const typename IndexT::component_t* x) {
index->train(n, x);
this->is_trained = index->is_trained;
}
template <typename IndexT>
void IndexIDMapTemplate<IndexT>::reset() {
index->reset();
id_map.clear();
this->ntotal = 0;
}
template <typename IndexT>
void IndexIDMapTemplate<IndexT>::add_with_ids(
idx_t n,
const typename IndexT::component_t* x,
const typename IndexT::idx_t* xids) {
index->add(n, x);
for (idx_t i = 0; i < n; i++)
id_map.push_back(xids[i]);
this->ntotal = index->ntotal;
}
template <typename IndexT>
void IndexIDMapTemplate<IndexT>::search(
idx_t n,
const typename IndexT::component_t* x,
idx_t k,
typename IndexT::distance_t* distances,
typename IndexT::idx_t* labels) const {
index->search(n, x, k, distances, labels);
idx_t* li = labels;
#pragma omp parallel for
for (idx_t i = 0; i < n * k; i++) {
li[i] = li[i] < 0 ? li[i] : id_map[li[i]];
}
}
template <typename IndexT>
void IndexIDMapTemplate<IndexT>::range_search(
typename IndexT::idx_t n,
const typename IndexT::component_t* x,
typename IndexT::distance_t radius,
RangeSearchResult* result) const {
index->range_search(n, x, radius, result);
#pragma omp parallel for
for (idx_t i = 0; i < result->lims[result->nq]; i++) {
result->labels[i] = result->labels[i] < 0 ? result->labels[i]
: id_map[result->labels[i]];
}
}
namespace {
struct IDTranslatedSelector : IDSelector {
const std::vector<int64_t>& id_map;
const IDSelector& sel;
IDTranslatedSelector(
const std::vector<int64_t>& id_map,
const IDSelector& sel)
: id_map(id_map), sel(sel) {}
bool is_member(idx_t id) const override {
return sel.is_member(id_map[id]);
}
};
} // namespace
template <typename IndexT>
size_t IndexIDMapTemplate<IndexT>::remove_ids(const IDSelector& sel) {
// remove in sub-index first
IDTranslatedSelector sel2(id_map, sel);
size_t nremove = index->remove_ids(sel2);
int64_t j = 0;
for (idx_t i = 0; i < this->ntotal; i++) {
if (sel.is_member(id_map[i])) {
// remove
} else {
id_map[j] = id_map[i];
j++;
}
}
FAISS_ASSERT(j == index->ntotal);
this->ntotal = j;
id_map.resize(this->ntotal);
return nremove;
}
template <typename IndexT>
IndexIDMapTemplate<IndexT>::~IndexIDMapTemplate() {
if (own_fields)
delete index;
}
/*****************************************************
* IndexIDMap2 implementation
*******************************************************/
template <typename IndexT>
IndexIDMap2Template<IndexT>::IndexIDMap2Template(IndexT* index)
: IndexIDMapTemplate<IndexT>(index) {}
template <typename IndexT>
void IndexIDMap2Template<IndexT>::add_with_ids(
idx_t n,
const typename IndexT::component_t* x,
const typename IndexT::idx_t* xids) {
size_t prev_ntotal = this->ntotal;
IndexIDMapTemplate<IndexT>::add_with_ids(n, x, xids);
for (size_t i = prev_ntotal; i < this->ntotal; i++) {
rev_map[this->id_map[i]] = i;
}
}
template <typename IndexT>
void IndexIDMap2Template<IndexT>::construct_rev_map() {
rev_map.clear();
for (size_t i = 0; i < this->ntotal; i++) {
rev_map[this->id_map[i]] = i;
}
}
template <typename IndexT>
size_t IndexIDMap2Template<IndexT>::remove_ids(const IDSelector& sel) {
// This is quite inefficient
size_t nremove = IndexIDMapTemplate<IndexT>::remove_ids(sel);
construct_rev_map();
return nremove;
}
template <typename IndexT>
void IndexIDMap2Template<IndexT>::reconstruct(
idx_t key,
typename IndexT::component_t* recons) const {
try {
this->index->reconstruct(rev_map.at(key), recons);
} catch (const std::out_of_range& e) {
FAISS_THROW_FMT("key %" PRId64 " not found", key);
}
}
// explicit template instantiations
template struct IndexIDMapTemplate<Index>;
template struct IndexIDMapTemplate<IndexBinary>;
template struct IndexIDMap2Template<Index>;
template struct IndexIDMap2Template<IndexBinary>;
/*****************************************************
* IndexSplitVectors implementation
*******************************************************/
IndexSplitVectors::IndexSplitVectors(idx_t d, bool threaded)
: Index(d), own_fields(false), threaded(threaded), sum_d(0) {}
void IndexSplitVectors::add_sub_index(Index* index) {
sub_indexes.push_back(index);
sync_with_sub_indexes();
}
void IndexSplitVectors::sync_with_sub_indexes() {
if (sub_indexes.empty())
return;
Index* index0 = sub_indexes[0];
sum_d = index0->d;
metric_type = index0->metric_type;
is_trained = index0->is_trained;
ntotal = index0->ntotal;
for (int i = 1; i < sub_indexes.size(); i++) {
Index* index = sub_indexes[i];
FAISS_THROW_IF_NOT(metric_type == index->metric_type);
FAISS_THROW_IF_NOT(ntotal == index->ntotal);
sum_d += index->d;
}
}
void IndexSplitVectors::add(idx_t /*n*/, const float* /*x*/) {
FAISS_THROW_MSG("not implemented");
}
void IndexSplitVectors::search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const {
FAISS_THROW_IF_NOT_MSG(k == 1, "search implemented only for k=1");
FAISS_THROW_IF_NOT_MSG(
sum_d == d, "not enough indexes compared to # dimensions");
int64_t nshard = sub_indexes.size();
float* all_distances = new float[nshard * k * n];
idx_t* all_labels = new idx_t[nshard * k * n];
ScopeDeleter<float> del(all_distances);
ScopeDeleter<idx_t> del2(all_labels);
auto query_func = [n,
x,
k,
distances,
labels,
all_distances,
all_labels,
this](int no) {
const IndexSplitVectors* index = this;
float* distances1 = no == 0 ? distances : all_distances + no * k * n;
idx_t* labels1 = no == 0 ? labels : all_labels + no * k * n;
if (index->verbose)
printf("begin query shard %d on %" PRId64 " points\n", no, n);
const Index* sub_index = index->sub_indexes[no];
int64_t sub_d = sub_index->d, d = index->d;
idx_t ofs = 0;
for (int i = 0; i < no; i++)
ofs += index->sub_indexes[i]->d;
float* sub_x = new float[sub_d * n];
ScopeDeleter<float> del1(sub_x);
for (idx_t i = 0; i < n; i++)
memcpy(sub_x + i * sub_d, x + ofs + i * d, sub_d * sizeof(sub_x));
sub_index->search(n, sub_x, k, distances1, labels1);
if (index->verbose)
printf("end query shard %d\n", no);
};
if (!threaded) {
for (int i = 0; i < nshard; i++) {
query_func(i);
}
} else {
std::vector<std::unique_ptr<WorkerThread>> threads;
std::vector<std::future<bool>> v;
for (int i = 0; i < nshard; i++) {
threads.emplace_back(new WorkerThread());
WorkerThread* wt = threads.back().get();
v.emplace_back(wt->add([i, query_func]() { query_func(i); }));
}
// Blocking wait for completion
for (auto& func : v) {
func.get();
}
}
int64_t factor = 1;
for (int i = 0; i < nshard; i++) {
if (i > 0) { // results of 0 are already in the table
const float* distances_i = all_distances + i * k * n;
const idx_t* labels_i = all_labels + i * k * n;
for (int64_t j = 0; j < n; j++) {
if (labels[j] >= 0 && labels_i[j] >= 0) {
labels[j] += labels_i[j] * factor;
distances[j] += distances_i[j];
} else {
labels[j] = -1;
distances[j] = std::numeric_limits<float>::quiet_NaN();
}
}
}
factor *= sub_indexes[i]->ntotal;
}
}
void IndexSplitVectors::train(idx_t /*n*/, const float* /*x*/) {
FAISS_THROW_MSG("not implemented");
}
void IndexSplitVectors::reset() {
FAISS_THROW_MSG("not implemented");
}
IndexSplitVectors::~IndexSplitVectors() {
if (own_fields) {
for (int s = 0; s < sub_indexes.size(); s++)
delete sub_indexes[s];
}
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef META_INDEXES_H
#define META_INDEXES_H
#include <faiss/Index.h>
#include <faiss/IndexReplicas.h>
#include <faiss/IndexShards.h>
#include <unordered_map>
#include <vector>
namespace faiss {
/** Index that translates search results to ids */
template <typename IndexT>
struct IndexIDMapTemplate : IndexT {
using idx_t = typename IndexT::idx_t;
using component_t = typename IndexT::component_t;
using distance_t = typename IndexT::distance_t;
IndexT* index; ///! the sub-index
bool own_fields; ///! whether pointers are deleted in destructo
std::vector<idx_t> id_map;
explicit IndexIDMapTemplate(IndexT* index);
/// @param xids if non-null, ids to store for the vectors (size n)
void add_with_ids(idx_t n, const component_t* x, const idx_t* xids)
override;
/// this will fail. Use add_with_ids
void add(idx_t n, const component_t* x) override;
void search(
idx_t n,
const component_t* x,
idx_t k,
distance_t* distances,
idx_t* labels) const override;
void train(idx_t n, const component_t* x) override;
void reset() override;
/// remove ids adapted to IndexFlat
size_t remove_ids(const IDSelector& sel) override;
void range_search(
idx_t n,
const component_t* x,
distance_t radius,
RangeSearchResult* result) const override;
~IndexIDMapTemplate() override;
IndexIDMapTemplate() {
own_fields = false;
index = nullptr;
}
};
using IndexIDMap = IndexIDMapTemplate<Index>;
using IndexBinaryIDMap = IndexIDMapTemplate<IndexBinary>;
/** same as IndexIDMap but also provides an efficient reconstruction
* implementation via a 2-way index */
template <typename IndexT>
struct IndexIDMap2Template : IndexIDMapTemplate<IndexT> {
using idx_t = typename IndexT::idx_t;
using component_t = typename IndexT::component_t;
using distance_t = typename IndexT::distance_t;
std::unordered_map<idx_t, idx_t> rev_map;
explicit IndexIDMap2Template(IndexT* index);
/// make the rev_map from scratch
void construct_rev_map();
void add_with_ids(idx_t n, const component_t* x, const idx_t* xids)
override;
size_t remove_ids(const IDSelector& sel) override;
void reconstruct(idx_t key, component_t* recons) const override;
~IndexIDMap2Template() override {}
IndexIDMap2Template() {}
};
using IndexIDMap2 = IndexIDMap2Template<Index>;
using IndexBinaryIDMap2 = IndexIDMap2Template<IndexBinary>;
/** splits input vectors in segments and assigns each segment to a sub-index
* used to distribute a MultiIndexQuantizer
*/
struct IndexSplitVectors : Index {
bool own_fields;
bool threaded;
std::vector<Index*> sub_indexes;
idx_t sum_d; /// sum of dimensions seen so far
explicit IndexSplitVectors(idx_t d, bool threaded = false);
void add_sub_index(Index*);
void sync_with_sub_indexes();
void add(idx_t n, const float* x) override;
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
void train(idx_t n, const float* x) override;
void reset() override;
~IndexSplitVectors() override;
};
} // namespace faiss
#endif
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_METRIC_TYPE_H
#define FAISS_METRIC_TYPE_H
namespace faiss {
/// The metric space for vector comparison for Faiss indices and algorithms.
///
/// Most algorithms support both inner product and L2, with the flat
/// (brute-force) indices supporting additional metric types for vector
/// comparison.
enum MetricType {
METRIC_INNER_PRODUCT = 0, ///< maximum inner product search
METRIC_L2 = 1, ///< squared L2 search
METRIC_L1, ///< L1 (aka cityblock)
METRIC_Linf, ///< infinity distance
METRIC_Lp, ///< L_p distance, p is given by a faiss::Index
/// metric_arg
/// some additional metrics defined in scipy.spatial.distance
METRIC_Canberra = 20,
METRIC_BrayCurtis,
METRIC_JensenShannon,
};
} // namespace faiss
#endif
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/VectorTransform.h>
#include <cinttypes>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <memory>
#include <faiss/IndexPQ.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/random.h>
#include <faiss/utils/utils.h>
using namespace faiss;
extern "C" {
// this is to keep the clang syntax checker happy
#ifndef FINTEGER
#define FINTEGER int
#endif
/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
int sgemm_(
const char* transa,
const char* transb,
FINTEGER* m,
FINTEGER* n,
FINTEGER* k,
const float* alpha,
const float* a,
FINTEGER* lda,
const float* b,
FINTEGER* ldb,
float* beta,
float* c,
FINTEGER* ldc);
int dgemm_(
const char* transa,
const char* transb,
FINTEGER* m,
FINTEGER* n,
FINTEGER* k,
const double* alpha,
const double* a,
FINTEGER* lda,
const double* b,
FINTEGER* ldb,
double* beta,
double* c,
FINTEGER* ldc);
int ssyrk_(
const char* uplo,
const char* trans,
FINTEGER* n,
FINTEGER* k,
float* alpha,
float* a,
FINTEGER* lda,
float* beta,
float* c,
FINTEGER* ldc);
/* Lapack functions from http://www.netlib.org/clapack/old/single/ */
int ssyev_(
const char* jobz,
const char* uplo,
FINTEGER* n,
float* a,
FINTEGER* lda,
float* w,
float* work,
FINTEGER* lwork,
FINTEGER* info);
int dsyev_(
const char* jobz,
const char* uplo,
FINTEGER* n,
double* a,
FINTEGER* lda,
double* w,
double* work,
FINTEGER* lwork,
FINTEGER* info);
int sgesvd_(
const char* jobu,
const char* jobvt,
FINTEGER* m,
FINTEGER* n,
float* a,
FINTEGER* lda,
float* s,
float* u,
FINTEGER* ldu,
float* vt,
FINTEGER* ldvt,
float* work,
FINTEGER* lwork,
FINTEGER* info);
int dgesvd_(
const char* jobu,
const char* jobvt,
FINTEGER* m,
FINTEGER* n,
double* a,
FINTEGER* lda,
double* s,
double* u,
FINTEGER* ldu,
double* vt,
FINTEGER* ldvt,
double* work,
FINTEGER* lwork,
FINTEGER* info);
}
/*********************************************
* VectorTransform
*********************************************/
float* VectorTransform::apply(Index::idx_t n, const float* x) const {
float* xt = new float[n * d_out];
apply_noalloc(n, x, xt);
return xt;
}
void VectorTransform::train(idx_t, const float*) {
// does nothing by default
}
void VectorTransform::reverse_transform(idx_t, const float*, float*) const {
FAISS_THROW_MSG("reverse transform not implemented");
}
/*********************************************
* LinearTransform
*********************************************/
/// both d_in > d_out and d_out < d_in are supported
LinearTransform::LinearTransform(int d_in, int d_out, bool have_bias)
: VectorTransform(d_in, d_out),
have_bias(have_bias),
is_orthonormal(false),
verbose(false) {
is_trained = false; // will be trained when A and b are initialized
}
void LinearTransform::apply_noalloc(Index::idx_t n, const float* x, float* xt)
const {
FAISS_THROW_IF_NOT_MSG(is_trained, "Transformation not trained yet");
float c_factor;
if (have_bias) {
FAISS_THROW_IF_NOT_MSG(b.size() == d_out, "Bias not initialized");
float* xi = xt;
for (int i = 0; i < n; i++)
for (int j = 0; j < d_out; j++)
*xi++ = b[j];
c_factor = 1.0;
} else {
c_factor = 0.0;
}
FAISS_THROW_IF_NOT_MSG(
A.size() == d_out * d_in, "Transformation matrix not initialized");
float one = 1;
FINTEGER nbiti = d_out, ni = n, di = d_in;
sgemm_("Transposed",
"Not transposed",
&nbiti,
&ni,
&di,
&one,
A.data(),
&di,
x,
&di,
&c_factor,
xt,
&nbiti);
}
void LinearTransform::transform_transpose(idx_t n, const float* y, float* x)
const {
if (have_bias) { // allocate buffer to store bias-corrected data
float* y_new = new float[n * d_out];
const float* yr = y;
float* yw = y_new;
for (idx_t i = 0; i < n; i++) {
for (int j = 0; j < d_out; j++) {
*yw++ = *yr++ - b[j];
}
}
y = y_new;
}
{
FINTEGER dii = d_in, doi = d_out, ni = n;
float one = 1.0, zero = 0.0;
sgemm_("Not",
"Not",
&dii,
&ni,
&doi,
&one,
A.data(),
&dii,
y,
&doi,
&zero,
x,
&dii);
}
if (have_bias)
delete[] y;
}
void LinearTransform::set_is_orthonormal() {
if (d_out > d_in) {
// not clear what we should do in this case
is_orthonormal = false;
return;
}
if (d_out == 0) { // borderline case, unnormalized matrix
is_orthonormal = true;
return;
}
double eps = 4e-5;
FAISS_ASSERT(A.size() >= d_out * d_in);
{
std::vector<float> ATA(d_out * d_out);
FINTEGER dii = d_in, doi = d_out;
float one = 1.0, zero = 0.0;
sgemm_("Transposed",
"Not",
&doi,
&doi,
&dii,
&one,
A.data(),
&dii,
A.data(),
&dii,
&zero,
ATA.data(),
&doi);
is_orthonormal = true;
for (long i = 0; i < d_out; i++) {
for (long j = 0; j < d_out; j++) {
float v = ATA[i + j * d_out];
if (i == j)
v -= 1;
if (fabs(v) > eps) {
is_orthonormal = false;
}
}
}
}
}
void LinearTransform::reverse_transform(idx_t n, const float* xt, float* x)
const {
if (is_orthonormal) {
transform_transpose(n, xt, x);
} else {
FAISS_THROW_MSG(
"reverse transform not implemented for non-orthonormal matrices");
}
}
void LinearTransform::print_if_verbose(
const char* name,
const std::vector<double>& mat,
int n,
int d) const {
if (!verbose)
return;
printf("matrix %s: %d*%d [\n", name, n, d);
FAISS_THROW_IF_NOT(mat.size() >= n * d);
for (int i = 0; i < n; i++) {
for (int j = 0; j < d; j++) {
printf("%10.5g ", mat[i * d + j]);
}
printf("\n");
}
printf("]\n");
}
/*********************************************
* RandomRotationMatrix
*********************************************/
void RandomRotationMatrix::init(int seed) {
if (d_out <= d_in) {
A.resize(d_out * d_in);
float* q = A.data();
float_randn(q, d_out * d_in, seed);
matrix_qr(d_in, d_out, q);
} else {
// use tight-frame transformation
A.resize(d_out * d_out);
float* q = A.data();
float_randn(q, d_out * d_out, seed);
matrix_qr(d_out, d_out, q);
// remove columns
int i, j;
for (i = 0; i < d_out; i++) {
for (j = 0; j < d_in; j++) {
q[i * d_in + j] = q[i * d_out + j];
}
}
A.resize(d_in * d_out);
}
is_orthonormal = true;
is_trained = true;
}
void RandomRotationMatrix::train(Index::idx_t /*n*/, const float* /*x*/) {
// initialize with some arbitrary seed
init(12345);
}
/*********************************************
* PCAMatrix
*********************************************/
PCAMatrix::PCAMatrix(
int d_in,
int d_out,
float eigen_power,
bool random_rotation)
: LinearTransform(d_in, d_out, true),
eigen_power(eigen_power),
random_rotation(random_rotation) {
is_trained = false;
max_points_per_d = 1000;
balanced_bins = 0;
epsilon = 0;
}
namespace {
/// Compute the eigenvalue decomposition of symmetric matrix cov,
/// dimensions d_in-by-d_in. Output eigenvectors in cov.
void eig(size_t d_in, double* cov, double* eigenvalues, int verbose) {
{ // compute eigenvalues and vectors
FINTEGER info = 0, lwork = -1, di = d_in;
double workq;
dsyev_("Vectors as well",
"Upper",
&di,
cov,
&di,
eigenvalues,
&workq,
&lwork,
&info);
lwork = FINTEGER(workq);
double* work = new double[lwork];
dsyev_("Vectors as well",
"Upper",
&di,
cov,
&di,
eigenvalues,
work,
&lwork,
&info);
delete[] work;
if (info != 0) {
fprintf(stderr,
"WARN ssyev info returns %d, "
"a very bad PCA matrix is learnt\n",
int(info));
// do not throw exception, as the matrix could still be useful
}
if (verbose && d_in <= 10) {
printf("info=%ld new eigvals=[", long(info));
for (int j = 0; j < d_in; j++)
printf("%g ", eigenvalues[j]);
printf("]\n");
double* ci = cov;
printf("eigenvecs=\n");
for (int i = 0; i < d_in; i++) {
for (int j = 0; j < d_in; j++)
printf("%10.4g ", *ci++);
printf("\n");
}
}
}
// revert order of eigenvectors & values
for (int i = 0; i < d_in / 2; i++) {
std::swap(eigenvalues[i], eigenvalues[d_in - 1 - i]);
double* v1 = cov + i * d_in;
double* v2 = cov + (d_in - 1 - i) * d_in;
for (int j = 0; j < d_in; j++)
std::swap(v1[j], v2[j]);
}
}
} // namespace
void PCAMatrix::train(Index::idx_t n, const float* x) {
const float* x_in = x;
x = fvecs_maybe_subsample(
d_in, (size_t*)&n, max_points_per_d * d_in, x, verbose);
ScopeDeleter<float> del_x(x != x_in ? x : nullptr);
// compute mean
mean.clear();
mean.resize(d_in, 0.0);
if (have_bias) { // we may want to skip the bias
const float* xi = x;
for (int i = 0; i < n; i++) {
for (int j = 0; j < d_in; j++)
mean[j] += *xi++;
}
for (int j = 0; j < d_in; j++)
mean[j] /= n;
}
if (verbose) {
printf("mean=[");
for (int j = 0; j < d_in; j++)
printf("%g ", mean[j]);
printf("]\n");
}
if (n >= d_in) {
// compute covariance matrix, store it in PCA matrix
PCAMat.resize(d_in * d_in);
float* cov = PCAMat.data();
{ // initialize with mean * mean^T term
float* ci = cov;
for (int i = 0; i < d_in; i++) {
for (int j = 0; j < d_in; j++)
*ci++ = -n * mean[i] * mean[j];
}
}
{
FINTEGER di = d_in, ni = n;
float one = 1.0;
ssyrk_("Up",
"Non transposed",
&di,
&ni,
&one,
(float*)x,
&di,
&one,
cov,
&di);
}
if (verbose && d_in <= 10) {
float* ci = cov;
printf("cov=\n");
for (int i = 0; i < d_in; i++) {
for (int j = 0; j < d_in; j++)
printf("%10g ", *ci++);
printf("\n");
}
}
std::vector<double> covd(d_in * d_in);
for (size_t i = 0; i < d_in * d_in; i++)
covd[i] = cov[i];
std::vector<double> eigenvaluesd(d_in);
eig(d_in, covd.data(), eigenvaluesd.data(), verbose);
for (size_t i = 0; i < d_in * d_in; i++)
PCAMat[i] = covd[i];
eigenvalues.resize(d_in);
for (size_t i = 0; i < d_in; i++)
eigenvalues[i] = eigenvaluesd[i];
} else {
std::vector<float> xc(n * d_in);
for (size_t i = 0; i < n; i++)
for (size_t j = 0; j < d_in; j++)
xc[i * d_in + j] = x[i * d_in + j] - mean[j];
// compute Gram matrix
std::vector<float> gram(n * n);
{
FINTEGER di = d_in, ni = n;
float one = 1.0, zero = 0.0;
ssyrk_("Up",
"Transposed",
&ni,
&di,
&one,
xc.data(),
&di,
&zero,
gram.data(),
&ni);
}
if (verbose && d_in <= 10) {
float* ci = gram.data();
printf("gram=\n");
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++)
printf("%10g ", *ci++);
printf("\n");
}
}
std::vector<double> gramd(n * n);
for (size_t i = 0; i < n * n; i++)
gramd[i] = gram[i];
std::vector<double> eigenvaluesd(n);
// eig will fill in only the n first eigenvals
eig(n, gramd.data(), eigenvaluesd.data(), verbose);
PCAMat.resize(d_in * n);
for (size_t i = 0; i < n * n; i++)
gram[i] = gramd[i];
eigenvalues.resize(d_in);
// fill in only the n first ones
for (size_t i = 0; i < n; i++)
eigenvalues[i] = eigenvaluesd[i];
{ // compute PCAMat = x' * v
FINTEGER di = d_in, ni = n;
float one = 1.0;
sgemm_("Non",
"Non Trans",
&di,
&ni,
&ni,
&one,
xc.data(),
&di,
gram.data(),
&ni,
&one,
PCAMat.data(),
&di);
}
if (verbose && d_in <= 10) {
float* ci = PCAMat.data();
printf("PCAMat=\n");
for (int i = 0; i < n; i++) {
for (int j = 0; j < d_in; j++)
printf("%10g ", *ci++);
printf("\n");
}
}
fvec_renorm_L2(d_in, n, PCAMat.data());
}
prepare_Ab();
is_trained = true;
}
void PCAMatrix::copy_from(const PCAMatrix& other) {
FAISS_THROW_IF_NOT(other.is_trained);
mean = other.mean;
eigenvalues = other.eigenvalues;
PCAMat = other.PCAMat;
prepare_Ab();
is_trained = true;
}
void PCAMatrix::prepare_Ab() {
FAISS_THROW_IF_NOT_FMT(
d_out * d_in <= PCAMat.size(),
"PCA matrix cannot output %d dimensions from %d ",
d_out,
d_in);
if (!random_rotation) {
A = PCAMat;
A.resize(d_out * d_in); // strip off useless dimensions
// first scale the components
if (eigen_power != 0) {
float* ai = A.data();
for (int i = 0; i < d_out; i++) {
float factor = pow(eigenvalues[i] + epsilon, eigen_power);
for (int j = 0; j < d_in; j++)
*ai++ *= factor;
}
}
if (balanced_bins != 0) {
FAISS_THROW_IF_NOT(d_out % balanced_bins == 0);
int dsub = d_out / balanced_bins;
std::vector<float> Ain;
std::swap(A, Ain);
A.resize(d_out * d_in);
std::vector<float> accu(balanced_bins);
std::vector<int> counter(balanced_bins);
// greedy assignment
for (int i = 0; i < d_out; i++) {
// find best bin
int best_j = -1;
float min_w = 1e30;
for (int j = 0; j < balanced_bins; j++) {
if (counter[j] < dsub && accu[j] < min_w) {
min_w = accu[j];
best_j = j;
}
}
int row_dst = best_j * dsub + counter[best_j];
accu[best_j] += eigenvalues[i];
counter[best_j]++;
memcpy(&A[row_dst * d_in], &Ain[i * d_in], d_in * sizeof(A[0]));
}
if (verbose) {
printf(" bin accu=[");
for (int i = 0; i < balanced_bins; i++)
printf("%g ", accu[i]);
printf("]\n");
}
}
} else {
FAISS_THROW_IF_NOT_MSG(
balanced_bins == 0,
"both balancing bins and applying a random rotation "
"does not make sense");
RandomRotationMatrix rr(d_out, d_out);
rr.init(5);
// apply scaling on the rotation matrix (right multiplication)
if (eigen_power != 0) {
for (int i = 0; i < d_out; i++) {
float factor = pow(eigenvalues[i], eigen_power);
for (int j = 0; j < d_out; j++)
rr.A[j * d_out + i] *= factor;
}
}
A.resize(d_in * d_out);
{
FINTEGER dii = d_in, doo = d_out;
float one = 1.0, zero = 0.0;
sgemm_("Not",
"Not",
&dii,
&doo,
&doo,
&one,
PCAMat.data(),
&dii,
rr.A.data(),
&doo,
&zero,
A.data(),
&dii);
}
}
b.clear();
b.resize(d_out);
for (int i = 0; i < d_out; i++) {
float accu = 0;
for (int j = 0; j < d_in; j++)
accu -= mean[j] * A[j + i * d_in];
b[i] = accu;
}
is_orthonormal = eigen_power == 0;
}
/*********************************************
* ITQMatrix
*********************************************/
ITQMatrix::ITQMatrix(int d)
: LinearTransform(d, d, false), max_iter(50), seed(123) {}
/** translated from fbcode/deeplearning/catalyzer/catalyzer/quantizers.py */
void ITQMatrix::train(Index::idx_t n, const float* xf) {
size_t d = d_in;
std::vector<double> rotation(d * d);
if (init_rotation.size() == d * d) {
memcpy(rotation.data(),
init_rotation.data(),
d * d * sizeof(rotation[0]));
} else {
RandomRotationMatrix rrot(d, d);
rrot.init(seed);
for (size_t i = 0; i < d * d; i++) {
rotation[i] = rrot.A[i];
}
}
std::vector<double> x(n * d);
for (size_t i = 0; i < n * d; i++) {
x[i] = xf[i];
}
std::vector<double> rotated_x(n * d), cov_mat(d * d);
std::vector<double> u(d * d), vt(d * d), singvals(d);
for (int i = 0; i < max_iter; i++) {
print_if_verbose("rotation", rotation, d, d);
{ // rotated_data = np.dot(training_data, rotation)
FINTEGER di = d, ni = n;
double one = 1, zero = 0;
dgemm_("N",
"N",
&di,
&ni,
&di,
&one,
rotation.data(),
&di,
x.data(),
&di,
&zero,
rotated_x.data(),
&di);
}
print_if_verbose("rotated_x", rotated_x, n, d);
// binarize
for (size_t j = 0; j < n * d; j++) {
rotated_x[j] = rotated_x[j] < 0 ? -1 : 1;
}
// covariance matrix
{ // rotated_data = np.dot(training_data, rotation)
FINTEGER di = d, ni = n;
double one = 1, zero = 0;
dgemm_("N",
"T",
&di,
&di,
&ni,
&one,
rotated_x.data(),
&di,
x.data(),
&di,
&zero,
cov_mat.data(),
&di);
}
print_if_verbose("cov_mat", cov_mat, d, d);
// SVD
{
FINTEGER di = d;
FINTEGER lwork = -1, info;
double lwork1;
// workspace query
dgesvd_("A",
"A",
&di,
&di,
cov_mat.data(),
&di,
singvals.data(),
u.data(),
&di,
vt.data(),
&di,
&lwork1,
&lwork,
&info);
FAISS_THROW_IF_NOT(info == 0);
lwork = size_t(lwork1);
std::vector<double> work(lwork);
dgesvd_("A",
"A",
&di,
&di,
cov_mat.data(),
&di,
singvals.data(),
u.data(),
&di,
vt.data(),
&di,
work.data(),
&lwork,
&info);
FAISS_THROW_IF_NOT_FMT(info == 0, "sgesvd returned info=%d", info);
}
print_if_verbose("u", u, d, d);
print_if_verbose("vt", vt, d, d);
// update rotation
{
FINTEGER di = d;
double one = 1, zero = 0;
dgemm_("N",
"T",
&di,
&di,
&di,
&one,
u.data(),
&di,
vt.data(),
&di,
&zero,
rotation.data(),
&di);
}
print_if_verbose("final rot", rotation, d, d);
}
A.resize(d * d);
for (size_t i = 0; i < d; i++) {
for (size_t j = 0; j < d; j++) {
A[i + d * j] = rotation[j + d * i];
}
}
is_trained = true;
}
ITQTransform::ITQTransform(int d_in, int d_out, bool do_pca)
: VectorTransform(d_in, d_out),
do_pca(do_pca),
itq(d_out),
pca_then_itq(d_in, d_out, false) {
if (!do_pca) {
FAISS_THROW_IF_NOT(d_in == d_out);
}
max_train_per_dim = 10;
is_trained = false;
}
void ITQTransform::train(idx_t n, const float* x) {
FAISS_THROW_IF_NOT(!is_trained);
const float* x_in = x;
size_t max_train_points = std::max(d_in * max_train_per_dim, 32768);
x = fvecs_maybe_subsample(d_in, (size_t*)&n, max_train_points, x);
ScopeDeleter<float> del_x(x != x_in ? x : nullptr);
std::unique_ptr<float[]> x_norm(new float[n * d_in]);
{ // normalize
int d = d_in;
mean.resize(d, 0);
for (idx_t i = 0; i < n; i++) {
for (idx_t j = 0; j < d; j++) {
mean[j] += x[i * d + j];
}
}
for (idx_t j = 0; j < d; j++) {
mean[j] /= n;
}
for (idx_t i = 0; i < n; i++) {
for (idx_t j = 0; j < d; j++) {
x_norm[i * d + j] = x[i * d + j] - mean[j];
}
}
fvec_renorm_L2(d_in, n, x_norm.get());
}
// train PCA
PCAMatrix pca(d_in, d_out);
float* x_pca;
std::unique_ptr<float[]> x_pca_del;
if (do_pca) {
pca.have_bias = false; // for consistency with reference implem
pca.train(n, x_norm.get());
x_pca = pca.apply(n, x_norm.get());
x_pca_del.reset(x_pca);
} else {
x_pca = x_norm.get();
}
// train ITQ
itq.train(n, x_pca);
// merge PCA and ITQ
if (do_pca) {
FINTEGER di = d_out, dini = d_in;
float one = 1, zero = 0;
pca_then_itq.A.resize(d_in * d_out);
sgemm_("N",
"N",
&dini,
&di,
&di,
&one,
pca.A.data(),
&dini,
itq.A.data(),
&di,
&zero,
pca_then_itq.A.data(),
&dini);
} else {
pca_then_itq.A = itq.A;
}
pca_then_itq.is_trained = true;
is_trained = true;
}
void ITQTransform::apply_noalloc(Index::idx_t n, const float* x, float* xt)
const {
FAISS_THROW_IF_NOT_MSG(is_trained, "Transformation not trained yet");
std::unique_ptr<float[]> x_norm(new float[n * d_in]);
{ // normalize
int d = d_in;
for (idx_t i = 0; i < n; i++) {
for (idx_t j = 0; j < d; j++) {
x_norm[i * d + j] = x[i * d + j] - mean[j];
}
}
// this is not really useful if we are going to binarize right
// afterwards but OK
fvec_renorm_L2(d_in, n, x_norm.get());
}
pca_then_itq.apply_noalloc(n, x_norm.get(), xt);
}
/*********************************************
* OPQMatrix
*********************************************/
OPQMatrix::OPQMatrix(int d, int M, int d2)
: LinearTransform(d, d2 == -1 ? d : d2, false),
M(M),
niter(50),
niter_pq(4),
niter_pq_0(40),
verbose(false),
pq(nullptr) {
is_trained = false;
// OPQ is quite expensive to train, so set this right.
max_train_points = 256 * 256;
pq = nullptr;
}
void OPQMatrix::train(Index::idx_t n, const float* x) {
const float* x_in = x;
x = fvecs_maybe_subsample(d_in, (size_t*)&n, max_train_points, x, verbose);
ScopeDeleter<float> del_x(x != x_in ? x : nullptr);
// To support d_out > d_in, we pad input vectors with 0s to d_out
size_t d = d_out <= d_in ? d_in : d_out;
size_t d2 = d_out;
#if 0
// what this test shows: the only way of getting bit-exact
// reproducible results with sgeqrf and sgesvd seems to be forcing
// single-threading.
{ // test repro
std::vector<float> r (d * d);
float * rotation = r.data();
float_randn (rotation, d * d, 1234);
printf("CS0: %016lx\n",
ivec_checksum (128*128, (int*)rotation));
matrix_qr (d, d, rotation);
printf("CS1: %016lx\n",
ivec_checksum (128*128, (int*)rotation));
return;
}
#endif
if (verbose) {
printf("OPQMatrix::train: training an OPQ rotation matrix "
"for M=%d from %" PRId64 " vectors in %dD -> %dD\n",
M,
n,
d_in,
d_out);
}
std::vector<float> xtrain(n * d);
// center x
{
std::vector<float> sum(d);
const float* xi = x;
for (size_t i = 0; i < n; i++) {
for (int j = 0; j < d_in; j++)
sum[j] += *xi++;
}
for (int i = 0; i < d; i++)
sum[i] /= n;
float* yi = xtrain.data();
xi = x;
for (size_t i = 0; i < n; i++) {
for (int j = 0; j < d_in; j++)
*yi++ = *xi++ - sum[j];
yi += d - d_in;
}
}
float* rotation;
if (A.size() == 0) {
A.resize(d * d);
rotation = A.data();
if (verbose)
printf(" OPQMatrix::train: making random %zd*%zd rotation\n",
d,
d);
float_randn(rotation, d * d, 1234);
matrix_qr(d, d, rotation);
// we use only the d * d2 upper part of the matrix
A.resize(d * d2);
} else {
FAISS_THROW_IF_NOT(A.size() == d * d2);
rotation = A.data();
}
std::vector<float> xproj(d2 * n), pq_recons(d2 * n), xxr(d * n),
tmp(d * d * 4);
ProductQuantizer pq_default(d2, M, 8);
ProductQuantizer& pq_regular = pq ? *pq : pq_default;
std::vector<uint8_t> codes(pq_regular.code_size * n);
double t0 = getmillisecs();
for (int iter = 0; iter < niter; iter++) {
{ // torch.mm(xtrain, rotation:t())
FINTEGER di = d, d2i = d2, ni = n;
float zero = 0, one = 1;
sgemm_("Transposed",
"Not transposed",
&d2i,
&ni,
&di,
&one,
rotation,
&di,
xtrain.data(),
&di,
&zero,
xproj.data(),
&d2i);
}
pq_regular.cp.max_points_per_centroid = 1000;
pq_regular.cp.niter = iter == 0 ? niter_pq_0 : niter_pq;
pq_regular.verbose = verbose;
pq_regular.train(n, xproj.data());
if (verbose) {
printf(" encode / decode\n");
}
if (pq_regular.assign_index) {
pq_regular.compute_codes_with_assign_index(
xproj.data(), codes.data(), n);
} else {
pq_regular.compute_codes(xproj.data(), codes.data(), n);
}
pq_regular.decode(codes.data(), pq_recons.data(), n);
float pq_err = fvec_L2sqr(pq_recons.data(), xproj.data(), n * d2) / n;
if (verbose)
printf(" Iteration %d (%d PQ iterations):"
"%.3f s, obj=%g\n",
iter,
pq_regular.cp.niter,
(getmillisecs() - t0) / 1000.0,
pq_err);
{
float *u = tmp.data(), *vt = &tmp[d * d];
float* sing_val = &tmp[2 * d * d];
FINTEGER di = d, d2i = d2, ni = n;
float one = 1, zero = 0;
if (verbose) {
printf(" X * recons\n");
}
// torch.mm(xtrain:t(), pq_recons)
sgemm_("Not",
"Transposed",
&d2i,
&di,
&ni,
&one,
pq_recons.data(),
&d2i,
xtrain.data(),
&di,
&zero,
xxr.data(),
&d2i);
FINTEGER lwork = -1, info = -1;
float worksz;
// workspace query
sgesvd_("All",
"All",
&d2i,
&di,
xxr.data(),
&d2i,
sing_val,
vt,
&d2i,
u,
&di,
&worksz,
&lwork,
&info);
lwork = int(worksz);
std::vector<float> work(lwork);
// u and vt swapped
sgesvd_("All",
"All",
&d2i,
&di,
xxr.data(),
&d2i,
sing_val,
vt,
&d2i,
u,
&di,
work.data(),
&lwork,
&info);
sgemm_("Transposed",
"Transposed",
&di,
&d2i,
&d2i,
&one,
u,
&di,
vt,
&d2i,
&zero,
rotation,
&di);
}
pq_regular.train_type = ProductQuantizer::Train_hot_start;
}
// revert A matrix
if (d > d_in) {
for (long i = 0; i < d_out; i++)
memmove(&A[i * d_in], &A[i * d], sizeof(A[0]) * d_in);
A.resize(d_in * d_out);
}
is_trained = true;
is_orthonormal = true;
}
/*********************************************
* NormalizationTransform
*********************************************/
NormalizationTransform::NormalizationTransform(int d, float norm)
: VectorTransform(d, d), norm(norm) {}
NormalizationTransform::NormalizationTransform()
: VectorTransform(-1, -1), norm(-1) {}
void NormalizationTransform::apply_noalloc(idx_t n, const float* x, float* xt)
const {
if (norm == 2.0) {
memcpy(xt, x, sizeof(x[0]) * n * d_in);
fvec_renorm_L2(d_in, n, xt);
} else {
FAISS_THROW_MSG("not implemented");
}
}
void NormalizationTransform::reverse_transform(
idx_t n,
const float* xt,
float* x) const {
memcpy(x, xt, sizeof(xt[0]) * n * d_in);
}
/*********************************************
* CenteringTransform
*********************************************/
CenteringTransform::CenteringTransform(int d) : VectorTransform(d, d) {
is_trained = false;
}
void CenteringTransform::train(Index::idx_t n, const float* x) {
FAISS_THROW_IF_NOT_MSG(n > 0, "need at least one training vector");
mean.resize(d_in, 0);
for (idx_t i = 0; i < n; i++) {
for (size_t j = 0; j < d_in; j++) {
mean[j] += *x++;
}
}
for (size_t j = 0; j < d_in; j++) {
mean[j] /= n;
}
is_trained = true;
}
void CenteringTransform::apply_noalloc(idx_t n, const float* x, float* xt)
const {
FAISS_THROW_IF_NOT(is_trained);
for (idx_t i = 0; i < n; i++) {
for (size_t j = 0; j < d_in; j++) {
*xt++ = *x++ - mean[j];
}
}
}
void CenteringTransform::reverse_transform(idx_t n, const float* xt, float* x)
const {
FAISS_THROW_IF_NOT(is_trained);
for (idx_t i = 0; i < n; i++) {
for (size_t j = 0; j < d_in; j++) {
*x++ = *xt++ + mean[j];
}
}
}
/*********************************************
* RemapDimensionsTransform
*********************************************/
RemapDimensionsTransform::RemapDimensionsTransform(
int d_in,
int d_out,
const int* map_in)
: VectorTransform(d_in, d_out) {
map.resize(d_out);
for (int i = 0; i < d_out; i++) {
map[i] = map_in[i];
FAISS_THROW_IF_NOT(map[i] == -1 || (map[i] >= 0 && map[i] < d_in));
}
}
RemapDimensionsTransform::RemapDimensionsTransform(
int d_in,
int d_out,
bool uniform)
: VectorTransform(d_in, d_out) {
map.resize(d_out, -1);
if (uniform) {
if (d_in < d_out) {
for (int i = 0; i < d_in; i++) {
map[i * d_out / d_in] = i;
}
} else {
for (int i = 0; i < d_out; i++) {
map[i] = i * d_in / d_out;
}
}
} else {
for (int i = 0; i < d_in && i < d_out; i++)
map[i] = i;
}
}
void RemapDimensionsTransform::apply_noalloc(idx_t n, const float* x, float* xt)
const {
for (idx_t i = 0; i < n; i++) {
for (int j = 0; j < d_out; j++) {
xt[j] = map[j] < 0 ? 0 : x[map[j]];
}
x += d_in;
xt += d_out;
}
}
void RemapDimensionsTransform::reverse_transform(
idx_t n,
const float* xt,
float* x) const {
memset(x, 0, sizeof(*x) * n * d_in);
for (idx_t i = 0; i < n; i++) {
for (int j = 0; j < d_out; j++) {
if (map[j] >= 0)
x[map[j]] = xt[j];
}
x += d_in;
xt += d_out;
}
}
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_VECTOR_TRANSFORM_H
#define FAISS_VECTOR_TRANSFORM_H
/** Defines a few objects that apply transformations to a set of
* vectors Often these are pre-processing steps.
*/
#include <stdint.h>
#include <vector>
#include <faiss/Index.h>
namespace faiss {
/** Any transformation applied on a set of vectors */
struct VectorTransform {
typedef Index::idx_t idx_t;
int d_in; ///! input dimension
int d_out; ///! output dimension
explicit VectorTransform(int d_in = 0, int d_out = 0)
: d_in(d_in), d_out(d_out), is_trained(true) {}
/// set if the VectorTransform does not require training, or if
/// training is done already
bool is_trained;
/** Perform training on a representative set of vectors. Does
* nothing by default.
*
* @param n nb of training vectors
* @param x training vecors, size n * d
*/
virtual void train(idx_t n, const float* x);
/** apply the random rotation, return new allocated matrix
* @param x size n * d_in
* @return size n * d_out
*/
float* apply(idx_t n, const float* x) const;
/// same as apply, but result is pre-allocated
virtual void apply_noalloc(idx_t n, const float* x, float* xt) const = 0;
/// reverse transformation. May not be implemented or may return
/// approximate result
virtual void reverse_transform(idx_t n, const float* xt, float* x) const;
virtual ~VectorTransform() {}
};
/** Generic linear transformation, with bias term applied on output
* y = A * x + b
*/
struct LinearTransform : VectorTransform {
bool have_bias; ///! whether to use the bias term
/// check if matrix A is orthonormal (enables reverse_transform)
bool is_orthonormal;
/// Transformation matrix, size d_out * d_in
std::vector<float> A;
/// bias vector, size d_out
std::vector<float> b;
/// both d_in > d_out and d_out < d_in are supported
explicit LinearTransform(
int d_in = 0,
int d_out = 0,
bool have_bias = false);
/// same as apply, but result is pre-allocated
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
/// compute x = A^T * (x - b)
/// is reverse transform if A has orthonormal lines
void transform_transpose(idx_t n, const float* y, float* x) const;
/// works only if is_orthonormal
void reverse_transform(idx_t n, const float* xt, float* x) const override;
/// compute A^T * A to set the is_orthonormal flag
void set_is_orthonormal();
bool verbose;
void print_if_verbose(
const char* name,
const std::vector<double>& mat,
int n,
int d) const;
~LinearTransform() override {}
};
/// Randomly rotate a set of vectors
struct RandomRotationMatrix : LinearTransform {
/// both d_in > d_out and d_out < d_in are supported
RandomRotationMatrix(int d_in, int d_out)
: LinearTransform(d_in, d_out, false) {}
/// must be called before the transform is used
void init(int seed);
// intializes with an arbitrary seed
void train(idx_t n, const float* x) override;
RandomRotationMatrix() {}
};
/** Applies a principal component analysis on a set of vectors,
* with optionally whitening and random rotation. */
struct PCAMatrix : LinearTransform {
/** after transformation the components are multiplied by
* eigenvalues^eigen_power
*
* =0: no whitening
* =-0.5: full whitening
*/
float eigen_power;
/// value added to eigenvalues to avoid division by 0 when whitening
float epsilon;
/// random rotation after PCA
bool random_rotation;
/// ratio between # training vectors and dimension
size_t max_points_per_d;
/// try to distribute output eigenvectors in this many bins
int balanced_bins;
/// Mean, size d_in
std::vector<float> mean;
/// eigenvalues of covariance matrix (= squared singular values)
std::vector<float> eigenvalues;
/// PCA matrix, size d_in * d_in
std::vector<float> PCAMat;
// the final matrix is computed after random rotation and/or whitening
explicit PCAMatrix(
int d_in = 0,
int d_out = 0,
float eigen_power = 0,
bool random_rotation = false);
/// train on n vectors. If n < d_in then the eigenvector matrix
/// will be completed with 0s
void train(idx_t n, const float* x) override;
/// copy pre-trained PCA matrix
void copy_from(const PCAMatrix& other);
/// called after mean, PCAMat and eigenvalues are computed
void prepare_Ab();
};
/** ITQ implementation from
*
* Iterative quantization: A procrustean approach to learning binary codes
* for large-scale image retrieval,
*
* Yunchao Gong, Svetlana Lazebnik, Albert Gordo, Florent Perronnin,
* PAMI'12.
*/
struct ITQMatrix : LinearTransform {
int max_iter;
int seed;
// force initialization of the rotation (for debugging)
std::vector<double> init_rotation;
explicit ITQMatrix(int d = 0);
void train(idx_t n, const float* x) override;
};
/** The full ITQ transform, including normalizations and PCA transformation
*/
struct ITQTransform : VectorTransform {
std::vector<float> mean;
bool do_pca;
ITQMatrix itq;
/// max training points per dimension
int max_train_per_dim;
// concatenation of PCA + ITQ transformation
LinearTransform pca_then_itq;
explicit ITQTransform(int d_in = 0, int d_out = 0, bool do_pca = false);
void train(idx_t n, const float* x) override;
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
};
struct ProductQuantizer;
/** Applies a rotation to align the dimensions with a PQ to minimize
* the reconstruction error. Can be used before an IndexPQ or an
* IndexIVFPQ. The method is the non-parametric version described in:
*
* "Optimized Product Quantization for Approximate Nearest Neighbor Search"
* Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
*
*/
struct OPQMatrix : LinearTransform {
int M; ///< nb of subquantizers
int niter; ///< Number of outer training iterations
int niter_pq; ///< Number of training iterations for the PQ
int niter_pq_0; ///< same, for the first outer iteration
/// if there are too many training points, resample
size_t max_train_points;
bool verbose;
/// if non-NULL, use this product quantizer for training
/// should be constructed with (d_out, M, _)
ProductQuantizer* pq;
/// if d2 != -1, output vectors of this dimension
explicit OPQMatrix(int d = 0, int M = 1, int d2 = -1);
void train(idx_t n, const float* x) override;
};
/** remap dimensions for intput vectors, possibly inserting 0s
* strictly speaking this is also a linear transform but we don't want
* to compute it with matrix multiplies */
struct RemapDimensionsTransform : VectorTransform {
/// map from output dimension to input, size d_out
/// -1 -> set output to 0
std::vector<int> map;
RemapDimensionsTransform(int d_in, int d_out, const int* map);
/// remap input to output, skipping or inserting dimensions as needed
/// if uniform: distribute dimensions uniformly
/// otherwise just take the d_out first ones.
RemapDimensionsTransform(int d_in, int d_out, bool uniform = true);
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
/// reverse transform correct only when the mapping is a permutation
void reverse_transform(idx_t n, const float* xt, float* x) const override;
RemapDimensionsTransform() {}
};
/** per-vector normalization */
struct NormalizationTransform : VectorTransform {
float norm;
explicit NormalizationTransform(int d, float norm = 2.0);
NormalizationTransform();
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
/// Identity transform since norm is not revertible
void reverse_transform(idx_t n, const float* xt, float* x) const override;
};
/** Subtract the mean of each component from the vectors. */
struct CenteringTransform : VectorTransform {
/// Mean, size d_in = d_out
std::vector<float> mean;
explicit CenteringTransform(int d = 0);
/// train on n vectors.
void train(idx_t n, const float* x) override;
/// subtract the mean
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
/// add the mean
void reverse_transform(idx_t n, const float* xt, float* x) const override;
};
} // namespace faiss
#endif
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/clone_index.h>
#include <cstdio>
#include <cstdlib>
#include <faiss/impl/FaissAssert.h>
#include <faiss/Index2Layer.h>
#include <faiss/IndexAdditiveQuantizer.h>
#include <faiss/IndexFlat.h>
#include <faiss/IndexHNSW.h>
#include <faiss/IndexIVF.h>
#include <faiss/IndexIVFFlat.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/IndexIVFPQR.h>
#include <faiss/IndexIVFSpectralHash.h>
#include <faiss/IndexLSH.h>
#include <faiss/IndexLattice.h>
#include <faiss/IndexNSG.h>
#include <faiss/IndexPQ.h>
#include <faiss/IndexPreTransform.h>
#include <faiss/IndexScalarQuantizer.h>
#include <faiss/MetaIndexes.h>
#include <faiss/VectorTransform.h>
namespace faiss {
/*************************************************************
* cloning functions
**************************************************************/
Index* clone_index(const Index* index) {
Cloner cl;
return cl.clone_Index(index);
}
// assumes there is a copy constructor ready. Always try from most
// specific to most general. Most indexes don't have complicated
// structs, the default copy constructor often just works.
#define TRYCLONE(classname, obj) \
if (const classname* clo = dynamic_cast<const classname*>(obj)) { \
return new classname(*clo); \
} else
VectorTransform* Cloner::clone_VectorTransform(const VectorTransform* vt) {
TRYCLONE(RemapDimensionsTransform, vt)
TRYCLONE(OPQMatrix, vt)
TRYCLONE(PCAMatrix, vt)
TRYCLONE(ITQMatrix, vt)
TRYCLONE(RandomRotationMatrix, vt)
TRYCLONE(LinearTransform, vt) {
FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
}
return nullptr;
}
IndexIVF* Cloner::clone_IndexIVF(const IndexIVF* ivf) {
TRYCLONE(IndexIVFPQR, ivf)
TRYCLONE(IndexIVFPQ, ivf)
TRYCLONE(IndexIVFFlat, ivf)
TRYCLONE(IndexIVFScalarQuantizer, ivf) {
FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
}
return nullptr;
}
Index* Cloner::clone_Index(const Index* index) {
TRYCLONE(IndexPQ, index)
TRYCLONE(IndexLSH, index)
TRYCLONE(IndexFlatL2, index)
TRYCLONE(IndexFlatIP, index)
TRYCLONE(IndexFlat, index)
TRYCLONE(IndexLattice, index)
TRYCLONE(IndexResidualQuantizer, index)
TRYCLONE(IndexScalarQuantizer, index)
TRYCLONE(MultiIndexQuantizer, index)
TRYCLONE(ResidualCoarseQuantizer, index)
if (const IndexIVF* ivf = dynamic_cast<const IndexIVF*>(index)) {
IndexIVF* res = clone_IndexIVF(ivf);
if (ivf->invlists == nullptr) {
res->invlists = nullptr;
} else if (
auto* ails = dynamic_cast<const ArrayInvertedLists*>(
ivf->invlists)) {
res->invlists = new ArrayInvertedLists(*ails);
res->own_invlists = true;
} else {
FAISS_THROW_MSG(
"clone not supported for this type of inverted lists");
}
res->own_fields = true;
res->quantizer = clone_Index(ivf->quantizer);
return res;
} else if (
const IndexPreTransform* ipt =
dynamic_cast<const IndexPreTransform*>(index)) {
IndexPreTransform* res = new IndexPreTransform();
res->d = ipt->d;
res->ntotal = ipt->ntotal;
res->is_trained = ipt->is_trained;
res->metric_type = ipt->metric_type;
res->metric_arg = ipt->metric_arg;
res->index = clone_Index(ipt->index);
for (int i = 0; i < ipt->chain.size(); i++)
res->chain.push_back(clone_VectorTransform(ipt->chain[i]));
res->own_fields = true;
return res;
} else if (
const IndexIDMap* idmap = dynamic_cast<const IndexIDMap*>(index)) {
IndexIDMap* res = new IndexIDMap(*idmap);
res->own_fields = true;
res->index = clone_Index(idmap->index);
return res;
} else if (const IndexHNSW* ihnsw = dynamic_cast<const IndexHNSW*>(index)) {
IndexHNSW* res = new IndexHNSW(*ihnsw);
res->own_fields = true;
res->storage = clone_Index(ihnsw->storage);
return res;
} else if (const IndexNSG* insg = dynamic_cast<const IndexNSG*>(index)) {
IndexNSG* res = new IndexNSG(*insg);
// copy the dynamic allocated graph
auto& new_graph = res->nsg.final_graph;
auto& old_graph = insg->nsg.final_graph;
new_graph = std::make_shared<nsg::Graph<int>>(*old_graph);
res->own_fields = true;
res->storage = clone_Index(insg->storage);
return res;
} else if (
const Index2Layer* i2l = dynamic_cast<const Index2Layer*>(index)) {
Index2Layer* res = new Index2Layer(*i2l);
res->q1.own_fields = true;
res->q1.quantizer = clone_Index(i2l->q1.quantizer);
return res;
} else {
FAISS_THROW_MSG("clone not supported for this type of Index");
}
return nullptr;
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
// I/O code for indexes
#pragma once
namespace faiss {
struct Index;
struct IndexIVF;
struct VectorTransform;
/* cloning functions */
Index* clone_index(const Index*);
/** Cloner class, useful to override classes with other cloning
* functions. The cloning function above just calls
* Cloner::clone_Index. */
struct Cloner {
virtual VectorTransform* clone_VectorTransform(const VectorTransform*);
virtual Index* clone_Index(const Index*);
virtual IndexIVF* clone_IndexIVF(const IndexIVF*);
virtual ~Cloner() {}
};
} // namespace faiss
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
set(FAISS_GPU_SRC
GpuAutoTune.cpp
GpuCloner.cpp
GpuClonerOptions.cpp
# HC
#GpuDistance.cu
GpuIcmEncoder.cu
GpuIndex.cu
GpuIndexBinaryFlat.cu
GpuIndexFlat.cu
GpuIndexIVF.cu
GpuIndexIVFFlat.cu
GpuIndexIVFPQ.cu
# HC
#GpuIndexIVFScalarQuantizer.cu
GpuResources.cpp
StandardGpuResources.cpp
# HC
#impl/BinaryDistance.cu
impl/BinaryFlatIndex.cu
impl/BroadcastSum.cu
impl/Distance.cu
# HC
#impl/FlatIndex.cu
impl/IVFAppend.cu
impl/IVFBase.cu
impl/IVFFlat.cu
impl/IVFFlatScan.cu
# HC
#impl/IVFInterleaved.cu
#impl/IVFPQ.cu
impl/IVFUtils.cu
#impl/IVFUtilsSelect1.cu
#impl/IVFUtilsSelect2.cu
impl/InterleavedCodes.cpp
# HC
#impl/L2Norm.cu
#impl/L2Select.cu
#impl/PQScanMultiPassPrecomputed.cu
impl/RemapIndices.cpp
impl/VectorResidual.cu
# HC
#impl/scan/IVFInterleaved1.cu
#impl/scan/IVFInterleaved32.cu
#impl/scan/IVFInterleaved64.cu
#impl/scan/IVFInterleaved128.cu
#impl/scan/IVFInterleaved256.cu
#impl/scan/IVFInterleaved512.cu
#impl/scan/IVFInterleaved1024.cu
#impl/scan/IVFInterleaved2048.cu
#impl/IcmEncoder.cu
#utils/BlockSelectFloat.cu
utils/DeviceUtils.cu
utils/StackDeviceMemory.cpp
utils/Timer.cpp
utils/WarpSelectFloat.cu
# HC
#utils/blockselect/BlockSelectFloat1.cu
#utils/blockselect/BlockSelectFloat32.cu
#utils/blockselect/BlockSelectFloat64.cu
#utils/blockselect/BlockSelectFloat128.cu
#utils/blockselect/BlockSelectFloat256.cu
#utils/blockselect/BlockSelectFloatF512.cu
#utils/blockselect/BlockSelectFloatF1024.cu
#utils/blockselect/BlockSelectFloatF2048.cu
#utils/blockselect/BlockSelectFloatT512.cu
#utils/blockselect/BlockSelectFloatT1024.cu
#utils/blockselect/BlockSelectFloatT2048.cu
#utils/warpselect/WarpSelectFloat1.cu
#utils/warpselect/WarpSelectFloat32.cu
#utils/warpselect/WarpSelectFloat64.cu
#utils/warpselect/WarpSelectFloat128.cu
#utils/warpselect/WarpSelectFloat256.cu
#utils/warpselect/WarpSelectFloatF512.cu
#utils/warpselect/WarpSelectFloatF1024.cu
#utils/warpselect/WarpSelectFloatF2048.cu
#utils/warpselect/WarpSelectFloatT512.cu
#utils/warpselect/WarpSelectFloatT1024.cu
#utils/warpselect/WarpSelectFloatT2048.cu
)
set(FAISS_GPU_HEADERS
GpuAutoTune.h
GpuCloner.h
GpuClonerOptions.h
GpuDistance.h
GpuIcmEncoder.h
GpuFaissAssert.h
GpuIndex.h
GpuIndexBinaryFlat.h
GpuIndexFlat.h
GpuIndexIVF.h
GpuIndexIVFFlat.h
GpuIndexIVFPQ.h
GpuIndexIVFScalarQuantizer.h
GpuIndicesOptions.h
GpuResources.h
StandardGpuResources.h
impl/BinaryDistance.cuh
impl/BinaryFlatIndex.cuh
impl/BroadcastSum.cuh
impl/Distance.cuh
impl/DistanceUtils.cuh
impl/FlatIndex.cuh
impl/GeneralDistance.cuh
impl/GpuScalarQuantizer.cuh
impl/IVFAppend.cuh
impl/IVFBase.cuh
impl/IVFFlat.cuh
impl/IVFFlatScan.cuh
impl/IVFInterleaved.cuh
impl/IVFPQ.cuh
impl/IVFUtils.cuh
impl/InterleavedCodes.h
impl/L2Norm.cuh
impl/L2Select.cuh
impl/PQCodeDistances-inl.cuh
impl/PQCodeDistances.cuh
impl/PQCodeLoad.cuh
impl/PQScanMultiPassNoPrecomputed-inl.cuh
impl/PQScanMultiPassNoPrecomputed.cuh
impl/PQScanMultiPassPrecomputed.cuh
impl/RemapIndices.h
impl/VectorResidual.cuh
impl/scan/IVFInterleavedImpl.cuh
impl/IcmEncoder.cuh
utils/BlockSelectKernel.cuh
utils/Comparators.cuh
utils/ConversionOperators.cuh
utils/CopyUtils.cuh
utils/DeviceDefs.cuh
utils/DeviceTensor-inl.cuh
utils/DeviceTensor.cuh
utils/DeviceUtils.h
utils/DeviceVector.cuh
utils/Float16.cuh
utils/HostTensor-inl.cuh
utils/HostTensor.cuh
utils/Limits.cuh
utils/LoadStoreOperators.cuh
utils/MathOperators.cuh
utils/MatrixMult-inl.cuh
utils/MatrixMult.cuh
utils/MergeNetworkBlock.cuh
utils/MergeNetworkUtils.cuh
utils/MergeNetworkWarp.cuh
utils/NoTypeTensor.cuh
utils/Pair.cuh
utils/PtxUtils.cuh
utils/ReductionOperators.cuh
utils/Reductions.cuh
utils/Select.cuh
utils/StackDeviceMemory.h
utils/StaticUtils.h
utils/Tensor-inl.cuh
utils/Tensor.cuh
utils/ThrustAllocator.cuh
utils/Timer.h
utils/Transpose.cuh
utils/WarpPackedBits.cuh
utils/WarpSelectKernel.cuh
utils/WarpShuffles.cuh
utils/blockselect/BlockSelectImpl.cuh
utils/warpselect/WarpSelectImpl.cuh
)
# Export FAISS_GPU_HEADERS variable to parent scope.
set(FAISS_GPU_HEADERS ${FAISS_GPU_HEADERS} PARENT_SCOPE)
target_sources(faiss PRIVATE ${FAISS_GPU_SRC})
target_sources(faiss_avx2 PRIVATE ${FAISS_GPU_SRC})
foreach(header ${FAISS_GPU_HEADERS})
get_filename_component(dir ${header} DIRECTORY )
install(FILES ${header}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/faiss/gpu/${dir}
)
endforeach()
# HC
#find_package(CUDAToolkit REQUIRED)
target_link_libraries(faiss)
target_link_libraries(faiss_avx2)
#target_link_libraries(faiss_avx2 PRIVATE hipblas)
#target_compile_options(faiss PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all>)
#target_compile_options(faiss_avx2 PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all>)
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/gpu/GpuAutoTune.h>
#include <typeinfo>
#include <faiss/IndexPreTransform.h>
#include <faiss/IndexReplicas.h>
#include <faiss/IndexShards.h>
#include <faiss/gpu/GpuIndex.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/gpu/GpuIndexIVFPQ.h>
#include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
#include <faiss/gpu/utils/DeviceUtils.h>
#include <faiss/impl/FaissAssert.h>
namespace faiss {
namespace gpu {
using namespace ::faiss;
/**********************************************************
* Parameters to auto-tune on GpuIndex'es
**********************************************************/
#define DC(classname) auto ix = dynamic_cast<const classname*>(index)
void GpuParameterSpace::initialize(const Index* index) {
if (DC(IndexPreTransform)) {
index = ix->index;
}
if (DC(IndexReplicas)) {
if (ix->count() == 0)
return;
index = ix->at(0);
}
if (DC(IndexShards)) {
if (ix->count() == 0)
return;
index = ix->at(0);
}
if (DC(GpuIndexIVF)) {
ParameterRange& pr = add_range("nprobe");
for (int i = 0; i < 12; i++) {
size_t nprobe = 1 << i;
if (nprobe >= ix->getNumLists() || nprobe > getMaxKSelection())
break;
pr.values.push_back(nprobe);
}
}
// not sure we should call the parent initializer
}
#undef DC
// non-const version
#define DC(classname) auto* ix = dynamic_cast<classname*>(index)
void GpuParameterSpace::set_index_parameter(
Index* index,
const std::string& name,
double val) const {
if (DC(IndexReplicas)) {
for (int i = 0; i < ix->count(); i++)
set_index_parameter(ix->at(i), name, val);
return;
}
if (name == "nprobe") {
if (DC(GpuIndexIVF)) {
ix->setNumProbes(int(val));
return;
}
}
if (name == "use_precomputed_table") {
if (DC(GpuIndexIVFPQ)) {
ix->setPrecomputedCodes(bool(val));
return;
}
}
// maybe normal index parameters apply?
ParameterSpace::set_index_parameter(index, name, val);
}
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/gpu/GpuAutoTune.h>
#include <typeinfo>
#include <faiss/IndexPreTransform.h>
#include <faiss/IndexReplicas.h>
#include <faiss/IndexShards.h>
#include <faiss/gpu/GpuIndex.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/gpu/GpuIndexIVFPQ.h>
#include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
#include <faiss/gpu/utils/DeviceUtils.h>
#include <faiss/impl/FaissAssert.h>
namespace faiss {
namespace gpu {
using namespace ::faiss;
/**********************************************************
* Parameters to auto-tune on GpuIndex'es
**********************************************************/
#define DC(classname) auto ix = dynamic_cast<const classname*>(index)
void GpuParameterSpace::initialize(const Index* index) {
if (DC(IndexPreTransform)) {
index = ix->index;
}
if (DC(IndexReplicas)) {
if (ix->count() == 0)
return;
index = ix->at(0);
}
if (DC(IndexShards)) {
if (ix->count() == 0)
return;
index = ix->at(0);
}
if (DC(GpuIndexIVF)) {
ParameterRange& pr = add_range("nprobe");
for (int i = 0; i < 12; i++) {
size_t nprobe = 1 << i;
if (nprobe >= ix->getNumLists() || nprobe > getMaxKSelection())
break;
pr.values.push_back(nprobe);
}
}
// not sure we should call the parent initializer
}
#undef DC
// non-const version
#define DC(classname) auto* ix = dynamic_cast<classname*>(index)
void GpuParameterSpace::set_index_parameter(
Index* index,
const std::string& name,
double val) const {
if (DC(IndexReplicas)) {
for (int i = 0; i < ix->count(); i++)
set_index_parameter(ix->at(i), name, val);
return;
}
if (name == "nprobe") {
if (DC(GpuIndexIVF)) {
ix->setNumProbes(int(val));
return;
}
}
if (name == "use_precomputed_table") {
if (DC(GpuIndexIVFPQ)) {
ix->setPrecomputedCodes(bool(val));
return;
}
}
// maybe normal index parameters apply?
ParameterSpace::set_index_parameter(index, name, val);
}
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <faiss/AutoTune.h>
#include <faiss/Index.h>
namespace faiss {
namespace gpu {
/// parameter space and setters for GPU indexes
struct GpuParameterSpace : faiss::ParameterSpace {
/// initialize with reasonable parameters for the index
void initialize(const faiss::Index* index) override;
/// set a combination of parameters on an index
void set_index_parameter(
faiss::Index* index,
const std::string& name,
double val) const override;
};
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <faiss/AutoTune.h>
#include <faiss/Index.h>
namespace faiss {
namespace gpu {
/// parameter space and setters for GPU indexes
struct GpuParameterSpace : faiss::ParameterSpace {
/// initialize with reasonable parameters for the index
void initialize(const faiss::Index* index) override;
/// set a combination of parameters on an index
void set_index_parameter(
faiss::Index* index,
const std::string& name,
double val) const override;
};
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/gpu/GpuCloner.h>
#include <faiss/impl/FaissAssert.h>
#include <typeinfo>
#include <faiss/gpu/StandardGpuResources.h>
#include <faiss/IndexFlat.h>
#include <faiss/IndexIVF.h>
#include <faiss/IndexIVFFlat.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/IndexPreTransform.h>
#include <faiss/IndexReplicas.h>
#include <faiss/IndexScalarQuantizer.h>
#include <faiss/MetaIndexes.h>
#include <faiss/gpu/GpuIndex.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/gpu/GpuIndexIVFPQ.h>
#include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
#include <faiss/gpu/utils/DeviceUtils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/index_io.h>
namespace faiss {
namespace gpu {
/**********************************************************
* Cloning to CPU
**********************************************************/
void ToCPUCloner::merge_index(Index* dst, Index* src, bool successive_ids) {
if (auto ifl = dynamic_cast<IndexFlat*>(dst)) {
auto ifl2 = dynamic_cast<const IndexFlat*>(src);
FAISS_ASSERT(ifl2);
FAISS_ASSERT(successive_ids);
ifl->add(ifl2->ntotal, ifl2->get_xb());
} else if (auto ifl = dynamic_cast<IndexIVFFlat*>(dst)) {
auto ifl2 = dynamic_cast<IndexIVFFlat*>(src);
FAISS_ASSERT(ifl2);
ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
} else if (auto ifl = dynamic_cast<IndexIVFScalarQuantizer*>(dst)) {
auto ifl2 = dynamic_cast<IndexIVFScalarQuantizer*>(src);
FAISS_ASSERT(ifl2);
ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
} else if (auto ifl = dynamic_cast<IndexIVFPQ*>(dst)) {
auto ifl2 = dynamic_cast<IndexIVFPQ*>(src);
FAISS_ASSERT(ifl2);
ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
} else {
FAISS_ASSERT(!"merging not implemented for this type of class");
}
}
Index* ToCPUCloner::clone_Index(const Index* index) {
if (auto ifl = dynamic_cast<const GpuIndexFlat*>(index)) {
IndexFlat* res = new IndexFlat();
ifl->copyTo(res);
return res;
} else if (auto ifl = dynamic_cast<const GpuIndexIVFFlat*>(index)) {
IndexIVFFlat* res = new IndexIVFFlat();
ifl->copyTo(res);
return res;
} else if (
auto ifl = dynamic_cast<const GpuIndexIVFScalarQuantizer*>(index)) {
IndexIVFScalarQuantizer* res = new IndexIVFScalarQuantizer();
ifl->copyTo(res);
return res;
} else if (auto ipq = dynamic_cast<const GpuIndexIVFPQ*>(index)) {
IndexIVFPQ* res = new IndexIVFPQ();
ipq->copyTo(res);
return res;
// for IndexShards and IndexReplicas we assume that the
// objective is to make a single component out of them
// (inverse op of ToGpuClonerMultiple)
} else if (auto ish = dynamic_cast<const IndexShards*>(index)) {
int nshard = ish->count();
FAISS_ASSERT(nshard > 0);
Index* res = clone_Index(ish->at(0));
for (int i = 1; i < ish->count(); i++) {
Index* res_i = clone_Index(ish->at(i));
merge_index(res, res_i, ish->successive_ids);
delete res_i;
}
return res;
} else if (auto ipr = dynamic_cast<const IndexReplicas*>(index)) {
// just clone one of the replicas
FAISS_ASSERT(ipr->count() > 0);
return clone_Index(ipr->at(0));
} else {
return Cloner::clone_Index(index);
}
}
faiss::Index* index_gpu_to_cpu(const faiss::Index* gpu_index) {
ToCPUCloner cl;
return cl.clone_Index(gpu_index);
}
/**********************************************************
* Cloning to 1 GPU
**********************************************************/
ToGpuCloner::ToGpuCloner(
GpuResourcesProvider* prov,
int device,
const GpuClonerOptions& options)
: GpuClonerOptions(options), provider(prov), device(device) {}
Index* ToGpuCloner::clone_Index(const Index* index) {
using idx_t = Index::idx_t;
if (auto ifl = dynamic_cast<const IndexFlat*>(index)) {
GpuIndexFlatConfig config;
config.device = device;
config.useFloat16 = useFloat16;
config.storeTransposed = storeTransposed;
return new GpuIndexFlat(provider, ifl, config);
} else if (
dynamic_cast<const IndexScalarQuantizer*>(index) &&
static_cast<const IndexScalarQuantizer*>(index)->sq.qtype ==
ScalarQuantizer::QT_fp16) {
GpuIndexFlatConfig config;
config.device = device;
config.useFloat16 = true;
GpuIndexFlat* gif = new GpuIndexFlat(
provider, index->d, index->metric_type, config);
// transfer data by blocks
idx_t bs = 1024 * 1024;
for (idx_t i0 = 0; i0 < index->ntotal; i0 += bs) {
idx_t i1 = std::min(i0 + bs, index->ntotal);
std::vector<float> buffer((i1 - i0) * index->d);
index->reconstruct_n(i0, i1 - i0, buffer.data());
gif->add(i1 - i0, buffer.data());
}
assert(gif->getNumVecs() == index->ntotal);
return gif;
} else if (auto ifl = dynamic_cast<const faiss::IndexIVFFlat*>(index)) {
GpuIndexIVFFlatConfig config;
config.device = device;
config.indicesOptions = indicesOptions;
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
config.flatConfig.storeTransposed = storeTransposed;
GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
provider, ifl->d, ifl->nlist, ifl->metric_type, config);
if (reserveVecs > 0 && ifl->ntotal == 0) {
res->reserveMemory(reserveVecs);
}
res->copyFrom(ifl);
return res;
} else if (
auto ifl = dynamic_cast<const faiss::IndexIVFScalarQuantizer*>(
index)) {
GpuIndexIVFScalarQuantizerConfig config;
config.device = device;
config.indicesOptions = indicesOptions;
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
config.flatConfig.storeTransposed = storeTransposed;
GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
provider,
ifl->d,
ifl->nlist,
ifl->sq.qtype,
ifl->metric_type,
ifl->by_residual,
config);
if (reserveVecs > 0 && ifl->ntotal == 0) {
res->reserveMemory(reserveVecs);
}
res->copyFrom(ifl);
return res;
} else if (auto ipq = dynamic_cast<const faiss::IndexIVFPQ*>(index)) {
if (verbose) {
printf(" IndexIVFPQ size %ld -> GpuIndexIVFPQ "
"indicesOptions=%d "
"usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
ipq->ntotal,
indicesOptions,
usePrecomputed,
useFloat16,
reserveVecs);
}
GpuIndexIVFPQConfig config;
config.device = device;
config.indicesOptions = indicesOptions;
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
config.flatConfig.storeTransposed = storeTransposed;
config.useFloat16LookupTables = useFloat16;
config.usePrecomputedTables = usePrecomputed;
GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config);
if (reserveVecs > 0 && ipq->ntotal == 0) {
res->reserveMemory(reserveVecs);
}
return res;
} else {
// default: use CPU cloner
return Cloner::clone_Index(index);
}
}
faiss::Index* index_cpu_to_gpu(
GpuResourcesProvider* provider,
int device,
const faiss::Index* index,
const GpuClonerOptions* options) {
GpuClonerOptions defaults;
ToGpuCloner cl(provider, device, options ? *options : defaults);
return cl.clone_Index(index);
}
/**********************************************************
* Cloning to multiple GPUs
**********************************************************/
ToGpuClonerMultiple::ToGpuClonerMultiple(
std::vector<GpuResourcesProvider*>& provider,
std::vector<int>& devices,
const GpuMultipleClonerOptions& options)
: GpuMultipleClonerOptions(options) {
FAISS_ASSERT(provider.size() == devices.size());
for (int i = 0; i < provider.size(); i++) {
sub_cloners.push_back(ToGpuCloner(provider[i], devices[i], options));
}
}
ToGpuClonerMultiple::ToGpuClonerMultiple(
const std::vector<ToGpuCloner>& sub_cloners,
const GpuMultipleClonerOptions& options)
: GpuMultipleClonerOptions(options), sub_cloners(sub_cloners) {}
void ToGpuClonerMultiple::copy_ivf_shard(
const IndexIVF* index_ivf,
IndexIVF* idx2,
long n,
long i) {
if (shard_type == 2) {
long i0 = i * index_ivf->ntotal / n;
long i1 = (i + 1) * index_ivf->ntotal / n;
if (verbose)
printf("IndexShards shard %ld indices %ld:%ld\n", i, i0, i1);
index_ivf->copy_subset_to(*idx2, 2, i0, i1);
FAISS_ASSERT(idx2->ntotal == i1 - i0);
} else if (shard_type == 1) {
if (verbose)
printf("IndexShards shard %ld select modulo %ld = %ld\n", i, n, i);
index_ivf->copy_subset_to(*idx2, 1, n, i);
} else {
FAISS_THROW_FMT("shard_type %d not implemented", shard_type);
}
}
Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
long n = sub_cloners.size();
auto index_ivfpq = dynamic_cast<const faiss::IndexIVFPQ*>(index);
auto index_ivfflat = dynamic_cast<const faiss::IndexIVFFlat*>(index);
auto index_ivfsq =
dynamic_cast<const faiss::IndexIVFScalarQuantizer*>(index);
auto index_flat = dynamic_cast<const faiss::IndexFlat*>(index);
FAISS_THROW_IF_NOT_MSG(
index_ivfpq || index_ivfflat || index_flat || index_ivfsq,
"IndexShards implemented only for "
"IndexIVFFlat, IndexIVFScalarQuantizer, "
"IndexFlat and IndexIVFPQ");
std::vector<faiss::Index*> shards(n);
for (long i = 0; i < n; i++) {
// make a shallow copy
if (reserveVecs)
sub_cloners[i].reserveVecs = (reserveVecs + n - 1) / n;
if (index_ivfpq) {
faiss::IndexIVFPQ idx2(
index_ivfpq->quantizer,
index_ivfpq->d,
index_ivfpq->nlist,
index_ivfpq->code_size,
index_ivfpq->pq.nbits);
idx2.metric_type = index_ivfpq->metric_type;
idx2.pq = index_ivfpq->pq;
idx2.nprobe = index_ivfpq->nprobe;
idx2.use_precomputed_table = 0;
idx2.is_trained = index->is_trained;
copy_ivf_shard(index_ivfpq, &idx2, n, i);
shards[i] = sub_cloners[i].clone_Index(&idx2);
} else if (index_ivfflat) {
faiss::IndexIVFFlat idx2(
index_ivfflat->quantizer,
index->d,
index_ivfflat->nlist,
index_ivfflat->metric_type);
idx2.nprobe = index_ivfflat->nprobe;
idx2.is_trained = index->is_trained;
copy_ivf_shard(index_ivfflat, &idx2, n, i);
shards[i] = sub_cloners[i].clone_Index(&idx2);
} else if (index_ivfsq) {
faiss::IndexIVFScalarQuantizer idx2(
index_ivfsq->quantizer,
index->d,
index_ivfsq->nlist,
index_ivfsq->sq.qtype,
index_ivfsq->metric_type,
index_ivfsq->by_residual);
idx2.nprobe = index_ivfsq->nprobe;
idx2.is_trained = index->is_trained;
idx2.sq = index_ivfsq->sq;
copy_ivf_shard(index_ivfsq, &idx2, n, i);
shards[i] = sub_cloners[i].clone_Index(&idx2);
} else if (index_flat) {
faiss::IndexFlat idx2(index->d, index->metric_type);
shards[i] = sub_cloners[i].clone_Index(&idx2);
if (index->ntotal > 0) {
long i0 = index->ntotal * i / n;
long i1 = index->ntotal * (i + 1) / n;
shards[i]->add(i1 - i0, index_flat->get_xb() + i0 * index->d);
}
}
}
bool successive_ids = index_flat != nullptr;
faiss::IndexShards* res =
new faiss::IndexShards(index->d, true, successive_ids);
for (int i = 0; i < n; i++) {
res->add_shard(shards[i]);
}
res->own_fields = true;
FAISS_ASSERT(index->ntotal == res->ntotal);
return res;
}
Index* ToGpuClonerMultiple::clone_Index(const Index* index) {
long n = sub_cloners.size();
if (n == 1)
return sub_cloners[0].clone_Index(index);
if (dynamic_cast<const IndexFlat*>(index) ||
dynamic_cast<const faiss::IndexIVFFlat*>(index) ||
dynamic_cast<const faiss::IndexIVFScalarQuantizer*>(index) ||
dynamic_cast<const faiss::IndexIVFPQ*>(index)) {
if (!shard) {
IndexReplicas* res = new IndexReplicas();
for (auto& sub_cloner : sub_cloners) {
res->addIndex(sub_cloner.clone_Index(index));
}
res->own_fields = true;
return res;
} else {
return clone_Index_to_shards(index);
}
} else if (auto miq = dynamic_cast<const MultiIndexQuantizer*>(index)) {
if (verbose) {
printf("cloning MultiIndexQuantizer: "
"will be valid only for search k=1\n");
}
const ProductQuantizer& pq = miq->pq;
IndexSplitVectors* splitv = new IndexSplitVectors(pq.d, true);
splitv->own_fields = true;
for (int m = 0; m < pq.M; m++) {
// which GPU(s) will be assigned to this sub-quantizer
long i0 = m * n / pq.M;
long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
std::vector<ToGpuCloner> sub_cloners_2;
sub_cloners_2.insert(
sub_cloners_2.begin(),
sub_cloners.begin() + i0,
sub_cloners.begin() + i1);
ToGpuClonerMultiple cm(sub_cloners_2, *this);
IndexFlatL2 idxc(pq.dsub);
idxc.add(pq.ksub, pq.centroids.data() + m * pq.d * pq.ksub);
Index* idx2 = cm.clone_Index(&idxc);
splitv->add_sub_index(idx2);
}
return splitv;
} else {
return Cloner::clone_Index(index);
}
}
faiss::Index* index_cpu_to_gpu_multiple(
std::vector<GpuResourcesProvider*>& provider,
std::vector<int>& devices,
const faiss::Index* index,
const GpuMultipleClonerOptions* options) {
GpuMultipleClonerOptions defaults;
ToGpuClonerMultiple cl(provider, devices, options ? *options : defaults);
return cl.clone_Index(index);
}
GpuProgressiveDimIndexFactory::GpuProgressiveDimIndexFactory(int ngpu) {
FAISS_THROW_IF_NOT(ngpu >= 1);
devices.resize(ngpu);
vres.resize(ngpu);
for (int i = 0; i < ngpu; i++) {
vres[i] = new StandardGpuResources();
devices[i] = i;
}
ncall = 0;
}
GpuProgressiveDimIndexFactory::~GpuProgressiveDimIndexFactory() {
for (int i = 0; i < vres.size(); i++) {
delete vres[i];
}
}
Index* GpuProgressiveDimIndexFactory::operator()(int dim) {
IndexFlatL2 index(dim);
ncall++;
return index_cpu_to_gpu_multiple(vres, devices, &index, &options);
}
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/gpu/GpuCloner.h>
#include <faiss/impl/FaissAssert.h>
#include <typeinfo>
#include <faiss/gpu/StandardGpuResources.h>
#include <faiss/IndexFlat.h>
#include <faiss/IndexIVF.h>
#include <faiss/IndexIVFFlat.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/IndexPreTransform.h>
#include <faiss/IndexReplicas.h>
#include <faiss/IndexScalarQuantizer.h>
#include <faiss/MetaIndexes.h>
#include <faiss/gpu/GpuIndex.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/gpu/GpuIndexIVFPQ.h>
#include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
#include <faiss/gpu/utils/DeviceUtils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/index_io.h>
namespace faiss {
namespace gpu {
/**********************************************************
* Cloning to CPU
**********************************************************/
void ToCPUCloner::merge_index(Index* dst, Index* src, bool successive_ids) {
if (auto ifl = dynamic_cast<IndexFlat*>(dst)) {
auto ifl2 = dynamic_cast<const IndexFlat*>(src);
FAISS_ASSERT(ifl2);
FAISS_ASSERT(successive_ids);
ifl->add(ifl2->ntotal, ifl2->get_xb());
} else if (auto ifl = dynamic_cast<IndexIVFFlat*>(dst)) {
auto ifl2 = dynamic_cast<IndexIVFFlat*>(src);
FAISS_ASSERT(ifl2);
ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
} else if (auto ifl = dynamic_cast<IndexIVFScalarQuantizer*>(dst)) {
auto ifl2 = dynamic_cast<IndexIVFScalarQuantizer*>(src);
FAISS_ASSERT(ifl2);
ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
} else if (auto ifl = dynamic_cast<IndexIVFPQ*>(dst)) {
auto ifl2 = dynamic_cast<IndexIVFPQ*>(src);
FAISS_ASSERT(ifl2);
ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0);
} else {
FAISS_ASSERT(!"merging not implemented for this type of class");
}
}
Index* ToCPUCloner::clone_Index(const Index* index) {
if (auto ifl = dynamic_cast<const GpuIndexFlat*>(index)) {
IndexFlat* res = new IndexFlat();
ifl->copyTo(res);
return res;
} else if (auto ifl = dynamic_cast<const GpuIndexIVFFlat*>(index)) {
IndexIVFFlat* res = new IndexIVFFlat();
ifl->copyTo(res);
return res;
} else if (
auto ifl = dynamic_cast<const GpuIndexIVFScalarQuantizer*>(index)) {
IndexIVFScalarQuantizer* res = new IndexIVFScalarQuantizer();
ifl->copyTo(res);
return res;
} else if (auto ipq = dynamic_cast<const GpuIndexIVFPQ*>(index)) {
IndexIVFPQ* res = new IndexIVFPQ();
ipq->copyTo(res);
return res;
// for IndexShards and IndexReplicas we assume that the
// objective is to make a single component out of them
// (inverse op of ToGpuClonerMultiple)
} else if (auto ish = dynamic_cast<const IndexShards*>(index)) {
int nshard = ish->count();
FAISS_ASSERT(nshard > 0);
Index* res = clone_Index(ish->at(0));
for (int i = 1; i < ish->count(); i++) {
Index* res_i = clone_Index(ish->at(i));
merge_index(res, res_i, ish->successive_ids);
delete res_i;
}
return res;
} else if (auto ipr = dynamic_cast<const IndexReplicas*>(index)) {
// just clone one of the replicas
FAISS_ASSERT(ipr->count() > 0);
return clone_Index(ipr->at(0));
} else {
return Cloner::clone_Index(index);
}
}
faiss::Index* index_gpu_to_cpu(const faiss::Index* gpu_index) {
ToCPUCloner cl;
return cl.clone_Index(gpu_index);
}
/**********************************************************
* Cloning to 1 GPU
**********************************************************/
ToGpuCloner::ToGpuCloner(
GpuResourcesProvider* prov,
int device,
const GpuClonerOptions& options)
: GpuClonerOptions(options), provider(prov), device(device) {}
Index* ToGpuCloner::clone_Index(const Index* index) {
using idx_t = Index::idx_t;
if (auto ifl = dynamic_cast<const IndexFlat*>(index)) {
GpuIndexFlatConfig config;
config.device = device;
config.useFloat16 = useFloat16;
config.storeTransposed = storeTransposed;
return new GpuIndexFlat(provider, ifl, config);
} else if (
dynamic_cast<const IndexScalarQuantizer*>(index) &&
static_cast<const IndexScalarQuantizer*>(index)->sq.qtype ==
ScalarQuantizer::QT_fp16) {
GpuIndexFlatConfig config;
config.device = device;
config.useFloat16 = true;
GpuIndexFlat* gif = new GpuIndexFlat(
provider, index->d, index->metric_type, config);
// transfer data by blocks
idx_t bs = 1024 * 1024;
for (idx_t i0 = 0; i0 < index->ntotal; i0 += bs) {
idx_t i1 = std::min(i0 + bs, index->ntotal);
std::vector<float> buffer((i1 - i0) * index->d);
index->reconstruct_n(i0, i1 - i0, buffer.data());
gif->add(i1 - i0, buffer.data());
}
assert(gif->getNumVecs() == index->ntotal);
return gif;
} else if (auto ifl = dynamic_cast<const faiss::IndexIVFFlat*>(index)) {
GpuIndexIVFFlatConfig config;
config.device = device;
config.indicesOptions = indicesOptions;
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
config.flatConfig.storeTransposed = storeTransposed;
GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
provider, ifl->d, ifl->nlist, ifl->metric_type, config);
if (reserveVecs > 0 && ifl->ntotal == 0) {
res->reserveMemory(reserveVecs);
}
res->copyFrom(ifl);
return res;
} else if (
auto ifl = dynamic_cast<const faiss::IndexIVFScalarQuantizer*>(
index)) {
GpuIndexIVFScalarQuantizerConfig config;
config.device = device;
config.indicesOptions = indicesOptions;
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
config.flatConfig.storeTransposed = storeTransposed;
GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
provider,
ifl->d,
ifl->nlist,
ifl->sq.qtype,
ifl->metric_type,
ifl->by_residual,
config);
if (reserveVecs > 0 && ifl->ntotal == 0) {
res->reserveMemory(reserveVecs);
}
res->copyFrom(ifl);
return res;
} else if (auto ipq = dynamic_cast<const faiss::IndexIVFPQ*>(index)) {
if (verbose) {
printf(" IndexIVFPQ size %ld -> GpuIndexIVFPQ "
"indicesOptions=%d "
"usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n",
ipq->ntotal,
indicesOptions,
usePrecomputed,
useFloat16,
reserveVecs);
}
GpuIndexIVFPQConfig config;
config.device = device;
config.indicesOptions = indicesOptions;
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
config.flatConfig.storeTransposed = storeTransposed;
config.useFloat16LookupTables = useFloat16;
config.usePrecomputedTables = usePrecomputed;
GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config);
if (reserveVecs > 0 && ipq->ntotal == 0) {
res->reserveMemory(reserveVecs);
}
return res;
} else {
// default: use CPU cloner
return Cloner::clone_Index(index);
}
}
faiss::Index* index_cpu_to_gpu(
GpuResourcesProvider* provider,
int device,
const faiss::Index* index,
const GpuClonerOptions* options) {
GpuClonerOptions defaults;
ToGpuCloner cl(provider, device, options ? *options : defaults);
return cl.clone_Index(index);
}
/**********************************************************
* Cloning to multiple GPUs
**********************************************************/
ToGpuClonerMultiple::ToGpuClonerMultiple(
std::vector<GpuResourcesProvider*>& provider,
std::vector<int>& devices,
const GpuMultipleClonerOptions& options)
: GpuMultipleClonerOptions(options) {
FAISS_ASSERT(provider.size() == devices.size());
for (int i = 0; i < provider.size(); i++) {
sub_cloners.push_back(ToGpuCloner(provider[i], devices[i], options));
}
}
ToGpuClonerMultiple::ToGpuClonerMultiple(
const std::vector<ToGpuCloner>& sub_cloners,
const GpuMultipleClonerOptions& options)
: GpuMultipleClonerOptions(options), sub_cloners(sub_cloners) {}
void ToGpuClonerMultiple::copy_ivf_shard(
const IndexIVF* index_ivf,
IndexIVF* idx2,
long n,
long i) {
if (shard_type == 2) {
long i0 = i * index_ivf->ntotal / n;
long i1 = (i + 1) * index_ivf->ntotal / n;
if (verbose)
printf("IndexShards shard %ld indices %ld:%ld\n", i, i0, i1);
index_ivf->copy_subset_to(*idx2, 2, i0, i1);
FAISS_ASSERT(idx2->ntotal == i1 - i0);
} else if (shard_type == 1) {
if (verbose)
printf("IndexShards shard %ld select modulo %ld = %ld\n", i, n, i);
index_ivf->copy_subset_to(*idx2, 1, n, i);
} else {
FAISS_THROW_FMT("shard_type %d not implemented", shard_type);
}
}
Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
long n = sub_cloners.size();
auto index_ivfpq = dynamic_cast<const faiss::IndexIVFPQ*>(index);
auto index_ivfflat = dynamic_cast<const faiss::IndexIVFFlat*>(index);
auto index_ivfsq =
dynamic_cast<const faiss::IndexIVFScalarQuantizer*>(index);
auto index_flat = dynamic_cast<const faiss::IndexFlat*>(index);
FAISS_THROW_IF_NOT_MSG(
index_ivfpq || index_ivfflat || index_flat || index_ivfsq,
"IndexShards implemented only for "
"IndexIVFFlat, IndexIVFScalarQuantizer, "
"IndexFlat and IndexIVFPQ");
std::vector<faiss::Index*> shards(n);
for (long i = 0; i < n; i++) {
// make a shallow copy
if (reserveVecs)
sub_cloners[i].reserveVecs = (reserveVecs + n - 1) / n;
if (index_ivfpq) {
faiss::IndexIVFPQ idx2(
index_ivfpq->quantizer,
index_ivfpq->d,
index_ivfpq->nlist,
index_ivfpq->code_size,
index_ivfpq->pq.nbits);
idx2.metric_type = index_ivfpq->metric_type;
idx2.pq = index_ivfpq->pq;
idx2.nprobe = index_ivfpq->nprobe;
idx2.use_precomputed_table = 0;
idx2.is_trained = index->is_trained;
copy_ivf_shard(index_ivfpq, &idx2, n, i);
shards[i] = sub_cloners[i].clone_Index(&idx2);
} else if (index_ivfflat) {
faiss::IndexIVFFlat idx2(
index_ivfflat->quantizer,
index->d,
index_ivfflat->nlist,
index_ivfflat->metric_type);
idx2.nprobe = index_ivfflat->nprobe;
idx2.is_trained = index->is_trained;
copy_ivf_shard(index_ivfflat, &idx2, n, i);
shards[i] = sub_cloners[i].clone_Index(&idx2);
} else if (index_ivfsq) {
faiss::IndexIVFScalarQuantizer idx2(
index_ivfsq->quantizer,
index->d,
index_ivfsq->nlist,
index_ivfsq->sq.qtype,
index_ivfsq->metric_type,
index_ivfsq->by_residual);
idx2.nprobe = index_ivfsq->nprobe;
idx2.is_trained = index->is_trained;
idx2.sq = index_ivfsq->sq;
copy_ivf_shard(index_ivfsq, &idx2, n, i);
shards[i] = sub_cloners[i].clone_Index(&idx2);
} else if (index_flat) {
faiss::IndexFlat idx2(index->d, index->metric_type);
shards[i] = sub_cloners[i].clone_Index(&idx2);
if (index->ntotal > 0) {
long i0 = index->ntotal * i / n;
long i1 = index->ntotal * (i + 1) / n;
shards[i]->add(i1 - i0, index_flat->get_xb() + i0 * index->d);
}
}
}
bool successive_ids = index_flat != nullptr;
faiss::IndexShards* res =
new faiss::IndexShards(index->d, true, successive_ids);
for (int i = 0; i < n; i++) {
res->add_shard(shards[i]);
}
res->own_fields = true;
FAISS_ASSERT(index->ntotal == res->ntotal);
return res;
}
Index* ToGpuClonerMultiple::clone_Index(const Index* index) {
long n = sub_cloners.size();
if (n == 1)
return sub_cloners[0].clone_Index(index);
if (dynamic_cast<const IndexFlat*>(index) ||
dynamic_cast<const faiss::IndexIVFFlat*>(index) ||
dynamic_cast<const faiss::IndexIVFScalarQuantizer*>(index) ||
dynamic_cast<const faiss::IndexIVFPQ*>(index)) {
if (!shard) {
IndexReplicas* res = new IndexReplicas();
for (auto& sub_cloner : sub_cloners) {
res->addIndex(sub_cloner.clone_Index(index));
}
res->own_fields = true;
return res;
} else {
return clone_Index_to_shards(index);
}
} else if (auto miq = dynamic_cast<const MultiIndexQuantizer*>(index)) {
if (verbose) {
printf("cloning MultiIndexQuantizer: "
"will be valid only for search k=1\n");
}
const ProductQuantizer& pq = miq->pq;
IndexSplitVectors* splitv = new IndexSplitVectors(pq.d, true);
splitv->own_fields = true;
for (int m = 0; m < pq.M; m++) {
// which GPU(s) will be assigned to this sub-quantizer
long i0 = m * n / pq.M;
long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
std::vector<ToGpuCloner> sub_cloners_2;
sub_cloners_2.insert(
sub_cloners_2.begin(),
sub_cloners.begin() + i0,
sub_cloners.begin() + i1);
ToGpuClonerMultiple cm(sub_cloners_2, *this);
IndexFlatL2 idxc(pq.dsub);
idxc.add(pq.ksub, pq.centroids.data() + m * pq.d * pq.ksub);
Index* idx2 = cm.clone_Index(&idxc);
splitv->add_sub_index(idx2);
}
return splitv;
} else {
return Cloner::clone_Index(index);
}
}
faiss::Index* index_cpu_to_gpu_multiple(
std::vector<GpuResourcesProvider*>& provider,
std::vector<int>& devices,
const faiss::Index* index,
const GpuMultipleClonerOptions* options) {
GpuMultipleClonerOptions defaults;
ToGpuClonerMultiple cl(provider, devices, options ? *options : defaults);
return cl.clone_Index(index);
}
GpuProgressiveDimIndexFactory::GpuProgressiveDimIndexFactory(int ngpu) {
FAISS_THROW_IF_NOT(ngpu >= 1);
devices.resize(ngpu);
vres.resize(ngpu);
for (int i = 0; i < ngpu; i++) {
vres[i] = new StandardGpuResources();
devices[i] = i;
}
ncall = 0;
}
GpuProgressiveDimIndexFactory::~GpuProgressiveDimIndexFactory() {
for (int i = 0; i < vres.size(); i++) {
delete vres[i];
}
}
Index* GpuProgressiveDimIndexFactory::operator()(int dim) {
IndexFlatL2 index(dim);
ncall++;
return index_cpu_to_gpu_multiple(vres, devices, &index, &options);
}
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <vector>
#include <faiss/Clustering.h>
#include <faiss/Index.h>
#include <faiss/clone_index.h>
#include <faiss/gpu/GpuClonerOptions.h>
#include <faiss/gpu/GpuIndex.h>
#include <faiss/gpu/GpuIndicesOptions.h>
namespace faiss {
namespace gpu {
class GpuResourcesProvider;
/// Cloner specialized for GPU -> CPU
struct ToCPUCloner : faiss::Cloner {
void merge_index(Index* dst, Index* src, bool successive_ids);
Index* clone_Index(const Index* index) override;
};
/// Cloner specialized for CPU -> 1 GPU
struct ToGpuCloner : faiss::Cloner, GpuClonerOptions {
GpuResourcesProvider* provider;
int device;
ToGpuCloner(
GpuResourcesProvider* prov,
int device,
const GpuClonerOptions& options);
Index* clone_Index(const Index* index) override;
};
/// Cloner specialized for CPU -> multiple GPUs
struct ToGpuClonerMultiple : faiss::Cloner, GpuMultipleClonerOptions {
std::vector<ToGpuCloner> sub_cloners;
ToGpuClonerMultiple(
std::vector<GpuResourcesProvider*>& provider,
std::vector<int>& devices,
const GpuMultipleClonerOptions& options);
ToGpuClonerMultiple(
const std::vector<ToGpuCloner>& sub_cloners,
const GpuMultipleClonerOptions& options);
void copy_ivf_shard(
const IndexIVF* index_ivf,
IndexIVF* idx2,
long n,
long i);
Index* clone_Index_to_shards(const Index* index);
/// main function
Index* clone_Index(const Index* index) override;
};
/// converts any GPU index inside gpu_index to a CPU index
faiss::Index* index_gpu_to_cpu(const faiss::Index* gpu_index);
/// converts any CPU index that can be converted to GPU
faiss::Index* index_cpu_to_gpu(
GpuResourcesProvider* provider,
int device,
const faiss::Index* index,
const GpuClonerOptions* options = nullptr);
faiss::Index* index_cpu_to_gpu_multiple(
std::vector<GpuResourcesProvider*>& provider,
std::vector<int>& devices,
const faiss::Index* index,
const GpuMultipleClonerOptions* options = nullptr);
/// index factory for the ProgressiveDimClustering object
struct GpuProgressiveDimIndexFactory : ProgressiveDimIndexFactory {
GpuMultipleClonerOptions options;
std::vector<GpuResourcesProvider*> vres;
std::vector<int> devices;
int ncall;
explicit GpuProgressiveDimIndexFactory(int ngpu);
Index* operator()(int dim) override;
virtual ~GpuProgressiveDimIndexFactory() override;
};
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <vector>
#include <faiss/Clustering.h>
#include <faiss/Index.h>
#include <faiss/clone_index.h>
#include <faiss/gpu/GpuClonerOptions.h>
#include <faiss/gpu/GpuIndex.h>
#include <faiss/gpu/GpuIndicesOptions.h>
namespace faiss {
namespace gpu {
class GpuResourcesProvider;
/// Cloner specialized for GPU -> CPU
struct ToCPUCloner : faiss::Cloner {
void merge_index(Index* dst, Index* src, bool successive_ids);
Index* clone_Index(const Index* index) override;
};
/// Cloner specialized for CPU -> 1 GPU
struct ToGpuCloner : faiss::Cloner, GpuClonerOptions {
GpuResourcesProvider* provider;
int device;
ToGpuCloner(
GpuResourcesProvider* prov,
int device,
const GpuClonerOptions& options);
Index* clone_Index(const Index* index) override;
};
/// Cloner specialized for CPU -> multiple GPUs
struct ToGpuClonerMultiple : faiss::Cloner, GpuMultipleClonerOptions {
std::vector<ToGpuCloner> sub_cloners;
ToGpuClonerMultiple(
std::vector<GpuResourcesProvider*>& provider,
std::vector<int>& devices,
const GpuMultipleClonerOptions& options);
ToGpuClonerMultiple(
const std::vector<ToGpuCloner>& sub_cloners,
const GpuMultipleClonerOptions& options);
void copy_ivf_shard(
const IndexIVF* index_ivf,
IndexIVF* idx2,
long n,
long i);
Index* clone_Index_to_shards(const Index* index);
/// main function
Index* clone_Index(const Index* index) override;
};
/// converts any GPU index inside gpu_index to a CPU index
faiss::Index* index_gpu_to_cpu(const faiss::Index* gpu_index);
/// converts any CPU index that can be converted to GPU
faiss::Index* index_cpu_to_gpu(
GpuResourcesProvider* provider,
int device,
const faiss::Index* index,
const GpuClonerOptions* options = nullptr);
faiss::Index* index_cpu_to_gpu_multiple(
std::vector<GpuResourcesProvider*>& provider,
std::vector<int>& devices,
const faiss::Index* index,
const GpuMultipleClonerOptions* options = nullptr);
/// index factory for the ProgressiveDimClustering object
struct GpuProgressiveDimIndexFactory : ProgressiveDimIndexFactory {
GpuMultipleClonerOptions options;
std::vector<GpuResourcesProvider*> vres;
std::vector<int> devices;
int ncall;
explicit GpuProgressiveDimIndexFactory(int ngpu);
Index* operator()(int dim) override;
virtual ~GpuProgressiveDimIndexFactory() override;
};
} // namespace gpu
} // namespace faiss
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment