Commit 395d2ce6 authored by huchen's avatar huchen
Browse files

init the faiss for rocm

parent 5ded39f5
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <faiss/IndexBinaryFlat.h>
#include <faiss/gpu/GpuIndex.h>
#include <faiss/gpu/GpuResources.h>
#include <memory>
namespace faiss {
namespace gpu {
class BinaryFlatIndex;
struct GpuIndexBinaryFlatConfig : public GpuIndexConfig {};
/// A GPU version of IndexBinaryFlat for brute-force comparison of bit vectors
/// via Hamming distance
class GpuIndexBinaryFlat : public IndexBinary {
public:
/// Construct from a pre-existing faiss::IndexBinaryFlat instance, copying
/// data over to the given GPU
GpuIndexBinaryFlat(
GpuResourcesProvider* resources,
const faiss::IndexBinaryFlat* index,
GpuIndexBinaryFlatConfig config = GpuIndexBinaryFlatConfig());
/// Construct an empty instance that can be added to
GpuIndexBinaryFlat(
GpuResourcesProvider* resources,
int dims,
GpuIndexBinaryFlatConfig config = GpuIndexBinaryFlatConfig());
~GpuIndexBinaryFlat() override;
/// Returns the device that this index is resident on
int getDevice() const;
/// Returns a reference to our GpuResources object that manages memory,
/// stream and handle resources on the GPU
std::shared_ptr<GpuResources> getResources();
/// Initialize ourselves from the given CPU index; will overwrite
/// all data in ourselves
void copyFrom(const faiss::IndexBinaryFlat* index);
/// Copy ourselves to the given CPU index; will overwrite all data
/// in the index instance
void copyTo(faiss::IndexBinaryFlat* index) const;
void add(faiss::IndexBinary::idx_t n, const uint8_t* x) override;
void reset() override;
void search(
faiss::IndexBinary::idx_t n,
const uint8_t* x,
faiss::IndexBinary::idx_t k,
int32_t* distances,
faiss::IndexBinary::idx_t* labels) const override;
void reconstruct(faiss::IndexBinary::idx_t key, uint8_t* recons)
const override;
protected:
/// Called from search when the input data is on the CPU;
/// potentially allows for pinned memory usage
void searchFromCpuPaged_(
int n,
const uint8_t* x,
int k,
int32_t* outDistancesData,
int* outIndicesData) const;
void searchNonPaged_(
int n,
const uint8_t* x,
int k,
int32_t* outDistancesData,
int* outIndicesData) const;
protected:
/// Manages streans, cuBLAS handles and scratch memory for devices
std::shared_ptr<GpuResources> resources_;
/// Configuration options
const GpuIndexBinaryFlatConfig binaryFlatConfig_;
/// Holds our GPU data containing the list of vectors
std::unique_ptr<BinaryFlatIndex> data_;
};
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <faiss/IndexBinaryFlat.h>
#include <faiss/gpu/GpuIndex.h>
#include <faiss/gpu/GpuResources.h>
#include <memory>
namespace faiss {
namespace gpu {
class BinaryFlatIndex;
struct GpuIndexBinaryFlatConfig : public GpuIndexConfig {};
/// A GPU version of IndexBinaryFlat for brute-force comparison of bit vectors
/// via Hamming distance
class GpuIndexBinaryFlat : public IndexBinary {
public:
/// Construct from a pre-existing faiss::IndexBinaryFlat instance, copying
/// data over to the given GPU
GpuIndexBinaryFlat(
GpuResourcesProvider* resources,
const faiss::IndexBinaryFlat* index,
GpuIndexBinaryFlatConfig config = GpuIndexBinaryFlatConfig());
/// Construct an empty instance that can be added to
GpuIndexBinaryFlat(
GpuResourcesProvider* resources,
int dims,
GpuIndexBinaryFlatConfig config = GpuIndexBinaryFlatConfig());
~GpuIndexBinaryFlat() override;
/// Returns the device that this index is resident on
int getDevice() const;
/// Returns a reference to our GpuResources object that manages memory,
/// stream and handle resources on the GPU
std::shared_ptr<GpuResources> getResources();
/// Initialize ourselves from the given CPU index; will overwrite
/// all data in ourselves
void copyFrom(const faiss::IndexBinaryFlat* index);
/// Copy ourselves to the given CPU index; will overwrite all data
/// in the index instance
void copyTo(faiss::IndexBinaryFlat* index) const;
void add(faiss::IndexBinary::idx_t n, const uint8_t* x) override;
void reset() override;
void search(
faiss::IndexBinary::idx_t n,
const uint8_t* x,
faiss::IndexBinary::idx_t k,
int32_t* distances,
faiss::IndexBinary::idx_t* labels) const override;
void reconstruct(faiss::IndexBinary::idx_t key, uint8_t* recons)
const override;
protected:
/// Called from search when the input data is on the CPU;
/// potentially allows for pinned memory usage
void searchFromCpuPaged_(
int n,
const uint8_t* x,
int k,
int32_t* outDistancesData,
int* outIndicesData) const;
void searchNonPaged_(
int n,
const uint8_t* x,
int k,
int32_t* outDistancesData,
int* outIndicesData) const;
protected:
/// Manages streans, cuBLAS handles and scratch memory for devices
std::shared_ptr<GpuResources> resources_;
/// Configuration options
const GpuIndexBinaryFlatConfig binaryFlatConfig_;
/// Holds our GPU data containing the list of vectors
std::unique_ptr<BinaryFlatIndex> data_;
};
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/IndexFlat.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuResources.h>
#include <faiss/gpu/utils/DeviceUtils.h>
#include <faiss/gpu/utils/StaticUtils.h>
#include <faiss/gpu/impl/FlatIndex.cuh>
#include <faiss/gpu/utils/ConversionOperators.cuh>
#include <faiss/gpu/utils/CopyUtils.cuh>
#include <faiss/gpu/utils/Float16.cuh>
#include <limits>
namespace faiss {
namespace gpu {
GpuIndexFlat::GpuIndexFlat(
GpuResourcesProvider* provider,
const faiss::IndexFlat* index,
GpuIndexFlatConfig config)
: GpuIndex(
provider->getResources(),
index->d,
index->metric_type,
index->metric_arg,
config),
flatConfig_(config) {
// Flat index doesn't need training
this->is_trained = true;
copyFrom(index);
}
GpuIndexFlat::GpuIndexFlat(
std::shared_ptr<GpuResources> resources,
const faiss::IndexFlat* index,
GpuIndexFlatConfig config)
: GpuIndex(
resources,
index->d,
index->metric_type,
index->metric_arg,
config),
flatConfig_(config) {
// Flat index doesn't need training
this->is_trained = true;
copyFrom(index);
}
GpuIndexFlat::GpuIndexFlat(
GpuResourcesProvider* provider,
int dims,
faiss::MetricType metric,
GpuIndexFlatConfig config)
: GpuIndex(provider->getResources(), dims, metric, 0, config),
flatConfig_(config) {
// Flat index doesn't need training
this->is_trained = true;
// Construct index
DeviceScope scope(config_.device);
data_.reset(new FlatIndex(
resources_.get(),
dims,
flatConfig_.useFloat16,
flatConfig_.storeTransposed,
config_.memorySpace));
}
GpuIndexFlat::GpuIndexFlat(
std::shared_ptr<GpuResources> resources,
int dims,
faiss::MetricType metric,
GpuIndexFlatConfig config)
: GpuIndex(resources, dims, metric, 0, config), flatConfig_(config) {
// Flat index doesn't need training
this->is_trained = true;
// Construct index
DeviceScope scope(config_.device);
data_.reset(new FlatIndex(
resources_.get(),
dims,
flatConfig_.useFloat16,
flatConfig_.storeTransposed,
config_.memorySpace));
}
GpuIndexFlat::~GpuIndexFlat() {}
void GpuIndexFlat::copyFrom(const faiss::IndexFlat* index) {
DeviceScope scope(config_.device);
GpuIndex::copyFrom(index);
// GPU code has 32 bit indices
FAISS_THROW_IF_NOT_FMT(
index->ntotal <= (Index::idx_t)std::numeric_limits<int>::max(),
"GPU index only supports up to %zu indices; "
"attempting to copy CPU index with %zu parameters",
(size_t)std::numeric_limits<int>::max(),
(size_t)index->ntotal);
data_.reset();
data_.reset(new FlatIndex(
resources_.get(),
this->d,
flatConfig_.useFloat16,
flatConfig_.storeTransposed,
config_.memorySpace));
// The index could be empty
if (index->ntotal > 0) {
data_->add(
index->get_xb(),
index->ntotal,
resources_->getDefaultStream(config_.device));
}
}
void GpuIndexFlat::copyTo(faiss::IndexFlat* index) const {
DeviceScope scope(config_.device);
GpuIndex::copyTo(index);
index->code_size = sizeof(float) * this->d;
FAISS_ASSERT(data_);
FAISS_ASSERT(data_->getSize() == this->ntotal);
index->codes.resize(this->ntotal * index->code_size);
auto stream = resources_->getDefaultStream(config_.device);
if (this->ntotal > 0) {
if (flatConfig_.useFloat16) {
auto vecFloat32 = data_->getVectorsFloat32Copy(stream);
fromDevice(vecFloat32, index->get_xb(), stream);
} else {
fromDevice(data_->getVectorsFloat32Ref(), index->get_xb(), stream);
}
}
}
size_t GpuIndexFlat::getNumVecs() const {
return this->ntotal;
}
void GpuIndexFlat::reset() {
DeviceScope scope(config_.device);
// Free the underlying memory
data_->reset();
this->ntotal = 0;
}
void GpuIndexFlat::train(Index::idx_t n, const float* x) {
// nothing to do
}
void GpuIndexFlat::add(Index::idx_t n, const float* x) {
FAISS_THROW_IF_NOT_MSG(this->is_trained, "Index not trained");
// For now, only support <= max int results
FAISS_THROW_IF_NOT_FMT(
n <= (Index::idx_t)std::numeric_limits<int>::max(),
"GPU index only supports up to %d indices",
std::numeric_limits<int>::max());
if (n == 0) {
// nothing to add
return;
}
DeviceScope scope(config_.device);
// To avoid multiple re-allocations, ensure we have enough storage
// available
data_->reserve(n, resources_->getDefaultStream(config_.device));
// If we're not operating in float16 mode, we don't need the input
// data to be resident on our device; we can add directly.
if (!flatConfig_.useFloat16) {
addImpl_(n, x, nullptr);
} else {
// Otherwise, perform the paging
GpuIndex::add(n, x);
}
}
bool GpuIndexFlat::addImplRequiresIDs_() const {
return false;
}
void GpuIndexFlat::addImpl_(int n, const float* x, const Index::idx_t* ids) {
FAISS_ASSERT(data_);
FAISS_ASSERT(n > 0);
// We do not support add_with_ids
FAISS_THROW_IF_NOT_MSG(!ids, "add_with_ids not supported");
// Due to GPU indexing in int32, we can't store more than this
// number of vectors on a GPU
FAISS_THROW_IF_NOT_FMT(
this->ntotal + n <= (Index::idx_t)std::numeric_limits<int>::max(),
"GPU index only supports up to %zu indices",
(size_t)std::numeric_limits<int>::max());
data_->add(x, n, resources_->getDefaultStream(config_.device));
this->ntotal += n;
}
void GpuIndexFlat::searchImpl_(
int n,
const float* x,
int k,
float* distances,
Index::idx_t* labels) const {
auto stream = resources_->getDefaultStream(config_.device);
// Input and output data are already resident on the GPU
Tensor<float, 2, true> queries(const_cast<float*>(x), {n, (int)this->d});
Tensor<float, 2, true> outDistances(distances, {n, k});
Tensor<Index::idx_t, 2, true> outLabels(labels, {n, k});
// FlatIndex only supports int indices
DeviceTensor<int, 2, true> outIntLabels(
resources_.get(), makeTempAlloc(AllocType::Other, stream), {n, k});
data_->query(
queries,
k,
metric_type,
metric_arg,
outDistances,
outIntLabels,
true);
// Convert int to idx_t
convertTensor<int, Index::idx_t, 2>(stream, outIntLabels, outLabels);
}
void GpuIndexFlat::reconstruct(Index::idx_t key, float* out) const {
DeviceScope scope(config_.device);
FAISS_THROW_IF_NOT_MSG(key < this->ntotal, "index out of bounds");
auto stream = resources_->getDefaultStream(config_.device);
if (flatConfig_.useFloat16) {
// FIXME jhj: kernel for copy
auto vec = data_->getVectorsFloat32Copy(key, 1, stream);
fromDevice(vec.data(), out, this->d, stream);
} else {
auto vec = data_->getVectorsFloat32Ref()[key];
fromDevice(vec.data(), out, this->d, stream);
}
}
void GpuIndexFlat::reconstruct_n(Index::idx_t i0, Index::idx_t num, float* out)
const {
DeviceScope scope(config_.device);
FAISS_THROW_IF_NOT_MSG(i0 < this->ntotal, "index out of bounds");
FAISS_THROW_IF_NOT_MSG(i0 + num - 1 < this->ntotal, "num out of bounds");
auto stream = resources_->getDefaultStream(config_.device);
if (flatConfig_.useFloat16) {
// FIXME jhj: kernel for copy
auto vec = data_->getVectorsFloat32Copy(i0, num, stream);
fromDevice(vec.data(), out, num * this->d, stream);
} else {
auto vec = data_->getVectorsFloat32Ref()[i0];
fromDevice(vec.data(), out, this->d * num, stream);
}
}
void GpuIndexFlat::compute_residual(
const float* x,
float* residual,
Index::idx_t key) const {
compute_residual_n(1, x, residual, &key);
}
void GpuIndexFlat::compute_residual_n(
Index::idx_t n,
const float* xs,
float* residuals,
const Index::idx_t* keys) const {
FAISS_THROW_IF_NOT_FMT(
n <= (Index::idx_t)std::numeric_limits<int>::max(),
"GPU index only supports up to %zu indices",
(size_t)std::numeric_limits<int>::max());
auto stream = resources_->getDefaultStream(config_.device);
DeviceScope scope(config_.device);
auto vecsDevice = toDeviceTemporary<float, 2>(
resources_.get(),
config_.device,
const_cast<float*>(xs),
stream,
{(int)n, (int)this->d});
auto idsDevice = toDeviceTemporary<Index::idx_t, 1>(
resources_.get(),
config_.device,
const_cast<Index::idx_t*>(keys),
stream,
{(int)n});
auto residualDevice = toDeviceTemporary<float, 2>(
resources_.get(),
config_.device,
residuals,
stream,
{(int)n, (int)this->d});
// Convert idx_t to int
auto keysInt = convertTensorTemporary<Index::idx_t, int, 1>(
resources_.get(), stream, idsDevice);
FAISS_ASSERT(data_);
data_->computeResidual(vecsDevice, keysInt, residualDevice);
fromDevice<float, 2>(residualDevice, residuals, stream);
}
//
// GpuIndexFlatL2
//
GpuIndexFlatL2::GpuIndexFlatL2(
GpuResourcesProvider* provider,
faiss::IndexFlatL2* index,
GpuIndexFlatConfig config)
: GpuIndexFlat(provider, index, config) {}
GpuIndexFlatL2::GpuIndexFlatL2(
std::shared_ptr<GpuResources> resources,
faiss::IndexFlatL2* index,
GpuIndexFlatConfig config)
: GpuIndexFlat(resources, index, config) {}
GpuIndexFlatL2::GpuIndexFlatL2(
GpuResourcesProvider* provider,
int dims,
GpuIndexFlatConfig config)
: GpuIndexFlat(provider, dims, faiss::METRIC_L2, config) {}
GpuIndexFlatL2::GpuIndexFlatL2(
std::shared_ptr<GpuResources> resources,
int dims,
GpuIndexFlatConfig config)
: GpuIndexFlat(resources, dims, faiss::METRIC_L2, config) {}
void GpuIndexFlatL2::copyFrom(faiss::IndexFlat* index) {
FAISS_THROW_IF_NOT_MSG(
index->metric_type == metric_type,
"Cannot copy a GpuIndexFlatL2 from an index of "
"different metric_type");
GpuIndexFlat::copyFrom(index);
}
void GpuIndexFlatL2::copyTo(faiss::IndexFlat* index) {
FAISS_THROW_IF_NOT_MSG(
index->metric_type == metric_type,
"Cannot copy a GpuIndexFlatL2 to an index of "
"different metric_type");
GpuIndexFlat::copyTo(index);
}
//
// GpuIndexFlatIP
//
GpuIndexFlatIP::GpuIndexFlatIP(
GpuResourcesProvider* provider,
faiss::IndexFlatIP* index,
GpuIndexFlatConfig config)
: GpuIndexFlat(provider, index, config) {}
GpuIndexFlatIP::GpuIndexFlatIP(
std::shared_ptr<GpuResources> resources,
faiss::IndexFlatIP* index,
GpuIndexFlatConfig config)
: GpuIndexFlat(resources, index, config) {}
GpuIndexFlatIP::GpuIndexFlatIP(
GpuResourcesProvider* provider,
int dims,
GpuIndexFlatConfig config)
: GpuIndexFlat(provider, dims, faiss::METRIC_INNER_PRODUCT, config) {}
GpuIndexFlatIP::GpuIndexFlatIP(
std::shared_ptr<GpuResources> resources,
int dims,
GpuIndexFlatConfig config)
: GpuIndexFlat(resources, dims, faiss::METRIC_INNER_PRODUCT, config) {}
void GpuIndexFlatIP::copyFrom(faiss::IndexFlat* index) {
FAISS_THROW_IF_NOT_MSG(
index->metric_type == metric_type,
"Cannot copy a GpuIndexFlatIP from an index of "
"different metric_type");
GpuIndexFlat::copyFrom(index);
}
void GpuIndexFlatIP::copyTo(faiss::IndexFlat* index) {
// The passed in index must be IP
FAISS_THROW_IF_NOT_MSG(
index->metric_type == metric_type,
"Cannot copy a GpuIndexFlatIP to an index of "
"different metric_type");
GpuIndexFlat::copyTo(index);
}
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/IndexFlat.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuResources.h>
#include <faiss/gpu/utils/DeviceUtils.h>
#include <faiss/gpu/utils/StaticUtils.h>
#include <faiss/gpu/impl/FlatIndex.cuh>
#include <faiss/gpu/utils/ConversionOperators.cuh>
#include <faiss/gpu/utils/CopyUtils.cuh>
#include <faiss/gpu/utils/Float16.cuh>
#include <limits>
namespace faiss {
namespace gpu {
GpuIndexFlat::GpuIndexFlat(
GpuResourcesProvider* provider,
const faiss::IndexFlat* index,
GpuIndexFlatConfig config)
: GpuIndex(
provider->getResources(),
index->d,
index->metric_type,
index->metric_arg,
config),
flatConfig_(config) {
// Flat index doesn't need training
this->is_trained = true;
copyFrom(index);
}
GpuIndexFlat::GpuIndexFlat(
std::shared_ptr<GpuResources> resources,
const faiss::IndexFlat* index,
GpuIndexFlatConfig config)
: GpuIndex(
resources,
index->d,
index->metric_type,
index->metric_arg,
config),
flatConfig_(config) {
// Flat index doesn't need training
this->is_trained = true;
copyFrom(index);
}
GpuIndexFlat::GpuIndexFlat(
GpuResourcesProvider* provider,
int dims,
faiss::MetricType metric,
GpuIndexFlatConfig config)
: GpuIndex(provider->getResources(), dims, metric, 0, config),
flatConfig_(config) {
// Flat index doesn't need training
this->is_trained = true;
// Construct index
DeviceScope scope(config_.device);
data_.reset(new FlatIndex(
resources_.get(),
dims,
flatConfig_.useFloat16,
flatConfig_.storeTransposed,
config_.memorySpace));
}
GpuIndexFlat::GpuIndexFlat(
std::shared_ptr<GpuResources> resources,
int dims,
faiss::MetricType metric,
GpuIndexFlatConfig config)
: GpuIndex(resources, dims, metric, 0, config), flatConfig_(config) {
// Flat index doesn't need training
this->is_trained = true;
// Construct index
DeviceScope scope(config_.device);
data_.reset(new FlatIndex(
resources_.get(),
dims,
flatConfig_.useFloat16,
flatConfig_.storeTransposed,
config_.memorySpace));
}
GpuIndexFlat::~GpuIndexFlat() {}
void GpuIndexFlat::copyFrom(const faiss::IndexFlat* index) {
DeviceScope scope(config_.device);
GpuIndex::copyFrom(index);
// GPU code has 32 bit indices
FAISS_THROW_IF_NOT_FMT(
index->ntotal <= (Index::idx_t)std::numeric_limits<int>::max(),
"GPU index only supports up to %zu indices; "
"attempting to copy CPU index with %zu parameters",
(size_t)std::numeric_limits<int>::max(),
(size_t)index->ntotal);
data_.reset();
data_.reset(new FlatIndex(
resources_.get(),
this->d,
flatConfig_.useFloat16,
flatConfig_.storeTransposed,
config_.memorySpace));
// The index could be empty
if (index->ntotal > 0) {
data_->add(
index->get_xb(),
index->ntotal,
resources_->getDefaultStream(config_.device));
}
}
void GpuIndexFlat::copyTo(faiss::IndexFlat* index) const {
DeviceScope scope(config_.device);
GpuIndex::copyTo(index);
index->code_size = sizeof(float) * this->d;
FAISS_ASSERT(data_);
FAISS_ASSERT(data_->getSize() == this->ntotal);
index->codes.resize(this->ntotal * index->code_size);
auto stream = resources_->getDefaultStream(config_.device);
if (this->ntotal > 0) {
if (flatConfig_.useFloat16) {
auto vecFloat32 = data_->getVectorsFloat32Copy(stream);
fromDevice(vecFloat32, index->get_xb(), stream);
} else {
fromDevice(data_->getVectorsFloat32Ref(), index->get_xb(), stream);
}
}
}
size_t GpuIndexFlat::getNumVecs() const {
return this->ntotal;
}
void GpuIndexFlat::reset() {
DeviceScope scope(config_.device);
// Free the underlying memory
data_->reset();
this->ntotal = 0;
}
void GpuIndexFlat::train(Index::idx_t n, const float* x) {
// nothing to do
}
void GpuIndexFlat::add(Index::idx_t n, const float* x) {
FAISS_THROW_IF_NOT_MSG(this->is_trained, "Index not trained");
// For now, only support <= max int results
FAISS_THROW_IF_NOT_FMT(
n <= (Index::idx_t)std::numeric_limits<int>::max(),
"GPU index only supports up to %d indices",
std::numeric_limits<int>::max());
if (n == 0) {
// nothing to add
return;
}
DeviceScope scope(config_.device);
// To avoid multiple re-allocations, ensure we have enough storage
// available
data_->reserve(n, resources_->getDefaultStream(config_.device));
// If we're not operating in float16 mode, we don't need the input
// data to be resident on our device; we can add directly.
if (!flatConfig_.useFloat16) {
addImpl_(n, x, nullptr);
} else {
// Otherwise, perform the paging
GpuIndex::add(n, x);
}
}
bool GpuIndexFlat::addImplRequiresIDs_() const {
return false;
}
void GpuIndexFlat::addImpl_(int n, const float* x, const Index::idx_t* ids) {
FAISS_ASSERT(data_);
FAISS_ASSERT(n > 0);
// We do not support add_with_ids
FAISS_THROW_IF_NOT_MSG(!ids, "add_with_ids not supported");
// Due to GPU indexing in int32, we can't store more than this
// number of vectors on a GPU
FAISS_THROW_IF_NOT_FMT(
this->ntotal + n <= (Index::idx_t)std::numeric_limits<int>::max(),
"GPU index only supports up to %zu indices",
(size_t)std::numeric_limits<int>::max());
data_->add(x, n, resources_->getDefaultStream(config_.device));
this->ntotal += n;
}
void GpuIndexFlat::searchImpl_(
int n,
const float* x,
int k,
float* distances,
Index::idx_t* labels) const {
auto stream = resources_->getDefaultStream(config_.device);
// Input and output data are already resident on the GPU
Tensor<float, 2, true> queries(const_cast<float*>(x), {n, (int)this->d});
Tensor<float, 2, true> outDistances(distances, {n, k});
Tensor<Index::idx_t, 2, true> outLabels(labels, {n, k});
// FlatIndex only supports int indices
DeviceTensor<int, 2, true> outIntLabels(
resources_.get(), makeTempAlloc(AllocType::Other, stream), {n, k});
data_->query(
queries,
k,
metric_type,
metric_arg,
outDistances,
outIntLabels,
true);
// Convert int to idx_t
convertTensor<int, Index::idx_t, 2>(stream, outIntLabels, outLabels);
}
void GpuIndexFlat::reconstruct(Index::idx_t key, float* out) const {
DeviceScope scope(config_.device);
FAISS_THROW_IF_NOT_MSG(key < this->ntotal, "index out of bounds");
auto stream = resources_->getDefaultStream(config_.device);
if (flatConfig_.useFloat16) {
// FIXME jhj: kernel for copy
auto vec = data_->getVectorsFloat32Copy(key, 1, stream);
fromDevice(vec.data(), out, this->d, stream);
} else {
auto vec = data_->getVectorsFloat32Ref()[key];
fromDevice(vec.data(), out, this->d, stream);
}
}
void GpuIndexFlat::reconstruct_n(Index::idx_t i0, Index::idx_t num, float* out)
const {
DeviceScope scope(config_.device);
FAISS_THROW_IF_NOT_MSG(i0 < this->ntotal, "index out of bounds");
FAISS_THROW_IF_NOT_MSG(i0 + num - 1 < this->ntotal, "num out of bounds");
auto stream = resources_->getDefaultStream(config_.device);
if (flatConfig_.useFloat16) {
// FIXME jhj: kernel for copy
auto vec = data_->getVectorsFloat32Copy(i0, num, stream);
fromDevice(vec.data(), out, num * this->d, stream);
} else {
auto vec = data_->getVectorsFloat32Ref()[i0];
fromDevice(vec.data(), out, this->d * num, stream);
}
}
void GpuIndexFlat::compute_residual(
const float* x,
float* residual,
Index::idx_t key) const {
compute_residual_n(1, x, residual, &key);
}
void GpuIndexFlat::compute_residual_n(
Index::idx_t n,
const float* xs,
float* residuals,
const Index::idx_t* keys) const {
FAISS_THROW_IF_NOT_FMT(
n <= (Index::idx_t)std::numeric_limits<int>::max(),
"GPU index only supports up to %zu indices",
(size_t)std::numeric_limits<int>::max());
auto stream = resources_->getDefaultStream(config_.device);
DeviceScope scope(config_.device);
auto vecsDevice = toDeviceTemporary<float, 2>(
resources_.get(),
config_.device,
const_cast<float*>(xs),
stream,
{(int)n, (int)this->d});
auto idsDevice = toDeviceTemporary<Index::idx_t, 1>(
resources_.get(),
config_.device,
const_cast<Index::idx_t*>(keys),
stream,
{(int)n});
auto residualDevice = toDeviceTemporary<float, 2>(
resources_.get(),
config_.device,
residuals,
stream,
{(int)n, (int)this->d});
// Convert idx_t to int
auto keysInt = convertTensorTemporary<Index::idx_t, int, 1>(
resources_.get(), stream, idsDevice);
FAISS_ASSERT(data_);
data_->computeResidual(vecsDevice, keysInt, residualDevice);
fromDevice<float, 2>(residualDevice, residuals, stream);
}
//
// GpuIndexFlatL2
//
GpuIndexFlatL2::GpuIndexFlatL2(
GpuResourcesProvider* provider,
faiss::IndexFlatL2* index,
GpuIndexFlatConfig config)
: GpuIndexFlat(provider, index, config) {}
GpuIndexFlatL2::GpuIndexFlatL2(
std::shared_ptr<GpuResources> resources,
faiss::IndexFlatL2* index,
GpuIndexFlatConfig config)
: GpuIndexFlat(resources, index, config) {}
GpuIndexFlatL2::GpuIndexFlatL2(
GpuResourcesProvider* provider,
int dims,
GpuIndexFlatConfig config)
: GpuIndexFlat(provider, dims, faiss::METRIC_L2, config) {}
GpuIndexFlatL2::GpuIndexFlatL2(
std::shared_ptr<GpuResources> resources,
int dims,
GpuIndexFlatConfig config)
: GpuIndexFlat(resources, dims, faiss::METRIC_L2, config) {}
void GpuIndexFlatL2::copyFrom(faiss::IndexFlat* index) {
FAISS_THROW_IF_NOT_MSG(
index->metric_type == metric_type,
"Cannot copy a GpuIndexFlatL2 from an index of "
"different metric_type");
GpuIndexFlat::copyFrom(index);
}
void GpuIndexFlatL2::copyTo(faiss::IndexFlat* index) {
FAISS_THROW_IF_NOT_MSG(
index->metric_type == metric_type,
"Cannot copy a GpuIndexFlatL2 to an index of "
"different metric_type");
GpuIndexFlat::copyTo(index);
}
//
// GpuIndexFlatIP
//
GpuIndexFlatIP::GpuIndexFlatIP(
GpuResourcesProvider* provider,
faiss::IndexFlatIP* index,
GpuIndexFlatConfig config)
: GpuIndexFlat(provider, index, config) {}
GpuIndexFlatIP::GpuIndexFlatIP(
std::shared_ptr<GpuResources> resources,
faiss::IndexFlatIP* index,
GpuIndexFlatConfig config)
: GpuIndexFlat(resources, index, config) {}
GpuIndexFlatIP::GpuIndexFlatIP(
GpuResourcesProvider* provider,
int dims,
GpuIndexFlatConfig config)
: GpuIndexFlat(provider, dims, faiss::METRIC_INNER_PRODUCT, config) {}
GpuIndexFlatIP::GpuIndexFlatIP(
std::shared_ptr<GpuResources> resources,
int dims,
GpuIndexFlatConfig config)
: GpuIndexFlat(resources, dims, faiss::METRIC_INNER_PRODUCT, config) {}
void GpuIndexFlatIP::copyFrom(faiss::IndexFlat* index) {
FAISS_THROW_IF_NOT_MSG(
index->metric_type == metric_type,
"Cannot copy a GpuIndexFlatIP from an index of "
"different metric_type");
GpuIndexFlat::copyFrom(index);
}
void GpuIndexFlatIP::copyTo(faiss::IndexFlat* index) {
// The passed in index must be IP
FAISS_THROW_IF_NOT_MSG(
index->metric_type == metric_type,
"Cannot copy a GpuIndexFlatIP to an index of "
"different metric_type");
GpuIndexFlat::copyTo(index);
}
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <faiss/gpu/GpuIndex.h>
#include <memory>
namespace faiss {
struct IndexFlat;
struct IndexFlatL2;
struct IndexFlatIP;
} // namespace faiss
namespace faiss {
namespace gpu {
class FlatIndex;
struct GpuIndexFlatConfig : public GpuIndexConfig {
inline GpuIndexFlatConfig() : useFloat16(false), storeTransposed(false) {}
/// Whether or not data is stored as float16
bool useFloat16;
/// Whether or not data is stored (transparently) in a transposed
/// layout, enabling use of the NN GEMM call, which is ~10% faster.
/// This will improve the speed of the flat index, but will
/// substantially slow down any add() calls made, as all data must
/// be transposed, and will increase storage requirements (we store
/// data in both transposed and non-transposed layouts).
bool storeTransposed;
};
/// Wrapper around the GPU implementation that looks like
/// faiss::IndexFlat; copies over centroid data from a given
/// faiss::IndexFlat
class GpuIndexFlat : public GpuIndex {
public:
/// Construct from a pre-existing faiss::IndexFlat instance, copying
/// data over to the given GPU
GpuIndexFlat(
GpuResourcesProvider* provider,
const faiss::IndexFlat* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
GpuIndexFlat(
std::shared_ptr<GpuResources> resources,
const faiss::IndexFlat* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Construct an empty instance that can be added to
GpuIndexFlat(
GpuResourcesProvider* provider,
int dims,
faiss::MetricType metric,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
GpuIndexFlat(
std::shared_ptr<GpuResources> resources,
int dims,
faiss::MetricType metric,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
~GpuIndexFlat() override;
/// Initialize ourselves from the given CPU index; will overwrite
/// all data in ourselves
void copyFrom(const faiss::IndexFlat* index);
/// Copy ourselves to the given CPU index; will overwrite all data
/// in the index instance
void copyTo(faiss::IndexFlat* index) const;
/// Returns the number of vectors we contain
size_t getNumVecs() const;
/// Clears all vectors from this index
void reset() override;
/// This index is not trained, so this does nothing
void train(Index::idx_t n, const float* x) override;
/// Overrides to avoid excessive copies
void add(Index::idx_t, const float* x) override;
/// Reconstruction methods; prefer the batch reconstruct as it will
/// be more efficient
void reconstruct(Index::idx_t key, float* out) const override;
/// Batch reconstruction method
void reconstruct_n(Index::idx_t i0, Index::idx_t num, float* out)
const override;
/// Compute residual
void compute_residual(const float* x, float* residual, Index::idx_t key)
const override;
/// Compute residual (batch mode)
void compute_residual_n(
Index::idx_t n,
const float* xs,
float* residuals,
const Index::idx_t* keys) const override;
/// For internal access
inline FlatIndex* getGpuData() {
return data_.get();
}
protected:
/// Flat index does not require IDs as there is no storage available for
/// them
bool addImplRequiresIDs_() const override;
/// Called from GpuIndex for add
void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
/// Called from GpuIndex for search
void searchImpl_(
int n,
const float* x,
int k,
float* distances,
Index::idx_t* labels) const override;
protected:
/// Our configuration options
const GpuIndexFlatConfig flatConfig_;
/// Holds our GPU data containing the list of vectors
std::unique_ptr<FlatIndex> data_;
};
/// Wrapper around the GPU implementation that looks like
/// faiss::IndexFlatL2; copies over centroid data from a given
/// faiss::IndexFlat
class GpuIndexFlatL2 : public GpuIndexFlat {
public:
/// Construct from a pre-existing faiss::IndexFlatL2 instance, copying
/// data over to the given GPU
GpuIndexFlatL2(
GpuResourcesProvider* provider,
faiss::IndexFlatL2* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
GpuIndexFlatL2(
std::shared_ptr<GpuResources> resources,
faiss::IndexFlatL2* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Construct an empty instance that can be added to
GpuIndexFlatL2(
GpuResourcesProvider* provider,
int dims,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
GpuIndexFlatL2(
std::shared_ptr<GpuResources> resources,
int dims,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Initialize ourselves from the given CPU index; will overwrite
/// all data in ourselves
void copyFrom(faiss::IndexFlat* index);
/// Copy ourselves to the given CPU index; will overwrite all data
/// in the index instance
void copyTo(faiss::IndexFlat* index);
};
/// Wrapper around the GPU implementation that looks like
/// faiss::IndexFlatIP; copies over centroid data from a given
/// faiss::IndexFlat
class GpuIndexFlatIP : public GpuIndexFlat {
public:
/// Construct from a pre-existing faiss::IndexFlatIP instance, copying
/// data over to the given GPU
GpuIndexFlatIP(
GpuResourcesProvider* provider,
faiss::IndexFlatIP* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
GpuIndexFlatIP(
std::shared_ptr<GpuResources> resources,
faiss::IndexFlatIP* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Construct an empty instance that can be added to
GpuIndexFlatIP(
GpuResourcesProvider* provider,
int dims,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
GpuIndexFlatIP(
std::shared_ptr<GpuResources> resources,
int dims,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Initialize ourselves from the given CPU index; will overwrite
/// all data in ourselves
void copyFrom(faiss::IndexFlat* index);
/// Copy ourselves to the given CPU index; will overwrite all data
/// in the index instance
void copyTo(faiss::IndexFlat* index);
};
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <faiss/gpu/GpuIndex.h>
#include <memory>
namespace faiss {
struct IndexFlat;
struct IndexFlatL2;
struct IndexFlatIP;
} // namespace faiss
namespace faiss {
namespace gpu {
class FlatIndex;
struct GpuIndexFlatConfig : public GpuIndexConfig {
inline GpuIndexFlatConfig() : useFloat16(false), storeTransposed(false) {}
/// Whether or not data is stored as float16
bool useFloat16;
/// Whether or not data is stored (transparently) in a transposed
/// layout, enabling use of the NN GEMM call, which is ~10% faster.
/// This will improve the speed of the flat index, but will
/// substantially slow down any add() calls made, as all data must
/// be transposed, and will increase storage requirements (we store
/// data in both transposed and non-transposed layouts).
bool storeTransposed;
};
/// Wrapper around the GPU implementation that looks like
/// faiss::IndexFlat; copies over centroid data from a given
/// faiss::IndexFlat
class GpuIndexFlat : public GpuIndex {
public:
/// Construct from a pre-existing faiss::IndexFlat instance, copying
/// data over to the given GPU
GpuIndexFlat(
GpuResourcesProvider* provider,
const faiss::IndexFlat* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
GpuIndexFlat(
std::shared_ptr<GpuResources> resources,
const faiss::IndexFlat* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Construct an empty instance that can be added to
GpuIndexFlat(
GpuResourcesProvider* provider,
int dims,
faiss::MetricType metric,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
GpuIndexFlat(
std::shared_ptr<GpuResources> resources,
int dims,
faiss::MetricType metric,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
~GpuIndexFlat() override;
/// Initialize ourselves from the given CPU index; will overwrite
/// all data in ourselves
void copyFrom(const faiss::IndexFlat* index);
/// Copy ourselves to the given CPU index; will overwrite all data
/// in the index instance
void copyTo(faiss::IndexFlat* index) const;
/// Returns the number of vectors we contain
size_t getNumVecs() const;
/// Clears all vectors from this index
void reset() override;
/// This index is not trained, so this does nothing
void train(Index::idx_t n, const float* x) override;
/// Overrides to avoid excessive copies
void add(Index::idx_t, const float* x) override;
/// Reconstruction methods; prefer the batch reconstruct as it will
/// be more efficient
void reconstruct(Index::idx_t key, float* out) const override;
/// Batch reconstruction method
void reconstruct_n(Index::idx_t i0, Index::idx_t num, float* out)
const override;
/// Compute residual
void compute_residual(const float* x, float* residual, Index::idx_t key)
const override;
/// Compute residual (batch mode)
void compute_residual_n(
Index::idx_t n,
const float* xs,
float* residuals,
const Index::idx_t* keys) const override;
/// For internal access
inline FlatIndex* getGpuData() {
return data_.get();
}
protected:
/// Flat index does not require IDs as there is no storage available for
/// them
bool addImplRequiresIDs_() const override;
/// Called from GpuIndex for add
void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
/// Called from GpuIndex for search
void searchImpl_(
int n,
const float* x,
int k,
float* distances,
Index::idx_t* labels) const override;
protected:
/// Our configuration options
const GpuIndexFlatConfig flatConfig_;
/// Holds our GPU data containing the list of vectors
std::unique_ptr<FlatIndex> data_;
};
/// Wrapper around the GPU implementation that looks like
/// faiss::IndexFlatL2; copies over centroid data from a given
/// faiss::IndexFlat
class GpuIndexFlatL2 : public GpuIndexFlat {
public:
/// Construct from a pre-existing faiss::IndexFlatL2 instance, copying
/// data over to the given GPU
GpuIndexFlatL2(
GpuResourcesProvider* provider,
faiss::IndexFlatL2* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
GpuIndexFlatL2(
std::shared_ptr<GpuResources> resources,
faiss::IndexFlatL2* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Construct an empty instance that can be added to
GpuIndexFlatL2(
GpuResourcesProvider* provider,
int dims,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
GpuIndexFlatL2(
std::shared_ptr<GpuResources> resources,
int dims,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Initialize ourselves from the given CPU index; will overwrite
/// all data in ourselves
void copyFrom(faiss::IndexFlat* index);
/// Copy ourselves to the given CPU index; will overwrite all data
/// in the index instance
void copyTo(faiss::IndexFlat* index);
};
/// Wrapper around the GPU implementation that looks like
/// faiss::IndexFlatIP; copies over centroid data from a given
/// faiss::IndexFlat
class GpuIndexFlatIP : public GpuIndexFlat {
public:
/// Construct from a pre-existing faiss::IndexFlatIP instance, copying
/// data over to the given GPU
GpuIndexFlatIP(
GpuResourcesProvider* provider,
faiss::IndexFlatIP* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
GpuIndexFlatIP(
std::shared_ptr<GpuResources> resources,
faiss::IndexFlatIP* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Construct an empty instance that can be added to
GpuIndexFlatIP(
GpuResourcesProvider* provider,
int dims,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
GpuIndexFlatIP(
std::shared_ptr<GpuResources> resources,
int dims,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Initialize ourselves from the given CPU index; will overwrite
/// all data in ourselves
void copyFrom(faiss::IndexFlat* index);
/// Copy ourselves to the given CPU index; will overwrite all data
/// in the index instance
void copyTo(faiss::IndexFlat* index);
};
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/IndexFlat.h>
#include <faiss/IndexIVF.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndexIVF.h>
#include <faiss/gpu/utils/DeviceUtils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/gpu/utils/Float16.cuh>
namespace faiss {
namespace gpu {
GpuIndexIVF::GpuIndexIVF(
GpuResourcesProvider* provider,
int dims,
faiss::MetricType metric,
float metricArg,
int nlistIn,
GpuIndexIVFConfig config)
: GpuIndex(provider->getResources(), dims, metric, metricArg, config),
nlist(nlistIn),
nprobe(1),
quantizer(nullptr),
ivfConfig_(config) {
init_();
// Only IP and L2 are supported for now
if (!(metric_type == faiss::METRIC_L2 ||
metric_type == faiss::METRIC_INNER_PRODUCT)) {
FAISS_THROW_FMT("unsupported metric type %d", (int)metric_type);
}
}
void GpuIndexIVF::init_() {
FAISS_THROW_IF_NOT_MSG(nlist > 0, "nlist must be > 0");
// Spherical by default if the metric is inner_product
if (metric_type == faiss::METRIC_INNER_PRODUCT) {
cp.spherical = true;
}
// here we set a low # iterations because this is typically used
// for large clusterings
cp.niter = 10;
cp.verbose = verbose;
if (!quantizer) {
// Construct an empty quantizer
GpuIndexFlatConfig config = ivfConfig_.flatConfig;
// FIXME: inherit our same device
config.device = config_.device;
if (metric_type == faiss::METRIC_L2) {
quantizer = new GpuIndexFlatL2(resources_, d, config);
} else if (metric_type == faiss::METRIC_INNER_PRODUCT) {
quantizer = new GpuIndexFlatIP(resources_, d, config);
} else {
// unknown metric type
FAISS_THROW_FMT("unsupported metric type %d", (int)metric_type);
}
}
}
GpuIndexIVF::~GpuIndexIVF() {
delete quantizer;
}
GpuIndexFlat* GpuIndexIVF::getQuantizer() {
return quantizer;
}
void GpuIndexIVF::copyFrom(const faiss::IndexIVF* index) {
DeviceScope scope(config_.device);
GpuIndex::copyFrom(index);
FAISS_ASSERT(index->nlist > 0);
FAISS_THROW_IF_NOT_FMT(
index->nlist <= (Index::idx_t)std::numeric_limits<int>::max(),
"GPU index only supports %zu inverted lists",
(size_t)std::numeric_limits<int>::max());
nlist = index->nlist;
FAISS_THROW_IF_NOT_FMT(
index->nprobe > 0 && index->nprobe <= getMaxKSelection(),
"GPU index only supports nprobe <= %zu; passed %zu",
(size_t)getMaxKSelection(),
index->nprobe);
nprobe = index->nprobe;
// The metric type may have changed as well, so we might have to
// change our quantizer
delete quantizer;
quantizer = nullptr;
// Construct an empty quantizer
GpuIndexFlatConfig config = ivfConfig_.flatConfig;
// FIXME: inherit our same device
config.device = config_.device;
if (index->metric_type == faiss::METRIC_L2) {
// FIXME: 2 different float16 options?
quantizer = new GpuIndexFlatL2(resources_, this->d, config);
} else if (index->metric_type == faiss::METRIC_INNER_PRODUCT) {
// FIXME: 2 different float16 options?
quantizer = new GpuIndexFlatIP(resources_, this->d, config);
} else {
// unknown metric type
FAISS_ASSERT(false);
}
if (!index->is_trained) {
// copied in GpuIndex::copyFrom
FAISS_ASSERT(!is_trained && ntotal == 0);
return;
}
// copied in GpuIndex::copyFrom
// ntotal can exceed max int, but the number of vectors per inverted
// list cannot exceed this. We check this in the subclasses.
FAISS_ASSERT(is_trained && (ntotal == index->ntotal));
// Since we're trained, the quantizer must have data
FAISS_ASSERT(index->quantizer->ntotal > 0);
// Right now, we can only handle IndexFlat or derived classes
auto qFlat = dynamic_cast<faiss::IndexFlat*>(index->quantizer);
FAISS_THROW_IF_NOT_MSG(
qFlat,
"Only IndexFlat is supported for the coarse quantizer "
"for copying from an IndexIVF into a GpuIndexIVF");
quantizer->copyFrom(qFlat);
}
void GpuIndexIVF::copyTo(faiss::IndexIVF* index) const {
DeviceScope scope(config_.device);
//
// Index information
//
GpuIndex::copyTo(index);
//
// IndexIVF information
//
index->nlist = nlist;
index->nprobe = nprobe;
// Construct and copy the appropriate quantizer
faiss::IndexFlat* q = nullptr;
if (this->metric_type == faiss::METRIC_L2) {
q = new faiss::IndexFlatL2(this->d);
} else if (this->metric_type == faiss::METRIC_INNER_PRODUCT) {
q = new faiss::IndexFlatIP(this->d);
} else {
// we should have one of the above metrics
FAISS_ASSERT(false);
}
FAISS_ASSERT(quantizer);
quantizer->copyTo(q);
if (index->own_fields) {
delete index->quantizer;
}
index->quantizer = q;
index->quantizer_trains_alone = 0;
index->own_fields = true;
index->cp = this->cp;
index->make_direct_map(false);
}
int GpuIndexIVF::getNumLists() const {
return nlist;
}
void GpuIndexIVF::setNumProbes(int nprobe) {
FAISS_THROW_IF_NOT_FMT(
nprobe > 0 && nprobe <= getMaxKSelection(),
"GPU index only supports nprobe <= %d; passed %d",
getMaxKSelection(),
nprobe);
this->nprobe = nprobe;
}
int GpuIndexIVF::getNumProbes() const {
return nprobe;
}
bool GpuIndexIVF::addImplRequiresIDs_() const {
// All IVF indices have storage for IDs
return true;
}
void GpuIndexIVF::trainQuantizer_(Index::idx_t n, const float* x) {
if (n == 0) {
// nothing to do
return;
}
if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
if (this->verbose) {
printf("IVF quantizer does not need training.\n");
}
return;
}
if (this->verbose) {
printf("Training IVF quantizer on %ld vectors in %dD\n", n, d);
}
DeviceScope scope(config_.device);
// leverage the CPU-side k-means code, which works for the GPU
// flat index as well
quantizer->reset();
Clustering clus(this->d, nlist, this->cp);
clus.verbose = verbose;
clus.train(n, x, *quantizer);
quantizer->is_trained = true;
FAISS_ASSERT(quantizer->ntotal == nlist);
}
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/IndexFlat.h>
#include <faiss/IndexIVF.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndexIVF.h>
#include <faiss/gpu/utils/DeviceUtils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/gpu/utils/Float16.cuh>
namespace faiss {
namespace gpu {
GpuIndexIVF::GpuIndexIVF(
GpuResourcesProvider* provider,
int dims,
faiss::MetricType metric,
float metricArg,
int nlistIn,
GpuIndexIVFConfig config)
: GpuIndex(provider->getResources(), dims, metric, metricArg, config),
nlist(nlistIn),
nprobe(1),
quantizer(nullptr),
ivfConfig_(config) {
init_();
// Only IP and L2 are supported for now
if (!(metric_type == faiss::METRIC_L2 ||
metric_type == faiss::METRIC_INNER_PRODUCT)) {
FAISS_THROW_FMT("unsupported metric type %d", (int)metric_type);
}
}
void GpuIndexIVF::init_() {
FAISS_THROW_IF_NOT_MSG(nlist > 0, "nlist must be > 0");
// Spherical by default if the metric is inner_product
if (metric_type == faiss::METRIC_INNER_PRODUCT) {
cp.spherical = true;
}
// here we set a low # iterations because this is typically used
// for large clusterings
cp.niter = 10;
cp.verbose = verbose;
if (!quantizer) {
// Construct an empty quantizer
GpuIndexFlatConfig config = ivfConfig_.flatConfig;
// FIXME: inherit our same device
config.device = config_.device;
if (metric_type == faiss::METRIC_L2) {
quantizer = new GpuIndexFlatL2(resources_, d, config);
} else if (metric_type == faiss::METRIC_INNER_PRODUCT) {
quantizer = new GpuIndexFlatIP(resources_, d, config);
} else {
// unknown metric type
FAISS_THROW_FMT("unsupported metric type %d", (int)metric_type);
}
}
}
GpuIndexIVF::~GpuIndexIVF() {
delete quantizer;
}
GpuIndexFlat* GpuIndexIVF::getQuantizer() {
return quantizer;
}
void GpuIndexIVF::copyFrom(const faiss::IndexIVF* index) {
DeviceScope scope(config_.device);
GpuIndex::copyFrom(index);
FAISS_ASSERT(index->nlist > 0);
FAISS_THROW_IF_NOT_FMT(
index->nlist <= (Index::idx_t)std::numeric_limits<int>::max(),
"GPU index only supports %zu inverted lists",
(size_t)std::numeric_limits<int>::max());
nlist = index->nlist;
FAISS_THROW_IF_NOT_FMT(
index->nprobe > 0 && index->nprobe <= getMaxKSelection(),
"GPU index only supports nprobe <= %zu; passed %zu",
(size_t)getMaxKSelection(),
index->nprobe);
nprobe = index->nprobe;
// The metric type may have changed as well, so we might have to
// change our quantizer
delete quantizer;
quantizer = nullptr;
// Construct an empty quantizer
GpuIndexFlatConfig config = ivfConfig_.flatConfig;
// FIXME: inherit our same device
config.device = config_.device;
if (index->metric_type == faiss::METRIC_L2) {
// FIXME: 2 different float16 options?
quantizer = new GpuIndexFlatL2(resources_, this->d, config);
} else if (index->metric_type == faiss::METRIC_INNER_PRODUCT) {
// FIXME: 2 different float16 options?
quantizer = new GpuIndexFlatIP(resources_, this->d, config);
} else {
// unknown metric type
FAISS_ASSERT(false);
}
if (!index->is_trained) {
// copied in GpuIndex::copyFrom
FAISS_ASSERT(!is_trained && ntotal == 0);
return;
}
// copied in GpuIndex::copyFrom
// ntotal can exceed max int, but the number of vectors per inverted
// list cannot exceed this. We check this in the subclasses.
FAISS_ASSERT(is_trained && (ntotal == index->ntotal));
// Since we're trained, the quantizer must have data
FAISS_ASSERT(index->quantizer->ntotal > 0);
// Right now, we can only handle IndexFlat or derived classes
auto qFlat = dynamic_cast<faiss::IndexFlat*>(index->quantizer);
FAISS_THROW_IF_NOT_MSG(
qFlat,
"Only IndexFlat is supported for the coarse quantizer "
"for copying from an IndexIVF into a GpuIndexIVF");
quantizer->copyFrom(qFlat);
}
void GpuIndexIVF::copyTo(faiss::IndexIVF* index) const {
DeviceScope scope(config_.device);
//
// Index information
//
GpuIndex::copyTo(index);
//
// IndexIVF information
//
index->nlist = nlist;
index->nprobe = nprobe;
// Construct and copy the appropriate quantizer
faiss::IndexFlat* q = nullptr;
if (this->metric_type == faiss::METRIC_L2) {
q = new faiss::IndexFlatL2(this->d);
} else if (this->metric_type == faiss::METRIC_INNER_PRODUCT) {
q = new faiss::IndexFlatIP(this->d);
} else {
// we should have one of the above metrics
FAISS_ASSERT(false);
}
FAISS_ASSERT(quantizer);
quantizer->copyTo(q);
if (index->own_fields) {
delete index->quantizer;
}
index->quantizer = q;
index->quantizer_trains_alone = 0;
index->own_fields = true;
index->cp = this->cp;
index->make_direct_map(false);
}
int GpuIndexIVF::getNumLists() const {
return nlist;
}
void GpuIndexIVF::setNumProbes(int nprobe) {
FAISS_THROW_IF_NOT_FMT(
nprobe > 0 && nprobe <= getMaxKSelection(),
"GPU index only supports nprobe <= %d; passed %d",
getMaxKSelection(),
nprobe);
this->nprobe = nprobe;
}
int GpuIndexIVF::getNumProbes() const {
return nprobe;
}
bool GpuIndexIVF::addImplRequiresIDs_() const {
// All IVF indices have storage for IDs
return true;
}
void GpuIndexIVF::trainQuantizer_(Index::idx_t n, const float* x) {
if (n == 0) {
// nothing to do
return;
}
if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
if (this->verbose) {
printf("IVF quantizer does not need training.\n");
}
return;
}
if (this->verbose) {
printf("Training IVF quantizer on %ld vectors in %dD\n", n, d);
}
DeviceScope scope(config_.device);
// leverage the CPU-side k-means code, which works for the GPU
// flat index as well
quantizer->reset();
Clustering clus(this->d, nlist, this->cp);
clus.verbose = verbose;
clus.train(n, x, *quantizer);
quantizer->is_trained = true;
FAISS_ASSERT(quantizer->ntotal == nlist);
}
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <faiss/Clustering.h>
#include <faiss/gpu/GpuIndex.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndicesOptions.h>
namespace faiss {
struct IndexIVF;
}
namespace faiss {
namespace gpu {
class GpuIndexFlat;
struct GpuIndexIVFConfig : public GpuIndexConfig {
inline GpuIndexIVFConfig() : indicesOptions(INDICES_64_BIT) {}
/// Index storage options for the GPU
IndicesOptions indicesOptions;
/// Configuration for the coarse quantizer object
GpuIndexFlatConfig flatConfig;
};
class GpuIndexIVF : public GpuIndex {
public:
GpuIndexIVF(
GpuResourcesProvider* provider,
int dims,
faiss::MetricType metric,
float metricArg,
int nlist,
GpuIndexIVFConfig config = GpuIndexIVFConfig());
~GpuIndexIVF() override;
private:
/// Shared initialization functions
void init_();
public:
/// Copy what we need from the CPU equivalent
void copyFrom(const faiss::IndexIVF* index);
/// Copy what we have to the CPU equivalent
void copyTo(faiss::IndexIVF* index) const;
/// Returns the number of inverted lists we're managing
int getNumLists() const;
/// Returns the number of vectors present in a particular inverted list
virtual int getListLength(int listId) const = 0;
/// Return the encoded vector data contained in a particular inverted list,
/// for debugging purposes.
/// If gpuFormat is true, the data is returned as it is encoded in the
/// GPU-side representation.
/// Otherwise, it is converted to the CPU format.
/// compliant format, while the native GPU format may differ.
virtual std::vector<uint8_t> getListVectorData(
int listId,
bool gpuFormat = false) const = 0;
/// Return the vector indices contained in a particular inverted list, for
/// debugging purposes.
virtual std::vector<Index::idx_t> getListIndices(int listId) const = 0;
/// Return the quantizer we're using
GpuIndexFlat* getQuantizer();
/// Sets the number of list probes per query
void setNumProbes(int nprobe);
/// Returns our current number of list probes per query
int getNumProbes() const;
protected:
bool addImplRequiresIDs_() const override;
void trainQuantizer_(Index::idx_t n, const float* x);
public:
/// Exposing this like the CPU version for manipulation
ClusteringParameters cp;
/// Exposing this like the CPU version for query
int nlist;
/// Exposing this like the CPU version for manipulation
int nprobe;
/// Exposeing this like the CPU version for query
GpuIndexFlat* quantizer;
protected:
/// Our configuration options
const GpuIndexIVFConfig ivfConfig_;
};
} // namespace gpu
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <faiss/Clustering.h>
#include <faiss/gpu/GpuIndex.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndicesOptions.h>
namespace faiss {
struct IndexIVF;
}
namespace faiss {
namespace gpu {
class GpuIndexFlat;
struct GpuIndexIVFConfig : public GpuIndexConfig {
inline GpuIndexIVFConfig() : indicesOptions(INDICES_64_BIT) {}
/// Index storage options for the GPU
IndicesOptions indicesOptions;
/// Configuration for the coarse quantizer object
GpuIndexFlatConfig flatConfig;
};
class GpuIndexIVF : public GpuIndex {
public:
GpuIndexIVF(
GpuResourcesProvider* provider,
int dims,
faiss::MetricType metric,
float metricArg,
int nlist,
GpuIndexIVFConfig config = GpuIndexIVFConfig());
~GpuIndexIVF() override;
private:
/// Shared initialization functions
void init_();
public:
/// Copy what we need from the CPU equivalent
void copyFrom(const faiss::IndexIVF* index);
/// Copy what we have to the CPU equivalent
void copyTo(faiss::IndexIVF* index) const;
/// Returns the number of inverted lists we're managing
int getNumLists() const;
/// Returns the number of vectors present in a particular inverted list
virtual int getListLength(int listId) const = 0;
/// Return the encoded vector data contained in a particular inverted list,
/// for debugging purposes.
/// If gpuFormat is true, the data is returned as it is encoded in the
/// GPU-side representation.
/// Otherwise, it is converted to the CPU format.
/// compliant format, while the native GPU format may differ.
virtual std::vector<uint8_t> getListVectorData(
int listId,
bool gpuFormat = false) const = 0;
/// Return the vector indices contained in a particular inverted list, for
/// debugging purposes.
virtual std::vector<Index::idx_t> getListIndices(int listId) const = 0;
/// Return the quantizer we're using
GpuIndexFlat* getQuantizer();
/// Sets the number of list probes per query
void setNumProbes(int nprobe);
/// Returns our current number of list probes per query
int getNumProbes() const;
protected:
bool addImplRequiresIDs_() const override;
void trainQuantizer_(Index::idx_t n, const float* x);
public:
/// Exposing this like the CPU version for manipulation
ClusteringParameters cp;
/// Exposing this like the CPU version for query
int nlist;
/// Exposing this like the CPU version for manipulation
int nprobe;
/// Exposeing this like the CPU version for query
GpuIndexFlat* quantizer;
protected:
/// Our configuration options
const GpuIndexIVFConfig ivfConfig_;
};
} // namespace gpu
} // namespace faiss
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment