Commit 395d2ce6 authored by huchen's avatar huchen
Browse files

init the faiss for rocm

parent 5ded39f5
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""
Simplistic RPC implementation.
Exposes all functions of a Server object.
Uses pickle for serialization and the socket interface.
"""
import os,pdb,pickle,time,errno,sys,_thread,traceback,socket,threading,gc
import logging
LOG = logging.getLogger(__name__)
# default
PORT=12032
#########################################################################
# simple I/O functions
def inline_send_handle(f, conn):
st = os.fstat(f.fileno())
size = st.st_size
pickle.dump(size, conn)
conn.write(f.read(size))
def inline_send_string(s, conn):
size = len(s)
pickle.dump(size, conn)
conn.write(s)
class FileSock:
" wraps a socket so that it is usable by pickle/cPickle "
def __init__(self,sock):
self.sock = sock
self.nr=0
def write(self, buf):
# print("sending %d bytes"%len(buf))
#self.sock.sendall(buf)
# print("...done")
bs = 512 * 1024
ns = 0
while ns < len(buf):
sent = self.sock.send(buf[ns:ns + bs])
ns += sent
def read(self,bs=512*1024):
#if self.nr==10000: pdb.set_trace()
self.nr+=1
# print("read bs=%d"%bs)
b = []
nb = 0
while len(b)<bs:
# print(' loop')
rb = self.sock.recv(bs - nb)
if not rb: break
b.append(rb)
nb += len(rb)
return b''.join(b)
def readline(self):
# print("readline!")
"""may be optimized..."""
s=bytes()
while True:
c=self.read(1)
s+=c
if len(c)==0 or chr(c[0])=='\n':
return s
class ClientExit(Exception):
pass
class ServerException(Exception):
pass
class Server:
"""
server protocol. Methods from classes that subclass Server can be called
transparently from a client
"""
def __init__(self, s, logf=sys.stderr, log_prefix=''):
self.logf = logf
self.log_prefix = log_prefix
# connection
self.conn = s
self.fs = FileSock(s)
def log(self, s):
self.logf.write("Sever log %s: %s\n" % (self.log_prefix, s))
def one_function(self):
"""
Executes a single function with associated I/O.
Protocol:
- the arguments and results are serialized with the pickle protocol
- client sends : (fname,args)
fname = method name to call
args = tuple of arguments
- server sends result: (rid,st,ret)
rid = request id
st = None, or exception if there was during execution
ret = return value or None if st!=None
"""
try:
(fname,args)=pickle.load(self.fs)
except EOFError:
raise ClientExit("read args")
self.log("executing method %s"%(fname))
st = None
ret = None
try:
f=getattr(self,fname)
except AttributeError:
st = AttributeError("unknown method "+fname)
self.log("unknown method ")
try:
ret = f(*args)
except Exception as e:
# due to a bug (in mod_python?), ServerException cannot be
# unpickled, so send the string and make the exception on the client side
#st=ServerException(
# "".join(traceback.format_tb(sys.exc_info()[2]))+
# str(e))
st="".join(traceback.format_tb(sys.exc_info()[2]))+str(e)
self.log("exception in method")
traceback.print_exc(50,self.logf)
self.logf.flush()
LOG.info("return")
try:
pickle.dump((st ,ret), self.fs, protocol=4)
except EOFError:
raise ClientExit("function return")
def exec_loop(self):
""" main execution loop. Loops and handles exit states"""
self.log("in exec_loop")
try:
while True:
self.one_function()
except ClientExit as e:
self.log("ClientExit %s"%e)
except socket.error as e:
self.log("socket error %s"%e)
traceback.print_exc(50,self.logf)
except EOFError:
self.log("EOF during communication")
traceback.print_exc(50,self.logf)
except BaseException:
# unexpected
traceback.print_exc(50,sys.stderr)
sys.exit(1)
LOG.info("exit sever")
def exec_loop_cleanup(self):
pass
###################################################################
# spying stuff
def get_ps_stats(self):
ret=''
f=os.popen("echo ============ `hostname` uptime:; uptime;"+
"echo ============ self:; "+
"ps -p %d -o pid,vsize,rss,%%cpu,nlwp,psr; "%os.getpid()+
"echo ============ run queue:;"+
"ps ar -o user,pid,%cpu,%mem,ni,nlwp,psr,vsz,rss,cputime,command")
for l in f:
ret+=l
return ret
class Client:
"""
Methods of the server object can be called transparently. Exceptions are
re-raised.
"""
def __init__(self, HOST, port=PORT, v6=False):
socktype = socket.AF_INET6 if v6 else socket.AF_INET
sock = socket.socket(socktype, socket.SOCK_STREAM)
LOG.info("connecting", HOST, port, socktype)
sock.connect((HOST, port))
self.sock = sock
self.fs = FileSock(sock)
def generic_fun(self, fname, args):
# int "gen fun",fname
pickle.dump((fname, args), self.fs, protocol=4)
return self.get_result()
def get_result(self):
(st, ret) = pickle.load(self.fs)
if st!=None:
raise ServerException(st)
else:
return ret
def __getattr__(self,name):
return lambda *x: self.generic_fun(name,x)
def run_server(new_handler, port=PORT, report_to_file=None, v6=False):
HOST = '' # Symbolic name meaning the local host
socktype = socket.AF_INET6 if v6 else socket.AF_INET
s = socket.socket(socktype, socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
LOG.info("bind %s:%d" % (HOST, port))
s.bind((HOST, port))
s.listen(5)
LOG.info("accepting connections")
if report_to_file is not None:
LOG.info('storing host+port in', report_to_file)
open(report_to_file, 'w').write('%s:%d ' % (socket.gethostname(), port))
while True:
try:
conn, addr = s.accept()
except socket.error as e:
if e[1]=='Interrupted system call': continue
raise
LOG.info('Connected by', addr, end=' ')
ibs = new_handler(conn)
tid = _thread.start_new_thread(ibs.exec_loop,())
LOG.info("tid",tid)
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""
This is a set of function wrappers that override the default numpy versions.
Interoperability functions for pytorch and Faiss: Importing this will allow
pytorch Tensors (CPU or GPU) to be used as arguments to Faiss indexes and
other functions. Torch GPU tensors can only be used with Faiss GPU indexes.
If this is imported with a package that supports Faiss GPU, the necessary
stream synchronization with the current pytorch stream will be automatically
performed.
Numpy ndarrays can continue to be used in the Faiss python interface after
importing this file. All arguments must be uniformly either numpy ndarrays
or Torch tensors; no mixing is allowed.
"""
import faiss
import torch
import contextlib
import inspect
import sys
import numpy as np
def swig_ptr_from_UInt8Tensor(x):
""" gets a Faiss SWIG pointer from a pytorch tensor (on CPU or GPU) """
assert x.is_contiguous()
assert x.dtype == torch.uint8
return faiss.cast_integer_to_uint8_ptr(
x.storage().data_ptr() + x.storage_offset())
def swig_ptr_from_HalfTensor(x):
""" gets a Faiss SWIG pointer from a pytorch tensor (on CPU or GPU) """
assert x.is_contiguous()
assert x.dtype == torch.float16
# no canonical half type in C/C++
return faiss.cast_integer_to_void_ptr(
x.storage().data_ptr() + x.storage_offset() * 4)
def swig_ptr_from_FloatTensor(x):
""" gets a Faiss SWIG pointer from a pytorch tensor (on CPU or GPU) """
assert x.is_contiguous()
assert x.dtype == torch.float32
return faiss.cast_integer_to_float_ptr(
x.storage().data_ptr() + x.storage_offset() * 4)
def swig_ptr_from_IntTensor(x):
""" gets a Faiss SWIG pointer from a pytorch tensor (on CPU or GPU) """
assert x.is_contiguous()
assert x.dtype == torch.int32, 'dtype=%s' % x.dtype
return faiss.cast_integer_to_int_ptr(
x.storage().data_ptr() + x.storage_offset() * 8)
def swig_ptr_from_IndicesTensor(x):
""" gets a Faiss SWIG pointer from a pytorch tensor (on CPU or GPU) """
assert x.is_contiguous()
assert x.dtype == torch.int64, 'dtype=%s' % x.dtype
return faiss.cast_integer_to_idx_t_ptr(
x.storage().data_ptr() + x.storage_offset() * 8)
@contextlib.contextmanager
def using_stream(res, pytorch_stream=None):
""" Creates a scoping object to make Faiss GPU use the same stream
as pytorch, based on torch.cuda.current_stream().
Or, a specific pytorch stream can be passed in as a second
argument, in which case we will use that stream.
"""
if pytorch_stream is None:
pytorch_stream = torch.cuda.current_stream()
# This is the cudaStream_t that we wish to use
cuda_stream_s = faiss.cast_integer_to_cudastream_t(pytorch_stream.cuda_stream)
# So we can revert GpuResources stream state upon exit
prior_dev = torch.cuda.current_device()
prior_stream = res.getDefaultStream(torch.cuda.current_device())
res.setDefaultStream(torch.cuda.current_device(), cuda_stream_s)
# Do the user work
try:
yield
finally:
res.setDefaultStream(prior_dev, prior_stream)
def torch_replace_method(the_class, name, replacement,
ignore_missing=False, ignore_no_base=False):
try:
orig_method = getattr(the_class, name)
except AttributeError:
if ignore_missing:
return
raise
if orig_method.__name__ == 'torch_replacement_' + name:
# replacement was done in parent class
return
# We should already have the numpy replacement methods patched
assert ignore_no_base or (orig_method.__name__ == 'replacement_' + name)
setattr(the_class, name + '_numpy', orig_method)
setattr(the_class, name, replacement)
def handle_torch_Index(the_class):
def torch_replacement_add(self, x):
if type(x) is np.ndarray:
# forward to faiss __init__.py base method
return self.add_numpy(x)
assert type(x) is torch.Tensor
n, d = x.shape
assert d == self.d
x_ptr = swig_ptr_from_FloatTensor(x)
if x.is_cuda:
assert hasattr(self, 'getDevice'), 'GPU tensor on CPU index not allowed'
# On the GPU, use proper stream ordering
with using_stream(self.getResources()):
self.add_c(n, x_ptr)
else:
# CPU torch
self.add_c(n, x_ptr)
def torch_replacement_add_with_ids(self, x, ids):
if type(x) is np.ndarray:
# forward to faiss __init__.py base method
return self.add_with_ids_numpy(x, ids)
assert type(x) is torch.Tensor
n, d = x.shape
assert d == self.d
x_ptr = swig_ptr_from_FloatTensor(x)
assert type(ids) is torch.Tensor
assert ids.shape == (n, ), 'not same number of vectors as ids'
ids_ptr = swig_ptr_from_IndicesTensor(ids)
if x.is_cuda:
assert hasattr(self, 'getDevice'), 'GPU tensor on CPU index not allowed'
# On the GPU, use proper stream ordering
with using_stream(self.getResources()):
self.add_with_ids_c(n, x_ptr, ids_ptr)
else:
# CPU torch
self.add_with_ids_c(n, x_ptr, ids_ptr)
def torch_replacement_assign(self, x, k, labels=None):
if type(x) is np.ndarray:
# forward to faiss __init__.py base method
return self.assign_numpy(x, k, labels)
assert type(x) is torch.Tensor
n, d = x.shape
assert d == self.d
x_ptr = swig_ptr_from_FloatTensor(x)
if labels is None:
labels = torch.empty(n, k, device=x.device, dtype=torch.int64)
else:
assert type(labels) is torch.Tensor
assert labels.shape == (n, k)
L_ptr = swig_ptr_from_IndicesTensor(labels)
if x.is_cuda:
assert hasattr(self, 'getDevice'), 'GPU tensor on CPU index not allowed'
# On the GPU, use proper stream ordering
with using_stream(self.getResources()):
self.assign_c(n, x_ptr, L_ptr, k)
else:
# CPU torch
self.assign_c(n, x_ptr, L_ptr, k)
return labels
def torch_replacement_train(self, x):
if type(x) is np.ndarray:
# forward to faiss __init__.py base method
return self.train_numpy(x)
assert type(x) is torch.Tensor
n, d = x.shape
assert d == self.d
x_ptr = swig_ptr_from_FloatTensor(x)
if x.is_cuda:
assert hasattr(self, 'getDevice'), 'GPU tensor on CPU index not allowed'
# On the GPU, use proper stream ordering
with using_stream(self.getResources()):
self.train_c(n, x_ptr)
else:
# CPU torch
self.train_c(n, x_ptr)
def torch_replacement_search(self, x, k, D=None, I=None):
if type(x) is np.ndarray:
# forward to faiss __init__.py base method
return self.search_numpy(x, k, D, I)
assert type(x) is torch.Tensor
n, d = x.shape
assert d == self.d
x_ptr = swig_ptr_from_FloatTensor(x)
if D is None:
D = torch.empty(n, k, device=x.device, dtype=torch.float32)
else:
assert type(D) is torch.Tensor
assert D.shape == (n, k)
D_ptr = swig_ptr_from_FloatTensor(D)
if I is None:
I = torch.empty(n, k, device=x.device, dtype=torch.int64)
else:
assert type(I) is torch.Tensor
assert I.shape == (n, k)
I_ptr = swig_ptr_from_IndicesTensor(I)
if x.is_cuda:
assert hasattr(self, 'getDevice'), 'GPU tensor on CPU index not allowed'
# On the GPU, use proper stream ordering
with using_stream(self.getResources()):
self.search_c(n, x_ptr, k, D_ptr, I_ptr)
else:
# CPU torch
self.search_c(n, x_ptr, k, D_ptr, I_ptr)
return D, I
def torch_replacement_search_and_reconstruct(self, x, k, D=None, I=None, R=None):
if type(x) is np.ndarray:
# Forward to faiss __init__.py base method
return self.search_and_reconstruct_numpy(x, k, D, I, R)
assert type(x) is torch.Tensor
n, d = x.shape
assert d == self.d
x_ptr = swig_ptr_from_FloatTensor(x)
if D is None:
D = torch.empty(n, k, device=x.device, dtype=torch.float32)
else:
assert type(D) is torch.Tensor
assert D.shape == (n, k)
D_ptr = swig_ptr_from_FloatTensor(D)
if I is None:
I = torch.empty(n, k, device=x.device, dtype=torch.int64)
else:
assert type(I) is torch.Tensor
assert I.shape == (n, k)
I_ptr = swig_ptr_from_IndicesTensor(I)
if R is None:
R = torch.empty(n, k, d, device=x.device, dtype=torch.float32)
else:
assert type(R) is torch.Tensor
assert R.shape == (n, k, d)
R_ptr = swig_ptr_from_FloatTensor(R)
if x.is_cuda:
assert hasattr(self, 'getDevice'), 'GPU tensor on CPU index not allowed'
# On the GPU, use proper stream ordering
with using_stream(self.getResources()):
self.search_and_reconstruct_c(n, x_ptr, k, D_ptr, I_ptr, R_ptr)
else:
# CPU torch
self.search_and_reconstruct_c(n, x_ptr, k, D_ptr, I_ptr, R_ptr)
return D, I, R
def torch_replacement_remove_ids(self, x):
# Not yet implemented
assert type(x) is not torch.Tensor, 'remove_ids not yet implemented for torch'
return self.remove_ids_numpy(x)
def torch_replacement_reconstruct(self, key, x=None):
# No tensor inputs are required, but with importing this module, we
# assume that the default should be torch tensors. If we are passed a
# numpy array, however, assume that the user is overriding this default
if (x is not None) and (type(x) is np.ndarray):
# Forward to faiss __init__.py base method
return self.reconstruct_numpy(key, x)
# If the index is a CPU index, the default device is CPU, otherwise we
# produce a GPU tensor
device = torch.device('cpu')
if hasattr(self, 'getDevice'):
# same device as the index
device = torch.device('cuda', self.getDevice())
if x is None:
x = torch.empty(self.d, device=device, dtype=torch.float32)
else:
assert type(x) is torch.Tensor
assert x.shape == (self.d, )
x_ptr = swig_ptr_from_FloatTensor(x)
if x.is_cuda:
assert hasattr(self, 'getDevice'), 'GPU tensor on CPU index not allowed'
# On the GPU, use proper stream ordering
with using_stream(self.getResources()):
self.reconstruct_c(key, x_ptr)
else:
# CPU torch
self.reconstruct_c(key, x_ptr)
return x
def torch_replacement_reconstruct_n(self, n0, ni, x=None):
# No tensor inputs are required, but with importing this module, we
# assume that the default should be torch tensors. If we are passed a
# numpy array, however, assume that the user is overriding this default
if (x is not None) and (type(x) is np.ndarray):
# Forward to faiss __init__.py base method
return self.reconstruct_n_numpy(n0, ni, x)
# If the index is a CPU index, the default device is CPU, otherwise we
# produce a GPU tensor
device = torch.device('cpu')
if hasattr(self, 'getDevice'):
# same device as the index
device = torch.device('cuda', self.getDevice())
if x is None:
x = torch.empty(ni, self.d, device=device, dtype=torch.float32)
else:
assert type(x) is torch.Tensor
assert x.shape == (ni, self.d)
x_ptr = swig_ptr_from_FloatTensor(x)
if x.is_cuda:
assert hasattr(self, 'getDevice'), 'GPU tensor on CPU index not allowed'
# On the GPU, use proper stream ordering
with using_stream(self.getResources()):
self.reconstruct_n_c(n0, ni, x_ptr)
else:
# CPU torch
self.reconstruct_n_c(n0, ni, x_ptr)
return x
def torch_replacement_update_vectors(self, keys, x):
if type(keys) is np.ndarray:
# Forward to faiss __init__.py base method
return self.update_vectors_numpy(keys, x)
assert type(keys) is torch.Tensor
(n, ) = keys.shape
keys_ptr = swig_ptr_from_IndicesTensor(keys)
assert type(x) is torch.Tensor
assert x.shape == (n, self.d)
x_ptr = swig_ptr_from_FloatTensor(x)
if x.is_cuda:
assert hasattr(self, 'getDevice'), 'GPU tensor on CPU index not allowed'
# On the GPU, use proper stream ordering
with using_stream(self.getResources()):
self.update_vectors_c(n, keys_ptr, x_ptr)
else:
# CPU torch
self.update_vectors_c(n, keys_ptr, x_ptr)
# Until the GPU version is implemented, we do not support pre-allocated
# output buffers
def torch_replacement_range_search(self, x, thresh):
if type(x) is np.ndarray:
# Forward to faiss __init__.py base method
return self.range_search_numpy(x, thresh)
assert type(x) is torch.Tensor
n, d = x.shape
assert d == self.d
x_ptr = swig_ptr_from_FloatTensor(x)
assert not x.is_cuda, 'Range search using GPU tensor not yet implemented'
assert not hasattr(self, 'getDevice'), 'Range search on GPU index not yet implemented'
res = faiss.RangeSearchResult(n)
self.range_search_c(n, x_ptr, thresh, res)
# get pointers and copy them
# FIXME: no rev_swig_ptr equivalent for torch.Tensor, just convert
# np to torch
# NOTE: torch does not support np.uint64, just np.int64
lims = torch.from_numpy(faiss.rev_swig_ptr(res.lims, n + 1).copy().astype('int64'))
nd = int(lims[-1])
D = torch.from_numpy(faiss.rev_swig_ptr(res.distances, nd).copy())
I = torch.from_numpy(faiss.rev_swig_ptr(res.labels, nd).copy())
return lims, D, I
def torch_replacement_sa_encode(self, x, codes=None):
if type(x) is np.ndarray:
# Forward to faiss __init__.py base method
return self.sa_encode_numpy(x, codes)
assert type(x) is torch.Tensor
n, d = x.shape
assert d == self.d
x_ptr = swig_ptr_from_FloatTensor(x)
if codes is None:
codes = torch.empty(n, self.sa_code_size(), dtype=torch.uint8)
else:
assert codes.shape == (n, self.sa_code_size())
codes_ptr = swig_ptr_from_UInt8Tensor(codes)
if x.is_cuda:
assert hasattr(self, 'getDevice'), 'GPU tensor on CPU index not allowed'
# On the GPU, use proper stream ordering
with using_stream(self.getResources()):
self.sa_encode_c(n, x_ptr, codes_ptr)
else:
# CPU torch
self.sa_encode_c(n, x_ptr, codes_ptr)
return codes
def torch_replacement_sa_decode(self, codes, x=None):
if type(codes) is np.ndarray:
# Forward to faiss __init__.py base method
return self.sa_decode_numpy(codes, x)
assert type(codes) is torch.Tensor
n, cs = codes.shape
assert cs == self.sa_code_size()
codes_ptr = swig_ptr_from_UInt8Tensor(codes)
if x is None:
x = torch.empty(n, self.d, dtype=torch.float32)
else:
assert type(x) is torch.Tensor
assert x.shape == (n, self.d)
x_ptr = swig_ptr_from_FloatTensor(x)
if codes.is_cuda:
assert hasattr(self, 'getDevice'), 'GPU tensor on CPU index not allowed'
# On the GPU, use proper stream ordering
with using_stream(self.getResources()):
self.sa_decode_c(n, codes_ptr, x_ptr)
else:
# CPU torch
self.sa_decode_c(n, codes_ptr, x_ptr)
return x
torch_replace_method(the_class, 'add', torch_replacement_add)
torch_replace_method(the_class, 'add_with_ids', torch_replacement_add_with_ids)
torch_replace_method(the_class, 'assign', torch_replacement_assign)
torch_replace_method(the_class, 'train', torch_replacement_train)
torch_replace_method(the_class, 'search', torch_replacement_search)
torch_replace_method(the_class, 'remove_ids', torch_replacement_remove_ids)
torch_replace_method(the_class, 'reconstruct', torch_replacement_reconstruct)
torch_replace_method(the_class, 'reconstruct_n', torch_replacement_reconstruct_n)
torch_replace_method(the_class, 'range_search', torch_replacement_range_search)
torch_replace_method(the_class, 'update_vectors', torch_replacement_update_vectors,
ignore_missing=True)
torch_replace_method(the_class, 'search_and_reconstruct',
torch_replacement_search_and_reconstruct, ignore_missing=True)
torch_replace_method(the_class, 'sa_encode', torch_replacement_sa_encode)
torch_replace_method(the_class, 'sa_decode', torch_replacement_sa_decode)
faiss_module = sys.modules['faiss']
# Re-patch anything that inherits from faiss.Index to add the torch bindings
for symbol in dir(faiss_module):
obj = getattr(faiss_module, symbol)
if inspect.isclass(obj):
the_class = obj
if issubclass(the_class, faiss.Index):
handle_torch_Index(the_class)
# allows torch tensor usage with bfKnn
def torch_replacement_knn_gpu(res, xq, xb, k, D=None, I=None, metric=faiss.METRIC_L2):
if type(xb) is np.ndarray:
# Forward to faiss __init__.py base method
return faiss.knn_gpu_numpy(res, xq, xb, k, D, I, metric)
nb, d = xb.size()
if xb.is_contiguous():
xb_row_major = True
elif xb.t().is_contiguous():
xb = xb.t()
xb_row_major = False
else:
raise TypeError('matrix should be row or column-major')
if xb.dtype == torch.float32:
xb_type = faiss.DistanceDataType_F32
xb_ptr = swig_ptr_from_FloatTensor(xb)
elif xb.dtype == torch.float16:
xb_type = faiss.DistanceDataType_F16
xb_ptr = swig_ptr_from_HalfTensor(xb)
else:
raise TypeError('xb must be f32 or f16')
nq, d2 = xq.size()
assert d2 == d
if xq.is_contiguous():
xq_row_major = True
elif xq.t().is_contiguous():
xq = xq.t()
xq_row_major = False
else:
raise TypeError('matrix should be row or column-major')
if xq.dtype == torch.float32:
xq_type = faiss.DistanceDataType_F32
xq_ptr = swig_ptr_from_FloatTensor(xq)
elif xq.dtype == torch.float16:
xq_type = faiss.DistanceDataType_F16
xq_ptr = swig_ptr_from_HalfTensor(xq)
else:
raise TypeError('xq must be f32 or f16')
if D is None:
D = torch.empty(nq, k, device=xb.device, dtype=torch.float32)
else:
assert D.shape == (nq, k)
# interface takes void*, we need to check this
assert (D.dtype == torch.float32)
if I is None:
I = torch.empty(nq, k, device=xb.device, dtype=torch.int64)
else:
assert I.shape == (nq, k)
if I.dtype == torch.int64:
I_type = faiss.IndicesDataType_I64
I_ptr = swig_ptr_from_IndicesTensor(I)
elif I.dtype == I.dtype == torch.int32:
I_type = faiss.IndicesDataType_I32
I_ptr = swig_ptr_from_IntTensor(I)
else:
raise TypeError('I must be i64 or i32')
D_ptr = swig_ptr_from_FloatTensor(D)
args = faiss.GpuDistanceParams()
args.metric = metric
args.k = k
args.dims = d
args.vectors = xb_ptr
args.vectorsRowMajor = xb_row_major
args.vectorType = xb_type
args.numVectors = nb
args.queries = xq_ptr
args.queriesRowMajor = xq_row_major
args.queryType = xq_type
args.numQueries = nq
args.outDistances = D_ptr
args.outIndices = I_ptr
args.outIndicesType = I_type
with using_stream(res):
faiss.bfKnn(res, args)
return D, I
torch_replace_method(faiss_module, 'knn_gpu', torch_replacement_knn_gpu, True, True)
# allows torch tensor usage with bfKnn for all pairwise distances
def torch_replacement_pairwise_distance_gpu(res, xq, xb, D=None, metric=faiss.METRIC_L2):
if type(xb) is np.ndarray:
# Forward to faiss __init__.py base method
return faiss.pairwise_distance_gpu_numpy(res, xq, xb, D, metric)
nb, d = xb.size()
if xb.is_contiguous():
xb_row_major = True
elif xb.t().is_contiguous():
xb = xb.t()
xb_row_major = False
else:
raise TypeError('xb matrix should be row or column-major')
if xb.dtype == torch.float32:
xb_type = faiss.DistanceDataType_F32
xb_ptr = swig_ptr_from_FloatTensor(xb)
elif xb.dtype == torch.float16:
xb_type = faiss.DistanceDataType_F16
xb_ptr = swig_ptr_from_HalfTensor(xb)
else:
raise TypeError('xb must be float32 or float16')
nq, d2 = xq.size()
assert d2 == d
if xq.is_contiguous():
xq_row_major = True
elif xq.t().is_contiguous():
xq = xq.t()
xq_row_major = False
else:
raise TypeError('xq matrix should be row or column-major')
if xq.dtype == torch.float32:
xq_type = faiss.DistanceDataType_F32
xq_ptr = swig_ptr_from_FloatTensor(xq)
elif xq.dtype == torch.float16:
xq_type = faiss.DistanceDataType_F16
xq_ptr = swig_ptr_from_HalfTensor(xq)
else:
raise TypeError('xq must be float32 or float16')
if D is None:
D = torch.empty(nq, nb, device=xb.device, dtype=torch.float32)
else:
assert D.shape == (nq, nb)
# interface takes void*, we need to check this
assert (D.dtype == torch.float32)
D_ptr = swig_ptr_from_FloatTensor(D)
args = faiss.GpuDistanceParams()
args.metric = metric
args.k = -1 # selects all pairwise distance
args.dims = d
args.vectors = xb_ptr
args.vectorsRowMajor = xb_row_major
args.vectorType = xb_type
args.numVectors = nb
args.queries = xq_ptr
args.queriesRowMajor = xq_row_major
args.queryType = xq_type
args.numQueries = nq
args.outDistances = D_ptr
with using_stream(res):
faiss.bfKnn(res, args)
return D
torch_replace_method(faiss_module, 'pairwise_distance_gpu', torch_replacement_pairwise_distance_gpu, True, True)
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
"""
I/O functions in fvecs, bvecs, ivecs formats
definition of the formats here: http://corpus-texmex.irisa.fr/
"""
def ivecs_read(fname):
a = np.fromfile(fname, dtype='int32')
d = a[0]
return a.reshape(-1, d + 1)[:, 1:].copy()
def fvecs_read(fname):
return ivecs_read(fname).view('float32')
def ivecs_mmap(fname):
a = np.memmap(fname, dtype='int32', mode='r')
d = a[0]
return a.reshape(-1, d + 1)[:, 1:]
def fvecs_mmap(fname):
return ivecs_mmap(fname).view('float32')
def bvecs_mmap(fname):
x = np.memmap(fname, dtype='uint8', mode='r')
d = x[:4].view('int32')[0]
return x.reshape(-1, d + 4)[:, 4:]
def ivecs_write(fname, m):
n, d = m.shape
m1 = np.empty((n, d + 1), dtype='int32')
m1[:, 0] = d
m1[:, 1:] = m
m1.tofile(fname)
def fvecs_write(fname, m):
m = m.astype('float32')
ivecs_write(fname, m.view('int32'))
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
add_executable(demo_imi_flat EXCLUDE_FROM_ALL demo_imi_flat.cpp)
target_link_libraries(demo_imi_flat PRIVATE faiss)
add_executable(demo_imi_pq EXCLUDE_FROM_ALL demo_imi_pq.cpp)
target_link_libraries(demo_imi_pq PRIVATE faiss)
add_executable(demo_ivfpq_indexing EXCLUDE_FROM_ALL demo_ivfpq_indexing.cpp)
target_link_libraries(demo_ivfpq_indexing PRIVATE faiss)
add_executable(demo_nndescent EXCLUDE_FROM_ALL demo_nndescent.cpp)
target_link_libraries(demo_nndescent PRIVATE faiss)
add_executable(demo_sift1M EXCLUDE_FROM_ALL demo_sift1M.cpp)
target_link_libraries(demo_sift1M PRIVATE faiss)
add_executable(demo_weighted_kmeans EXCLUDE_FROM_ALL demo_weighted_kmeans.cpp)
target_link_libraries(demo_weighted_kmeans PRIVATE faiss)
Demos for a few Faiss functionalities
=====================================
demo_auto_tune.py
-----------------
Demonstrates the auto-tuning functionality of Faiss
demo_ondisk_ivf.py
------------------
Shows how to construct a Faiss index that stores the inverted file
data on disk, eg. when it does not fit in RAM. The script works on a
small dataset (sift1M) for demonstration and proceeds in stages:
0: train on the dataset
1-4: build 4 indexes, each containing 1/4 of the dataset. This can be
done in parallel on several machines
5: merge the 4 indexes into one that is written directly to disk
(needs not to fit in RAM)
6: load and test the index
#!/usr/bin/env python2
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import print_function
import os
import time
import numpy as np
try:
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot
graphical_output = True
except ImportError:
graphical_output = False
import faiss
#################################################################
# Small I/O functions
#################################################################
def ivecs_read(fname):
a = np.fromfile(fname, dtype="int32")
d = a[0]
return a.reshape(-1, d + 1)[:, 1:].copy()
def fvecs_read(fname):
return ivecs_read(fname).view('float32')
def plot_OperatingPoints(ops, nq, **kwargs):
ops = ops.optimal_pts
n = ops.size() * 2 - 1
pyplot.plot([ops.at( i // 2).perf for i in range(n)],
[ops.at((i + 1) // 2).t / nq * 1000 for i in range(n)],
**kwargs)
#################################################################
# prepare common data for all indexes
#################################################################
t0 = time.time()
print("load data")
xt = fvecs_read("sift1M/sift_learn.fvecs")
xb = fvecs_read("sift1M/sift_base.fvecs")
xq = fvecs_read("sift1M/sift_query.fvecs")
d = xt.shape[1]
print("load GT")
gt = ivecs_read("sift1M/sift_groundtruth.ivecs")
gt = gt.astype('int64')
k = gt.shape[1]
print("prepare criterion")
# criterion = 1-recall at 1
crit = faiss.OneRecallAtRCriterion(xq.shape[0], 1)
crit.set_groundtruth(None, gt)
crit.nnn = k
# indexes that are useful when there is no limitation on memory usage
unlimited_mem_keys = [
"IMI2x10,Flat", "IMI2x11,Flat",
"IVF4096,Flat", "IVF16384,Flat",
"PCA64,IMI2x10,Flat"]
# memory limited to 16 bytes / vector
keys_mem_16 = [
'IMI2x10,PQ16', 'IVF4096,PQ16',
'IMI2x10,PQ8+8', 'OPQ16_64,IMI2x10,PQ16'
]
# limited to 32 bytes / vector
keys_mem_32 = [
'IMI2x10,PQ32', 'IVF4096,PQ32', 'IVF16384,PQ32',
'IMI2x10,PQ16+16',
'OPQ32,IVF4096,PQ32', 'IVF4096,PQ16+16', 'OPQ16,IMI2x10,PQ16+16'
]
# indexes that can run on the GPU
keys_gpu = [
"PCA64,IVF4096,Flat",
"PCA64,Flat", "Flat", "IVF4096,Flat", "IVF16384,Flat",
"IVF4096,PQ32"]
keys_to_test = unlimited_mem_keys
use_gpu = False
if use_gpu:
# if this fails, it means that the GPU version was not comp
assert faiss.StandardGpuResources, \
"FAISS was not compiled with GPU support, or loading _swigfaiss_gpu.so failed"
res = faiss.StandardGpuResources()
dev_no = 0
# remember results from other index types
op_per_key = []
# keep track of optimal operating points seen so far
op = faiss.OperatingPoints()
for index_key in keys_to_test:
print("============ key", index_key)
# make the index described by the key
index = faiss.index_factory(d, index_key)
if use_gpu:
# transfer to GPU (may be partial)
index = faiss.index_cpu_to_gpu(res, dev_no, index)
params = faiss.GpuParameterSpace()
else:
params = faiss.ParameterSpace()
params.initialize(index)
print("[%.3f s] train & add" % (time.time() - t0))
index.train(xt)
index.add(xb)
print("[%.3f s] explore op points" % (time.time() - t0))
# find operating points for this index
opi = params.explore(index, xq, crit)
print("[%.3f s] result operating points:" % (time.time() - t0))
opi.display()
# update best operating points so far
op.merge_with(opi, index_key + " ")
op_per_key.append((index_key, opi))
if graphical_output:
# graphical output (to tmp/ subdirectory)
fig = pyplot.figure(figsize=(12, 9))
pyplot.xlabel("1-recall at 1")
pyplot.ylabel("search time (ms/query, %d threads)" % faiss.omp_get_max_threads())
pyplot.gca().set_yscale('log')
pyplot.grid()
for i2, opi2 in op_per_key:
plot_OperatingPoints(opi2, crit.nq, label = i2, marker = 'o')
# plot_OperatingPoints(op, crit.nq, label = 'best', marker = 'o', color = 'r')
pyplot.legend(loc=2)
fig.savefig('tmp/demo_auto_tune.png')
print("[%.3f s] final result:" % (time.time() - t0))
op.display()
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import sys
import numpy as np
import faiss
from faiss.contrib.client_server import run_index_server, ClientIndex
#################################################################
# Small I/O functions
#################################################################
def ivecs_read(fname):
a = np.fromfile(fname, dtype='int32')
d = a[0]
return a.reshape(-1, d + 1)[:, 1:].copy()
def fvecs_read(fname):
return ivecs_read(fname).view('float32')
#################################################################
# Main program
#################################################################
stage = int(sys.argv[1])
tmpdir = '/tmp/'
if stage == 0:
# train the index
xt = fvecs_read("sift1M/sift_learn.fvecs")
index = faiss.index_factory(xt.shape[1], "IVF4096,Flat")
print("training index")
index.train(xt)
print("write " + tmpdir + "trained.index")
faiss.write_index(index, tmpdir + "trained.index")
if 1 <= stage <= 4:
# add 1/4 of the database to 4 independent indexes
bno = stage - 1
xb = fvecs_read("sift1M/sift_base.fvecs")
i0, i1 = int(bno * xb.shape[0] / 4), int((bno + 1) * xb.shape[0] / 4)
index = faiss.read_index(tmpdir + "trained.index")
print("adding vectors %d:%d" % (i0, i1))
index.add_with_ids(xb[i0:i1], np.arange(i0, i1))
print("write " + tmpdir + "block_%d.index" % bno)
faiss.write_index(index, tmpdir + "block_%d.index" % bno)
machine_ports = [
('localhost', 12010),
('localhost', 12011),
('localhost', 12012),
('localhost', 12013),
]
v6 = False
if 5 <= stage <= 8:
# load an index slice and launch index
bno = stage - 5
fname = tmpdir + "block_%d.index" % bno
print("read " + fname)
index = faiss.read_index(fname)
port = machine_ports[bno][1]
run_index_server(index, port, v6=v6)
if stage == 9:
client_index = ClientIndex(machine_ports)
print('index size:', client_index.ntotal)
client_index.set_nprobe(16)
# load query vectors and ground-truth
xq = fvecs_read("sift1M/sift_query.fvecs")
gt = ivecs_read("sift1M/sift_groundtruth.ivecs")
D, I = client_index.search(xq, 5)
recall_at_1 = (I[:, :1] == gt[:, :1]).sum() / float(xq.shape[0])
print("recall@1: %.3f" % recall_at_1)
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <random>
#include <sys/time.h>
#include <faiss/IndexFlat.h>
#include <faiss/IndexIVFFlat.h>
#include <faiss/IndexPQ.h>
#include <faiss/index_io.h>
double elapsed() {
struct timeval tv;
gettimeofday(&tv, nullptr);
return tv.tv_sec + tv.tv_usec * 1e-6;
}
int main() {
double t0 = elapsed();
// dimension of the vectors to index
int d = 128;
// size of the database we plan to index
size_t nb = 1000 * 1000;
// make a set of nt training vectors in the unit cube
// (could be the database)
size_t nt = 100 * 1000;
//---------------------------------------------------------------
// Define the core quantizer
// We choose a multiple inverted index for faster training with less data
// and because it usually offers best accuracy/speed trade-offs
//
// We here assume that its lifespan of this coarse quantizer will cover the
// lifespan of the inverted-file quantizer IndexIVFFlat below
// With dynamic allocation, one may give the responsability to free the
// quantizer to the inverted-file index (with attribute do_delete_quantizer)
//
// Note: a regular clustering algorithm would be defined as:
// faiss::IndexFlatL2 coarse_quantizer (d);
//
// Use nhash=2 subquantizers used to define the product coarse quantizer
// Number of bits: we will have 2^nbits_coarse centroids per subquantizer
// meaning (2^12)^nhash distinct inverted lists
size_t nhash = 2;
size_t nbits_subq = int(log2(nb + 1) / 2); // good choice in general
size_t ncentroids = 1 << (nhash * nbits_subq); // total # of centroids
faiss::MultiIndexQuantizer coarse_quantizer(d, nhash, nbits_subq);
printf("IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
nhash,
nbits_subq,
ncentroids,
nb);
// the coarse quantizer should not be dealloced before the index
// 4 = nb of bytes per code (d must be a multiple of this)
// 8 = nb of bits per sub-code (almost always 8)
faiss::MetricType metric = faiss::METRIC_L2; // can be METRIC_INNER_PRODUCT
faiss::IndexIVFFlat index(&coarse_quantizer, d, ncentroids, metric);
index.quantizer_trains_alone = true;
// define the number of probes. 2048 is for high-dim, overkilled in practice
// Use 4-1024 depending on the trade-off speed accuracy that you want
index.nprobe = 2048;
std::mt19937 rng;
std::uniform_real_distribution<> distrib;
{ // training
printf("[%.3f s] Generating %ld vectors in %dD for training\n",
elapsed() - t0,
nt,
d);
std::vector<float> trainvecs(nt * d);
for (size_t i = 0; i < nt * d; i++) {
trainvecs[i] = distrib(rng);
}
printf("[%.3f s] Training the index\n", elapsed() - t0);
index.verbose = true;
index.train(nt, trainvecs.data());
}
size_t nq;
std::vector<float> queries;
{ // populating the database
printf("[%.3f s] Building a dataset of %ld vectors to index\n",
elapsed() - t0,
nb);
std::vector<float> database(nb * d);
for (size_t i = 0; i < nb * d; i++) {
database[i] = distrib(rng);
}
printf("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
index.add(nb, database.data());
// remember a few elements from the database as queries
int i0 = 1234;
int i1 = 1244;
nq = i1 - i0;
queries.resize(nq * d);
for (int i = i0; i < i1; i++) {
for (int j = 0; j < d; j++) {
queries[(i - i0) * d + j] = database[i * d + j];
}
}
}
{ // searching the database
int k = 5;
printf("[%.3f s] Searching the %d nearest neighbors "
"of %ld vectors in the index\n",
elapsed() - t0,
k,
nq);
std::vector<faiss::Index::idx_t> nns(k * nq);
std::vector<float> dis(k * nq);
index.search(nq, queries.data(), k, dis.data(), nns.data());
printf("[%.3f s] Query results (vector ids, then distances):\n",
elapsed() - t0);
for (int i = 0; i < nq; i++) {
printf("query %2d: ", i);
for (int j = 0; j < k; j++) {
printf("%7ld ", nns[j + i * k]);
}
printf("\n dis: ");
for (int j = 0; j < k; j++) {
printf("%7g ", dis[j + i * k]);
}
printf("\n");
}
}
return 0;
}
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <random>
#include <sys/time.h>
#include <faiss/IndexFlat.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/IndexPQ.h>
#include <faiss/index_io.h>
double elapsed() {
struct timeval tv;
gettimeofday(&tv, nullptr);
return tv.tv_sec + tv.tv_usec * 1e-6;
}
int main() {
double t0 = elapsed();
// dimension of the vectors to index
int d = 64;
// size of the database we plan to index
size_t nb = 1000 * 1000;
size_t add_bs = 10000; // # size of the blocks to add
// make a set of nt training vectors in the unit cube
// (could be the database)
size_t nt = 100 * 1000;
//---------------------------------------------------------------
// Define the core quantizer
// We choose a multiple inverted index for faster training with less data
// and because it usually offers best accuracy/speed trade-offs
//
// We here assume that its lifespan of this coarse quantizer will cover the
// lifespan of the inverted-file quantizer IndexIVFFlat below
// With dynamic allocation, one may give the responsability to free the
// quantizer to the inverted-file index (with attribute do_delete_quantizer)
//
// Note: a regular clustering algorithm would be defined as:
// faiss::IndexFlatL2 coarse_quantizer (d);
//
// Use nhash=2 subquantizers used to define the product coarse quantizer
// Number of bits: we will have 2^nbits_coarse centroids per subquantizer
// meaning (2^12)^nhash distinct inverted lists
//
// The parameter bytes_per_code is determined by the memory
// constraint, the dataset will use nb * (bytes_per_code + 8)
// bytes.
//
// The parameter nbits_subq is determined by the size of the dataset to
// index.
//
size_t nhash = 2;
size_t nbits_subq = 9;
size_t ncentroids = 1 << (nhash * nbits_subq); // total # of centroids
int bytes_per_code = 16;
faiss::MultiIndexQuantizer coarse_quantizer(d, nhash, nbits_subq);
printf("IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
nhash,
nbits_subq,
ncentroids,
nb);
// the coarse quantizer should not be dealloced before the index
// 4 = nb of bytes per code (d must be a multiple of this)
// 8 = nb of bits per sub-code (almost always 8)
faiss::MetricType metric = faiss::METRIC_L2; // can be METRIC_INNER_PRODUCT
faiss::IndexIVFPQ index(
&coarse_quantizer, d, ncentroids, bytes_per_code, 8);
index.quantizer_trains_alone = true;
// define the number of probes. 2048 is for high-dim, overkill in practice
// Use 4-1024 depending on the trade-off speed accuracy that you want
index.nprobe = 2048;
std::mt19937 rng;
std::uniform_real_distribution<> distrib;
{ // training.
// The distribution of the training vectors should be the same
// as the database vectors. It could be a sub-sample of the
// database vectors, if sampling is not biased. Here we just
// randomly generate the vectors.
printf("[%.3f s] Generating %ld vectors in %dD for training\n",
elapsed() - t0,
nt,
d);
std::vector<float> trainvecs(nt * d);
for (size_t i = 0; i < nt; i++) {
for (size_t j = 0; j < d; j++) {
trainvecs[i * d + j] = distrib(rng);
}
}
printf("[%.3f s] Training the index\n", elapsed() - t0);
index.verbose = true;
index.train(nt, trainvecs.data());
}
// the index can be re-loaded later with
// faiss::Index * idx = faiss::read_index("/tmp/trained_index.faissindex");
faiss::write_index(&index, "/tmp/trained_index.faissindex");
size_t nq;
std::vector<float> queries;
{ // populating the database
printf("[%.3f s] Building a dataset of %ld vectors to index\n",
elapsed() - t0,
nb);
std::vector<float> database(nb * d);
std::vector<faiss::Index::idx_t> ids(nb);
for (size_t i = 0; i < nb; i++) {
for (size_t j = 0; j < d; j++) {
database[i * d + j] = distrib(rng);
}
ids[i] = 8760000000L + i;
}
printf("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
for (size_t begin = 0; begin < nb; begin += add_bs) {
size_t end = std::min(begin + add_bs, nb);
index.add_with_ids(
end - begin,
database.data() + d * begin,
ids.data() + begin);
}
// remember a few elements from the database as queries
int i0 = 1234;
int i1 = 1244;
nq = i1 - i0;
queries.resize(nq * d);
for (int i = i0; i < i1; i++) {
for (int j = 0; j < d; j++) {
queries[(i - i0) * d + j] = database[i * d + j];
}
}
}
// A few notes on the internal format of the index:
//
// - the positing lists for PQ codes are index.codes, which is a
// std::vector < std::vector<uint8_t> >
// if n is the length of posting list #i, codes[i] has length
// bytes_per_code * n
//
// - the corresponding ids are stored in index.ids
//
// - given a vector float *x, finding which k centroids are
// closest to it (ie to find the nearest neighbors) can be done with
//
// faiss::Index::idx_t *centroid_ids = new faiss::Index::idx_t[k];
// float *distances = new float[k];
// index.quantizer->search (1, x, k, dis, centroids_ids);
//
faiss::write_index(&index, "/tmp/populated_index.faissindex");
{ // searching the database
int k = 5;
printf("[%.3f s] Searching the %d nearest neighbors "
"of %ld vectors in the index\n",
elapsed() - t0,
k,
nq);
std::vector<faiss::Index::idx_t> nns(k * nq);
std::vector<float> dis(k * nq);
index.search(nq, queries.data(), k, dis.data(), nns.data());
printf("[%.3f s] Query results (vector ids, then distances):\n",
elapsed() - t0);
for (int i = 0; i < nq; i++) {
printf("query %2d: ", i);
for (int j = 0; j < k; j++) {
printf("%7ld ", nns[j + i * k]);
}
printf("\n dis: ");
for (int j = 0; j < k; j++) {
printf("%7g ", dis[j + i * k]);
}
printf("\n");
}
}
return 0;
}
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <random>
#include <sys/time.h>
#include <faiss/IndexFlat.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/index_io.h>
double elapsed() {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + tv.tv_usec * 1e-6;
}
int main() {
double t0 = elapsed();
// dimension of the vectors to index
int d = 128;
// size of the database we plan to index
size_t nb = 200 * 1000;
// make a set of nt training vectors in the unit cube
// (could be the database)
size_t nt = 100 * 1000;
// make the index object and train it
faiss::IndexFlatL2 coarse_quantizer(d);
// a reasonable number of centroids to index nb vectors
int ncentroids = int(4 * sqrt(nb));
// the coarse quantizer should not be dealloced before the index
// 4 = nb of bytes per code (d must be a multiple of this)
// 8 = nb of bits per sub-code (almost always 8)
faiss::IndexIVFPQ index(&coarse_quantizer, d, ncentroids, 4, 8);
std::mt19937 rng;
{ // training
printf("[%.3f s] Generating %ld vectors in %dD for training\n",
elapsed() - t0,
nt,
d);
std::vector<float> trainvecs(nt * d);
std::uniform_real_distribution<> distrib;
for (size_t i = 0; i < nt * d; i++) {
trainvecs[i] = distrib(rng);
}
printf("[%.3f s] Training the index\n", elapsed() - t0);
index.verbose = true;
index.train(nt, trainvecs.data());
}
{ // I/O demo
const char* outfilename = "/tmp/index_trained.faissindex";
printf("[%.3f s] storing the pre-trained index to %s\n",
elapsed() - t0,
outfilename);
write_index(&index, outfilename);
}
size_t nq;
std::vector<float> queries;
{ // populating the database
printf("[%.3f s] Building a dataset of %ld vectors to index\n",
elapsed() - t0,
nb);
std::vector<float> database(nb * d);
std::uniform_real_distribution<> distrib;
for (size_t i = 0; i < nb * d; i++) {
database[i] = distrib(rng);
}
printf("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
index.add(nb, database.data());
printf("[%.3f s] imbalance factor: %g\n",
elapsed() - t0,
index.invlists->imbalance_factor());
// remember a few elements from the database as queries
int i0 = 1234;
int i1 = 1243;
nq = i1 - i0;
queries.resize(nq * d);
for (int i = i0; i < i1; i++) {
for (int j = 0; j < d; j++) {
queries[(i - i0) * d + j] = database[i * d + j];
}
}
}
{ // searching the database
int k = 5;
printf("[%.3f s] Searching the %d nearest neighbors "
"of %ld vectors in the index\n",
elapsed() - t0,
k,
nq);
std::vector<faiss::Index::idx_t> nns(k * nq);
std::vector<float> dis(k * nq);
index.search(nq, queries.data(), k, dis.data(), nns.data());
printf("[%.3f s] Query results (vector ids, then distances):\n",
elapsed() - t0);
for (int i = 0; i < nq; i++) {
printf("query %2d: ", i);
for (int j = 0; j < k; j++) {
printf("%7ld ", nns[j + i * k]);
}
printf("\n dis: ");
for (int j = 0; j < k; j++) {
printf("%7g ", dis[j + i * k]);
}
printf("\n");
}
printf("note that the nearest neighbor is not at "
"distance 0 due to quantization errors\n");
}
return 0;
}
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <random>
#include <faiss/IndexFlat.h>
#include <faiss/IndexNNDescent.h>
using namespace std::chrono;
int main(void) {
// dimension of the vectors to index
int d = 64;
int K = 64;
// size of the database we plan to index
size_t nb = 10000;
std::mt19937 rng(12345);
// make the index object and train it
faiss::IndexNNDescentFlat index(d, K, faiss::METRIC_L2);
index.nndescent.S = 10;
index.nndescent.R = 32;
index.nndescent.L = K;
index.nndescent.iter = 10;
index.verbose = true;
// generate labels by IndexFlat
faiss::IndexFlat bruteforce(d, faiss::METRIC_L2);
std::vector<float> database(nb * d);
for (size_t i = 0; i < nb * d; i++) {
database[i] = rng() % 1024;
}
{ // populating the database
index.add(nb, database.data());
bruteforce.add(nb, database.data());
}
size_t nq = 1000;
{ // searching the database
printf("Searching ...\n");
index.nndescent.search_L = 50;
std::vector<float> queries(nq * d);
for (size_t i = 0; i < nq * d; i++) {
queries[i] = rng() % 1024;
}
int k = 5;
std::vector<faiss::IndexNNDescent::idx_t> nns(k * nq);
std::vector<faiss::IndexFlat::idx_t> gt_nns(k * nq);
std::vector<float> dis(k * nq);
auto start = high_resolution_clock::now();
index.search(nq, queries.data(), k, dis.data(), nns.data());
auto end = high_resolution_clock::now();
// find exact kNNs by brute force search
bruteforce.search(nq, queries.data(), k, dis.data(), gt_nns.data());
int recalls = 0;
for (size_t i = 0; i < nq; ++i) {
for (int n = 0; n < k; n++) {
for (int m = 0; m < k; m++) {
if (nns[i * k + n] == gt_nns[i * k + m]) {
recalls += 1;
}
}
}
}
float recall = 1.0f * recalls / (k * nq);
auto t = duration_cast<microseconds>(end - start).count();
int qps = nq * 1.0f * 1000 * 1000 / t;
printf("Recall@%d: %f, QPS: %d\n", k, recall, qps);
}
}
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import sys
import numpy as np
import faiss
from faiss.contrib.ondisk import merge_ondisk
#################################################################
# Small I/O functions
#################################################################
def ivecs_read(fname):
a = np.fromfile(fname, dtype='int32')
d = a[0]
return a.reshape(-1, d + 1)[:, 1:].copy()
def fvecs_read(fname):
return ivecs_read(fname).view('float32')
#################################################################
# Main program
#################################################################
stage = int(sys.argv[1])
tmpdir = '/tmp/'
if stage == 0:
# train the index
xt = fvecs_read("sift1M/sift_learn.fvecs")
index = faiss.index_factory(xt.shape[1], "IVF4096,Flat")
print("training index")
index.train(xt)
print("write " + tmpdir + "trained.index")
faiss.write_index(index, tmpdir + "trained.index")
if 1 <= stage <= 4:
# add 1/4 of the database to 4 independent indexes
bno = stage - 1
xb = fvecs_read("sift1M/sift_base.fvecs")
i0, i1 = int(bno * xb.shape[0] / 4), int((bno + 1) * xb.shape[0] / 4)
index = faiss.read_index(tmpdir + "trained.index")
print("adding vectors %d:%d" % (i0, i1))
index.add_with_ids(xb[i0:i1], np.arange(i0, i1))
print("write " + tmpdir + "block_%d.index" % bno)
faiss.write_index(index, tmpdir + "block_%d.index" % bno)
if stage == 5:
print('loading trained index')
# construct the output index
index = faiss.read_index(tmpdir + "trained.index")
block_fnames = [
tmpdir + "block_%d.index" % bno
for bno in range(4)
]
merge_ondisk(index, block_fnames, tmpdir + "merged_index.ivfdata")
print("write " + tmpdir + "populated.index")
faiss.write_index(index, tmpdir + "populated.index")
if stage == 6:
# perform a search from disk
print("read " + tmpdir + "populated.index")
index = faiss.read_index(tmpdir + "populated.index")
index.nprobe = 16
# load query vectors and ground-truth
xq = fvecs_read("sift1M/sift_query.fvecs")
gt = ivecs_read("sift1M/sift_groundtruth.ivecs")
D, I = index.search(xq, 5)
recall_at_1 = (I[:, :1] == gt[:, :1]).sum() / float(xq.shape[0])
print("recall@1: %.3f" % recall_at_1)
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cassert>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/time.h>
#include <faiss/AutoTune.h>
#include <faiss/index_factory.h>
/**
* To run this demo, please download the ANN_SIFT1M dataset from
*
* http://corpus-texmex.irisa.fr/
*
* and unzip it to the sudirectory sift1M.
**/
/*****************************************************
* I/O functions for fvecs and ivecs
*****************************************************/
float* fvecs_read(const char* fname, size_t* d_out, size_t* n_out) {
FILE* f = fopen(fname, "r");
if (!f) {
fprintf(stderr, "could not open %s\n", fname);
perror("");
abort();
}
int d;
fread(&d, 1, sizeof(int), f);
assert((d > 0 && d < 1000000) || !"unreasonable dimension");
fseek(f, 0, SEEK_SET);
struct stat st;
fstat(fileno(f), &st);
size_t sz = st.st_size;
assert(sz % ((d + 1) * 4) == 0 || !"weird file size");
size_t n = sz / ((d + 1) * 4);
*d_out = d;
*n_out = n;
float* x = new float[n * (d + 1)];
size_t nr = fread(x, sizeof(float), n * (d + 1), f);
assert(nr == n * (d + 1) || !"could not read whole file");
// shift array to remove row headers
for (size_t i = 0; i < n; i++)
memmove(x + i * d, x + 1 + i * (d + 1), d * sizeof(*x));
fclose(f);
return x;
}
// not very clean, but works as long as sizeof(int) == sizeof(float)
int* ivecs_read(const char* fname, size_t* d_out, size_t* n_out) {
return (int*)fvecs_read(fname, d_out, n_out);
}
double elapsed() {
struct timeval tv;
gettimeofday(&tv, nullptr);
return tv.tv_sec + tv.tv_usec * 1e-6;
}
int main() {
double t0 = elapsed();
// this is typically the fastest one.
const char* index_key = "IVF4096,Flat";
// these ones have better memory usage
// const char *index_key = "Flat";
// const char *index_key = "PQ32";
// const char *index_key = "PCA80,Flat";
// const char *index_key = "IVF4096,PQ8+16";
// const char *index_key = "IVF4096,PQ32";
// const char *index_key = "IMI2x8,PQ32";
// const char *index_key = "IMI2x8,PQ8+16";
// const char *index_key = "OPQ16_64,IMI2x8,PQ8+16";
faiss::Index* index;
size_t d;
{
printf("[%.3f s] Loading train set\n", elapsed() - t0);
size_t nt;
float* xt = fvecs_read("sift1M/sift_learn.fvecs", &d, &nt);
printf("[%.3f s] Preparing index \"%s\" d=%ld\n",
elapsed() - t0,
index_key,
d);
index = faiss::index_factory(d, index_key);
printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nt);
index->train(nt, xt);
delete[] xt;
}
{
printf("[%.3f s] Loading database\n", elapsed() - t0);
size_t nb, d2;
float* xb = fvecs_read("sift1M/sift_base.fvecs", &d2, &nb);
assert(d == d2 || !"dataset does not have same dimension as train set");
printf("[%.3f s] Indexing database, size %ld*%ld\n",
elapsed() - t0,
nb,
d);
index->add(nb, xb);
delete[] xb;
}
size_t nq;
float* xq;
{
printf("[%.3f s] Loading queries\n", elapsed() - t0);
size_t d2;
xq = fvecs_read("sift1M/sift_query.fvecs", &d2, &nq);
assert(d == d2 || !"query does not have same dimension as train set");
}
size_t k; // nb of results per query in the GT
faiss::Index::idx_t* gt; // nq * k matrix of ground-truth nearest-neighbors
{
printf("[%.3f s] Loading ground truth for %ld queries\n",
elapsed() - t0,
nq);
// load ground-truth and convert int to long
size_t nq2;
int* gt_int = ivecs_read("sift1M/sift_groundtruth.ivecs", &k, &nq2);
assert(nq2 == nq || !"incorrect nb of ground truth entries");
gt = new faiss::Index::idx_t[k * nq];
for (int i = 0; i < k * nq; i++) {
gt[i] = gt_int[i];
}
delete[] gt_int;
}
// Result of the auto-tuning
std::string selected_params;
{ // run auto-tuning
printf("[%.3f s] Preparing auto-tune criterion 1-recall at 1 "
"criterion, with k=%ld nq=%ld\n",
elapsed() - t0,
k,
nq);
faiss::OneRecallAtRCriterion crit(nq, 1);
crit.set_groundtruth(k, nullptr, gt);
crit.nnn = k; // by default, the criterion will request only 1 NN
printf("[%.3f s] Preparing auto-tune parameters\n", elapsed() - t0);
faiss::ParameterSpace params;
params.initialize(index);
printf("[%.3f s] Auto-tuning over %ld parameters (%ld combinations)\n",
elapsed() - t0,
params.parameter_ranges.size(),
params.n_combinations());
faiss::OperatingPoints ops;
params.explore(index, nq, xq, crit, &ops);
printf("[%.3f s] Found the following operating points: \n",
elapsed() - t0);
ops.display();
// keep the first parameter that obtains > 0.5 1-recall@1
for (int i = 0; i < ops.optimal_pts.size(); i++) {
if (ops.optimal_pts[i].perf > 0.5) {
selected_params = ops.optimal_pts[i].key;
break;
}
}
assert(selected_params.size() >= 0 ||
!"could not find good enough op point");
}
{ // Use the found configuration to perform a search
faiss::ParameterSpace params;
printf("[%.3f s] Setting parameter configuration \"%s\" on index\n",
elapsed() - t0,
selected_params.c_str());
params.set_index_parameters(index, selected_params.c_str());
printf("[%.3f s] Perform a search on %ld queries\n",
elapsed() - t0,
nq);
// output buffers
faiss::Index::idx_t* I = new faiss::Index::idx_t[nq * k];
float* D = new float[nq * k];
index->search(nq, xq, k, D, I);
printf("[%.3f s] Compute recalls\n", elapsed() - t0);
// evaluate result by hand.
int n_1 = 0, n_10 = 0, n_100 = 0;
for (int i = 0; i < nq; i++) {
int gt_nn = gt[i * k];
for (int j = 0; j < k; j++) {
if (I[i * k + j] == gt_nn) {
if (j < 1)
n_1++;
if (j < 10)
n_10++;
if (j < 100)
n_100++;
}
}
}
printf("R@1 = %.4f\n", n_1 / float(nq));
printf("R@10 = %.4f\n", n_10 / float(nq));
printf("R@100 = %.4f\n", n_100 / float(nq));
delete[] I;
delete[] D;
}
delete[] xq;
delete[] gt;
delete index;
return 0;
}
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cstdio>
#include <cstdlib>
#include <faiss/Clustering.h>
#include <faiss/IndexFlat.h>
#include <faiss/IndexHNSW.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/random.h>
namespace {
enum WeightedKMeansType {
WKMT_FlatL2,
WKMT_FlatIP,
WKMT_FlatIP_spherical,
WKMT_HNSW,
};
float weighted_kmeans_clustering(
size_t d,
size_t n,
size_t k,
const float* input,
const float* weights,
float* centroids,
WeightedKMeansType index_num) {
using namespace faiss;
Clustering clus(d, k);
clus.verbose = true;
std::unique_ptr<Index> index;
switch (index_num) {
case WKMT_FlatL2:
index.reset(new IndexFlatL2(d));
break;
case WKMT_FlatIP:
index.reset(new IndexFlatIP(d));
break;
case WKMT_FlatIP_spherical:
index.reset(new IndexFlatIP(d));
clus.spherical = true;
break;
case WKMT_HNSW:
IndexHNSWFlat* ihnsw = new IndexHNSWFlat(d, 32);
ihnsw->hnsw.efSearch = 128;
index.reset(ihnsw);
break;
}
clus.train(n, input, *index.get(), weights);
// on output the index contains the centroids.
memcpy(centroids, clus.centroids.data(), sizeof(*centroids) * d * k);
return clus.iteration_stats.back().obj;
}
int d = 32;
float sigma = 0.1;
#define BIGTEST
#ifdef BIGTEST
// the production setup = setting of https://fb.quip.com/CWgnAAYbwtgs
int nc = 200000;
int n_big = 4;
int n_small = 2;
#else
int nc = 5;
int n_big = 100;
int n_small = 10;
#endif
int n; // number of training points
void generate_trainset(
std::vector<float>& ccent,
std::vector<float>& x,
std::vector<float>& weights) {
// same sampling as test_build_blocks.py test_weighted
ccent.resize(d * 2 * nc);
faiss::float_randn(ccent.data(), d * 2 * nc, 123);
faiss::fvec_renorm_L2(d, 2 * nc, ccent.data());
n = nc * n_big + nc * n_small;
x.resize(d * n);
weights.resize(n);
faiss::float_randn(x.data(), x.size(), 1234);
float* xi = x.data();
float* w = weights.data();
for (int ci = 0; ci < nc * 2; ci++) { // loop over centroids
int np = ci < nc ? n_big : n_small; // nb of points around this centroid
for (int i = 0; i < np; i++) {
for (int j = 0; j < d; j++) {
xi[j] = xi[j] * sigma + ccent[ci * d + j];
}
*w++ = ci < nc ? 0.1 : 10;
xi += d;
}
}
}
} // namespace
int main(int argc, char** argv) {
std::vector<float> ccent;
std::vector<float> x;
std::vector<float> weights;
printf("generate training set\n");
generate_trainset(ccent, x, weights);
std::vector<float> centroids;
centroids.resize(nc * d);
int the_index_num = -1;
int the_with_weights = -1;
if (argc == 3) {
the_index_num = atoi(argv[1]);
the_with_weights = atoi(argv[2]);
}
for (int index_num = WKMT_FlatL2; index_num <= WKMT_HNSW; index_num++) {
if (the_index_num >= 0 && index_num != the_index_num) {
continue;
}
for (int with_weights = 0; with_weights <= 1; with_weights++) {
if (the_with_weights >= 0 && with_weights != the_with_weights) {
continue;
}
printf("=================== index_num=%d Run %s weights\n",
index_num,
with_weights ? "with" : "without");
weighted_kmeans_clustering(
d,
n,
nc,
x.data(),
with_weights ? weights.data() : nullptr,
centroids.data(),
(WeightedKMeansType)index_num);
{ // compute distance of points to centroids
faiss::IndexFlatL2 cent_index(d);
cent_index.add(nc, centroids.data());
std::vector<float> dis(n);
std::vector<faiss::Index::idx_t> idx(n);
cent_index.search(
nc * 2, ccent.data(), 1, dis.data(), idx.data());
float dis1 = 0, dis2 = 0;
for (int i = 0; i < nc; i++) {
dis1 += dis[i];
}
printf("average distance of points from big clusters: %g\n",
dis1 / nc);
for (int i = 0; i < nc; i++) {
dis2 += dis[i + nc];
}
printf("average distance of points from small clusters: %g\n",
dis2 / nc);
}
}
}
return 0;
}
[flake8]
# Ignore flakes about ambiguous variable name `I`.
ignore = E741
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
/*
* implementation of Hyper-parameter auto-tuning
*/
#include <faiss/AutoTune.h>
#include <cinttypes>
#include <cmath>
#include <typeinfo>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/random.h>
#include <faiss/utils/utils.h>
#include <faiss/IndexFlat.h>
#include <faiss/IndexHNSW.h>
#include <faiss/IndexIVF.h>
#include <faiss/IndexIVFFlat.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/IndexIVFPQR.h>
#include <faiss/IndexLSH.h>
#include <faiss/IndexPQ.h>
#include <faiss/IndexPreTransform.h>
#include <faiss/IndexRefine.h>
#include <faiss/IndexScalarQuantizer.h>
#include <faiss/MetaIndexes.h>
#include <faiss/VectorTransform.h>
#include <faiss/IndexBinaryFlat.h>
#include <faiss/IndexBinaryHNSW.h>
#include <faiss/IndexBinaryIVF.h>
namespace faiss {
AutoTuneCriterion::AutoTuneCriterion(idx_t nq, idx_t nnn)
: nq(nq), nnn(nnn), gt_nnn(0) {}
void AutoTuneCriterion::set_groundtruth(
int gt_nnn,
const float* gt_D_in,
const idx_t* gt_I_in) {
this->gt_nnn = gt_nnn;
if (gt_D_in) { // allow null for this, as it is often not used
gt_D.resize(nq * gt_nnn);
memcpy(gt_D.data(), gt_D_in, sizeof(gt_D[0]) * nq * gt_nnn);
}
gt_I.resize(nq * gt_nnn);
memcpy(gt_I.data(), gt_I_in, sizeof(gt_I[0]) * nq * gt_nnn);
}
OneRecallAtRCriterion::OneRecallAtRCriterion(idx_t nq, idx_t R)
: AutoTuneCriterion(nq, R), R(R) {}
double OneRecallAtRCriterion::evaluate(const float* /*D*/, const idx_t* I)
const {
FAISS_THROW_IF_NOT_MSG(
(gt_I.size() == gt_nnn * nq && gt_nnn >= 1 && nnn >= R),
"ground truth not initialized");
idx_t n_ok = 0;
for (idx_t q = 0; q < nq; q++) {
idx_t gt_nn = gt_I[q * gt_nnn];
const idx_t* I_line = I + q * nnn;
for (int i = 0; i < R; i++) {
if (I_line[i] == gt_nn) {
n_ok++;
break;
}
}
}
return n_ok / double(nq);
}
IntersectionCriterion::IntersectionCriterion(idx_t nq, idx_t R)
: AutoTuneCriterion(nq, R), R(R) {}
double IntersectionCriterion::evaluate(const float* /*D*/, const idx_t* I)
const {
FAISS_THROW_IF_NOT_MSG(
(gt_I.size() == gt_nnn * nq && gt_nnn >= R && nnn >= R),
"ground truth not initialized");
int64_t n_ok = 0;
#pragma omp parallel for reduction(+ : n_ok)
for (idx_t q = 0; q < nq; q++) {
n_ok += ranklist_intersection_size(
R, &gt_I[q * gt_nnn], R, I + q * nnn);
}
return n_ok / double(nq * R);
}
/***************************************************************
* OperatingPoints
***************************************************************/
OperatingPoints::OperatingPoints() {
clear();
}
void OperatingPoints::clear() {
all_pts.clear();
optimal_pts.clear();
/// default point: doing nothing gives 0 performance and takes 0 time
OperatingPoint op = {0, 0, "", -1};
optimal_pts.push_back(op);
}
/// add a performance measure
bool OperatingPoints::add(
double perf,
double t,
const std::string& key,
size_t cno) {
OperatingPoint op = {perf, t, key, int64_t(cno)};
all_pts.push_back(op);
if (perf == 0) {
return false; // no method for 0 accuracy is faster than doing nothing
}
std::vector<OperatingPoint>& a = optimal_pts;
if (perf > a.back().perf) {
// keep unconditionally
a.push_back(op);
} else if (perf == a.back().perf) {
if (t < a.back().t) {
a.back() = op;
} else {
return false;
}
} else {
int i;
// stricto sensu this should be a bissection
for (i = 0; i < a.size(); i++) {
if (a[i].perf >= perf)
break;
}
assert(i < a.size());
if (t < a[i].t) {
if (a[i].perf == perf) {
a[i] = op;
} else {
a.insert(a.begin() + i, op);
}
} else {
return false;
}
}
{ // remove non-optimal points from array
int i = a.size() - 1;
while (i > 0) {
if (a[i].t < a[i - 1].t)
a.erase(a.begin() + (i - 1));
i--;
}
}
return true;
}
int OperatingPoints::merge_with(
const OperatingPoints& other,
const std::string& prefix) {
int n_add = 0;
for (int i = 0; i < other.all_pts.size(); i++) {
const OperatingPoint& op = other.all_pts[i];
if (add(op.perf, op.t, prefix + op.key, op.cno))
n_add++;
}
return n_add;
}
/// get time required to obtain a given performance measure
double OperatingPoints::t_for_perf(double perf) const {
const std::vector<OperatingPoint>& a = optimal_pts;
if (perf > a.back().perf)
return 1e50;
int i0 = -1, i1 = a.size() - 1;
while (i0 + 1 < i1) {
int imed = (i0 + i1 + 1) / 2;
if (a[imed].perf < perf)
i0 = imed;
else
i1 = imed;
}
return a[i1].t;
}
void OperatingPoints::all_to_gnuplot(const char* fname) const {
FILE* f = fopen(fname, "w");
if (!f) {
fprintf(stderr, "cannot open %s", fname);
perror("");
abort();
}
for (int i = 0; i < all_pts.size(); i++) {
const OperatingPoint& op = all_pts[i];
fprintf(f, "%g %g %s\n", op.perf, op.t, op.key.c_str());
}
fclose(f);
}
void OperatingPoints::optimal_to_gnuplot(const char* fname) const {
FILE* f = fopen(fname, "w");
if (!f) {
fprintf(stderr, "cannot open %s", fname);
perror("");
abort();
}
double prev_perf = 0.0;
for (int i = 0; i < optimal_pts.size(); i++) {
const OperatingPoint& op = optimal_pts[i];
fprintf(f, "%g %g\n", prev_perf, op.t);
fprintf(f, "%g %g %s\n", op.perf, op.t, op.key.c_str());
prev_perf = op.perf;
}
fclose(f);
}
void OperatingPoints::display(bool only_optimal) const {
const std::vector<OperatingPoint>& pts =
only_optimal ? optimal_pts : all_pts;
printf("Tested %zd operating points, %zd ones are Pareto-optimal:\n",
all_pts.size(),
optimal_pts.size());
for (int i = 0; i < pts.size(); i++) {
const OperatingPoint& op = pts[i];
const char* star = "";
if (!only_optimal) {
for (int j = 0; j < optimal_pts.size(); j++) {
if (op.cno == optimal_pts[j].cno) {
star = "*";
break;
}
}
}
printf("cno=%" PRId64 " key=%s perf=%.4f t=%.3f %s\n",
op.cno,
op.key.c_str(),
op.perf,
op.t,
star);
}
}
/***************************************************************
* ParameterSpace
***************************************************************/
ParameterSpace::ParameterSpace()
: verbose(1),
n_experiments(500),
batchsize(1 << 30),
thread_over_batches(false),
min_test_duration(0) {}
/* not keeping this constructor as inheritors will call the parent
initialize()
*/
#if 0
ParameterSpace::ParameterSpace (Index *index):
verbose (1), n_experiments (500),
batchsize (1<<30), thread_over_batches (false)
{
initialize(index);
}
#endif
size_t ParameterSpace::n_combinations() const {
size_t n = 1;
for (int i = 0; i < parameter_ranges.size(); i++)
n *= parameter_ranges[i].values.size();
return n;
}
/// get string representation of the combination
std::string ParameterSpace::combination_name(size_t cno) const {
char buf[1000], *wp = buf;
*wp = 0;
for (int i = 0; i < parameter_ranges.size(); i++) {
const ParameterRange& pr = parameter_ranges[i];
size_t j = cno % pr.values.size();
cno /= pr.values.size();
wp += snprintf(
wp,
buf + 1000 - wp,
"%s%s=%g",
i == 0 ? "" : ",",
pr.name.c_str(),
pr.values[j]);
}
return std::string(buf);
}
bool ParameterSpace::combination_ge(size_t c1, size_t c2) const {
for (int i = 0; i < parameter_ranges.size(); i++) {
int nval = parameter_ranges[i].values.size();
size_t j1 = c1 % nval;
size_t j2 = c2 % nval;
if (!(j1 >= j2))
return false;
c1 /= nval;
c2 /= nval;
}
return true;
}
#define DC(classname) \
const classname* ix = dynamic_cast<const classname*>(index)
static void init_pq_ParameterRange(
const ProductQuantizer& pq,
ParameterRange& pr) {
if (pq.code_size % 4 == 0) {
// Polysemous not supported for code sizes that are not a
// multiple of 4
for (int i = 2; i <= pq.code_size * 8 / 2; i += 2)
pr.values.push_back(i);
}
pr.values.push_back(pq.code_size * 8);
}
ParameterRange& ParameterSpace::add_range(const std::string& name) {
for (auto& pr : parameter_ranges) {
if (pr.name == name) {
return pr;
}
}
parameter_ranges.push_back(ParameterRange());
parameter_ranges.back().name = name;
return parameter_ranges.back();
}
/// initialize with reasonable parameters for this type of index
void ParameterSpace::initialize(const Index* index) {
if (DC(IndexPreTransform)) {
index = ix->index;
}
if (DC(IndexRefine)) {
ParameterRange& pr = add_range("k_factor_rf");
for (int i = 0; i <= 6; i++) {
pr.values.push_back(1 << i);
}
index = ix->base_index;
}
if (DC(IndexPreTransform)) {
index = ix->index;
}
if (DC(IndexIVF)) {
{
ParameterRange& pr = add_range("nprobe");
for (int i = 0; i < 13; i++) {
size_t nprobe = 1 << i;
if (nprobe >= ix->nlist)
break;
pr.values.push_back(nprobe);
}
}
ParameterSpace ivf_pspace;
ivf_pspace.initialize(ix->quantizer);
for (const ParameterRange& p : ivf_pspace.parameter_ranges) {
ParameterRange& pr = add_range("quantizer_" + p.name);
pr.values = p.values;
}
}
if (DC(IndexPQ)) {
ParameterRange& pr = add_range("ht");
init_pq_ParameterRange(ix->pq, pr);
}
if (DC(IndexIVFPQ)) {
ParameterRange& pr = add_range("ht");
init_pq_ParameterRange(ix->pq, pr);
}
if (DC(IndexIVF)) {
const MultiIndexQuantizer* miq =
dynamic_cast<const MultiIndexQuantizer*>(ix->quantizer);
if (miq) {
ParameterRange& pr_max_codes = add_range("max_codes");
for (int i = 8; i < 20; i++) {
pr_max_codes.values.push_back(1 << i);
}
pr_max_codes.values.push_back(
std::numeric_limits<double>::infinity());
}
}
if (DC(IndexIVFPQR)) {
ParameterRange& pr = add_range("k_factor");
for (int i = 0; i <= 6; i++) {
pr.values.push_back(1 << i);
}
}
if (dynamic_cast<const IndexHNSW*>(index)) {
ParameterRange& pr = add_range("efSearch");
for (int i = 2; i <= 9; i++) {
pr.values.push_back(1 << i);
}
}
}
#undef DC
// non-const version
#define DC(classname) classname* ix = dynamic_cast<classname*>(index)
/// set a combination of parameters on an index
void ParameterSpace::set_index_parameters(Index* index, size_t cno) const {
for (int i = 0; i < parameter_ranges.size(); i++) {
const ParameterRange& pr = parameter_ranges[i];
size_t j = cno % pr.values.size();
cno /= pr.values.size();
double val = pr.values[j];
set_index_parameter(index, pr.name, val);
}
}
/// set a combination of parameters on an index
void ParameterSpace::set_index_parameters(
Index* index,
const char* description_in) const {
std::string description(description_in);
char* ptr;
for (char* tok = strtok_r(&description[0], " ,", &ptr); tok;
tok = strtok_r(nullptr, " ,", &ptr)) {
char name[100];
double val;
int ret = sscanf(tok, "%99[^=]=%lf", name, &val);
FAISS_THROW_IF_NOT_FMT(
ret == 2, "could not interpret parameters %s", tok);
set_index_parameter(index, name, val);
}
}
void ParameterSpace::set_index_parameter(
Index* index,
const std::string& name,
double val) const {
if (verbose > 1) {
printf(" set_index_parameter %s=%g\n", name.c_str(), val);
}
if (name == "verbose") {
index->verbose = int(val);
// and fall through to also enable it on sub-indexes
}
if (DC(IndexIDMap)) {
set_index_parameter(ix->index, name, val);
return;
}
if (DC(IndexPreTransform)) {
set_index_parameter(ix->index, name, val);
return;
}
if (DC(ThreadedIndex<Index>)) {
// call on all sub-indexes
auto fn = [this, name, val](int /* no */, Index* subIndex) {
set_index_parameter(subIndex, name, val);
};
ix->runOnIndex(fn);
return;
}
if (DC(IndexRefine)) {
if (name == "k_factor_rf") {
ix->k_factor = int(val);
return;
}
// otherwise it is for the sub-index
set_index_parameter(ix->base_index, name, val);
return;
}
if (name == "verbose") {
index->verbose = int(val);
return; // last verbose that we could find
}
if (name == "nprobe") {
if (DC(IndexIVF)) {
ix->nprobe = int(val);
return;
}
}
if (name == "ht") {
if (DC(IndexPQ)) {
if (val >= ix->pq.code_size * 8) {
ix->search_type = IndexPQ::ST_PQ;
} else {
ix->search_type = IndexPQ::ST_polysemous;
ix->polysemous_ht = int(val);
}
return;
} else if (DC(IndexIVFPQ)) {
if (val >= ix->pq.code_size * 8) {
ix->polysemous_ht = 0;
} else {
ix->polysemous_ht = int(val);
}
return;
}
}
if (name == "k_factor") {
if (DC(IndexIVFPQR)) {
ix->k_factor = val;
return;
}
}
if (name == "max_codes") {
if (DC(IndexIVF)) {
ix->max_codes = std::isfinite(val) ? size_t(val) : 0;
return;
}
}
if (name == "efSearch") {
if (DC(IndexHNSW)) {
ix->hnsw.efSearch = int(val);
return;
}
if (DC(IndexIVF)) {
if (IndexHNSW* cq = dynamic_cast<IndexHNSW*>(ix->quantizer)) {
cq->hnsw.efSearch = int(val);
return;
}
}
}
if (name.find("quantizer_") == 0) {
if (DC(IndexIVF)) {
std::string sub_name = name.substr(strlen("quantizer_"));
set_index_parameter(ix->quantizer, sub_name, val);
return;
}
}
FAISS_THROW_FMT(
"ParameterSpace::set_index_parameter:"
"could not set parameter %s",
name.c_str());
}
void ParameterSpace::display() const {
printf("ParameterSpace, %zd parameters, %zd combinations:\n",
parameter_ranges.size(),
n_combinations());
for (int i = 0; i < parameter_ranges.size(); i++) {
const ParameterRange& pr = parameter_ranges[i];
printf(" %s: ", pr.name.c_str());
char sep = '[';
for (int j = 0; j < pr.values.size(); j++) {
printf("%c %g", sep, pr.values[j]);
sep = ',';
}
printf("]\n");
}
}
void ParameterSpace::update_bounds(
size_t cno,
const OperatingPoint& op,
double* upper_bound_perf,
double* lower_bound_t) const {
if (combination_ge(cno, op.cno)) {
if (op.t > *lower_bound_t)
*lower_bound_t = op.t;
}
if (combination_ge(op.cno, cno)) {
if (op.perf < *upper_bound_perf)
*upper_bound_perf = op.perf;
}
}
void ParameterSpace::explore(
Index* index,
size_t nq,
const float* xq,
const AutoTuneCriterion& crit,
OperatingPoints* ops) const {
FAISS_THROW_IF_NOT_MSG(
nq == crit.nq, "criterion does not have the same nb of queries");
size_t n_comb = n_combinations();
if (n_experiments == 0) {
for (size_t cno = 0; cno < n_comb; cno++) {
set_index_parameters(index, cno);
std::vector<Index::idx_t> I(nq * crit.nnn);
std::vector<float> D(nq * crit.nnn);
double t0 = getmillisecs();
index->search(nq, xq, crit.nnn, D.data(), I.data());
double t_search = (getmillisecs() - t0) / 1e3;
double perf = crit.evaluate(D.data(), I.data());
bool keep = ops->add(perf, t_search, combination_name(cno), cno);
if (verbose)
printf(" %zd/%zd: %s perf=%.3f t=%.3f s %s\n",
cno,
n_comb,
combination_name(cno).c_str(),
perf,
t_search,
keep ? "*" : "");
}
return;
}
int n_exp = n_experiments;
if (n_exp > n_comb)
n_exp = n_comb;
FAISS_THROW_IF_NOT(n_comb == 1 || n_exp > 2);
std::vector<int> perm(n_comb);
// make sure the slowest and fastest experiment are run
perm[0] = 0;
if (n_comb > 1) {
perm[1] = n_comb - 1;
rand_perm(&perm[2], n_comb - 2, 1234);
for (int i = 2; i < perm.size(); i++)
perm[i]++;
}
for (size_t xp = 0; xp < n_exp; xp++) {
size_t cno = perm[xp];
if (verbose)
printf(" %zd/%d: cno=%zd %s ",
xp,
n_exp,
cno,
combination_name(cno).c_str());
{
double lower_bound_t = 0.0;
double upper_bound_perf = 1.0;
for (int i = 0; i < ops->all_pts.size(); i++) {
update_bounds(
cno,
ops->all_pts[i],
&upper_bound_perf,
&lower_bound_t);
}
double best_t = ops->t_for_perf(upper_bound_perf);
if (verbose)
printf("bounds [perf<=%.3f t>=%.3f] %s",
upper_bound_perf,
lower_bound_t,
best_t <= lower_bound_t ? "skip\n" : "");
if (best_t <= lower_bound_t)
continue;
}
set_index_parameters(index, cno);
std::vector<Index::idx_t> I(nq * crit.nnn);
std::vector<float> D(nq * crit.nnn);
double t0 = getmillisecs();
int nrun = 0;
double t_search;
do {
if (thread_over_batches) {
#pragma omp parallel for
for (Index::idx_t q0 = 0; q0 < nq; q0 += batchsize) {
size_t q1 = q0 + batchsize;
if (q1 > nq)
q1 = nq;
index->search(
q1 - q0,
xq + q0 * index->d,
crit.nnn,
D.data() + q0 * crit.nnn,
I.data() + q0 * crit.nnn);
}
} else {
for (size_t q0 = 0; q0 < nq; q0 += batchsize) {
size_t q1 = q0 + batchsize;
if (q1 > nq)
q1 = nq;
index->search(
q1 - q0,
xq + q0 * index->d,
crit.nnn,
D.data() + q0 * crit.nnn,
I.data() + q0 * crit.nnn);
}
}
nrun++;
t_search = (getmillisecs() - t0) / 1e3;
} while (t_search < min_test_duration);
t_search /= nrun;
double perf = crit.evaluate(D.data(), I.data());
bool keep = ops->add(perf, t_search, combination_name(cno), cno);
if (verbose)
printf(" perf %.3f t %.3f (%d %s) %s\n",
perf,
t_search,
nrun,
nrun >= 2 ? "runs" : "run",
keep ? "*" : "");
}
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_AUTO_TUNE_H
#define FAISS_AUTO_TUNE_H
#include <stdint.h>
#include <unordered_map>
#include <vector>
#include <faiss/Index.h>
#include <faiss/IndexBinary.h>
namespace faiss {
/**
* Evaluation criterion. Returns a performance measure in [0,1],
* higher is better.
*/
struct AutoTuneCriterion {
typedef Index::idx_t idx_t;
idx_t nq; ///< nb of queries this criterion is evaluated on
idx_t nnn; ///< nb of NNs that the query should request
idx_t gt_nnn; ///< nb of GT NNs required to evaluate criterion
std::vector<float> gt_D; ///< Ground-truth distances (size nq * gt_nnn)
std::vector<idx_t> gt_I; ///< Ground-truth indexes (size nq * gt_nnn)
AutoTuneCriterion(idx_t nq, idx_t nnn);
/** Intitializes the gt_D and gt_I vectors. Must be called before evaluating
*
* @param gt_D_in size nq * gt_nnn
* @param gt_I_in size nq * gt_nnn
*/
void set_groundtruth(
int gt_nnn,
const float* gt_D_in,
const idx_t* gt_I_in);
/** Evaluate the criterion.
*
* @param D size nq * nnn
* @param I size nq * nnn
* @return the criterion, between 0 and 1. Larger is better.
*/
virtual double evaluate(const float* D, const idx_t* I) const = 0;
virtual ~AutoTuneCriterion() {}
};
struct OneRecallAtRCriterion : AutoTuneCriterion {
idx_t R;
OneRecallAtRCriterion(idx_t nq, idx_t R);
double evaluate(const float* D, const idx_t* I) const override;
~OneRecallAtRCriterion() override {}
};
struct IntersectionCriterion : AutoTuneCriterion {
idx_t R;
IntersectionCriterion(idx_t nq, idx_t R);
double evaluate(const float* D, const idx_t* I) const override;
~IntersectionCriterion() override {}
};
/**
* Maintains a list of experimental results. Each operating point is a
* (perf, t, key) triplet, where higher perf and lower t is
* better. The key field is an arbitrary identifier for the operating point.
*
* Includes primitives to extract the Pareto-optimal operating points in the
* (perf, t) space.
*/
struct OperatingPoint {
double perf; ///< performance measure (output of a Criterion)
double t; ///< corresponding execution time (ms)
std::string key; ///< key that identifies this op pt
int64_t cno; ///< integer identifer
};
struct OperatingPoints {
/// all operating points
std::vector<OperatingPoint> all_pts;
/// optimal operating points, sorted by perf
std::vector<OperatingPoint> optimal_pts;
// begins with a single operating point: t=0, perf=0
OperatingPoints();
/// add operating points from other to this, with a prefix to the keys
int merge_with(
const OperatingPoints& other,
const std::string& prefix = "");
void clear();
/// add a performance measure. Return whether it is an optimal point
bool add(double perf, double t, const std::string& key, size_t cno = 0);
/// get time required to obtain a given performance measure
double t_for_perf(double perf) const;
/// easy-to-read output
void display(bool only_optimal = true) const;
/// output to a format easy to digest by gnuplot
void all_to_gnuplot(const char* fname) const;
void optimal_to_gnuplot(const char* fname) const;
};
/// possible values of a parameter, sorted from least to most expensive/accurate
struct ParameterRange {
std::string name;
std::vector<double> values;
};
/** Uses a-priori knowledge on the Faiss indexes to extract tunable parameters.
*/
struct ParameterSpace {
/// all tunable parameters
std::vector<ParameterRange> parameter_ranges;
// exploration parameters
/// verbosity during exploration
int verbose;
/// nb of experiments during optimization (0 = try all combinations)
int n_experiments;
/// maximum number of queries to submit at a time.
size_t batchsize;
/// use multithreading over batches (useful to benchmark
/// independent single-searches)
bool thread_over_batches;
/// run tests several times until they reach at least this
/// duration (to avoid jittering in MT mode)
double min_test_duration;
ParameterSpace();
/// nb of combinations, = product of values sizes
size_t n_combinations() const;
/// returns whether combinations c1 >= c2 in the tuple sense
bool combination_ge(size_t c1, size_t c2) const;
/// get string representation of the combination
std::string combination_name(size_t cno) const;
/// print a description on stdout
void display() const;
/// add a new parameter (or return it if it exists)
ParameterRange& add_range(const std::string& name);
/// initialize with reasonable parameters for the index
virtual void initialize(const Index* index);
/// set a combination of parameters on an index
void set_index_parameters(Index* index, size_t cno) const;
/// set a combination of parameters described by a string
void set_index_parameters(Index* index, const char* param_string) const;
/// set one of the parameters, returns whether setting was successful
virtual void set_index_parameter(
Index* index,
const std::string& name,
double val) const;
/** find an upper bound on the performance and a lower bound on t
* for configuration cno given another operating point op */
void update_bounds(
size_t cno,
const OperatingPoint& op,
double* upper_bound_perf,
double* lower_bound_t) const;
/** explore operating points
* @param index index to run on
* @param xq query vectors (size nq * index.d)
* @param crit selection criterion
* @param ops resulting operating points
*/
void explore(
Index* index,
size_t nq,
const float* xq,
const AutoTuneCriterion& crit,
OperatingPoints* ops) const;
virtual ~ParameterSpace() {}
};
} // namespace faiss
#endif
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
set(FAISS_SRC
AutoTune.cpp
Clustering.cpp
IVFlib.cpp
Index.cpp
Index2Layer.cpp
IndexAdditiveQuantizer.cpp
IndexBinary.cpp
IndexBinaryFlat.cpp
IndexBinaryFromFloat.cpp
IndexBinaryHNSW.cpp
IndexBinaryHash.cpp
IndexBinaryIVF.cpp
IndexFlat.cpp
IndexFlatCodes.cpp
IndexHNSW.cpp
IndexIVF.cpp
IndexIVFAdditiveQuantizer.cpp
IndexIVFFlat.cpp
IndexIVFPQ.cpp
IndexIVFPQFastScan.cpp
IndexIVFPQR.cpp
IndexIVFSpectralHash.cpp
IndexLSH.cpp
IndexNNDescent.cpp
IndexLattice.cpp
IndexNSG.cpp
IndexPQ.cpp
IndexPQFastScan.cpp
IndexPreTransform.cpp
IndexRefine.cpp
IndexReplicas.cpp
IndexScalarQuantizer.cpp
IndexShards.cpp
MatrixStats.cpp
MetaIndexes.cpp
VectorTransform.cpp
clone_index.cpp
index_factory.cpp
impl/AuxIndexStructures.cpp
impl/FaissException.cpp
impl/HNSW.cpp
impl/NSG.cpp
impl/PolysemousTraining.cpp
impl/ProductQuantizer.cpp
impl/AdditiveQuantizer.cpp
impl/ResidualQuantizer.cpp
impl/LocalSearchQuantizer.cpp
impl/ScalarQuantizer.cpp
impl/index_read.cpp
impl/index_write.cpp
impl/io.cpp
impl/kmeans1d.cpp
impl/lattice_Zn.cpp
impl/pq4_fast_scan.cpp
impl/pq4_fast_scan_search_1.cpp
impl/pq4_fast_scan_search_qbs.cpp
impl/io.cpp
impl/lattice_Zn.cpp
impl/NNDescent.cpp
invlists/BlockInvertedLists.cpp
invlists/DirectMap.cpp
invlists/InvertedLists.cpp
invlists/InvertedListsIOHook.cpp
utils/Heap.cpp
utils/WorkerThread.cpp
utils/distances.cpp
utils/distances_simd.cpp
utils/extra_distances.cpp
utils/hamming.cpp
utils/partitioning.cpp
utils/quantize_lut.cpp
utils/random.cpp
utils/utils.cpp
)
set(FAISS_HEADERS
AutoTune.h
Clustering.h
IVFlib.h
Index.h
Index2Layer.h
IndexAdditiveQuantizer.h
IndexBinary.h
IndexBinaryFlat.h
IndexBinaryFromFloat.h
IndexBinaryHNSW.h
IndexBinaryHash.h
IndexBinaryIVF.h
IndexFlat.h
IndexFlatCodes.h
IndexHNSW.h
IndexIVF.h
IndexIVFAdditiveQuantizer.h
IndexIVFFlat.h
IndexIVFPQ.h
IndexIVFPQFastScan.h
IndexIVFPQR.h
IndexIVFSpectralHash.h
IndexLSH.h
IndexLattice.h
IndexNNDescent.h
IndexNSG.h
IndexPQ.h
IndexPQFastScan.h
IndexPreTransform.h
IndexRefine.h
IndexReplicas.h
IndexAdditiveQuantizer.h
IndexScalarQuantizer.h
IndexShards.h
MatrixStats.h
MetaIndexes.h
MetricType.h
VectorTransform.h
clone_index.h
index_factory.h
index_io.h
impl/AdditiveQuantizer.h
impl/AuxIndexStructures.h
impl/FaissAssert.h
impl/FaissException.h
impl/HNSW.h
impl/LocalSearchQuantizer.h
impl/NNDescent.h
impl/NSG.h
impl/PolysemousTraining.h
impl/ProductQuantizer-inl.h
impl/ProductQuantizer.h
impl/ResidualQuantizer.h
impl/ResultHandler.h
impl/ScalarQuantizer.h
impl/ThreadedIndex-inl.h
impl/ThreadedIndex.h
impl/io.h
impl/io_macros.h
impl/kmeans1d.h
impl/lattice_Zn.h
impl/platform_macros.h
impl/pq4_fast_scan.h
impl/simd_result_handlers.h
invlists/BlockInvertedLists.h
invlists/DirectMap.h
invlists/InvertedLists.h
invlists/InvertedListsIOHook.h
utils/AlignedTable.h
utils/Heap.h
utils/WorkerThread.h
utils/distances.h
utils/extra_distances-inl.h
utils/extra_distances.h
utils/hamming-inl.h
utils/hamming.h
utils/ordered_key_value.h
utils/partitioning.h
utils/quantize_lut.h
utils/random.h
utils/simdlib.h
utils/simdlib_avx2.h
utils/simdlib_emulated.h
utils/simdlib_neon.h
utils/utils.h
)
if(NOT WIN32)
list(APPEND FAISS_SRC invlists/OnDiskInvertedLists.cpp)
list(APPEND FAISS_HEADERS invlists/OnDiskInvertedLists.h)
endif()
# Export FAISS_HEADERS variable to parent scope.
set(FAISS_HEADERS ${FAISS_HEADERS} PARENT_SCOPE)
add_library(faiss ${FAISS_SRC})
add_library(faiss_avx2 ${FAISS_SRC})
if(NOT FAISS_OPT_LEVEL STREQUAL "avx2")
set_target_properties(faiss_avx2 PROPERTIES EXCLUDE_FROM_ALL TRUE)
endif()
if(NOT WIN32)
target_compile_options(faiss_avx2 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-mavx2 -mfma -mf16c -mpopcnt>)
else()
# MSVC enables FMA with /arch:AVX2; no separate flags for F16C, POPCNT
# Ref. FMA (under /arch:AVX2): https://docs.microsoft.com/en-us/cpp/build/reference/arch-x64
# Ref. F16C (2nd paragraph): https://walbourn.github.io/directxmath-avx2/
# Ref. POPCNT: https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64
target_compile_options(faiss_avx2 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/arch:AVX2>)
endif()
# Handle `#include <faiss/foo.h>`.
target_include_directories(faiss PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>)
# Handle `#include <faiss/foo.h>`.
target_include_directories(faiss_avx2 PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>)
set_target_properties(faiss PROPERTIES
POSITION_INDEPENDENT_CODE ON
WINDOWS_EXPORT_ALL_SYMBOLS ON
)
set_target_properties(faiss_avx2 PROPERTIES
POSITION_INDEPENDENT_CODE ON
WINDOWS_EXPORT_ALL_SYMBOLS ON
)
if(WIN32)
target_compile_definitions(faiss PRIVATE FAISS_MAIN_LIB)
target_compile_definitions(faiss_avx2 PRIVATE FAISS_MAIN_LIB)
endif()
target_compile_definitions(faiss PRIVATE FINTEGER=int)
target_compile_definitions(faiss_avx2 PRIVATE FINTEGER=int)
find_package(OpenMP REQUIRED)
target_link_libraries(faiss PRIVATE OpenMP::OpenMP_CXX)
target_link_libraries(faiss_avx2 PRIVATE OpenMP::OpenMP_CXX)
find_package(MKL)
if(MKL_FOUND)
target_link_libraries(faiss PRIVATE ${MKL_LIBRARIES})
target_link_libraries(faiss_avx2 PRIVATE ${MKL_LIBRARIES})
else()
find_package(BLAS REQUIRED)
target_link_libraries(faiss PRIVATE ${BLAS_LIBRARIES})
target_link_libraries(faiss_avx2 PRIVATE ${BLAS_LIBRARIES})
find_package(LAPACK REQUIRED)
target_link_libraries(faiss PRIVATE ${LAPACK_LIBRARIES})
target_link_libraries(faiss_avx2 PRIVATE ${LAPACK_LIBRARIES})
endif()
install(TARGETS faiss
EXPORT faiss-targets
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
if(FAISS_OPT_LEVEL STREQUAL "avx2")
install(TARGETS faiss_avx2
EXPORT faiss-targets
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
endif()
foreach(header ${FAISS_HEADERS})
get_filename_component(dir ${header} DIRECTORY )
install(FILES ${header}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/faiss/${dir}
)
endforeach()
include(CMakePackageConfigHelpers)
write_basic_package_version_file(
"${PROJECT_BINARY_DIR}/cmake/faiss-config-version.cmake"
VERSION ${CMAKE_PROJECT_VERSION}
COMPATIBILITY AnyNewerVersion
)
configure_file(${PROJECT_SOURCE_DIR}/cmake/faiss-config.cmake.in
${PROJECT_BINARY_DIR}/cmake/faiss-config.cmake
COPYONLY
)
install(FILES ${PROJECT_BINARY_DIR}/cmake/faiss-config.cmake
${PROJECT_BINARY_DIR}/cmake/faiss-config-version.cmake
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/faiss
)
install(EXPORT faiss-targets
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/faiss
)
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/Clustering.h>
#include <faiss/VectorTransform.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <cinttypes>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <omp.h>
#include <faiss/IndexFlat.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/kmeans1d.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/random.h>
#include <faiss/utils/utils.h>
namespace faiss {
ClusteringParameters::ClusteringParameters()
: niter(25),
nredo(1),
verbose(false),
spherical(false),
int_centroids(false),
update_index(false),
frozen_centroids(false),
min_points_per_centroid(39),
max_points_per_centroid(256),
seed(1234),
decode_block_size(32768) {}
// 39 corresponds to 10000 / 256 -> to avoid warnings on PQ tests with randu10k
Clustering::Clustering(int d, int k) : d(d), k(k) {}
Clustering::Clustering(int d, int k, const ClusteringParameters& cp)
: ClusteringParameters(cp), d(d), k(k) {}
static double imbalance_factor(int n, int k, int64_t* assign) {
std::vector<int> hist(k, 0);
for (int i = 0; i < n; i++)
hist[assign[i]]++;
double tot = 0, uf = 0;
for (int i = 0; i < k; i++) {
tot += hist[i];
uf += hist[i] * (double)hist[i];
}
uf = uf * k / (tot * tot);
return uf;
}
void Clustering::post_process_centroids() {
if (spherical) {
fvec_renorm_L2(d, k, centroids.data());
}
if (int_centroids) {
for (size_t i = 0; i < centroids.size(); i++)
centroids[i] = roundf(centroids[i]);
}
}
void Clustering::train(
idx_t nx,
const float* x_in,
Index& index,
const float* weights) {
train_encoded(
nx,
reinterpret_cast<const uint8_t*>(x_in),
nullptr,
index,
weights);
}
namespace {
using idx_t = Clustering::idx_t;
idx_t subsample_training_set(
const Clustering& clus,
idx_t nx,
const uint8_t* x,
size_t line_size,
const float* weights,
uint8_t** x_out,
float** weights_out) {
if (clus.verbose) {
printf("Sampling a subset of %zd / %" PRId64 " for training\n",
clus.k * clus.max_points_per_centroid,
nx);
}
std::vector<int> perm(nx);
rand_perm(perm.data(), nx, clus.seed);
nx = clus.k * clus.max_points_per_centroid;
uint8_t* x_new = new uint8_t[nx * line_size];
*x_out = x_new;
for (idx_t i = 0; i < nx; i++) {
memcpy(x_new + i * line_size, x + perm[i] * line_size, line_size);
}
if (weights) {
float* weights_new = new float[nx];
for (idx_t i = 0; i < nx; i++) {
weights_new[i] = weights[perm[i]];
}
*weights_out = weights_new;
} else {
*weights_out = nullptr;
}
return nx;
}
/** compute centroids as (weighted) sum of training points
*
* @param x training vectors, size n * code_size (from codec)
* @param codec how to decode the vectors (if NULL then cast to float*)
* @param weights per-training vector weight, size n (or NULL)
* @param assign nearest centroid for each training vector, size n
* @param k_frozen do not update the k_frozen first centroids
* @param centroids centroid vectors (output only), size k * d
* @param hassign histogram of assignments per centroid (size k),
* should be 0 on input
*
*/
void compute_centroids(
size_t d,
size_t k,
size_t n,
size_t k_frozen,
const uint8_t* x,
const Index* codec,
const int64_t* assign,
const float* weights,
float* hassign,
float* centroids) {
k -= k_frozen;
centroids += k_frozen * d;
memset(centroids, 0, sizeof(*centroids) * d * k);
size_t line_size = codec ? codec->sa_code_size() : d * sizeof(float);
#pragma omp parallel
{
int nt = omp_get_num_threads();
int rank = omp_get_thread_num();
// this thread is taking care of centroids c0:c1
size_t c0 = (k * rank) / nt;
size_t c1 = (k * (rank + 1)) / nt;
std::vector<float> decode_buffer(d);
for (size_t i = 0; i < n; i++) {
int64_t ci = assign[i];
assert(ci >= 0 && ci < k + k_frozen);
ci -= k_frozen;
if (ci >= c0 && ci < c1) {
float* c = centroids + ci * d;
const float* xi;
if (!codec) {
xi = reinterpret_cast<const float*>(x + i * line_size);
} else {
float* xif = decode_buffer.data();
codec->sa_decode(1, x + i * line_size, xif);
xi = xif;
}
if (weights) {
float w = weights[i];
hassign[ci] += w;
for (size_t j = 0; j < d; j++) {
c[j] += xi[j] * w;
}
} else {
hassign[ci] += 1.0;
for (size_t j = 0; j < d; j++) {
c[j] += xi[j];
}
}
}
}
}
#pragma omp parallel for
for (idx_t ci = 0; ci < k; ci++) {
if (hassign[ci] == 0) {
continue;
}
float norm = 1 / hassign[ci];
float* c = centroids + ci * d;
for (size_t j = 0; j < d; j++) {
c[j] *= norm;
}
}
}
// a bit above machine epsilon for float16
#define EPS (1 / 1024.)
/** Handle empty clusters by splitting larger ones.
*
* It works by slightly changing the centroids to make 2 clusters from
* a single one. Takes the same arguments as compute_centroids.
*
* @return nb of spliting operations (larger is worse)
*/
int split_clusters(
size_t d,
size_t k,
size_t n,
size_t k_frozen,
float* hassign,
float* centroids) {
k -= k_frozen;
centroids += k_frozen * d;
/* Take care of void clusters */
size_t nsplit = 0;
RandomGenerator rng(1234);
for (size_t ci = 0; ci < k; ci++) {
if (hassign[ci] == 0) { /* need to redefine a centroid */
size_t cj;
for (cj = 0; 1; cj = (cj + 1) % k) {
/* probability to pick this cluster for split */
float p = (hassign[cj] - 1.0) / (float)(n - k);
float r = rng.rand_float();
if (r < p) {
break; /* found our cluster to be split */
}
}
memcpy(centroids + ci * d,
centroids + cj * d,
sizeof(*centroids) * d);
/* small symmetric pertubation */
for (size_t j = 0; j < d; j++) {
if (j % 2 == 0) {
centroids[ci * d + j] *= 1 + EPS;
centroids[cj * d + j] *= 1 - EPS;
} else {
centroids[ci * d + j] *= 1 - EPS;
centroids[cj * d + j] *= 1 + EPS;
}
}
/* assume even split of the cluster */
hassign[ci] = hassign[cj] / 2;
hassign[cj] -= hassign[ci];
nsplit++;
}
}
return nsplit;
}
}; // namespace
void Clustering::train_encoded(
idx_t nx,
const uint8_t* x_in,
const Index* codec,
Index& index,
const float* weights) {
FAISS_THROW_IF_NOT_FMT(
nx >= k,
"Number of training points (%" PRId64
") should be at least "
"as large as number of clusters (%zd)",
nx,
k);
FAISS_THROW_IF_NOT_FMT(
(!codec || codec->d == d),
"Codec dimension %d not the same as data dimension %d",
int(codec->d),
int(d));
FAISS_THROW_IF_NOT_FMT(
index.d == d,
"Index dimension %d not the same as data dimension %d",
int(index.d),
int(d));
double t0 = getmillisecs();
if (!codec) {
// Check for NaNs in input data. Normally it is the user's
// responsibility, but it may spare us some hard-to-debug
// reports.
const float* x = reinterpret_cast<const float*>(x_in);
for (size_t i = 0; i < nx * d; i++) {
FAISS_THROW_IF_NOT_MSG(
std::isfinite(x[i]), "input contains NaN's or Inf's");
}
}
const uint8_t* x = x_in;
std::unique_ptr<uint8_t[]> del1;
std::unique_ptr<float[]> del3;
size_t line_size = codec ? codec->sa_code_size() : sizeof(float) * d;
if (nx > k * max_points_per_centroid) {
uint8_t* x_new;
float* weights_new;
nx = subsample_training_set(
*this, nx, x, line_size, weights, &x_new, &weights_new);
del1.reset(x_new);
x = x_new;
del3.reset(weights_new);
weights = weights_new;
} else if (nx < k * min_points_per_centroid) {
fprintf(stderr,
"WARNING clustering %" PRId64
" points to %zd centroids: "
"please provide at least %" PRId64 " training points\n",
nx,
k,
idx_t(k) * min_points_per_centroid);
}
if (nx == k) {
// this is a corner case, just copy training set to clusters
if (verbose) {
printf("Number of training points (%" PRId64
") same as number of "
"clusters, just copying\n",
nx);
}
centroids.resize(d * k);
if (!codec) {
memcpy(centroids.data(), x_in, sizeof(float) * d * k);
} else {
codec->sa_decode(nx, x_in, centroids.data());
}
// one fake iteration...
ClusteringIterationStats stats = {0.0, 0.0, 0.0, 1.0, 0};
iteration_stats.push_back(stats);
index.reset();
index.add(k, centroids.data());
return;
}
if (verbose) {
printf("Clustering %" PRId64
" points in %zdD to %zd clusters, "
"redo %d times, %d iterations\n",
nx,
d,
k,
nredo,
niter);
if (codec) {
printf("Input data encoded in %zd bytes per vector\n",
codec->sa_code_size());
}
}
std::unique_ptr<idx_t[]> assign(new idx_t[nx]);
std::unique_ptr<float[]> dis(new float[nx]);
// remember best iteration for redo
bool lower_is_better = index.metric_type != METRIC_INNER_PRODUCT;
float best_obj = lower_is_better ? HUGE_VALF : -HUGE_VALF;
std::vector<ClusteringIterationStats> best_iteration_stats;
std::vector<float> best_centroids;
// support input centroids
FAISS_THROW_IF_NOT_MSG(
centroids.size() % d == 0,
"size of provided input centroids not a multiple of dimension");
size_t n_input_centroids = centroids.size() / d;
if (verbose && n_input_centroids > 0) {
printf(" Using %zd centroids provided as input (%sfrozen)\n",
n_input_centroids,
frozen_centroids ? "" : "not ");
}
double t_search_tot = 0;
if (verbose) {
printf(" Preprocessing in %.2f s\n", (getmillisecs() - t0) / 1000.);
}
t0 = getmillisecs();
// temporary buffer to decode vectors during the optimization
std::vector<float> decode_buffer(codec ? d * decode_block_size : 0);
for (int redo = 0; redo < nredo; redo++) {
if (verbose && nredo > 1) {
printf("Outer iteration %d / %d\n", redo, nredo);
}
// initialize (remaining) centroids with random points from the dataset
centroids.resize(d * k);
std::vector<int> perm(nx);
rand_perm(perm.data(), nx, seed + 1 + redo * 15486557L);
if (!codec) {
for (int i = n_input_centroids; i < k; i++) {
memcpy(&centroids[i * d], x + perm[i] * line_size, line_size);
}
} else {
for (int i = n_input_centroids; i < k; i++) {
codec->sa_decode(1, x + perm[i] * line_size, &centroids[i * d]);
}
}
post_process_centroids();
// prepare the index
if (index.ntotal != 0) {
index.reset();
}
if (!index.is_trained) {
index.train(k, centroids.data());
}
index.add(k, centroids.data());
// k-means iterations
float obj = 0;
for (int i = 0; i < niter; i++) {
double t0s = getmillisecs();
if (!codec) {
index.search(
nx,
reinterpret_cast<const float*>(x),
1,
dis.get(),
assign.get());
} else {
// search by blocks of decode_block_size vectors
size_t code_size = codec->sa_code_size();
for (size_t i0 = 0; i0 < nx; i0 += decode_block_size) {
size_t i1 = i0 + decode_block_size;
if (i1 > nx) {
i1 = nx;
}
codec->sa_decode(
i1 - i0, x + code_size * i0, decode_buffer.data());
index.search(
i1 - i0,
decode_buffer.data(),
1,
dis.get() + i0,
assign.get() + i0);
}
}
InterruptCallback::check();
t_search_tot += getmillisecs() - t0s;
// accumulate objective
obj = 0;
for (int j = 0; j < nx; j++) {
obj += dis[j];
}
// update the centroids
std::vector<float> hassign(k);
size_t k_frozen = frozen_centroids ? n_input_centroids : 0;
compute_centroids(
d,
k,
nx,
k_frozen,
x,
codec,
assign.get(),
weights,
hassign.data(),
centroids.data());
int nsplit = split_clusters(
d, k, nx, k_frozen, hassign.data(), centroids.data());
// collect statistics
ClusteringIterationStats stats = {
obj,
(getmillisecs() - t0) / 1000.0,
t_search_tot / 1000,
imbalance_factor(nx, k, assign.get()),
nsplit};
iteration_stats.push_back(stats);
if (verbose) {
printf(" Iteration %d (%.2f s, search %.2f s): "
"objective=%g imbalance=%.3f nsplit=%d \r",
i,
stats.time,
stats.time_search,
stats.obj,
stats.imbalance_factor,
nsplit);
fflush(stdout);
}
post_process_centroids();
// add centroids to index for the next iteration (or for output)
index.reset();
if (update_index) {
index.train(k, centroids.data());
}
index.add(k, centroids.data());
InterruptCallback::check();
}
if (verbose)
printf("\n");
if (nredo > 1) {
if ((lower_is_better && obj < best_obj) ||
(!lower_is_better && obj > best_obj)) {
if (verbose) {
printf("Objective improved: keep new clusters\n");
}
best_centroids = centroids;
best_iteration_stats = iteration_stats;
best_obj = obj;
}
index.reset();
}
}
if (nredo > 1) {
centroids = best_centroids;
iteration_stats = best_iteration_stats;
index.reset();
index.add(k, best_centroids.data());
}
}
Clustering1D::Clustering1D(int k) : Clustering(1, k) {}
Clustering1D::Clustering1D(int k, const ClusteringParameters& cp)
: Clustering(1, k, cp) {}
void Clustering1D::train_exact(idx_t n, const float* x) {
const float* xt = x;
std::unique_ptr<uint8_t[]> del;
if (n > k * max_points_per_centroid) {
uint8_t* x_new;
float* weights_new;
n = subsample_training_set(
*this,
n,
(uint8_t*)x,
sizeof(float) * d,
nullptr,
&x_new,
&weights_new);
del.reset(x_new);
xt = (float*)x_new;
}
centroids.resize(k);
double uf = kmeans1d(xt, n, k, centroids.data());
ClusteringIterationStats stats = {0.0, 0.0, 0.0, uf, 0};
iteration_stats.push_back(stats);
}
float kmeans_clustering(
size_t d,
size_t n,
size_t k,
const float* x,
float* centroids) {
Clustering clus(d, k);
clus.verbose = d * n * k > (1L << 30);
// display logs if > 1Gflop per iteration
IndexFlatL2 index(d);
clus.train(n, x, index);
memcpy(centroids, clus.centroids.data(), sizeof(*centroids) * d * k);
return clus.iteration_stats.back().obj;
}
/******************************************************************************
* ProgressiveDimClustering implementation
******************************************************************************/
ProgressiveDimClusteringParameters::ProgressiveDimClusteringParameters() {
progressive_dim_steps = 10;
apply_pca = true; // seems a good idea to do this by default
niter = 10; // reduce nb of iterations per step
}
Index* ProgressiveDimIndexFactory::operator()(int dim) {
return new IndexFlatL2(dim);
}
ProgressiveDimClustering::ProgressiveDimClustering(int d, int k) : d(d), k(k) {}
ProgressiveDimClustering::ProgressiveDimClustering(
int d,
int k,
const ProgressiveDimClusteringParameters& cp)
: ProgressiveDimClusteringParameters(cp), d(d), k(k) {}
namespace {
using idx_t = Index::idx_t;
void copy_columns(idx_t n, idx_t d1, const float* src, idx_t d2, float* dest) {
idx_t d = std::min(d1, d2);
for (idx_t i = 0; i < n; i++) {
memcpy(dest, src, sizeof(float) * d);
src += d1;
dest += d2;
}
}
}; // namespace
void ProgressiveDimClustering::train(
idx_t n,
const float* x,
ProgressiveDimIndexFactory& factory) {
int d_prev = 0;
PCAMatrix pca(d, d);
std::vector<float> xbuf;
if (apply_pca) {
if (verbose) {
printf("Training PCA transform\n");
}
pca.train(n, x);
if (verbose) {
printf("Apply PCA\n");
}
xbuf.resize(n * d);
pca.apply_noalloc(n, x, xbuf.data());
x = xbuf.data();
}
for (int iter = 0; iter < progressive_dim_steps; iter++) {
int di = int(pow(d, (1. + iter) / progressive_dim_steps));
if (verbose) {
printf("Progressive dim step %d: cluster in dimension %d\n",
iter,
di);
}
std::unique_ptr<Index> clustering_index(factory(di));
Clustering clus(di, k, *this);
if (d_prev > 0) {
// copy warm-start centroids (padded with 0s)
clus.centroids.resize(k * di);
copy_columns(
k, d_prev, centroids.data(), di, clus.centroids.data());
}
std::vector<float> xsub(n * di);
copy_columns(n, d, x, di, xsub.data());
clus.train(n, xsub.data(), *clustering_index.get());
centroids = clus.centroids;
iteration_stats.insert(
iteration_stats.end(),
clus.iteration_stats.begin(),
clus.iteration_stats.end());
d_prev = di;
}
if (apply_pca) {
if (verbose) {
printf("Revert PCA transform on centroids\n");
}
std::vector<float> cent_transformed(d * k);
pca.reverse_transform(k, centroids.data(), cent_transformed.data());
cent_transformed.swap(centroids);
}
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_CLUSTERING_H
#define FAISS_CLUSTERING_H
#include <faiss/Index.h>
#include <vector>
namespace faiss {
/** Class for the clustering parameters. Can be passed to the
* constructor of the Clustering object.
*/
struct ClusteringParameters {
int niter; ///< clustering iterations
int nredo; ///< redo clustering this many times and keep best
bool verbose;
bool spherical; ///< do we want normalized centroids?
bool int_centroids; ///< round centroids coordinates to integer
bool update_index; ///< re-train index after each iteration?
bool frozen_centroids; ///< use the centroids provided as input and do not
///< change them during iterations
int min_points_per_centroid; ///< otherwise you get a warning
int max_points_per_centroid; ///< to limit size of dataset
int seed; ///< seed for the random number generator
size_t decode_block_size; ///< how many vectors at a time to decode
/// sets reasonable defaults
ClusteringParameters();
};
struct ClusteringIterationStats {
float obj; ///< objective values (sum of distances reported by index)
double time; ///< seconds for iteration
double time_search; ///< seconds for just search
double imbalance_factor; ///< imbalance factor of iteration
int nsplit; ///< number of cluster splits
};
/** K-means clustering based on assignment - centroid update iterations
*
* The clustering is based on an Index object that assigns training
* points to the centroids. Therefore, at each iteration the centroids
* are added to the index.
*
* On output, the centoids table is set to the latest version
* of the centroids and they are also added to the index. If the
* centroids table it is not empty on input, it is also used for
* initialization.
*
*/
struct Clustering : ClusteringParameters {
typedef Index::idx_t idx_t;
size_t d; ///< dimension of the vectors
size_t k; ///< nb of centroids
/** centroids (k * d)
* if centroids are set on input to train, they will be used as
* initialization
*/
std::vector<float> centroids;
/// stats at every iteration of clustering
std::vector<ClusteringIterationStats> iteration_stats;
Clustering(int d, int k);
Clustering(int d, int k, const ClusteringParameters& cp);
/** run k-means training
*
* @param x training vectors, size n * d
* @param index index used for assignment
* @param x_weights weight associated to each vector: NULL or size n
*/
virtual void train(
idx_t n,
const float* x,
faiss::Index& index,
const float* x_weights = nullptr);
/** run with encoded vectors
*
* win addition to train()'s parameters takes a codec as parameter
* to decode the input vectors.
*
* @param codec codec used to decode the vectors (nullptr =
* vectors are in fact floats) *
*/
void train_encoded(
idx_t nx,
const uint8_t* x_in,
const Index* codec,
Index& index,
const float* weights = nullptr);
/// Post-process the centroids after each centroid update.
/// includes optional L2 normalization and nearest integer rounding
void post_process_centroids();
virtual ~Clustering() {}
};
/** Exact 1D clustering algorithm
*
* Since it does not use an index, it does not overload the train() function
*/
struct Clustering1D : Clustering {
explicit Clustering1D(int k);
Clustering1D(int k, const ClusteringParameters& cp);
void train_exact(idx_t n, const float* x);
virtual ~Clustering1D() {}
};
struct ProgressiveDimClusteringParameters : ClusteringParameters {
int progressive_dim_steps; ///< number of incremental steps
bool apply_pca; ///< apply PCA on input
ProgressiveDimClusteringParameters();
};
/** generates an index suitable for clustering when called */
struct ProgressiveDimIndexFactory {
/// ownership transferred to caller
virtual Index* operator()(int dim);
virtual ~ProgressiveDimIndexFactory() {}
};
/** K-means clustering with progressive dimensions used
*
* The clustering first happens in dim 1, then with exponentially increasing
* dimension until d (I steps). This is typically applied after a PCA
* transformation (optional). Reference:
*
* "Improved Residual Vector Quantization for High-dimensional Approximate
* Nearest Neighbor Search"
*
* Shicong Liu, Hongtao Lu, Junru Shao, AAAI'15
*
* https://arxiv.org/abs/1509.05195
*/
struct ProgressiveDimClustering : ProgressiveDimClusteringParameters {
using idx_t = Index::idx_t;
size_t d; ///< dimension of the vectors
size_t k; ///< nb of centroids
/** centroids (k * d) */
std::vector<float> centroids;
/// stats at every iteration of clustering
std::vector<ClusteringIterationStats> iteration_stats;
ProgressiveDimClustering(int d, int k);
ProgressiveDimClustering(
int d,
int k,
const ProgressiveDimClusteringParameters& cp);
void train(idx_t n, const float* x, ProgressiveDimIndexFactory& factory);
virtual ~ProgressiveDimClustering() {}
};
/** simplified interface
*
* @param d dimension of the data
* @param n nb of training vectors
* @param k nb of output centroids
* @param x training set (size n * d)
* @param centroids output centroids (size k * d)
* @return final quantization error
*/
float kmeans_clustering(
size_t d,
size_t n,
size_t k,
const float* x,
float* centroids);
} // namespace faiss
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment