Commit 395d2ce6 authored by huchen's avatar huchen
Browse files

init the faiss for rocm

parent 5ded39f5
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import os
import time
import numpy as np
import faiss
import argparse
from multiprocessing.dummy import Pool as ThreadPool
def ivecs_mmap(fname):
a = np.memmap(fname, dtype='int32', mode='r')
d = a[0]
return a.reshape(-1, d + 1)[:, 1:]
def fvecs_mmap(fname):
return ivecs_mmap(fname).view('float32')
def produce_batches(args):
x = fvecs_mmap(args.input)
if args.i1 == -1:
args.i1 = len(x)
print("Iterating on vectors %d:%d from %s by batches of size %d" % (
args.i0, args.i1, args.input, args.bs))
for j0 in range(args.i0, args.i1, args.bs):
j1 = min(j0 + args.bs, args.i1)
yield np.arange(j0, j1), x[j0:j1]
def rate_limited_iter(l):
'a thread pre-processes the next element'
pool = ThreadPool(1)
res = None
def next_or_None():
try:
return next(l)
except StopIteration:
return None
while True:
res_next = pool.apply_async(next_or_None)
if res is not None:
res = res.get()
if res is None:
return
yield res
res = res_next
deep1bdir = "/datasets01_101/simsearch/041218/deep1b/"
workdir = "/checkpoint/matthijs/ondisk_distributed/"
def main():
parser = argparse.ArgumentParser(
description='make index for a subset of the data')
def aa(*args, **kwargs):
group.add_argument(*args, **kwargs)
group = parser.add_argument_group('index type')
aa('--inputindex',
default=workdir + 'trained.faissindex',
help='empty input index to fill in')
aa('--nt', default=-1, type=int, help='nb of openmp threads to use')
group = parser.add_argument_group('db options')
aa('--input', default=deep1bdir + "base.fvecs")
aa('--bs', default=2**18, type=int,
help='batch size for db access')
aa('--i0', default=0, type=int, help='lower bound to index')
aa('--i1', default=-1, type=int, help='upper bound of vectors to index')
group = parser.add_argument_group('output')
aa('-o', default='/tmp/x', help='output index')
aa('--keepquantizer', default=False, action='store_true',
help='by default we remove the data from the quantizer to save space')
args = parser.parse_args()
print('args=', args)
print('start accessing data')
src = produce_batches(args)
print('loading index', args.inputindex)
index = faiss.read_index(args.inputindex)
if args.nt != -1:
faiss.omp_set_num_threads(args.nt)
t0 = time.time()
ntot = 0
for ids, x in rate_limited_iter(src):
print('add %d:%d (%.3f s)' % (ntot, ntot + ids.size, time.time() - t0))
index.add_with_ids(np.ascontiguousarray(x, dtype='float32'), ids)
ntot += ids.size
index_ivf = faiss.extract_index_ivf(index)
print('invlists stats: imbalance %.3f' % index_ivf.invlists.imbalance_factor())
index_ivf.invlists.print_stats()
if not args.keepquantizer:
print('resetting quantizer content')
index_ivf = faiss.extract_index_ivf(index)
index_ivf.quantizer.reset()
print('store output', args.o)
faiss.write_index(index, args.o)
if __name__ == '__main__':
main()
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
import faiss
deep1bdir = "/datasets01_101/simsearch/041218/deep1b/"
workdir = "/checkpoint/matthijs/ondisk_distributed/"
print('Load centroids')
centroids = np.load(workdir + '1M_centroids.npy')
ncent, d = centroids.shape
print('apply random rotation')
rrot = faiss.RandomRotationMatrix(d, d)
rrot.init(1234)
centroids = rrot.apply_py(centroids)
print('make HNSW index as quantizer')
quantizer = faiss.IndexHNSWFlat(d, 32)
quantizer.hnsw.efSearch = 1024
quantizer.hnsw.efConstruction = 200
quantizer.add(centroids)
print('build index')
index = faiss.IndexPreTransform(
rrot,
faiss.IndexIVFScalarQuantizer(
quantizer, d, ncent, faiss.ScalarQuantizer.QT_6bit
)
)
def ivecs_mmap(fname):
a = np.memmap(fname, dtype='int32', mode='r')
d = a[0]
return a.reshape(-1, d + 1)[:, 1:]
def fvecs_mmap(fname):
return ivecs_mmap(fname).view('float32')
print('finish training index')
xt = fvecs_mmap(deep1bdir + 'learn.fvecs')
xt = np.ascontiguousarray(xt[:256 * 1000], dtype='float32')
index.train(xt)
print('write output')
faiss.write_index(index, workdir + 'trained.faissindex')
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import os
import faiss
import argparse
from multiprocessing.dummy import Pool as ThreadPool
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--inputs', nargs='*', required=True,
help='input indexes to merge')
parser.add_argument('--l0', type=int, default=0)
parser.add_argument('--l1', type=int, default=-1)
parser.add_argument('--nt', default=-1,
help='nb threads')
parser.add_argument('--output', required=True,
help='output index filename')
parser.add_argument('--outputIL',
help='output invfile filename')
args = parser.parse_args()
if args.nt != -1:
print('set nb of threads to', args.nt)
ils = faiss.InvertedListsPtrVector()
ils_dont_dealloc = []
pool = ThreadPool(20)
def load_index(fname):
print("loading", fname)
try:
index = faiss.read_index(fname, faiss.IO_FLAG_MMAP | faiss.IO_FLAG_READ_ONLY)
except RuntimeError as e:
print('could not load %s: %s' % (fname, e))
return fname, None
print(" %d entries" % index.ntotal)
return fname, index
index0 = None
for _, index in pool.imap(load_index, args.inputs):
if index is None:
continue
index_ivf = faiss.extract_index_ivf(index)
il = faiss.downcast_InvertedLists(index_ivf.invlists)
index_ivf.invlists = None
il.this.own()
ils_dont_dealloc.append(il)
if (args.l0, args.l1) != (0, -1):
print('restricting to lists %d:%d' % (args.l0, args.l1))
# il = faiss.SliceInvertedLists(il, args.l0, args.l1)
il.crop_invlists(args.l0, args.l1)
ils_dont_dealloc.append(il)
ils.push_back(il)
if index0 is None:
index0 = index
print("loaded %d invlists" % ils.size())
if not args.outputIL:
args.outputIL = args.output + '_invlists'
il0 = ils.at(0)
il = faiss.OnDiskInvertedLists(
il0.nlist, il0.code_size,
args.outputIL)
print("perform merge")
ntotal = il.merge_from(ils.data(), ils.size(), True)
print("swap into index0")
index0_ivf = faiss.extract_index_ivf(index0)
index0_ivf.nlist = il0.nlist
index0_ivf.ntotal = index0.ntotal = ntotal
index0_ivf.invlists = il
index0_ivf.own_invlists = False
print("write", args.output)
faiss.write_index(index0, args.output)
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""
Simplistic RPC implementation.
Exposes all functions of a Server object.
Uses pickle for serialization and the socket interface.
"""
import os,pdb,pickle,time,errno,sys,_thread,traceback,socket,threading,gc
# default
PORT=12032
#########################################################################
# simple I/O functions
def inline_send_handle(f, conn):
st = os.fstat(f.fileno())
size = st.st_size
pickle.dump(size, conn)
conn.write(f.read(size))
def inline_send_string(s, conn):
size = len(s)
pickle.dump(size, conn)
conn.write(s)
class FileSock:
" wraps a socket so that it is usable by pickle/cPickle "
def __init__(self,sock):
self.sock = sock
self.nr=0
def write(self, buf):
# print("sending %d bytes"%len(buf))
#self.sock.sendall(buf)
# print("...done")
bs = 512 * 1024
ns = 0
while ns < len(buf):
sent = self.sock.send(buf[ns:ns + bs])
ns += sent
def read(self,bs=512*1024):
#if self.nr==10000: pdb.set_trace()
self.nr+=1
# print("read bs=%d"%bs)
b = []
nb = 0
while len(b)<bs:
# print(' loop')
rb = self.sock.recv(bs - nb)
if not rb: break
b.append(rb)
nb += len(rb)
return b''.join(b)
def readline(self):
# print("readline!")
"""may be optimized..."""
s=bytes()
while True:
c=self.read(1)
s+=c
if len(c)==0 or chr(c[0])=='\n':
return s
class ClientExit(Exception):
pass
class ServerException(Exception):
pass
class Server:
"""
server protocol. Methods from classes that subclass Server can be called
transparently from a client
"""
def __init__(self, s, logf=sys.stderr, log_prefix=''):
self.logf = logf
self.log_prefix = log_prefix
# connection
self.conn = s
self.fs = FileSock(s)
def log(self, s):
self.logf.write("Sever log %s: %s\n" % (self.log_prefix, s))
def one_function(self):
"""
Executes a single function with associated I/O.
Protocol:
- the arguments and results are serialized with the pickle protocol
- client sends : (fname,args)
fname = method name to call
args = tuple of arguments
- server sends result: (rid,st,ret)
rid = request id
st = None, or exception if there was during execution
ret = return value or None if st!=None
"""
try:
(fname,args)=pickle.load(self.fs)
except EOFError:
raise ClientExit("read args")
self.log("executing method %s"%(fname))
st = None
ret = None
try:
f=getattr(self,fname)
except AttributeError:
st = AttributeError("unknown method "+fname)
self.log("unknown method ")
try:
ret = f(*args)
except Exception as e:
# due to a bug (in mod_python?), ServerException cannot be
# unpickled, so send the string and make the exception on the client side
#st=ServerException(
# "".join(traceback.format_tb(sys.exc_info()[2]))+
# str(e))
st="".join(traceback.format_tb(sys.exc_info()[2]))+str(e)
self.log("exception in method")
traceback.print_exc(50,self.logf)
self.logf.flush()
print("return")
try:
pickle.dump((st ,ret), self.fs, protocol=4)
except EOFError:
raise ClientExit("function return")
def exec_loop(self):
""" main execution loop. Loops and handles exit states"""
self.log("in exec_loop")
try:
while True:
self.one_function()
except ClientExit as e:
self.log("ClientExit %s"%e)
except socket.error as e:
self.log("socket error %s"%e)
traceback.print_exc(50,self.logf)
except EOFError:
self.log("EOF during communication")
traceback.print_exc(50,self.logf)
except BaseException:
# unexpected
traceback.print_exc(50,sys.stderr)
sys.exit(1)
print("exit sever")
def exec_loop_cleanup(self):
pass
###################################################################
# spying stuff
def get_ps_stats(self):
ret=''
f=os.popen("echo ============ `hostname` uptime:; uptime;"+
"echo ============ self:; "+
"ps -p %d -o pid,vsize,rss,%%cpu,nlwp,psr; "%os.getpid()+
"echo ============ run queue:;"+
"ps ar -o user,pid,%cpu,%mem,ni,nlwp,psr,vsz,rss,cputime,command")
for l in f:
ret+=l
return ret
class Client:
"""
Methods of the server object can be called transparently. Exceptions are
re-raised.
"""
def __init__(self, HOST, port=PORT, v6=False):
socktype = socket.AF_INET6 if v6 else socket.AF_INET
sock = socket.socket(socktype, socket.SOCK_STREAM)
print("connecting",HOST, port, socktype)
sock.connect((HOST, port))
self.sock = sock
self.fs = FileSock(sock)
def generic_fun(self, fname, args):
# int "gen fun",fname
pickle.dump((fname, args), self.fs, protocol=4)
return self.get_result()
def get_result(self):
(st, ret) = pickle.load(self.fs)
if st!=None:
raise ServerException(st)
else:
return ret
def __getattr__(self,name):
return lambda *x: self.generic_fun(name,x)
def run_server(new_handler, port=PORT, report_to_file=None, v6=False):
HOST = '' # Symbolic name meaning the local host
socktype = socket.AF_INET6 if v6 else socket.AF_INET
s = socket.socket(socktype, socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
print("bind %s:%d" % (HOST, port))
s.bind((HOST, port))
s.listen(5)
print("accepting connections")
if report_to_file is not None:
print('storing host+port in', report_to_file)
open(report_to_file, 'w').write('%s:%d ' % (socket.gethostname(), port))
while True:
try:
conn, addr = s.accept()
except socket.error as e:
if e[1]=='Interrupted system call': continue
raise
print('Connected by', addr, end=' ')
ibs = new_handler(conn)
tid = _thread.start_new_thread(ibs.exec_loop,())
print("tid",tid)
#! /bin/bash
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
set -e
todo=$1
# other options can be transmitted
shift
# the training data of the Deep1B dataset
deep1bdir=/datasets01_101/simsearch/041218/deep1b
traindata=$deep1bdir/learn.fvecs
# this is for small tests
nvec=1000000
k=4000
# for the real run
# nvec=50000000
# k=1000000
# working directory for the real run
workdir=/checkpoint/matthijs/ondisk_distributed
mkdir -p $workdir/{vslices,hslices}
if [ -z "$todo" ]; then
echo "nothing to do"
exit 1
elif [ $todo == test_kmeans_0 ]; then
# non distributed baseline
python distributed_kmeans.py \
--indata $traindata --i1 $nvec \
--k $k
elif [ $todo == test_kmeans_1 ]; then
# using all the machine's GPUs
python distributed_kmeans.py \
--indata $traindata --i1 $nvec \
--k $k --gpu -1
elif [ $todo == test_kmeans_2 ]; then
# distrbuted run, with one local server per GPU
ngpu=$( echo /dev/nvidia? | wc -w )
baseport=12012
# kill background porcesses on output of this script
trap 'kill -HUP 0' 0
hostports=''
for((gpu=0;gpu<ngpu;gpu++)); do
# range of vectors to assign to each sever
i0=$((nvec * gpu / ngpu))
i1=$((nvec * (gpu + 1) / ngpu))
port=$(( baseport + gpu ))
echo "start server $gpu for range $i0:$i1"
python distributed_kmeans.py \
--indata $traindata \
--i0 $i0 --i1 $i1 \
--server --gpu $gpu \
--port $port --ipv4 &
hostports="$hostports localhost:$port"
done
# lame way of making sure all servers are running
sleep 5s
python distributed_kmeans.py \
--client --servers "$hostports" \
--k $k --ipv4
elif [ $todo == slurm_distributed_kmeans ]; then
nserv=5
srun -n$nserv \
--time=48:00:00 \
--cpus-per-task=40 --gres=gpu:4 --mem=100G \
--partition=priority --comment='priority is the only one that works' \
-l bash $( realpath $0 ) slurm_within_kmeans_server
elif [ $todo == slurm_within_kmeans_server ]; then
nserv=$SLURM_NPROCS
[ ! -z "$nserv" ] || (echo "should be run by slurm"; exit 1)
rank=$SLURM_PROCID
baseport=12012
i0=$((nvec * rank / nserv))
i1=$((nvec * (rank + 1) / nserv))
port=$(( baseport + rank ))
echo "host $(hostname) start server $rank for range $i0:$i1 port $port"
if [ $rank != 0 ]; then
python -u distributed_kmeans.py \
--indata $traindata \
--i0 $i0 --i1 $i1 \
--server --gpu -1 \
--port $port --ipv4
else
# master process
# kill background processes on output of this script
trap 'kill -HUP 0' 0
python -u distributed_kmeans.py \
--indata $traindata \
--i0 $i0 --i1 $i1 \
--server --gpu -1 \
--port $port --ipv4 &
# Slurm has a somewhat convoluted way of specifying the nodes
# assigned to each task. This is to parse the SLURM_TASKS_PER_NODE variable
function parse_tasks_per_node () {
local blocks=$1
for block in ${blocks//,/ }; do
if [ ${block/x/} != $block ]; then
tpn="${block%(*}"
repeat=${block#*x}
repeat=${repeat%?}
for((i=0;i<repeat;i++)); do
echo $tpn
done
else
echo $block
fi
done
}
hostports=""
port=$baseport
echo VARS $SLURM_TASKS_PER_NODE $SLURM_JOB_NODELIST
tasks_per_node=( $( parse_tasks_per_node $SLURM_TASKS_PER_NODE ) )
nodes=( $( scontrol show hostnames $SLURM_JOB_NODELIST ) )
n=${#nodes[*]}
for((i=0;i<n;i++)); do
hostname=${nodes[i]}
for((j=0;j<tasks_per_node[i];j++)); do
hostports="$hostports $hostname:$port"
((port++))
done
done
echo HOSTPORTS $hostports
sleep 20s
# run client
python distributed_kmeans.py \
--client --servers "$hostports" \
--k $k --ipv4 "$@"
echo "Done, kill the job"
scancel $SLURM_JOBID
fi
elif [ $todo == deep1b_clustering ]; then
# also set nvec=500M and k=10M in the top of the file
nserv=20
srun -n$nserv \
--time=48:00:00 \
--cpus-per-task=40 --gres=gpu:4 --mem=100G \
--partition=priority --comment='priority is the only one that works' \
-l bash $( realpath $0 ) slurm_within_kmeans_server \
--out $workdir/1M_centroids.npy
elif [ $todo == make_index_vslices ]; then
# vslice: slice per database shards
nvec=1000000000
nslice=200
for((i=0;i<nslice;i++)); do
i0=$((nvec * i / nslice))
i1=$((nvec * (i + 1) / nslice))
# make the script to be run by sbatch
cat > $workdir/vslices/slice$i.bash <<EOF
#!/bin/bash
srun python -u make_index_vslice.py \
--inputindex $workdir/trained.faissindex \
--input $deep1bdir/base.fvecs \
--nt 40 \
--i0 $i0 --i1 $i1 \
-o $workdir/vslices/slice$i.faissindex
EOF
# specify resources for script and run it
sbatch -n1 \
--time=48:00:00 \
--cpus-per-task=40 --gres=gpu:0 --mem=200G \
--output=$workdir/vslices/slice$i.log \
--job-name=vslice$i.c \
$workdir/vslices/slice$i.bash
echo "logs in $workdir/vslices/slice$i.log"
done
elif [ $todo == make_index_hslices ]; then
# hslice: slice per inverted lists
nlist=1000000
nslice=50
for((i=0;i<nslice;i++)); do
i0=$((nlist * i / nslice))
i1=$((nlist * (i + 1) / nslice))
# make the script to be run by sbatch
cat > $workdir/hslices/slice$i.bash <<EOF
#!/bin/bash
srun python -u merge_to_ondisk.py \
--input $workdir/vslices/slice{0..199}.faissindex \
--nt 20 \
--l0 $i0 --l1 $i1 \
--output $workdir/hslices/slice$i.faissindex \
--outputIL $workdir/hslices/slice$i.invlists
EOF
# specify resources for script and run it
sbatch -n1 \
--time=48:00:00 \
--cpus-per-task=20 --gres=gpu:0 --mem=200G \
--output=$workdir/hslices/slice$i.log \
--job-name=hslice$i.a \
--constraint=pascal \
$workdir/hslices/slice$i.bash
echo "logs in $workdir/hslices/slice$i.log"
done
elif [ $todo == run_search_servers ]; then
nserv=3
srun -n$nserv \
--time=48:00:00 \
--cpus-per-task=64 --gres=gpu:0 --mem=100G \
--constraint=pascal \
--partition=priority --comment='priority is the only one that works' \
-l python -u search_server.py --port 12012
else
echo "unknown todo $todo"
exit 1
fi
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import os
import time
import rpc
import combined_index
import argparse
############################################################
# Server implementation
############################################################
class MyServer(rpc.Server):
""" Assign version that can be exposed via RPC """
def __init__(self, s, index):
rpc.Server.__init__(self, s)
self.index = index
def __getattr__(self, f):
return getattr(self.index, f)
def main():
parser = argparse.ArgumentParser()
def aa(*args, **kwargs):
group.add_argument(*args, **kwargs)
group = parser.add_argument_group('server options')
aa('--port', default=12012, type=int, help='server port')
aa('--when_ready_dir', default=None,
help='store host:port to this file when ready')
aa('--ipv4', default=False, action='store_true', help='force ipv4')
aa('--rank', default=0, type=int,
help='rank used as index in the client table')
args = parser.parse_args()
when_ready = None
if args.when_ready_dir:
when_ready = '%s/%d' % (args.when_ready_dir, args.rank)
print('loading index')
index = combined_index.CombinedIndexDeep1B()
print('starting server')
rpc.run_server(
lambda s: MyServer(s, index),
args.port, report_to_file=when_ready,
v6=not args.ipv4)
if __name__ == '__main__':
main()
############################################################
# Client implementation
############################################################
from multiprocessing.dummy import Pool as ThreadPool
import faiss
import numpy as np
class ResultHeap:
""" Combine query results from a sliced dataset (for k-nn search) """
def __init__(self, nq, k):
" nq: number of query vectors, k: number of results per query "
self.I = np.zeros((nq, k), dtype='int64')
self.D = np.zeros((nq, k), dtype='float32')
self.nq, self.k = nq, k
heaps = faiss.float_maxheap_array_t()
heaps.k = k
heaps.nh = nq
heaps.val = faiss.swig_ptr(self.D)
heaps.ids = faiss.swig_ptr(self.I)
heaps.heapify()
self.heaps = heaps
def add_batch_result(self, D, I, i0):
assert D.shape == (self.nq, self.k)
assert I.shape == (self.nq, self.k)
I += i0
self.heaps.addn_with_ids(
self.k, faiss.swig_ptr(D),
faiss.swig_ptr(I), self.k)
def finalize(self):
self.heaps.reorder()
def distribute_weights(weights, nbin):
""" assign a set of weights to a smaller set of bins to balance them """
nw = weights.size
o = weights.argsort()
bins = np.zeros(nbin)
assign = np.ones(nw, dtype=int)
for i in o[::-1]:
b = bins.argmin()
assign[i] = b
bins[b] += weights[i]
return bins, assign
class SplitPerListIndex:
"""manages a local index, that does the coarse quantization and a set
of sub_indexes. The sub_indexes search a subset of the inverted
lists. The SplitPerListIndex merges results from the sub-indexes"""
def __init__(self, index, sub_indexes):
self.index = index
self.code_size = faiss.extract_index_ivf(index.index).code_size
self.sub_indexes = sub_indexes
self.ni = len(self.sub_indexes)
# pool of threads. Each thread manages one sub-index.
self.pool = ThreadPool(self.ni)
self.verbose = False
def set_nprobe(self, nprobe):
self.index.set_nprobe(nprobe)
self.pool.map(
lambda i: self.sub_indexes[i].set_nprobe(nprobe),
range(self.ni)
)
def set_omp_num_threads(self, nt):
faiss.omp_set_num_threads(nt)
self.pool.map(
lambda idx: idx.set_omp_num_threads(nt),
self.sub_indexes
)
def set_parallel_mode(self, pm):
self.index.set_parallel_mode(pm)
self.pool.map(
lambda idx: idx.set_parallel_mode(pm),
self.sub_indexes
)
def set_prefetch_nthread(self, nt):
self.index.set_prefetch_nthread(nt)
self.pool.map(
lambda idx: idx.set_prefetch_nthread(nt),
self.sub_indexes
)
def balance_lists(self, list_nos):
big_il = self.index.big_il
weights = np.array([big_il.list_size(int(i))
for i in list_nos.ravel()])
bins, assign = distribute_weights(weights, self.ni)
if self.verbose:
print('bins weight range %d:%d total %d (%.2f MiB)' % (
bins.min(), bins.max(), bins.sum(),
bins.sum() * (self.code_size + 8) / 2 ** 20))
self.nscan = bins.sum()
return assign.reshape(list_nos.shape)
def search(self, x, k):
xqo, list_nos, coarse_dis = self.index.transform_and_assign(x)
assign = self.balance_lists(list_nos)
def do_query(i):
sub_index = self.sub_indexes[i]
list_nos_i = list_nos.copy()
list_nos_i[assign != i] = -1
t0 = time.time()
Di, Ii = sub_index.ivf_search_preassigned(
xqo, list_nos_i, coarse_dis, k)
#print(list_nos_i, Ii)
if self.verbose:
print('client %d: %.3f s' % (i, time.time() - t0))
return Di, Ii
rh = ResultHeap(x.shape[0], k)
for Di, Ii in self.pool.imap(do_query, range(self.ni)):
#print("ADD", Ii, rh.I)
rh.add_batch_result(Di, Ii, 0)
rh.finalize()
return rh.D, rh.I
def range_search(self, x, radius):
xqo, list_nos, coarse_dis = self.index.transform_and_assign(x)
assign = self.balance_lists(list_nos)
nq = len(x)
def do_query(i):
sub_index = self.sub_indexes[i]
list_nos_i = list_nos.copy()
list_nos_i[assign != i] = -1
t0 = time.time()
limi, Di, Ii = sub_index.ivf_range_search_preassigned(
xqo, list_nos_i, coarse_dis, radius)
if self.verbose:
print('slice %d: %.3f s' % (i, time.time() - t0))
return limi, Di, Ii
D = [[] for i in range(nq)]
I = [[] for i in range(nq)]
sizes = np.zeros(nq, dtype=int)
for lims, Di, Ii in self.pool.imap(do_query, range(self.ni)):
for i in range(nq):
l0, l1 = lims[i:i + 2]
D[i].append(Di[l0:l1])
I[i].append(Ii[l0:l1])
sizes[i] += l1 - l0
lims = np.zeros(nq + 1, dtype=int)
lims[1:] = np.cumsum(sizes)
D = np.hstack([j for i in D for j in i])
I = np.hstack([j for i in I for j in i])
return lims, D, I
#! /usr/bin/env python2
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import print_function
import numpy as np
import time
import faiss
import sys
# Get command-line arguments
k = int(sys.argv[1])
ngpu = int(sys.argv[2])
# Load Leon's file format
def load_mnist(fname):
print("load", fname)
f = open(fname)
header = np.fromfile(f, dtype='int8', count=4*4)
header = header.reshape(4, 4)[:, ::-1].copy().view('int32')
print(header)
nim, xd, yd = [int(x) for x in header[1:]]
data = np.fromfile(f, count=nim * xd * yd,
dtype='uint8')
print(data.shape, nim, xd, yd)
data = data.reshape(nim, xd, yd)
return data
basedir = "/path/to/mnist/data"
x = load_mnist(basedir + 'mnist8m/mnist8m-patterns-idx3-ubyte')
print("reshape")
x = x.reshape(x.shape[0], -1).astype('float32')
def train_kmeans(x, k, ngpu):
"Runs kmeans on one or several GPUs"
d = x.shape[1]
clus = faiss.Clustering(d, k)
clus.verbose = True
clus.niter = 20
# otherwise the kmeans implementation sub-samples the training set
clus.max_points_per_centroid = 10000000
res = [faiss.StandardGpuResources() for i in range(ngpu)]
flat_config = []
for i in range(ngpu):
cfg = faiss.GpuIndexFlatConfig()
cfg.useFloat16 = False
cfg.device = i
flat_config.append(cfg)
if ngpu == 1:
index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
else:
indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
for i in range(ngpu)]
index = faiss.IndexReplicas()
for sub_index in indexes:
index.addIndex(sub_index)
# perform the training
clus.train(x, index)
centroids = faiss.vector_float_to_array(clus.centroids)
obj = faiss.vector_float_to_array(clus.obj)
print("final objective: %.4g" % obj[-1])
return centroids.reshape(k, d)
print("run")
t0 = time.time()
train_kmeans(x, k, ngpu)
t1 = time.time()
print("total runtime: %.3f s" % (t1 - t0))
README for the link & code implementation
=========================================
What is this?
-------------
Link & code is an indexing method that combines HNSW indexing with
compression and exploits the neighborhood structure of the similarity
graph to improve the reconstruction. It is described in
```
@inproceedings{link_and_code,
author = {Matthijs Douze and Alexandre Sablayrolles and Herv\'e J\'egou},
title = {Link and code: Fast indexing with graphs and compact regression codes},
booktitle = {CVPR},
year = {2018}
}
```
ArXiV [here](https://arxiv.org/abs/1804.09996)
Code structure
--------------
The test runs with 3 files:
- `bench_link_and_code.py`: driver script
- `datasets.py`: code to load the datasets. The example code runs on the
deep1b and bigann datasets. See the [toplevel README](../README.md)
on how to downlod them. They should be put in a directory, edit
datasets.py to set the path.
- `neighbor_codec.py`: this is where the representation is trained.
The code runs on top of Faiss. The HNSW index can be extended with a
`ReconstructFromNeighbors` C++ object that refines the distances. The
training is implemented in Python.
Reproducing Table 2 in the paper
--------------------------------
The results of table 2 (accuracy on deep100M) in the paper can be
obtained with:
```
python bench_link_and_code.py \
--db deep100M \
--M0 6 \
--indexkey OPQ36_144,HNSW32_PQ36 \
--indexfile $bdir/deep100M_PQ36_L6.index \
--beta_nsq 4 \
--beta_centroids $bdir/deep100M_PQ36_L6_nsq4.npy \
--neigh_recons_codes $bdir/deep100M_PQ36_L6_nsq4_codes.npy \
--k_reorder 0,5 --efSearch 1,1024
```
Set `bdir` to a scratch directory.
Explanation of the flags:
- `--db deep1M`: dataset to process
- `--M0 6`: number of links on the base level (L6)
- `--indexkey OPQ36_144,HNSW32_PQ36`: Faiss index key to construct the
HNSW structure. It means that vectors are transformed by OPQ and
encoded with PQ 36x8 (with an intermediate size of 144D). The HNSW
level>0 nodes have 32 links (theses ones are "cheap" to store
because there are fewer nodes in the upper levels.
- `--indexfile $bdir/deep1M_PQ36_M6.index`: name of the index file
(without information for the L&C extension)
- `--beta_nsq 4`: number of bytes to allocate for the codes (M in the
paper)
- `--beta_centroids $bdir/deep1M_PQ36_M6_nsq4.npy`: filename to store
the trained beta centroids
- `--neigh_recons_codes $bdir/deep1M_PQ36_M6_nsq4_codes.npy`: filename
for the encoded weights (beta) of the combination
- `--k_reorder 0,5`: number of restults to reorder. 0 = baseline
without reordering, 5 = value used throughout the paper
- `--efSearch 1,1024`: number of nodes to visit (T in the paper)
The script will proceed with the following steps:
0. load dataset (and possibly compute the ground-truth if the
ground-truth file is not provided)
1. train the OPQ encoder
2. build the index and store it
3. compute the residuals and train the beta vocabulary to do the reconstuction
4. encode the vertices
5. search and evaluate the search results.
With option `--exhaustive` the results of the exhaustive column can be
obtained.
The run above should output:
```
...
setting k_reorder=5
...
efSearch=1024 0.3132 ms per query, R@1: 0.4283 R@10: 0.6337 R@100: 0.6520 ndis 40941919 nreorder 50000
```
which matches the paper's table 2.
Note that in multi-threaded mode, the building of the HNSW strcuture
is not deterministic. Therefore, the results across runs may not be exactly the same.
Reproducing Figure 5 in the paper
---------------------------------
Figure 5 just evaluates the combination of HNSW and PQ. For example,
the operating point L6&OPQ40 can be obtained with
```
python bench_link_and_code.py \
--db deep1M \
--M0 6 \
--indexkey OPQ40_160,HNSW32_PQ40 \
--indexfile $bdir/deep1M_PQ40_M6.index \
--beta_nsq 1 --beta_k 1 \
--beta_centroids $bdir/deep1M_PQ40_M6_nsq0.npy \
--neigh_recons_codes $bdir/deep1M_PQ36_M6_nsq0_codes.npy \
--k_reorder 0 --efSearch 16,64,256,1024
```
The arguments are similar to the previous table. Note that nsq = 0 is
simulated by setting beta_nsq = 1 and beta_k = 1 (ie a code with a single
reproduction value).
The output should look like:
```
setting k_reorder=0
efSearch=16 0.0147 ms per query, R@1: 0.3409 R@10: 0.4388 R@100: 0.4394 ndis 2629735 nreorder 0
efSearch=64 0.0122 ms per query, R@1: 0.4836 R@10: 0.6490 R@100: 0.6509 ndis 4623221 nreorder 0
efSearch=256 0.0344 ms per query, R@1: 0.5730 R@10: 0.7915 R@100: 0.7951 ndis 11090176 nreorder 0
efSearch=1024 0.2656 ms per query, R@1: 0.6212 R@10: 0.8722 R@100: 0.8765 ndis 33501951 nreorder 0
```
The results with k_reorder=5 are not reported in the paper, they
represent the performance of a "free coding" version of the algorithm.
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import print_function
import os
import sys
import time
import numpy as np
import re
import faiss
from multiprocessing.dummy import Pool as ThreadPool
import pdb
import argparse
import datasets
from datasets import sanitize
import neighbor_codec
######################################################
# Command-line parsing
######################################################
parser = argparse.ArgumentParser()
def aa(*args, **kwargs):
group.add_argument(*args, **kwargs)
group = parser.add_argument_group('dataset options')
aa('--db', default='deep1M', help='dataset')
aa( '--compute_gt', default=False, action='store_true',
help='compute and store the groundtruth')
group = parser.add_argument_group('index consturction')
aa('--indexkey', default='HNSW32', help='index_factory type')
aa('--efConstruction', default=200, type=int,
help='HNSW construction factor')
aa('--M0', default=-1, type=int, help='size of base level')
aa('--maxtrain', default=256 * 256, type=int,
help='maximum number of training points')
aa('--indexfile', default='', help='file to read or write index from')
aa('--add_bs', default=-1, type=int,
help='add elements index by batches of this size')
aa('--link_singletons', default=False, action='store_true',
help='do a pass to link in the singletons')
group = parser.add_argument_group(
'searching (reconstruct_from_neighbors options)')
aa('--beta_centroids', default='',
help='file with codebook')
aa('--neigh_recons_codes', default='',
help='file with codes for reconstruction')
aa('--beta_ntrain', default=250000, type=int, help='')
aa('--beta_k', default=256, type=int, help='beta codebook size')
aa('--beta_nsq', default=1, type=int, help='number of beta sub-vectors')
aa('--beta_niter', default=10, type=int, help='')
aa('--k_reorder', default='-1', help='')
group = parser.add_argument_group('searching')
aa('--k', default=100, type=int, help='nb of nearest neighbors')
aa('--exhaustive', default=False, action='store_true',
help='report the exhaustive search topline')
aa('--searchthreads', default=-1, type=int,
help='nb of threads to use at search time')
aa('--efSearch', default='', type=str,
help='comma-separated values of efSearch to try')
args = parser.parse_args()
print("args:", args)
######################################################
# Load dataset
######################################################
xt, xb, xq, gt = datasets.load_data(
dataset=args.db, compute_gt=args.compute_gt)
nq, d = xq.shape
nb, d = xb.shape
######################################################
# Make index
######################################################
if os.path.exists(args.indexfile):
print("reading", args.indexfile)
index = faiss.read_index(args.indexfile)
if isinstance(index, faiss.IndexPreTransform):
index_hnsw = faiss.downcast_index(index.index)
vec_transform = index.chain.at(0).apply_py
else:
index_hnsw = index
vec_transform = lambda x:x
hnsw = index_hnsw.hnsw
hnsw_stats = faiss.cvar.hnsw_stats
else:
print("build index, key=", args.indexkey)
index = faiss.index_factory(d, args.indexkey)
if isinstance(index, faiss.IndexPreTransform):
index_hnsw = faiss.downcast_index(index.index)
vec_transform = index.chain.at(0).apply_py
else:
index_hnsw = index
vec_transform = lambda x:x
hnsw = index_hnsw.hnsw
hnsw.efConstruction = args.efConstruction
hnsw_stats = faiss.cvar.hnsw_stats
index.verbose = True
index_hnsw.verbose = True
index_hnsw.storage.verbose = True
if args.M0 != -1:
print("set level 0 nb of neighbors to", args.M0)
hnsw.set_nb_neighbors(0, args.M0)
xt2 = sanitize(xt[:args.maxtrain])
assert np.all(np.isfinite(xt2))
print("train, size", xt.shape)
t0 = time.time()
index.train(xt2)
print(" train in %.3f s" % (time.time() - t0))
print("adding")
t0 = time.time()
if args.add_bs == -1:
index.add(sanitize(xb))
else:
for i0 in range(0, nb, args.add_bs):
i1 = min(nb, i0 + args.add_bs)
print(" adding %d:%d / %d" % (i0, i1, nb))
index.add(sanitize(xb[i0:i1]))
print(" add in %.3f s" % (time.time() - t0))
print("storing", args.indexfile)
faiss.write_index(index, args.indexfile)
######################################################
# Train beta centroids and encode dataset
######################################################
if args.beta_centroids:
print("reordering links")
index_hnsw.reorder_links()
if os.path.exists(args.beta_centroids):
print("load", args.beta_centroids)
beta_centroids = np.load(args.beta_centroids)
nsq, k, M1 = beta_centroids.shape
assert M1 == hnsw.nb_neighbors(0) + 1
rfn = faiss.ReconstructFromNeighbors(index_hnsw, k, nsq)
else:
print("train beta centroids")
rfn = faiss.ReconstructFromNeighbors(
index_hnsw, args.beta_k, args.beta_nsq)
xb_full = vec_transform(sanitize(xb[:args.beta_ntrain]))
beta_centroids = neighbor_codec.train_beta_codebook(
rfn, xb_full, niter=args.beta_niter)
print(" storing", args.beta_centroids)
np.save(args.beta_centroids, beta_centroids)
faiss.copy_array_to_vector(beta_centroids.ravel(),
rfn.codebook)
index_hnsw.reconstruct_from_neighbors = rfn
if rfn.k == 1:
pass # no codes to take care of
elif os.path.exists(args.neigh_recons_codes):
print("loading neigh codes", args.neigh_recons_codes)
codes = np.load(args.neigh_recons_codes)
assert codes.size == rfn.code_size * index.ntotal
faiss.copy_array_to_vector(codes.astype('uint8'),
rfn.codes)
rfn.ntotal = index.ntotal
else:
print("encoding neigh codes")
t0 = time.time()
bs = 1000000 if args.add_bs == -1 else args.add_bs
for i0 in range(0, nb, bs):
i1 = min(i0 + bs, nb)
print(" encode %d:%d / %d [%.3f s]\r" % (
i0, i1, nb, time.time() - t0), end=' ')
sys.stdout.flush()
xbatch = vec_transform(sanitize(xb[i0:i1]))
rfn.add_codes(i1 - i0, faiss.swig_ptr(xbatch))
print()
print("storing %s" % args.neigh_recons_codes)
codes = faiss.vector_to_array(rfn.codes)
np.save(args.neigh_recons_codes, codes)
######################################################
# Exhaustive evaluation
######################################################
if args.exhaustive:
print("exhaustive evaluation")
xq_tr = vec_transform(sanitize(xq))
index2 = faiss.IndexFlatL2(index_hnsw.d)
accu_recons_error = 0.0
if faiss.get_num_gpus() > 0:
print("do eval on GPU")
co = faiss.GpuMultipleClonerOptions()
co.shard = False
index2 = faiss.index_cpu_to_all_gpus(index2, co)
# process in batches in case the dataset does not fit in RAM
rh = datasets.ResultHeap(xq_tr.shape[0], 100)
t0 = time.time()
bs = 500000
for i0 in range(0, nb, bs):
i1 = min(nb, i0 + bs)
print(' handling batch %d:%d' % (i0, i1))
xb_recons = np.empty(
(i1 - i0, index_hnsw.d), dtype='float32')
rfn.reconstruct_n(i0, i1 - i0, faiss.swig_ptr(xb_recons))
accu_recons_error += (
(vec_transform(sanitize(xb[i0:i1])) -
xb_recons)**2).sum()
index2.reset()
index2.add(xb_recons)
D, I = index2.search(xq_tr, 100)
rh.add_batch_result(D, I, i0)
rh.finalize()
del index2
t1 = time.time()
print("done in %.3f s" % (t1 - t0))
print("total reconstruction error: ", accu_recons_error)
print("eval retrieval:")
datasets.evaluate_DI(rh.D, rh.I, gt)
def get_neighbors(hnsw, i, level):
" list the neighbors for node i at level "
assert i < hnsw.levels.size()
assert level < hnsw.levels.at(i)
be = np.empty(2, 'uint64')
hnsw.neighbor_range(i, level, faiss.swig_ptr(be), faiss.swig_ptr(be[1:]))
return [hnsw.neighbors.at(j) for j in range(be[0], be[1])]
#############################################################
# Index is ready
#############################################################
xq = sanitize(xq)
if args.searchthreads != -1:
print("Setting nb of threads to", args.searchthreads)
faiss.omp_set_num_threads(args.searchthreads)
if gt is None:
print("no valid groundtruth -- exit")
sys.exit()
k_reorders = [int(x) for x in args.k_reorder.split(',')]
efSearchs = [int(x) for x in args.efSearch.split(',')]
for k_reorder in k_reorders:
if index_hnsw.reconstruct_from_neighbors:
print("setting k_reorder=%d" % k_reorder)
index_hnsw.reconstruct_from_neighbors.k_reorder = k_reorder
for efSearch in efSearchs:
print("efSearch=%-4d" % efSearch, end=' ')
hnsw.efSearch = efSearch
hnsw_stats.reset()
datasets.evaluate(xq, gt, index, k=args.k, endl=False)
print("ndis %d nreorder %d" % (hnsw_stats.ndis, hnsw_stats.nreorder))
#! /usr/bin/env python2
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""
Common functions to load datasets and compute their ground-truth
"""
from __future__ import print_function
import time
import numpy as np
import faiss
import pdb
import sys
# set this to the directory that contains the datafiles.
# deep1b data should be at simdir + 'deep1b'
# bigann data should be at simdir + 'bigann'
simdir = '/mnt/vol/gfsai-east/ai-group/datasets/simsearch/'
#################################################################
# Small I/O functions
#################################################################
def ivecs_read(fname):
a = np.fromfile(fname, dtype='int32')
d = a[0]
return a.reshape(-1, d + 1)[:, 1:].copy()
def fvecs_read(fname):
return ivecs_read(fname).view('float32')
def ivecs_mmap(fname):
a = np.memmap(fname, dtype='int32', mode='r')
d = a[0]
return a.reshape(-1, d + 1)[:, 1:]
def fvecs_mmap(fname):
return ivecs_mmap(fname).view('float32')
def bvecs_mmap(fname):
x = np.memmap(fname, dtype='uint8', mode='r')
d = x[:4].view('int32')[0]
return x.reshape(-1, d + 4)[:, 4:]
def ivecs_write(fname, m):
n, d = m.shape
m1 = np.empty((n, d + 1), dtype='int32')
m1[:, 0] = d
m1[:, 1:] = m
m1.tofile(fname)
def fvecs_write(fname, m):
m = m.astype('float32')
ivecs_write(fname, m.view('int32'))
#################################################################
# Dataset
#################################################################
def sanitize(x):
return np.ascontiguousarray(x, dtype='float32')
class ResultHeap:
""" Combine query results from a sliced dataset """
def __init__(self, nq, k):
" nq: number of query vectors, k: number of results per query "
self.I = np.zeros((nq, k), dtype='int64')
self.D = np.zeros((nq, k), dtype='float32')
self.nq, self.k = nq, k
heaps = faiss.float_maxheap_array_t()
heaps.k = k
heaps.nh = nq
heaps.val = faiss.swig_ptr(self.D)
heaps.ids = faiss.swig_ptr(self.I)
heaps.heapify()
self.heaps = heaps
def add_batch_result(self, D, I, i0):
assert D.shape == (self.nq, self.k)
assert I.shape == (self.nq, self.k)
I += i0
self.heaps.addn_with_ids(
self.k, faiss.swig_ptr(D),
faiss.swig_ptr(I), self.k)
def finalize(self):
self.heaps.reorder()
def compute_GT_sliced(xb, xq, k):
print("compute GT")
t0 = time.time()
nb, d = xb.shape
nq, d = xq.shape
rh = ResultHeap(nq, k)
bs = 10 ** 5
xqs = sanitize(xq)
db_gt = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(d))
# compute ground-truth by blocks of bs, and add to heaps
for i0 in range(0, nb, bs):
i1 = min(nb, i0 + bs)
xsl = sanitize(xb[i0:i1])
db_gt.add(xsl)
D, I = db_gt.search(xqs, k)
rh.add_batch_result(D, I, i0)
db_gt.reset()
print("\r %d/%d, %.3f s" % (i0, nb, time.time() - t0), end=' ')
sys.stdout.flush()
print()
rh.finalize()
gt_I = rh.I
print("GT time: %.3f s" % (time.time() - t0))
return gt_I
def do_compute_gt(xb, xq, k):
print("computing GT")
nb, d = xb.shape
index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(d))
if nb < 100 * 1000:
print(" add")
index.add(np.ascontiguousarray(xb, dtype='float32'))
print(" search")
D, I = index.search(np.ascontiguousarray(xq, dtype='float32'), k)
else:
I = compute_GT_sliced(xb, xq, k)
return I.astype('int32')
def load_data(dataset='deep1M', compute_gt=False):
print("load data", dataset)
if dataset == 'sift1M':
basedir = simdir + 'sift1M/'
xt = fvecs_read(basedir + "sift_learn.fvecs")
xb = fvecs_read(basedir + "sift_base.fvecs")
xq = fvecs_read(basedir + "sift_query.fvecs")
gt = ivecs_read(basedir + "sift_groundtruth.ivecs")
elif dataset.startswith('bigann'):
basedir = simdir + 'bigann/'
dbsize = 1000 if dataset == "bigann1B" else int(dataset[6:-1])
xb = bvecs_mmap(basedir + 'bigann_base.bvecs')
xq = bvecs_mmap(basedir + 'bigann_query.bvecs')
xt = bvecs_mmap(basedir + 'bigann_learn.bvecs')
# trim xb to correct size
xb = xb[:dbsize * 1000 * 1000]
gt = ivecs_read(basedir + 'gnd/idx_%dM.ivecs' % dbsize)
elif dataset.startswith("deep"):
basedir = simdir + 'deep1b/'
szsuf = dataset[4:]
if szsuf[-1] == 'M':
dbsize = 10 ** 6 * int(szsuf[:-1])
elif szsuf == '1B':
dbsize = 10 ** 9
elif szsuf[-1] == 'k':
dbsize = 1000 * int(szsuf[:-1])
else:
assert False, "did not recognize suffix " + szsuf
xt = fvecs_mmap(basedir + "learn.fvecs")
xb = fvecs_mmap(basedir + "base.fvecs")
xq = fvecs_read(basedir + "deep1B_queries.fvecs")
xb = xb[:dbsize]
gt_fname = basedir + "%s_groundtruth.ivecs" % dataset
if compute_gt:
gt = do_compute_gt(xb, xq, 100)
print("store", gt_fname)
ivecs_write(gt_fname, gt)
gt = ivecs_read(gt_fname)
else:
assert False
print("dataset %s sizes: B %s Q %s T %s" % (
dataset, xb.shape, xq.shape, xt.shape))
return xt, xb, xq, gt
#################################################################
# Evaluation
#################################################################
def evaluate_DI(D, I, gt):
nq = gt.shape[0]
k = I.shape[1]
rank = 1
while rank <= k:
recall = (I[:, :rank] == gt[:, :1]).sum() / float(nq)
print("R@%d: %.4f" % (rank, recall), end=' ')
rank *= 10
def evaluate(xq, gt, index, k=100, endl=True):
t0 = time.time()
D, I = index.search(xq, k)
t1 = time.time()
nq = xq.shape[0]
print("\t %8.4f ms per query, " % (
(t1 - t0) * 1000.0 / nq), end=' ')
rank = 1
while rank <= k:
recall = (I[:, :rank] == gt[:, :1]).sum() / float(nq)
print("R@%d: %.4f" % (rank, recall), end=' ')
rank *= 10
if endl:
print()
return D, I
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""
This is the training code for the link and code. Especially the
neighbors_kmeans function implements the EM-algorithm to find the
appropriate weightings and cluster them.
"""
from __future__ import print_function
import time
import numpy as np
import faiss
#----------------------------------------------------------
# Utils
#----------------------------------------------------------
def sanitize(x):
return np.ascontiguousarray(x, dtype='float32')
def train_kmeans(x, k, ngpu, max_points_per_centroid=256):
"Runs kmeans on one or several GPUs"
d = x.shape[1]
clus = faiss.Clustering(d, k)
clus.verbose = True
clus.niter = 20
clus.max_points_per_centroid = max_points_per_centroid
if ngpu == 0:
index = faiss.IndexFlatL2(d)
else:
res = [faiss.StandardGpuResources() for i in range(ngpu)]
flat_config = []
for i in range(ngpu):
cfg = faiss.GpuIndexFlatConfig()
cfg.useFloat16 = False
cfg.device = i
flat_config.append(cfg)
if ngpu == 1:
index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
else:
indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
for i in range(ngpu)]
index = faiss.IndexReplicas()
for sub_index in indexes:
index.addIndex(sub_index)
# perform the training
clus.train(x, index)
centroids = faiss.vector_float_to_array(clus.centroids)
stats = clus.iteration_stats
stats = [stats.at(i) for i in range(stats.size())]
obj = np.array([st.obj for st in stats])
print("final objective: %.4g" % obj[-1])
return centroids.reshape(k, d)
#----------------------------------------------------------
# Learning the codebook from neighbors
#----------------------------------------------------------
# works with both a full Inn table and dynamically generated neighbors
def get_Inn_shape(Inn):
if type(Inn) != tuple:
return Inn.shape
return Inn[:2]
def get_neighbor_table(x_coded, Inn, i):
if type(Inn) != tuple:
return x_coded[Inn[i,:],:]
rfn = x_coded
M, d = rfn.M, rfn.index.d
out = np.zeros((M + 1, d), dtype='float32')
int_i = int(i)
rfn.get_neighbor_table(int_i, faiss.swig_ptr(out))
_, _, sq = Inn
return out[:, sq * rfn.dsub : (sq + 1) * rfn.dsub]
# Function that produces the best regression values from the vector
# and its neighbors
def regress_from_neighbors (x, x_coded, Inn):
(N, knn) = get_Inn_shape(Inn)
betas = np.zeros((N,knn))
t0 = time.time()
for i in range (N):
xi = x[i,:]
NNi = get_neighbor_table(x_coded, Inn, i)
betas[i,:] = np.linalg.lstsq(NNi.transpose(), xi, rcond=0.01)[0]
if i % (N / 10) == 0:
print ("[%d:%d] %6.3fs" % (i, i + N / 10, time.time() - t0))
return betas
# find the best beta minimizing ||x-x_coded[Inn,:]*beta||^2
def regress_opt_beta (x, x_coded, Inn):
(N, knn) = get_Inn_shape(Inn)
d = x.shape[1]
# construct the linear system to be solved
X = np.zeros ((d*N))
Y = np.zeros ((d*N, knn))
for i in range (N):
X[i*d:(i+1)*d] = x[i,:]
neighbor_table = get_neighbor_table(x_coded, Inn, i)
Y[i*d:(i+1)*d, :] = neighbor_table.transpose()
beta_opt = np.linalg.lstsq(Y, X, rcond=0.01)[0]
return beta_opt
# Find the best encoding by minimizing the reconstruction error using
# a set of pre-computed beta values
def assign_beta (beta_centroids, x, x_coded, Inn, verbose=True):
if type(Inn) == tuple:
return assign_beta_2(beta_centroids, x, x_coded, Inn)
(N, knn) = Inn.shape
x_ibeta = np.zeros ((N), dtype='int32')
t0= time.time()
for i in range (N):
NNi = x_coded[Inn[i,:]]
# Consider all possible betas for the encoding and compute the
# encoding error
x_reg_all = np.dot (beta_centroids, NNi)
err = ((x_reg_all - x[i,:]) ** 2).sum(axis=1)
x_ibeta[i] = err.argmin()
if verbose:
if i % (N / 10) == 0:
print ("[%d:%d] %6.3fs" % (i, i + N / 10, time.time() - t0))
return x_ibeta
# Reconstruct a set of vectors using the beta_centroids, the
# assignment, the encoded neighbors identified by the list Inn (which
# includes the vector itself)
def recons_from_neighbors (beta_centroids, x_ibeta, x_coded, Inn):
(N, knn) = Inn.shape
x_rec = np.zeros(x_coded.shape)
t0= time.time()
for i in range (N):
NNi = x_coded[Inn[i,:]]
x_rec[i, :] = np.dot (beta_centroids[x_ibeta[i]], NNi)
if i % (N / 10) == 0:
print ("[%d:%d] %6.3fs" % (i, i + N / 10, time.time() - t0))
return x_rec
# Compute a EM-like algorithm trying at optimizing the beta such as they
# minimize the reconstruction error from the neighbors
def neighbors_kmeans (x, x_coded, Inn, K, ngpus=1, niter=5):
# First compute centroids using a regular k-means algorithm
betas = regress_from_neighbors (x, x_coded, Inn)
beta_centroids = train_kmeans(
sanitize(betas), K, ngpus, max_points_per_centroid=1000000)
_, knn = get_Inn_shape(Inn)
d = x.shape[1]
rs = np.random.RandomState()
for iter in range(niter):
print('iter', iter)
idx = assign_beta (beta_centroids, x, x_coded, Inn, verbose=False)
hist = np.bincount(idx)
for cl0 in np.where(hist == 0)[0]:
print(" cluster %d empty, split" % cl0, end=' ')
cl1 = idx[np.random.randint(idx.size)]
pos = np.nonzero (idx == cl1)[0]
pos = rs.choice(pos, pos.size / 2)
print(" cl %d -> %d + %d" % (cl1, len(pos), hist[cl1] - len(pos)))
idx[pos] = cl0
hist = np.bincount(idx)
tot_err = 0
for k in range (K):
pos = np.nonzero (idx == k)[0]
npos = pos.shape[0]
X = np.zeros (d*npos)
Y = np.zeros ((d*npos, knn))
for i in range(npos):
X[i*d:(i+1)*d] = x[pos[i],:]
neighbor_table = get_neighbor_table(x_coded, Inn, pos[i])
Y[i*d:(i+1)*d, :] = neighbor_table.transpose()
sol, residuals, _, _ = np.linalg.lstsq(Y, X, rcond=0.01)
if residuals.size > 0:
tot_err += residuals.sum()
beta_centroids[k, :] = sol
print(' err=%g' % tot_err)
return beta_centroids
# assign the betas in C++
def assign_beta_2(beta_centroids, x, rfn, Inn):
_, _, sq = Inn
if rfn.k == 1:
return np.zeros(x.shape[0], dtype=int)
# add dummy dimensions to beta_centroids and x
all_beta_centroids = np.zeros(
(rfn.nsq, rfn.k, rfn.M + 1), dtype='float32')
all_beta_centroids[sq] = beta_centroids
all_x = np.zeros((len(x), rfn.d), dtype='float32')
all_x[:, sq * rfn.dsub : (sq + 1) * rfn.dsub] = x
rfn.codes.clear()
rfn.ntotal = 0
faiss.copy_array_to_vector(
all_beta_centroids.ravel(), rfn.codebook)
rfn.add_codes(len(x), faiss.swig_ptr(all_x))
codes = faiss.vector_to_array(rfn.codes)
codes = codes.reshape(-1, rfn.nsq)
return codes[:, sq]
#######################################################
# For usage from bench_storages.py
def train_beta_codebook(rfn, xb_full, niter=10):
beta_centroids = []
for sq in range(rfn.nsq):
d0, d1 = sq * rfn.dsub, (sq + 1) * rfn.dsub
print("training subquantizer %d/%d on dimensions %d:%d" % (
sq, rfn.nsq, d0, d1))
beta_centroids_i = neighbors_kmeans(
xb_full[:, d0:d1], rfn, (xb_full.shape[0], rfn.M + 1, sq),
rfn.k,
ngpus=0, niter=niter)
beta_centroids.append(beta_centroids_i)
rfn.ntotal = 0
rfn.codes.clear()
rfn.codebook.clear()
return np.stack(beta_centroids)
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#include "AutoTune_c.h"
#include <faiss/AutoTune.h>
#include <cstring>
#include "macros_impl.h"
using faiss::Index;
using faiss::ParameterRange;
using faiss::ParameterSpace;
const char* faiss_ParameterRange_name(const FaissParameterRange* range) {
return reinterpret_cast<const ParameterRange*>(range)->name.c_str();
}
void faiss_ParameterRange_values(
FaissParameterRange* range,
double** p_values,
size_t* p_size) {
auto& values = reinterpret_cast<ParameterRange*>(range)->values;
*p_values = values.data();
*p_size = values.size();
}
int faiss_ParameterSpace_new(FaissParameterSpace** space) {
try {
auto new_space = new ParameterSpace();
*space = reinterpret_cast<FaissParameterSpace*>(new_space);
}
CATCH_AND_HANDLE
}
DEFINE_DESTRUCTOR(ParameterSpace)
size_t faiss_ParameterSpace_n_combinations(const FaissParameterSpace* space) {
return reinterpret_cast<const ParameterSpace*>(space)->n_combinations();
}
int faiss_ParameterSpace_combination_name(
const FaissParameterSpace* space,
size_t cno,
char* char_buffer,
size_t size) {
try {
auto rep = reinterpret_cast<const ParameterSpace*>(space)
->combination_name(cno);
strncpy(char_buffer, rep.c_str(), size);
}
CATCH_AND_HANDLE
}
int faiss_ParameterSpace_set_index_parameters(
const FaissParameterSpace* space,
FaissIndex* cindex,
const char* param_string) {
try {
auto index = reinterpret_cast<Index*>(cindex);
reinterpret_cast<const ParameterSpace*>(space)->set_index_parameters(
index, param_string);
}
CATCH_AND_HANDLE
}
/// set a combination of parameters on an index
int faiss_ParameterSpace_set_index_parameters_cno(
const FaissParameterSpace* space,
FaissIndex* cindex,
size_t cno) {
try {
auto index = reinterpret_cast<Index*>(cindex);
reinterpret_cast<const ParameterSpace*>(space)->set_index_parameters(
index, cno);
}
CATCH_AND_HANDLE
}
int faiss_ParameterSpace_set_index_parameter(
const FaissParameterSpace* space,
FaissIndex* cindex,
const char* name,
double value) {
try {
auto index = reinterpret_cast<Index*>(cindex);
reinterpret_cast<const ParameterSpace*>(space)->set_index_parameter(
index, name, value);
}
CATCH_AND_HANDLE
}
void faiss_ParameterSpace_display(const FaissParameterSpace* space) {
reinterpret_cast<const ParameterSpace*>(space)->display();
}
int faiss_ParameterSpace_add_range(
FaissParameterSpace* space,
const char* name,
FaissParameterRange** p_range) {
try {
ParameterRange& range =
reinterpret_cast<ParameterSpace*>(space)->add_range(name);
if (p_range) {
*p_range = reinterpret_cast<FaissParameterRange*>(&range);
}
}
CATCH_AND_HANDLE
}
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c -*-
#ifndef FAISS_AUTO_TUNE_C_H
#define FAISS_AUTO_TUNE_C_H
#include "Index_c.h"
#include "faiss_c.h"
#ifdef __cplusplus
extern "C" {
#endif
/// possible values of a parameter, sorted from least to most expensive/accurate
FAISS_DECLARE_CLASS(ParameterRange)
FAISS_DECLARE_GETTER(ParameterRange, const char*, name)
/// Getter for the values in the range. The output values are invalidated
/// upon any other modification of the range.
void faiss_ParameterRange_values(FaissParameterRange*, double**, size_t*);
/** Uses a-priori knowledge on the Faiss indexes to extract tunable parameters.
*/
FAISS_DECLARE_CLASS(ParameterSpace)
FAISS_DECLARE_DESTRUCTOR(ParameterSpace)
/// Parameter space default constructor
int faiss_ParameterSpace_new(FaissParameterSpace** space);
/// nb of combinations, = product of values sizes
size_t faiss_ParameterSpace_n_combinations(const FaissParameterSpace*);
/// get string representation of the combination
/// by writing it to the given character buffer.
/// A buffer size of 1000 ensures that the full name is collected.
int faiss_ParameterSpace_combination_name(
const FaissParameterSpace*,
size_t,
char*,
size_t);
/// set a combination of parameters described by a string
int faiss_ParameterSpace_set_index_parameters(
const FaissParameterSpace*,
FaissIndex*,
const char*);
/// set a combination of parameters on an index
int faiss_ParameterSpace_set_index_parameters_cno(
const FaissParameterSpace*,
FaissIndex*,
size_t);
/// set one of the parameters
int faiss_ParameterSpace_set_index_parameter(
const FaissParameterSpace*,
FaissIndex*,
const char*,
double);
/// print a description on stdout
void faiss_ParameterSpace_display(const FaissParameterSpace*);
/// add a new parameter (or return it if it exists)
int faiss_ParameterSpace_add_range(
FaissParameterSpace*,
const char*,
FaissParameterRange**);
#ifdef __cplusplus
}
#endif
#endif
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
project(faiss_c_library LANGUAGES C CXX)
set(CMAKE_C_STANDARD 11)
set(FAISS_C_SRC
AutoTune_c.cpp
Clustering_c.cpp
IndexFlat_c.cpp
IndexIVFFlat_c.cpp
IndexIVF_c.cpp
IndexLSH_c.cpp
IndexPreTransform_c.cpp
VectorTransform_c.cpp
IndexShards_c.cpp
IndexReplicas_c.cpp
Index_c.cpp
IndexScalarQuantizer_c.cpp
MetaIndexes_c.cpp
clone_index_c.cpp
error_impl.cpp
index_factory_c.cpp
index_io_c.cpp
impl/AuxIndexStructures_c.cpp
utils/distances_c.cpp
)
add_library(faiss_c ${FAISS_C_SRC})
target_link_libraries(faiss_c PRIVATE faiss)
function(faiss_install_headers headers p)
foreach(h ${headers})
get_filename_component(f ${h} DIRECTORY)
install(FILES ${h}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/faiss/${p}/${f}
)
endforeach()
endfunction()
file(GLOB FAISS_C_API_HEADERS
RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"*.h"
"impl/*.h"
"utils/*.h")
faiss_install_headers("${FAISS_C_API_HEADERS}" c_api)
add_executable(example_c EXCLUDE_FROM_ALL example_c.c)
target_link_libraries(example_c PRIVATE faiss_c)
if(FAISS_ENABLE_GPU)
add_subdirectory(gpu)
endif()
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#include "Clustering_c.h"
#include <faiss/Clustering.h>
#include <faiss/Index.h>
#include <vector>
#include "macros_impl.h"
extern "C" {
using faiss::Clustering;
using faiss::ClusteringIterationStats;
using faiss::ClusteringParameters;
using faiss::Index;
DEFINE_GETTER(Clustering, int, niter)
DEFINE_GETTER(Clustering, int, nredo)
DEFINE_GETTER(Clustering, int, verbose)
DEFINE_GETTER(Clustering, int, spherical)
DEFINE_GETTER(Clustering, int, int_centroids)
DEFINE_GETTER(Clustering, int, update_index)
DEFINE_GETTER(Clustering, int, frozen_centroids)
DEFINE_GETTER(Clustering, int, min_points_per_centroid)
DEFINE_GETTER(Clustering, int, max_points_per_centroid)
DEFINE_GETTER(Clustering, int, seed)
DEFINE_GETTER(Clustering, size_t, decode_block_size)
/// getter for d
DEFINE_GETTER(Clustering, size_t, d)
/// getter for k
DEFINE_GETTER(Clustering, size_t, k)
DEFINE_GETTER(ClusteringIterationStats, float, obj)
DEFINE_GETTER(ClusteringIterationStats, double, time)
DEFINE_GETTER(ClusteringIterationStats, double, time_search)
DEFINE_GETTER(ClusteringIterationStats, double, imbalance_factor)
DEFINE_GETTER(ClusteringIterationStats, int, nsplit)
void faiss_ClusteringParameters_init(FaissClusteringParameters* params) {
ClusteringParameters d;
params->frozen_centroids = d.frozen_centroids;
params->max_points_per_centroid = d.max_points_per_centroid;
params->min_points_per_centroid = d.min_points_per_centroid;
params->niter = d.niter;
params->nredo = d.nredo;
params->seed = d.seed;
params->spherical = d.spherical;
params->int_centroids = d.int_centroids;
params->update_index = d.update_index;
params->verbose = d.verbose;
params->decode_block_size = d.decode_block_size;
}
// This conversion is required because the two types are not memory-compatible
inline ClusteringParameters from_faiss_c(
const FaissClusteringParameters* params) {
ClusteringParameters o;
o.frozen_centroids = params->frozen_centroids;
o.max_points_per_centroid = params->max_points_per_centroid;
o.min_points_per_centroid = params->min_points_per_centroid;
o.niter = params->niter;
o.nredo = params->nredo;
o.seed = params->seed;
o.spherical = params->spherical;
o.update_index = params->update_index;
o.int_centroids = params->int_centroids;
o.verbose = params->verbose;
o.decode_block_size = params->decode_block_size;
return o;
}
/// getter for centroids (size = k * d)
void faiss_Clustering_centroids(
FaissClustering* clustering,
float** centroids,
size_t* size) {
std::vector<float>& v =
reinterpret_cast<Clustering*>(clustering)->centroids;
if (centroids) {
*centroids = v.data();
}
if (size) {
*size = v.size();
}
}
/// getter for iteration stats
void faiss_Clustering_iteration_stats(
FaissClustering* clustering,
FaissClusteringIterationStats** iteration_stats,
size_t* size) {
std::vector<ClusteringIterationStats>& v =
reinterpret_cast<Clustering*>(clustering)->iteration_stats;
if (iteration_stats) {
*iteration_stats =
reinterpret_cast<FaissClusteringIterationStats*>(v.data());
}
if (size) {
*size = v.size();
}
}
/// the only mandatory parameters are k and d
int faiss_Clustering_new(FaissClustering** p_clustering, int d, int k) {
try {
Clustering* c = new Clustering(d, k);
*p_clustering = reinterpret_cast<FaissClustering*>(c);
return 0;
}
CATCH_AND_HANDLE
}
int faiss_Clustering_new_with_params(
FaissClustering** p_clustering,
int d,
int k,
const FaissClusteringParameters* cp) {
try {
Clustering* c = new Clustering(d, k, from_faiss_c(cp));
*p_clustering = reinterpret_cast<FaissClustering*>(c);
return 0;
}
CATCH_AND_HANDLE
}
/// Index is used during the assignment stage
int faiss_Clustering_train(
FaissClustering* clustering,
idx_t n,
const float* x,
FaissIndex* index) {
try {
reinterpret_cast<Clustering*>(clustering)
->train(n, x, *reinterpret_cast<Index*>(index));
return 0;
}
CATCH_AND_HANDLE
}
void faiss_Clustering_free(FaissClustering* clustering) {
delete reinterpret_cast<Clustering*>(clustering);
}
int faiss_kmeans_clustering(
size_t d,
size_t n,
size_t k,
const float* x,
float* centroids,
float* q_error) {
try {
float out = faiss::kmeans_clustering(d, n, k, x, centroids);
if (q_error) {
*q_error = out;
}
return 0;
}
CATCH_AND_HANDLE
}
}
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
// -*- c -*-
#ifndef FAISS_CLUSTERING_C_H
#define FAISS_CLUSTERING_C_H
#include "Index_c.h"
#include "faiss_c.h"
#ifdef __cplusplus
extern "C" {
#endif
/** Class for the clustering parameters. Can be passed to the
* constructor of the Clustering object.
*/
typedef struct FaissClusteringParameters {
int niter; ///< clustering iterations
int nredo; ///< redo clustering this many times and keep best
int verbose; ///< (bool)
int spherical; ///< (bool) do we want normalized centroids?
int int_centroids; ///< (bool) round centroids coordinates to integer
int update_index; ///< (bool) update index after each iteration?
int frozen_centroids; ///< (bool) use the centroids provided as input and do
///< not change them during iterations
int min_points_per_centroid; ///< otherwise you get a warning
int max_points_per_centroid; ///< to limit size of dataset
int seed; ///< seed for the random number generator
size_t decode_block_size; ///< how many vectors at a time to decode
} FaissClusteringParameters;
/// Sets the ClusteringParameters object with reasonable defaults
void faiss_ClusteringParameters_init(FaissClusteringParameters* params);
/** clustering based on assignment - centroid update iterations
*
* The clustering is based on an Index object that assigns training
* points to the centroids. Therefore, at each iteration the centroids
* are added to the index.
*
* On output, the centroids table is set to the latest version
* of the centroids and they are also added to the index. If the
* centroids table it is not empty on input, it is also used for
* initialization.
*
* To do several clusterings, just call train() several times on
* different training sets, clearing the centroid table in between.
*/
FAISS_DECLARE_CLASS(Clustering)
FAISS_DECLARE_GETTER(Clustering, int, niter)
FAISS_DECLARE_GETTER(Clustering, int, nredo)
FAISS_DECLARE_GETTER(Clustering, int, verbose)
FAISS_DECLARE_GETTER(Clustering, int, spherical)
FAISS_DECLARE_GETTER(Clustering, int, int_centroids)
FAISS_DECLARE_GETTER(Clustering, int, update_index)
FAISS_DECLARE_GETTER(Clustering, int, frozen_centroids)
FAISS_DECLARE_GETTER(Clustering, int, min_points_per_centroid)
FAISS_DECLARE_GETTER(Clustering, int, max_points_per_centroid)
FAISS_DECLARE_GETTER(Clustering, int, seed)
FAISS_DECLARE_GETTER(Clustering, size_t, decode_block_size)
/// getter for d
FAISS_DECLARE_GETTER(Clustering, size_t, d)
/// getter for k
FAISS_DECLARE_GETTER(Clustering, size_t, k)
FAISS_DECLARE_CLASS(ClusteringIterationStats)
FAISS_DECLARE_GETTER(ClusteringIterationStats, float, obj)
FAISS_DECLARE_GETTER(ClusteringIterationStats, double, time)
FAISS_DECLARE_GETTER(ClusteringIterationStats, double, time_search)
FAISS_DECLARE_GETTER(ClusteringIterationStats, double, imbalance_factor)
FAISS_DECLARE_GETTER(ClusteringIterationStats, int, nsplit)
/// getter for centroids (size = k * d)
void faiss_Clustering_centroids(
FaissClustering* clustering,
float** centroids,
size_t* size);
/// getter for iteration stats
void faiss_Clustering_iteration_stats(
FaissClustering* clustering,
FaissClusteringIterationStats** iteration_stats,
size_t* size);
/// the only mandatory parameters are k and d
int faiss_Clustering_new(FaissClustering** p_clustering, int d, int k);
int faiss_Clustering_new_with_params(
FaissClustering** p_clustering,
int d,
int k,
const FaissClusteringParameters* cp);
int faiss_Clustering_train(
FaissClustering* clustering,
idx_t n,
const float* x,
FaissIndex* index);
void faiss_Clustering_free(FaissClustering* clustering);
/** simplified interface
*
* @param d dimension of the data
* @param n nb of training vectors
* @param k nb of output centroids
* @param x training set (size n * d)
* @param centroids output centroids (size k * d)
* @param q_error final quantization error
* @return error code
*/
int faiss_kmeans_clustering(
size_t d,
size_t n,
size_t k,
const float* x,
float* centroids,
float* q_error);
#ifdef __cplusplus
}
#endif
#endif
Faiss C API
===========
Faiss provides a pure C interface, which can subsequently be used either in pure C programs or to produce bindings for programming languages with Foreign Function Interface (FFI) support. Although this is not required for the Python interface, some other programming languages (e.g. Rust and Julia) do not have SWIG support.
Compilation instructions
------------------------
The full contents of the pure C API are in the ["c_api"](c_api/) folder.
Please be sure to follow the instructions on [building the main C++ library](../INSTALL.md#step-1-compiling-the-c-faiss) first.
Then, enter the [c_api](c_api/) directory and run
`make`
This builds the dynamic library "faiss_c", containing the full implementation of Faiss and the necessary wrappers for the C interface. It does not depend on libfaiss.a or the C++ standard library. It will also build an example program `bin/example_c`.
Using the API
-------------
The C API is composed of:
- A set of C header files comprising the main Faiss interfaces, converted for use in C. Each file follows the format `«name»_c.h`, where `«name»` is the respective name from the C++ API. For example, the file [Index_c.h](./Index_c.h) file corresponds to the base `Index` API. Functions are declared with the `faiss_` prefix (e.g. `faiss_IndexFlat_new`), whereas new types have the `Faiss` prefix (e.g. `FaissIndex`, `FaissMetricType`, ...).
- A dynamic library, compiled from the sources in the same folder, encloses the implementation of the library and wrapper functions.
The index factory is available via the `faiss_index_factory` function in `AutoTune_c.h`:
```c
FaissIndex* index = NULL;
int c = faiss_index_factory(&index, 64, "Flat", METRIC_L2);
if (c) {
// operation failed
}
```
Most operations that you would find as member functions are available with the format `faiss_«classname»_«member»`.
```c
idx_t ntotal = faiss_Index_ntotal(index);
```
Since this is C, the index needs to be freed manually in the end:
```c
faiss_Index_free(index);
```
Error handling is done by examining the error code returned by operations with recoverable errors.
The code identifies the type of exception that rose from the implementation. Fetching the
corresponding error message can be done by calling the function `faiss_get_last_error()` from
`error_c.h`. Getter functions and `free` functions do not return an error code.
```c
int c = faiss_Index_add(index, nb, xb);
if (c) {
printf("%s", faiss_get_last_error());
exit(-1);
}
```
An example is included, which is built automatically for the target `all`. It can also be built separately:
`make bin/example_c`
Building with GPU support
-------------------------
For GPU support, a separate dynamic library in the "c_api/gpu" directory needs to be built.
`make`
The "gpufaiss_c" dynamic library contains the GPU and CPU implementations of Faiss, which means that
it can be used in place of "faiss_c". The same library will dynamically link with the CUDA runtime
and cuBLAS.
Using the GPU with the C API
----------------------------
A standard GPU resurces object can be obtained by the name `FaissStandardGpuResources`:
```c
FaissStandardGpuResources* gpu_res = NULL;
int c = faiss_StandardGpuResources_new(&gpu_res);
if (c) {
printf("%s", faiss_get_last_error());
exit(-1);
}
```
Similarly to the C++ API, a CPU index can be converted to a GPU index:
```c
FaissIndex* cpu_index = NULL;
int c = faiss_index_factory(&cpu_index, d, "Flat", METRIC_L2);
if (c) { /* ... */ }
FaissGpuIndex* gpu_index = NULL;
c = faiss_index_cpu_to_gpu(gpu_res, 0, cpu_index, &gpu_index);
if (c) { /* ... */ }
```
A more complete example is available by the name `bin/example_gpu_c`.
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#include "IndexFlat_c.h"
#include <faiss/IndexFlat.h>
#include <faiss/IndexRefine.h>
#include "macros_impl.h"
extern "C" {
using faiss::Index;
using faiss::IndexFlat;
using faiss::IndexFlat1D;
using faiss::IndexFlatIP;
using faiss::IndexFlatL2;
using faiss::IndexRefineFlat;
DEFINE_DESTRUCTOR(IndexFlat)
DEFINE_INDEX_DOWNCAST(IndexFlat)
int faiss_IndexFlat_new(FaissIndexFlat** p_index) {
try {
*p_index = reinterpret_cast<FaissIndexFlat*>(new IndexFlat());
return 0;
}
CATCH_AND_HANDLE
}
int faiss_IndexFlat_new_with(
FaissIndexFlat** p_index,
idx_t d,
FaissMetricType metric) {
try {
IndexFlat* index =
new IndexFlat(d, static_cast<faiss::MetricType>(metric));
*p_index = reinterpret_cast<FaissIndexFlat*>(index);
return 0;
}
CATCH_AND_HANDLE
}
void faiss_IndexFlat_xb(FaissIndexFlat* index, float** p_xb, size_t* p_size) {
IndexFlat* indexf = reinterpret_cast<IndexFlat*>(index);
*p_xb = indexf->get_xb();
if (p_size) {
*p_size = indexf->codes.size() / sizeof(float);
}
}
int faiss_IndexFlat_compute_distance_subset(
FaissIndex* index,
idx_t n,
const float* x,
idx_t k,
float* distances,
const idx_t* labels) {
try {
reinterpret_cast<IndexFlat*>(index)->compute_distance_subset(
n, x, k, distances, labels);
return 0;
}
CATCH_AND_HANDLE
}
DEFINE_DESTRUCTOR(IndexFlatIP)
DEFINE_INDEX_DOWNCAST(IndexFlatIP)
int faiss_IndexFlatIP_new(FaissIndexFlatIP** p_index) {
try {
IndexFlatIP* index = new IndexFlatIP();
*p_index = reinterpret_cast<FaissIndexFlatIP*>(index);
return 0;
}
CATCH_AND_HANDLE
}
int faiss_IndexFlatIP_new_with(FaissIndexFlatIP** p_index, idx_t d) {
try {
IndexFlatIP* index = new IndexFlatIP(d);
*p_index = reinterpret_cast<FaissIndexFlatIP*>(index);
return 0;
}
CATCH_AND_HANDLE
}
DEFINE_DESTRUCTOR(IndexFlatL2)
DEFINE_INDEX_DOWNCAST(IndexFlatL2)
int faiss_IndexFlatL2_new(FaissIndexFlatL2** p_index) {
try {
IndexFlatL2* index = new IndexFlatL2();
*p_index = reinterpret_cast<FaissIndexFlatL2*>(index);
return 0;
}
CATCH_AND_HANDLE
}
int faiss_IndexFlatL2_new_with(FaissIndexFlatL2** p_index, idx_t d) {
try {
IndexFlatL2* index = new IndexFlatL2(d);
*p_index = reinterpret_cast<FaissIndexFlatL2*>(index);
return 0;
}
CATCH_AND_HANDLE
}
int faiss_IndexRefineFlat_new(
FaissIndexRefineFlat** p_index,
FaissIndex* base_index) {
try {
IndexRefineFlat* index = new IndexRefineFlat(
reinterpret_cast<faiss::Index*>(base_index));
*p_index = reinterpret_cast<FaissIndexRefineFlat*>(index);
return 0;
}
CATCH_AND_HANDLE
}
DEFINE_DESTRUCTOR(IndexRefineFlat)
DEFINE_INDEX_DOWNCAST(IndexRefineFlat)
DEFINE_GETTER(IndexRefineFlat, int, own_fields)
DEFINE_SETTER(IndexRefineFlat, int, own_fields)
DEFINE_GETTER(IndexRefineFlat, float, k_factor)
DEFINE_SETTER(IndexRefineFlat, float, k_factor)
DEFINE_DESTRUCTOR(IndexFlat1D)
DEFINE_INDEX_DOWNCAST(IndexFlat1D)
int faiss_IndexFlat1D_new(FaissIndexFlat1D** p_index) {
try {
IndexFlat1D* index = new IndexFlat1D();
*p_index = reinterpret_cast<FaissIndexFlat1D*>(index);
return 0;
}
CATCH_AND_HANDLE
}
int faiss_IndexFlat1D_new_with(
FaissIndexFlat1D** p_index,
int continuous_update) {
try {
IndexFlat1D* index =
new IndexFlat1D(static_cast<bool>(continuous_update));
*p_index = reinterpret_cast<FaissIndexFlat1D*>(index);
return 0;
}
CATCH_AND_HANDLE
}
int faiss_IndexFlat1D_update_permutation(FaissIndexFlat1D* index) {
try {
reinterpret_cast<IndexFlat1D*>(index)->update_permutation();
return 0;
}
CATCH_AND_HANDLE
}
}
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
// -*- c -*-
#ifndef FAISS_INDEX_FLAT_C_H
#define FAISS_INDEX_FLAT_C_H
#include "Index_c.h"
#include "faiss_c.h"
#ifdef __cplusplus
extern "C" {
#endif
// forward declaration
typedef enum FaissMetricType FaissMetricType;
/** Opaque type for IndexFlat */
FAISS_DECLARE_CLASS_INHERITED(IndexFlat, Index)
int faiss_IndexFlat_new(FaissIndexFlat** p_index);
int faiss_IndexFlat_new_with(
FaissIndexFlat** p_index,
idx_t d,
FaissMetricType metric);
/** get a pointer to the index's internal data (the `xb` field). The outputs
* become invalid after any data addition or removal operation.
*
* @param index opaque pointer to index object
* @param p_xb output, the pointer to the beginning of `xb`.
* @param p_size output, the current size of `sb` in number of float values.
*/
void faiss_IndexFlat_xb(FaissIndexFlat* index, float** p_xb, size_t* p_size);
/** attempt a dynamic cast to a flat index, thus checking
* check whether the underlying index type is `IndexFlat`.
*
* @param index opaque pointer to index object
* @return the same pointer if the index is a flat index, NULL otherwise
*/
FAISS_DECLARE_INDEX_DOWNCAST(IndexFlat)
FAISS_DECLARE_DESTRUCTOR(IndexFlat)
/** compute distance with a subset of vectors
*
* @param index opaque pointer to index object
* @param x query vectors, size n * d
* @param labels indices of the vectors that should be compared
* for each query vector, size n * k
* @param distances
* corresponding output distances, size n * k
*/
int faiss_IndexFlat_compute_distance_subset(
FaissIndex* index,
idx_t n,
const float* x,
idx_t k,
float* distances,
const idx_t* labels);
/** Opaque type for IndexFlatIP */
FAISS_DECLARE_CLASS_INHERITED(IndexFlatIP, Index)
FAISS_DECLARE_INDEX_DOWNCAST(IndexFlatIP)
FAISS_DECLARE_DESTRUCTOR(IndexFlatIP)
int faiss_IndexFlatIP_new(FaissIndexFlatIP** p_index);
int faiss_IndexFlatIP_new_with(FaissIndexFlatIP** p_index, idx_t d);
/** Opaque type for IndexFlatL2 */
FAISS_DECLARE_CLASS_INHERITED(IndexFlatL2, Index)
FAISS_DECLARE_INDEX_DOWNCAST(IndexFlatL2)
FAISS_DECLARE_DESTRUCTOR(IndexFlatL2)
int faiss_IndexFlatL2_new(FaissIndexFlatL2** p_index);
int faiss_IndexFlatL2_new_with(FaissIndexFlatL2** p_index, idx_t d);
/** Opaque type for IndexRefineFlat
*
* Index that queries in a base_index (a fast one) and refines the
* results with an exact search, hopefully improving the results.
*/
FAISS_DECLARE_CLASS_INHERITED(IndexRefineFlat, Index)
int faiss_IndexRefineFlat_new(
FaissIndexRefineFlat** p_index,
FaissIndex* base_index);
FAISS_DECLARE_DESTRUCTOR(IndexRefineFlat)
FAISS_DECLARE_INDEX_DOWNCAST(IndexRefineFlat)
FAISS_DECLARE_GETTER_SETTER(IndexRefineFlat, int, own_fields)
/// factor between k requested in search and the k requested from
/// the base_index (should be >= 1)
FAISS_DECLARE_GETTER_SETTER(IndexRefineFlat, float, k_factor)
/** Opaque type for IndexFlat1D
*
* optimized version for 1D "vectors"
*/
FAISS_DECLARE_CLASS_INHERITED(IndexFlat1D, Index)
FAISS_DECLARE_INDEX_DOWNCAST(IndexFlat1D)
FAISS_DECLARE_DESTRUCTOR(IndexFlat1D)
int faiss_IndexFlat1D_new(FaissIndexFlat1D** p_index);
int faiss_IndexFlat1D_new_with(
FaissIndexFlat1D** p_index,
int continuous_update);
int faiss_IndexFlat1D_update_permutation(FaissIndexFlat1D* index);
#ifdef __cplusplus
}
#endif
#endif
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#include "IndexIVFFlat_c.h"
#include <faiss/IndexIVFFlat.h>
#include "Clustering_c.h"
#include "Index_c.h"
#include "macros_impl.h"
using faiss::Index;
using faiss::IndexIVFFlat;
using faiss::MetricType;
DEFINE_DESTRUCTOR(IndexIVFFlat)
DEFINE_INDEX_DOWNCAST(IndexIVFFlat)
/// number of possible key values
DEFINE_GETTER(IndexIVFFlat, size_t, nlist)
/// number of probes at query time
DEFINE_GETTER(IndexIVFFlat, size_t, nprobe)
DEFINE_SETTER(IndexIVFFlat, size_t, nprobe)
/// quantizer that maps vectors to inverted lists
DEFINE_GETTER_PERMISSIVE(IndexIVFFlat, FaissIndex*, quantizer)
/**
* = 0: use the quantizer as index in a kmeans training
* = 1: just pass on the training set to the train() of the quantizer
* = 2: kmeans training on a flat index + add the centroids to the quantizer
*/
DEFINE_GETTER(IndexIVFFlat, char, quantizer_trains_alone)
/// whether object owns the quantizer
DEFINE_GETTER(IndexIVFFlat, int, own_fields)
DEFINE_SETTER(IndexIVFFlat, int, own_fields)
int faiss_IndexIVFFlat_new(FaissIndexIVFFlat** p_index) {
try {
*p_index = reinterpret_cast<FaissIndexIVFFlat*>(new IndexIVFFlat());
}
CATCH_AND_HANDLE
}
int faiss_IndexIVFFlat_new_with(
FaissIndexIVFFlat** p_index,
FaissIndex* quantizer,
size_t d,
size_t nlist) {
try {
auto q = reinterpret_cast<Index*>(quantizer);
*p_index = reinterpret_cast<FaissIndexIVFFlat*>(
new IndexIVFFlat(q, d, nlist));
}
CATCH_AND_HANDLE
}
int faiss_IndexIVFFlat_new_with_metric(
FaissIndexIVFFlat** p_index,
FaissIndex* quantizer,
size_t d,
size_t nlist,
FaissMetricType metric) {
try {
auto q = reinterpret_cast<Index*>(quantizer);
auto m = static_cast<MetricType>(metric);
*p_index = reinterpret_cast<FaissIndexIVFFlat*>(
new IndexIVFFlat(q, d, nlist, m));
}
CATCH_AND_HANDLE
}
int faiss_IndexIVFFlat_add_core(
FaissIndexIVFFlat* index,
idx_t n,
const float* x,
const idx_t* xids,
const int64_t* precomputed_idx) {
try {
reinterpret_cast<IndexIVFFlat*>(index)->add_core(
n, x, xids, precomputed_idx);
}
CATCH_AND_HANDLE
}
int faiss_IndexIVFFlat_update_vectors(
FaissIndexIVFFlat* index,
int nv,
idx_t* idx,
const float* v) {
try {
reinterpret_cast<IndexIVFFlat*>(index)->update_vectors(nv, idx, v);
}
CATCH_AND_HANDLE
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment