Commit 3359c1f1 authored by lisj's avatar lisj
Browse files

增加GKLib

parent f2c80b44
// ISO C9x compliant inttypes.h for Microsoft Visual Studio
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
//
// Copyright (c) 2006 Alexander Chemeris
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. The name of the author may be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef _MSC_INTTYPES_H_ // [
#define _MSC_INTTYPES_H_
#if _MSC_VER > 1000
#pragma once
#endif
#include "ms_stdint.h"
// 7.8 Format conversion of integer types
typedef struct {
intmax_t quot;
intmax_t rem;
} imaxdiv_t;
// 7.8.1 Macros for format specifiers
// The fprintf macros for signed integers are:
#define PRId8 "d"
#define PRIi8 "i"
#define PRIdLEAST8 "d"
#define PRIiLEAST8 "i"
#define PRIdFAST8 "d"
#define PRIiFAST8 "i"
#define PRId16 "hd"
#define PRIi16 "hi"
#define PRIdLEAST16 "hd"
#define PRIiLEAST16 "hi"
#define PRIdFAST16 "hd"
#define PRIiFAST16 "hi"
#define PRId32 "I32d"
#define PRIi32 "I32i"
#define PRIdLEAST32 "I32d"
#define PRIiLEAST32 "I32i"
#define PRIdFAST32 "I32d"
#define PRIiFAST32 "I32i"
#define PRId64 "I64d"
#define PRIi64 "I64i"
#define PRIdLEAST64 "I64d"
#define PRIiLEAST64 "I64i"
#define PRIdFAST64 "I64d"
#define PRIiFAST64 "I64i"
#define PRIdMAX "I64d"
#define PRIiMAX "I64i"
#define PRIdPTR "Id"
#define PRIiPTR "Ii"
// The fprintf macros for unsigned integers are:
#define PRIo8 "o"
#define PRIu8 "u"
#define PRIx8 "x"
#define PRIX8 "X"
#define PRIoLEAST8 "o"
#define PRIuLEAST8 "u"
#define PRIxLEAST8 "x"
#define PRIXLEAST8 "X"
#define PRIoFAST8 "o"
#define PRIuFAST8 "u"
#define PRIxFAST8 "x"
#define PRIXFAST8 "X"
#define PRIo16 "ho"
#define PRIu16 "hu"
#define PRIx16 "hx"
#define PRIX16 "hX"
#define PRIoLEAST16 "ho"
#define PRIuLEAST16 "hu"
#define PRIxLEAST16 "hx"
#define PRIXLEAST16 "hX"
#define PRIoFAST16 "ho"
#define PRIuFAST16 "hu"
#define PRIxFAST16 "hx"
#define PRIXFAST16 "hX"
#define PRIo32 "I32o"
#define PRIu32 "I32u"
#define PRIx32 "I32x"
#define PRIX32 "I32X"
#define PRIoLEAST32 "I32o"
#define PRIuLEAST32 "I32u"
#define PRIxLEAST32 "I32x"
#define PRIXLEAST32 "I32X"
#define PRIoFAST32 "I32o"
#define PRIuFAST32 "I32u"
#define PRIxFAST32 "I32x"
#define PRIXFAST32 "I32X"
#define PRIo64 "I64o"
#define PRIu64 "I64u"
#define PRIx64 "I64x"
#define PRIX64 "I64X"
#define PRIoLEAST64 "I64o"
#define PRIuLEAST64 "I64u"
#define PRIxLEAST64 "I64x"
#define PRIXLEAST64 "I64X"
#define PRIoFAST64 "I64o"
#define PRIuFAST64 "I64u"
#define PRIxFAST64 "I64x"
#define PRIXFAST64 "I64X"
#define PRIoMAX "I64o"
#define PRIuMAX "I64u"
#define PRIxMAX "I64x"
#define PRIXMAX "I64X"
#define PRIoPTR "Io"
#define PRIuPTR "Iu"
#define PRIxPTR "Ix"
#define PRIXPTR "IX"
// The fscanf macros for signed integers are:
#define SCNd8 "d"
#define SCNi8 "i"
#define SCNdLEAST8 "d"
#define SCNiLEAST8 "i"
#define SCNdFAST8 "d"
#define SCNiFAST8 "i"
#define SCNd16 "hd"
#define SCNi16 "hi"
#define SCNdLEAST16 "hd"
#define SCNiLEAST16 "hi"
#define SCNdFAST16 "hd"
#define SCNiFAST16 "hi"
#define SCNd32 "ld"
#define SCNi32 "li"
#define SCNdLEAST32 "ld"
#define SCNiLEAST32 "li"
#define SCNdFAST32 "ld"
#define SCNiFAST32 "li"
#define SCNd64 "I64d"
#define SCNi64 "I64i"
#define SCNdLEAST64 "I64d"
#define SCNiLEAST64 "I64i"
#define SCNdFAST64 "I64d"
#define SCNiFAST64 "I64i"
#define SCNdMAX "I64d"
#define SCNiMAX "I64i"
#ifdef _WIN64 // [
# define SCNdPTR "I64d"
# define SCNiPTR "I64i"
#else // _WIN64 ][
# define SCNdPTR "ld"
# define SCNiPTR "li"
#endif // _WIN64 ]
// The fscanf macros for unsigned integers are:
#define SCNo8 "o"
#define SCNu8 "u"
#define SCNx8 "x"
#define SCNX8 "X"
#define SCNoLEAST8 "o"
#define SCNuLEAST8 "u"
#define SCNxLEAST8 "x"
#define SCNXLEAST8 "X"
#define SCNoFAST8 "o"
#define SCNuFAST8 "u"
#define SCNxFAST8 "x"
#define SCNXFAST8 "X"
#define SCNo16 "ho"
#define SCNu16 "hu"
#define SCNx16 "hx"
#define SCNX16 "hX"
#define SCNoLEAST16 "ho"
#define SCNuLEAST16 "hu"
#define SCNxLEAST16 "hx"
#define SCNXLEAST16 "hX"
#define SCNoFAST16 "ho"
#define SCNuFAST16 "hu"
#define SCNxFAST16 "hx"
#define SCNXFAST16 "hX"
#define SCNo32 "lo"
#define SCNu32 "lu"
#define SCNx32 "lx"
#define SCNX32 "lX"
#define SCNoLEAST32 "lo"
#define SCNuLEAST32 "lu"
#define SCNxLEAST32 "lx"
#define SCNXLEAST32 "lX"
#define SCNoFAST32 "lo"
#define SCNuFAST32 "lu"
#define SCNxFAST32 "lx"
#define SCNXFAST32 "lX"
#define SCNo64 "I64o"
#define SCNu64 "I64u"
#define SCNx64 "I64x"
#define SCNX64 "I64X"
#define SCNoLEAST64 "I64o"
#define SCNuLEAST64 "I64u"
#define SCNxLEAST64 "I64x"
#define SCNXLEAST64 "I64X"
#define SCNoFAST64 "I64o"
#define SCNuFAST64 "I64u"
#define SCNxFAST64 "I64x"
#define SCNXFAST64 "I64X"
#define SCNoMAX "I64o"
#define SCNuMAX "I64u"
#define SCNxMAX "I64x"
#define SCNXMAX "I64X"
#ifdef _WIN64 // [
# define SCNoPTR "I64o"
# define SCNuPTR "I64u"
# define SCNxPTR "I64x"
# define SCNXPTR "I64X"
#else // _WIN64 ][
# define SCNoPTR "lo"
# define SCNuPTR "lu"
# define SCNxPTR "lx"
# define SCNXPTR "lX"
#endif // _WIN64 ]
// 7.8.2 Functions for greatest-width integer types
// 7.8.2.1 The imaxabs function
#define imaxabs _abs64
// 7.8.2.2 The imaxdiv function
// This is modified version of div() function from Microsoft's div.c found
// in %MSVC.NET%\crt\src\div.c
#ifdef STATIC_IMAXDIV // [
static
#else // STATIC_IMAXDIV ][
_inline
#endif // STATIC_IMAXDIV ]
imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
{
imaxdiv_t result;
result.quot = numer / denom;
result.rem = numer % denom;
if (numer < 0 && result.rem > 0) {
// did division wrong; must fix up
++result.quot;
result.rem -= denom;
}
return result;
}
// 7.8.2.3 The strtoimax and strtoumax functions
#define strtoimax _strtoi64
#define strtoumax _strtoui64
// 7.8.2.4 The wcstoimax and wcstoumax functions
#define wcstoimax _wcstoi64
#define wcstoumax _wcstoui64
#endif // _MSC_INTTYPES_H_ ]
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef _MS_STAT_H_
#define _MS_STAT_H_
#if _MSC_VER > 1000
#pragma once
#endif
#include <sys/stat.h>
/* Test macros for file types. */
#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
#define S_ISDIR(mode) __S_ISTYPE((mode), S_IFDIR)
#define S_ISCHR(mode) __S_ISTYPE((mode), S_IFCHR)
#define S_ISBLK(mode) __S_ISTYPE((mode), S_IFBLK)
#define S_ISREG(mode) __S_ISTYPE((mode), S_IFREG)
#endif
// ISO C9x compliant stdint.h for Microsoft Visual Studio
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
//
// Copyright (c) 2006 Alexander Chemeris
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. The name of the author may be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef _MSC_STDINT_H_ // [
#define _MSC_STDINT_H_
#if _MSC_VER > 1000
#pragma once
#endif
#include <limits.h>
// For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}'
// or compiler give many errors like this:
// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
#if (_MSC_VER < 1300) && defined(__cplusplus)
extern "C++" {
#endif
# include <wchar.h>
#if (_MSC_VER < 1300) && defined(__cplusplus)
}
#endif
// 7.18.1 Integer types
// 7.18.1.1 Exact-width integer types
typedef __int8 int8_t;
typedef __int16 int16_t;
typedef __int32 int32_t;
typedef __int64 int64_t;
typedef unsigned __int8 uint8_t;
typedef unsigned __int16 uint16_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
// 7.18.1.2 Minimum-width integer types
typedef int8_t int_least8_t;
typedef int16_t int_least16_t;
typedef int32_t int_least32_t;
typedef int64_t int_least64_t;
typedef uint8_t uint_least8_t;
typedef uint16_t uint_least16_t;
typedef uint32_t uint_least32_t;
typedef uint64_t uint_least64_t;
// 7.18.1.3 Fastest minimum-width integer types
typedef int8_t int_fast8_t;
typedef int16_t int_fast16_t;
typedef int32_t int_fast32_t;
typedef int64_t int_fast64_t;
typedef uint8_t uint_fast8_t;
typedef uint16_t uint_fast16_t;
typedef uint32_t uint_fast32_t;
typedef uint64_t uint_fast64_t;
// 7.18.1.4 Integer types capable of holding object pointers
#ifdef _WIN64 // [
typedef __int64 intptr_t;
typedef unsigned __int64 uintptr_t;
#else // _WIN64 ][
typedef int intptr_t;
typedef unsigned int uintptr_t;
#endif // _WIN64 ]
// 7.18.1.5 Greatest-width integer types
typedef int64_t intmax_t;
typedef uint64_t uintmax_t;
// 7.18.2 Limits of specified-width integer types
#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
// 7.18.2.1 Limits of exact-width integer types
#define INT8_MIN ((int8_t)_I8_MIN)
#define INT8_MAX _I8_MAX
#define INT16_MIN ((int16_t)_I16_MIN)
#define INT16_MAX _I16_MAX
#define INT32_MIN ((int32_t)_I32_MIN)
#define INT32_MAX _I32_MAX
#define INT64_MIN ((int64_t)_I64_MIN)
#define INT64_MAX _I64_MAX
#define UINT8_MAX _UI8_MAX
#define UINT16_MAX _UI16_MAX
#define UINT32_MAX _UI32_MAX
#define UINT64_MAX _UI64_MAX
// 7.18.2.2 Limits of minimum-width integer types
#define INT_LEAST8_MIN INT8_MIN
#define INT_LEAST8_MAX INT8_MAX
#define INT_LEAST16_MIN INT16_MIN
#define INT_LEAST16_MAX INT16_MAX
#define INT_LEAST32_MIN INT32_MIN
#define INT_LEAST32_MAX INT32_MAX
#define INT_LEAST64_MIN INT64_MIN
#define INT_LEAST64_MAX INT64_MAX
#define UINT_LEAST8_MAX UINT8_MAX
#define UINT_LEAST16_MAX UINT16_MAX
#define UINT_LEAST32_MAX UINT32_MAX
#define UINT_LEAST64_MAX UINT64_MAX
// 7.18.2.3 Limits of fastest minimum-width integer types
#define INT_FAST8_MIN INT8_MIN
#define INT_FAST8_MAX INT8_MAX
#define INT_FAST16_MIN INT16_MIN
#define INT_FAST16_MAX INT16_MAX
#define INT_FAST32_MIN INT32_MIN
#define INT_FAST32_MAX INT32_MAX
#define INT_FAST64_MIN INT64_MIN
#define INT_FAST64_MAX INT64_MAX
#define UINT_FAST8_MAX UINT8_MAX
#define UINT_FAST16_MAX UINT16_MAX
#define UINT_FAST32_MAX UINT32_MAX
#define UINT_FAST64_MAX UINT64_MAX
// 7.18.2.4 Limits of integer types capable of holding object pointers
#ifdef _WIN64 // [
# define INTPTR_MIN INT64_MIN
# define INTPTR_MAX INT64_MAX
# define UINTPTR_MAX UINT64_MAX
#else // _WIN64 ][
# define INTPTR_MIN INT32_MIN
# define INTPTR_MAX INT32_MAX
# define UINTPTR_MAX UINT32_MAX
#endif // _WIN64 ]
// 7.18.2.5 Limits of greatest-width integer types
#define INTMAX_MIN INT64_MIN
#define INTMAX_MAX INT64_MAX
#define UINTMAX_MAX UINT64_MAX
// 7.18.3 Limits of other integer types
#ifdef _WIN64 // [
# define PTRDIFF_MIN _I64_MIN
# define PTRDIFF_MAX _I64_MAX
#else // _WIN64 ][
# define PTRDIFF_MIN _I32_MIN
# define PTRDIFF_MAX _I32_MAX
#endif // _WIN64 ]
#define SIG_ATOMIC_MIN INT_MIN
#define SIG_ATOMIC_MAX INT_MAX
#ifndef SIZE_MAX // [
# ifdef _WIN64 // [
# define SIZE_MAX _UI64_MAX
# else // _WIN64 ][
# define SIZE_MAX _UI32_MAX
# endif // _WIN64 ]
#endif // SIZE_MAX ]
// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
#ifndef WCHAR_MIN // [
# define WCHAR_MIN 0
#endif // WCHAR_MIN ]
#ifndef WCHAR_MAX // [
# define WCHAR_MAX _UI16_MAX
#endif // WCHAR_MAX ]
#define WINT_MIN 0
#define WINT_MAX _UI16_MAX
#endif // __STDC_LIMIT_MACROS ]
// 7.18.4 Limits of other integer types
#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
// 7.18.4.1 Macros for minimum-width integer constants
#define INT8_C(val) val##i8
#define INT16_C(val) val##i16
#define INT32_C(val) val##i32
#define INT64_C(val) val##i64
#define UINT8_C(val) val##ui8
#define UINT16_C(val) val##ui16
#define UINT32_C(val) val##ui32
#define UINT64_C(val) val##ui64
// 7.18.4.2 Macros for greatest-width integer constants
#define INTMAX_C INT64_C
#define UINTMAX_C UINT64_C
#endif // __STDC_CONSTANT_MACROS ]
#endif // _MSC_STDINT_H_ ]
/*!
\file pqueue.c
\brief This file implements various max-priority queues.
The priority queues are generated using the GK_MKPQUEUE macro.
\date Started 3/27/2007
\author George
\version\verbatim $Id: pqueue.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Create the various max priority queues */
/*************************************************************************/
#define key_gt(a, b) ((a) > (b))
GK_MKPQUEUE(gk_ipq, gk_ipq_t, gk_ikv_t, int, gk_idx_t, gk_ikvmalloc, INT_MAX, key_gt)
GK_MKPQUEUE(gk_i32pq, gk_i32pq_t, gk_i32kv_t, int32_t, gk_idx_t, gk_i32kvmalloc, INT32_MAX, key_gt)
GK_MKPQUEUE(gk_i64pq, gk_i64pq_t, gk_i64kv_t, int64_t, gk_idx_t, gk_i64kvmalloc, INT64_MAX, key_gt)
GK_MKPQUEUE(gk_fpq, gk_fpq_t, gk_fkv_t, float, gk_idx_t, gk_fkvmalloc, FLT_MAX, key_gt)
GK_MKPQUEUE(gk_dpq, gk_dpq_t, gk_dkv_t, double, gk_idx_t, gk_dkvmalloc, DBL_MAX, key_gt)
GK_MKPQUEUE(gk_idxpq, gk_idxpq_t, gk_idxkv_t, gk_idx_t, gk_idx_t, gk_idxkvmalloc, GK_IDX_MAX, key_gt)
#undef key_gt
/*!
\file
\brief Various routines for providing portable 32 and 64 bit random number
generators.
\date Started 5/17/2007
\author George
\version\verbatim $Id: random.c 18796 2015-06-02 11:39:45Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Create the various random number functions */
/*************************************************************************/
GK_MKRANDOM(gk_c, size_t, char)
GK_MKRANDOM(gk_i, size_t, int)
GK_MKRANDOM(gk_i32, size_t, int32_t)
GK_MKRANDOM(gk_f, size_t, float)
GK_MKRANDOM(gk_d, size_t, double)
GK_MKRANDOM(gk_idx, size_t, gk_idx_t)
GK_MKRANDOM(gk_z, size_t, ssize_t)
GK_MKRANDOM(gk_zu, size_t, size_t)
/*************************************************************************/
/*! GKlib's built in random number generator for portability across
different architectures */
/*************************************************************************/
#ifdef USE_GKRAND
/*
A C-program for MT19937-64 (2004/9/29 version).
Coded by Takuji Nishimura and Makoto Matsumoto.
This is a 64-bit version of Mersenne Twister pseudorandom number
generator.
Before using, initialize the state by using init_genrand64(seed)
or init_by_array64(init_key, key_length).
Copyright (C) 2004, Makoto Matsumoto and Takuji Nishimura,
All rights reserved.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define NN 312
#define MM 156
#define MATRIX_A 0xB5026F5AA96619E9ULL
#define UM 0xFFFFFFFF80000000ULL /* Most significant 33 bits */
#define LM 0x7FFFFFFFULL /* Least significant 31 bits */
/* The array for the state vector */
static uint64_t mt[NN];
/* mti==NN+1 means mt[NN] is not initialized */
static int mti=NN+1;
#endif /* USE_GKRAND */
/* initializes mt[NN] with a seed */
void gk_randinit(uint64_t seed)
{
#ifdef USE_GKRAND
mt[0] = seed;
for (mti=1; mti<NN; mti++)
mt[mti] = (6364136223846793005ULL * (mt[mti-1] ^ (mt[mti-1] >> 62)) + mti);
#else
srand((unsigned int) seed);
#endif
}
/* generates a random number on [0, 2^64-1]-interval */
uint64_t gk_randint64(void)
{
#ifdef USE_GKRAND
int i;
unsigned long long x;
static uint64_t mag01[2]={0ULL, MATRIX_A};
if (mti >= NN) { /* generate NN words at one time */
/* if init_genrand64() has not been called, */
/* a default initial seed is used */
if (mti == NN+1)
gk_randinit(5489ULL);
for (i=0; i<NN-MM; i++) {
x = (mt[i]&UM)|(mt[i+1]&LM);
mt[i] = mt[i+MM] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
}
for (; i<NN-1; i++) {
x = (mt[i]&UM)|(mt[i+1]&LM);
mt[i] = mt[i+(MM-NN)] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
}
x = (mt[NN-1]&UM)|(mt[0]&LM);
mt[NN-1] = mt[MM-1] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
mti = 0;
}
x = mt[mti++];
x ^= (x >> 29) & 0x5555555555555555ULL;
x ^= (x << 17) & 0x71D67FFFEDA60000ULL;
x ^= (x << 37) & 0xFFF7EEE000000000ULL;
x ^= (x >> 43);
return x & 0x7FFFFFFFFFFFFFFF;
#else
return (uint64_t)(((uint64_t) rand()) << 32 | ((uint64_t) rand()));
#endif
}
/* generates a random number on [0, 2^32-1]-interval */
uint32_t gk_randint32(void)
{
#ifdef USE_GKRAND
return (uint32_t)(gk_randint64() & 0x7FFFFFFF);
#else
return (uint32_t)rand();
#endif
}
/*!
* \file
*
* \brief Various routines that perform random-walk based operations
on graphs stored as gk_csr_t matrices.
*
* \author George Karypis
* \version\verbatim $Id: rw.c 11078 2011-11-12 00:20:44Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Computes the (personalized) page-rank of the vertices in a graph.
\param mat is the matrix storing the graph.
\param lamda is the restart probability.
\param eps is the error tolerance for convergance.
\param max_niter is the maximum number of allowed iterations.
\param pr on entry stores the restart distribution of the vertices.
This allows for the computation of personalized page-rank scores
by appropriately setting that parameter.
On return, pr stores the computed page ranks.
\returns the number of iterations that were performed.
*/
/**************************************************************************/
int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr)
{
ssize_t i, j, k, iter, nrows;
double *rscale, *prold, *prnew, *prtmp;
double fromsinks, error;
ssize_t *rowptr;
int *rowind;
float *rowval;
nrows = mat->nrows;
rowptr = mat->rowptr;
rowind = mat->rowind;
rowval = mat->rowval;
prold = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prnew");
prnew = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prold");
rscale = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: rscale");
/* compute the scaling factors to get adjacency weights into transition
probabilities */
for (i=0; i<nrows; i++) {
for (j=rowptr[i]; j<rowptr[i+1]; j++)
rscale[i] += rowval[j];
if (rscale[i] > 0)
rscale[i] = 1.0/rscale[i];
}
/* the restart distribution is the initial pr scores */
for (i=0; i<nrows; i++)
prnew[i] = pr[i];
/* get into the PR iteration */
for (iter=0; iter<max_niter; iter++) {
gk_SWAP(prnew, prold, prtmp);
gk_dset(nrows, 0.0, prnew);
/* determine the total current PR score of the sinks so that you
can distribute them to all nodes according to the restart
distribution. */
for (fromsinks=0.0, i=0; i<nrows; i++) {
if (rscale[i] == 0)
fromsinks += prold[i];
}
/* push random-walk scores to the outlinks */
for (i=0; i<nrows; i++) {
for (j=rowptr[i]; j<rowptr[i+1]; j++)
prnew[rowind[j]] += prold[i]*rscale[i]*rowval[j];
}
/* apply the restart conditions */
for (i=0; i<nrows; i++) {
prnew[i] = lamda*(fromsinks*pr[i]+prnew[i]) + (1.0-lamda)*pr[i];
}
/* compute the error */
for (error=0.0, i=0; i<nrows; i++)
error = (fabs(prnew[i]-prold[i]) > error ? fabs(prnew[i]-prold[i]) : error);
//printf("nrm1: %le maxfabserr: %le\n", gk_dsum(nrows, prnew, 1), error);
if (error < eps)
break;
}
/* store the computed pr scores into pr for output */
for (i=0; i<nrows; i++)
pr[i] = prnew[i];
gk_free((void **)&prnew, &prold, &rscale, LTERM);
return (int)(iter+1);
}
#!/usr/bin/perl -w
die "Usage $0 <gfile> <ncopies>\n" unless @ARGV == 2;
$filein = shift(@ARGV);
$ncopies = shift(@ARGV);
open(FPIN, "<$filein") or die "Could not open $filein. $!\n";
$_ = <FPIN>;
chomp($_);
($nvtxs, $nedges) = split(' ', $_);
#print "nvtxs: $nvtxs, nedges: $nedges\n";
$u = 1;
while (<FPIN>) {
chomp($_);
@edges = split(' ', $_);
# put the within layer edges
foreach $v (@edges) {
next if $v < $u;
for ($i=0; $i<$ncopies; $i++) {
printf("%d %d\n", $i*$nvtxs+$u-1, $i*$nvtxs+$v-1);
printf("%d %d\n", $i*$nvtxs+$v-1, $i*$nvtxs+$u-1);
}
}
# put the vertex across layer edges
for ($i=0; $i<$ncopies-1; $i++) {
printf("%d %d\n", $i*$nvtxs+$u-1, ($i+1)*$nvtxs+$u-1);
printf("%d %d\n", ($i+1)*$nvtxs+$u-1, $i*$nvtxs+$u-1);
}
# put the adjacent across layer edges
for ($i=0; $i<$ncopies-1; $i++) {
$j=0;
foreach $v (@edges) {
$j++;
next if (($j+$i)%2 == 0);
printf("%d %d\n", $i*$nvtxs+$u-1, ($i+1)*$nvtxs+$v-1);
printf("%d %d\n", ($i+1)*$nvtxs+$v-1, $i*$nvtxs+$u-1);
}
}
goto DONE;
DONE:
$u++;
}
close(FPIN);
/*
*
* Sequence handler library by Huzefa Rangwala
* Date : 03.01.2007
*
*
*
*/
#include <GKlib.h>
/*********************************************************/
/* ! \brief Initializes the <tt>gk_seq_t</tt> variable
\param A pointer to gk_seq_t itself
\returns null
*/
/***********************************************************************/
void gk_seq_init(gk_seq_t *seq)
{
seq->len = 0;
seq->sequence = NULL;
seq->pssm = NULL;
seq->psfm = NULL;
seq->name = NULL;
}
/***********************************************************************/
/*! \brief This function creates the localizations for the various sequences
\param string i.e amino acids, nucleotides, sequences
\returns gk_i2cc2i_t variable
*/
/*********************************************************************/
gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet)
{
int nsymbols;
gk_idx_t i;
gk_i2cc2i_t *t;
nsymbols = strlen(alphabet);
t = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common");
t->n = nsymbols;
t->i2c = gk_cmalloc(256, "gk_i2c_create_common");
t->c2i = gk_imalloc(256, "gk_i2c_create_common");
gk_cset(256, -1, t->i2c);
gk_iset(256, -1, t->c2i);
for(i=0;i<nsymbols;i++){
t->i2c[i] = alphabet[i];
t->c2i[(int)alphabet[i]] = i;
}
return t;
}
/*********************************************************************/
/*! \brief This function reads a pssm in the format of gkmod pssm
\param file_name is the name of the pssm file
\returns gk_seq_t
*/
/********************************************************************/
gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename)
{
gk_seq_t *seq;
gk_idx_t i, j, ii;
size_t ntokens, nbytes, len;
FILE *fpin;
gk_Tokens_t tokens;
static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*";
static int PSSMWIDTH = 20;
char *header, line[MAXLINELEN];
gk_i2cc2i_t *converter;
header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header");
converter = gk_i2cc2i_create_common(AAORDER);
gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes);
len --;
seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM");
gk_seq_init(seq);
seq->len = len;
seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM");
seq->pssm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
seq->psfm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
seq->nsymbols = PSSMWIDTH;
seq->name = gk_getbasename(filename);
fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM");
/* Read the header line */
if (fgets(line, MAXLINELEN-1, fpin) == NULL)
errexit("Unexpected end of file: %s\n", filename);
gk_strtoupper(line);
gk_strtokenize(line, " \t\n", &tokens);
for (i=0; i<PSSMWIDTH; i++)
header[i] = tokens.list[i][0];
gk_freetokenslist(&tokens);
/* Read the rest of the lines */
for (i=0, ii=0; ii<len; ii++) {
if (fgets(line, MAXLINELEN-1, fpin) == NULL)
errexit("Unexpected end of file: %s\n", filename);
gk_strtoupper(line);
gk_strtokenize(line, " \t\n", &tokens);
seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]];
for (j=0; j<PSSMWIDTH; j++) {
seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]);
seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]);
}
gk_freetokenslist(&tokens);
i++;
}
seq->len = i; /* Reset the length if certain characters were skipped */
gk_free((void **)&header, LTERM);
gk_fclose(fpin);
return seq;
}
/**************************************************************************/
/*! \brief This function frees the memory allocated to the seq structure.
\param gk_seq_t
\returns nothing
*/
/**************************************************************************/
void gk_seq_free(gk_seq_t *seq)
{
gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols);
gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols);
gk_free((void **)&seq->name, &seq->sequence, LTERM);
//gk_free((void **)&seq, LTERM);
gk_free((void **) &seq, LTERM);
}
/*!
\file sort.c
\brief This file contains GKlib's various sorting routines
These routines are implemented using the GKSORT macro that is defined
in gk_qsort.h and is based on GNU's GLIBC qsort() implementation.
Additional sorting routines can be created using the same way that
these routines where defined.
\date Started 4/4/07
\author George
\version\verbatim $Id: sort.c 21050 2017-05-25 03:53:58Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Sorts an array of chars in increasing order */
/*************************************************************************/
void gk_csorti(size_t n, char *base)
{
#define char_lt(a, b) ((*a) < (*b))
GK_MKQSORT(char, base, n, char_lt);
#undef char_lt
}
/*************************************************************************/
/*! Sorts an array of chars in decreasing order */
/*************************************************************************/
void gk_csortd(size_t n, char *base)
{
#define char_gt(a, b) ((*a) > (*b))
GK_MKQSORT(char, base, n, char_gt);
#undef char_gt
}
/*************************************************************************/
/*! Sorts an array of integers in increasing order */
/*************************************************************************/
void gk_isorti(size_t n, int *base)
{
#define int_lt(a, b) ((*a) < (*b))
GK_MKQSORT(int, base, n, int_lt);
#undef int_lt
}
/*************************************************************************/
/*! Sorts an array of integers in decreasing order */
/*************************************************************************/
void gk_isortd(size_t n, int *base)
{
#define int_gt(a, b) ((*a) > (*b))
GK_MKQSORT(int, base, n, int_gt);
#undef int_gt
}
/*************************************************************************/
/*! Sorts an array of integers in increasing order */
/*************************************************************************/
void gk_i32sorti(size_t n, int32_t *base)
{
#define int_lt(a, b) ((*a) < (*b))
GK_MKQSORT(int32_t, base, n, int_lt);
#undef int_lt
}
/*************************************************************************/
/*! Sorts an array of integers in decreasing order */
/*************************************************************************/
void gk_i32sortd(size_t n, int32_t *base)
{
#define int_gt(a, b) ((*a) > (*b))
GK_MKQSORT(int32_t, base, n, int_gt);
#undef int_gt
}
/*************************************************************************/
/*! Sorts an array of integers in increasing order */
/*************************************************************************/
void gk_i64sorti(size_t n, int64_t *base)
{
#define int_lt(a, b) ((*a) < (*b))
GK_MKQSORT(int64_t, base, n, int_lt);
#undef int_lt
}
/*************************************************************************/
/*! Sorts an array of integers in increasing order */
/*************************************************************************/
void gk_ui32sorti(size_t n, uint32_t *base)
{
#define int_lt(a, b) ((*a) < (*b))
GK_MKQSORT(uint32_t, base, n, int_lt);
#undef int_lt
}
/*************************************************************************/
/*! Sorts an array of integers in decreasing order */
/*************************************************************************/
void gk_ui32sortd(size_t n, uint32_t *base)
{
#define int_gt(a, b) ((*a) > (*b))
GK_MKQSORT(uint32_t, base, n, int_gt);
#undef int_gt
}
/*************************************************************************/
/*! Sorts an array of integers in increasing order */
/*************************************************************************/
void gk_ui64sorti(size_t n, uint64_t *base)
{
#define int_lt(a, b) ((*a) < (*b))
GK_MKQSORT(uint64_t, base, n, int_lt);
#undef int_lt
}
/*************************************************************************/
/*! Sorts an array of integers in decreasing order */
/*************************************************************************/
void gk_ui64sortd(size_t n, uint64_t *base)
{
#define int_gt(a, b) ((*a) > (*b))
GK_MKQSORT(uint64_t, base, n, int_gt);
#undef int_gt
}
/*************************************************************************/
/*! Sorts an array of integers in decreasing order */
/*************************************************************************/
void gk_i64sortd(size_t n, int64_t *base)
{
#define int_gt(a, b) ((*a) > (*b))
GK_MKQSORT(int64_t, base, n, int_gt);
#undef int_gt
}
/*************************************************************************/
/*! Sorts an array of floats in increasing order */
/*************************************************************************/
void gk_fsorti(size_t n, float *base)
{
#define float_lt(a, b) ((*a) < (*b))
GK_MKQSORT(float, base, n, float_lt);
#undef float_lt
}
/*************************************************************************/
/*! Sorts an array of floats in decreasing order */
/*************************************************************************/
void gk_fsortd(size_t n, float *base)
{
#define float_gt(a, b) ((*a) > (*b))
GK_MKQSORT(float, base, n, float_gt);
#undef float_gt
}
/*************************************************************************/
/*! Sorts an array of doubles in increasing order */
/*************************************************************************/
void gk_dsorti(size_t n, double *base)
{
#define double_lt(a, b) ((*a) < (*b))
GK_MKQSORT(double, base, n, double_lt);
#undef double_lt
}
/*************************************************************************/
/*! Sorts an array of doubles in decreasing order */
/*************************************************************************/
void gk_dsortd(size_t n, double *base)
{
#define double_gt(a, b) ((*a) > (*b))
GK_MKQSORT(double, base, n, double_gt);
#undef double_gt
}
/*************************************************************************/
/*! Sorts an array of gk_idx_t in increasing order */
/*************************************************************************/
void gk_idxsorti(size_t n, gk_idx_t *base)
{
#define idx_lt(a, b) ((*a) < (*b))
GK_MKQSORT(gk_idx_t, base, n, idx_lt);
#undef idx_lt
}
/*************************************************************************/
/*! Sorts an array of gk_idx_t in decreasing order */
/*************************************************************************/
void gk_idxsortd(size_t n, gk_idx_t *base)
{
#define idx_gt(a, b) ((*a) > (*b))
GK_MKQSORT(gk_idx_t, base, n, idx_gt);
#undef idx_gt
}
/*************************************************************************/
/*! Sorts an array of gk_ckv_t in increasing order */
/*************************************************************************/
void gk_ckvsorti(size_t n, gk_ckv_t *base)
{
#define ckey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_ckv_t, base, n, ckey_lt);
#undef ckey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_ckv_t in decreasing order */
/*************************************************************************/
void gk_ckvsortd(size_t n, gk_ckv_t *base)
{
#define ckey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_ckv_t, base, n, ckey_gt);
#undef ckey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_ikv_t in increasing order */
/*************************************************************************/
void gk_ikvsorti(size_t n, gk_ikv_t *base)
{
#define ikey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_ikv_t, base, n, ikey_lt);
#undef ikey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_ikv_t in decreasing order */
/*************************************************************************/
void gk_ikvsortd(size_t n, gk_ikv_t *base)
{
#define ikey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_ikv_t, base, n, ikey_gt);
#undef ikey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_i32kv_t in increasing order */
/*************************************************************************/
void gk_i32kvsorti(size_t n, gk_i32kv_t *base)
{
#define ikey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_i32kv_t, base, n, ikey_lt);
#undef ikey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_i32kv_t in decreasing order */
/*************************************************************************/
void gk_i32kvsortd(size_t n, gk_i32kv_t *base)
{
#define ikey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_i32kv_t, base, n, ikey_gt);
#undef ikey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_i64kv_t in increasing order */
/*************************************************************************/
void gk_i64kvsorti(size_t n, gk_i64kv_t *base)
{
#define ikey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_i64kv_t, base, n, ikey_lt);
#undef ikey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_i64kv_t in decreasing order */
/*************************************************************************/
void gk_i64kvsortd(size_t n, gk_i64kv_t *base)
{
#define ikey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_i64kv_t, base, n, ikey_gt);
#undef ikey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_zkv_t in increasing order */
/*************************************************************************/
void gk_zkvsorti(size_t n, gk_zkv_t *base)
{
#define zkey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_zkv_t, base, n, zkey_lt);
#undef zkey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_zkv_t in decreasing order */
/*************************************************************************/
void gk_zkvsortd(size_t n, gk_zkv_t *base)
{
#define zkey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_zkv_t, base, n, zkey_gt);
#undef zkey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_zukv_t in increasing order */
/*************************************************************************/
void gk_zukvsorti(size_t n, gk_zukv_t *base)
{
#define zukey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_zukv_t, base, n, zukey_lt);
#undef zukey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_zukv_t in decreasing order */
/*************************************************************************/
void gk_zukvsortd(size_t n, gk_zukv_t *base)
{
#define zukey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_zukv_t, base, n, zukey_gt);
#undef zukey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_fkv_t in increasing order */
/*************************************************************************/
void gk_fkvsorti(size_t n, gk_fkv_t *base)
{
#define fkey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_fkv_t, base, n, fkey_lt);
#undef fkey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_fkv_t in decreasing order */
/*************************************************************************/
void gk_fkvsortd(size_t n, gk_fkv_t *base)
{
#define fkey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_fkv_t, base, n, fkey_gt);
#undef fkey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_dkv_t in increasing order */
/*************************************************************************/
void gk_dkvsorti(size_t n, gk_dkv_t *base)
{
#define dkey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_dkv_t, base, n, dkey_lt);
#undef dkey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_fkv_t in decreasing order */
/*************************************************************************/
void gk_dkvsortd(size_t n, gk_dkv_t *base)
{
#define dkey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_dkv_t, base, n, dkey_gt);
#undef dkey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_skv_t in increasing order */
/*************************************************************************/
void gk_skvsorti(size_t n, gk_skv_t *base)
{
#define skey_lt(a, b) (strcmp((a)->key, (b)->key) < 0)
GK_MKQSORT(gk_skv_t, base, n, skey_lt);
#undef skey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_skv_t in decreasing order */
/*************************************************************************/
void gk_skvsortd(size_t n, gk_skv_t *base)
{
#define skey_gt(a, b) (strcmp((a)->key, (b)->key) > 0)
GK_MKQSORT(gk_skv_t, base, n, skey_gt);
#undef skey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_idxkv_t in increasing order */
/*************************************************************************/
void gk_idxkvsorti(size_t n, gk_idxkv_t *base)
{
#define idxkey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_idxkv_t, base, n, idxkey_lt);
#undef idxkey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_idxkv_t in decreasing order */
/*************************************************************************/
void gk_idxkvsortd(size_t n, gk_idxkv_t *base)
{
#define idxkey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_idxkv_t, base, n, idxkey_gt);
#undef idxkey_gt
}
/************************************************************************/
/*! \file
\brief Functions for manipulating strings.
Various functions for manipulating strings. Some of these functions
provide new functionality, whereas others are drop-in replacements
of standard functions (but with enhanced functionality).
\date Started 11/1/99
\author George
\version $Id: string.c 14330 2013-05-18 12:15:15Z karypis $
*/
/************************************************************************/
#include <GKlib.h>
/************************************************************************/
/*! \brief Replaces certain characters in a string.
This function takes a string and replaces all the characters in the
\c fromlist with the corresponding characters from the \c tolist.
That is, each occurence of <tt>fromlist[i]</tt> is replaced by
<tt>tolist[i]</tt>.
If the \c tolist is shorter than \c fromlist, then the corresponding
characters are deleted. The modifications on \c str are done in place.
It tries to provide a functionality similar to Perl's \b tr// function.
\param str is the string whose characters will be replaced.
\param fromlist is the set of characters to be replaced.
\param tolist is the set of replacement characters .
\returns A pointer to \c str itself.
*/
/************************************************************************/
char *gk_strchr_replace(char *str, char *fromlist, char *tolist)
{
ssize_t i, j, k, len, fromlen, tolen;
len = strlen(str);
fromlen = strlen(fromlist);
tolen = strlen(tolist);
for (i=j=0; i<len; i++) {
for (k=0; k<fromlen; k++) {
if (str[i] == fromlist[k]) {
if (k < tolen)
str[j++] = tolist[k];
break;
}
}
if (k == fromlen)
str[j++] = str[i];
}
str[j] = '\0';
return str;
}
/************************************************************************/
/*! \brief Regex-based search-and-replace function
This function is a C implementation of Perl's <tt> s//</tt> regular-expression
based substitution function.
\param str
is the input string on which the operation will be performed.
\param pattern
is the regular expression for the pattern to be matched for substitution.
\param replacement
is the replacement string, in which the possible captured pattern substrings
are referred to as $1, $2, ..., $9. The entire matched pattern is refered
to as $0.
\param options
is a string specified options for the substitution operation. Currently the
<tt>"i"</tt> (case insensitive) and <tt>"g"</tt> (global substitution) are
supported.
\param new_str
is a reference to a pointer that will store a pointer to the newly created
string that results from the substitutions. This string is allocated via
gk_malloc() and needs to be freed using gk_free(). The string is returned
even if no substitutions were performed.
\returns
If successful, it returns 1 + the number of substitutions that were performed.
Thus, if no substitutions were performed, the returned value will be 1.
Otherwise it returns 0. In case of error, a meaningful error message is
returned in <tt>newstr</tt>, which also needs to be freed afterwards.
*/
/************************************************************************/
int gk_strstr_replace(char *str, char *pattern, char *replacement, char *options,
char **new_str)
{
ssize_t i, len, rlen, nlen, offset, noffset;
int j, rc, flags, global, nmatches;
regex_t re;
regmatch_t matches[10];
/* Parse the options */
flags = REG_EXTENDED;
if (strchr(options, 'i') != NULL)
flags = flags | REG_ICASE;
global = (strchr(options, 'g') != NULL ? 1 : 0);
/* Compile the regex */
if ((rc = regcomp(&re, pattern, flags)) != 0) {
len = regerror(rc, &re, NULL, 0);
*new_str = gk_cmalloc(len, "gk_strstr_replace: new_str");
regerror(rc, &re, *new_str, len);
return 0;
}
/* Prepare the output string */
len = strlen(str);
nlen = 2*len;
noffset = 0;
*new_str = gk_cmalloc(nlen+1, "gk_strstr_replace: new_str");
/* Get into the matching-replacing loop */
rlen = strlen(replacement);
offset = 0;
nmatches = 0;
do {
rc = regexec(&re, str+offset, 10, matches, 0);
if (rc == REG_ESPACE) {
gk_free((void **)new_str, LTERM);
*new_str = gk_strdup("regexec ran out of memory.");
regfree(&re);
return 0;
}
else if (rc == REG_NOMATCH) {
if (nlen-noffset < len-offset) {
nlen += (len-offset) - (nlen-noffset);
*new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
}
strcpy(*new_str+noffset, str+offset);
noffset += (len-offset);
break;
}
else { /* A match was found! */
nmatches++;
/* Copy the left unmatched portion of the string */
if (matches[0].rm_so > 0) {
if (nlen-noffset < matches[0].rm_so) {
nlen += matches[0].rm_so - (nlen-noffset);
*new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
}
strncpy(*new_str+noffset, str+offset, matches[0].rm_so);
noffset += matches[0].rm_so;
}
/* Go and append the replacement string */
for (i=0; i<rlen; i++) {
switch (replacement[i]) {
case '\\':
if (i+1 < rlen) {
if (nlen-noffset < 1) {
nlen += nlen + 1;
*new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
}
*new_str[noffset++] = replacement[++i];
}
else {
gk_free((void **)new_str, LTERM);
*new_str = gk_strdup("Error in replacement string. Missing character following '\'.");
regfree(&re);
return 0;
}
break;
case '$':
if (i+1 < rlen) {
j = (int)(replacement[++i] - '0');
if (j < 0 || j > 9) {
gk_free((void **)new_str, LTERM);
*new_str = gk_strdup("Error in captured subexpression specification.");
regfree(&re);
return 0;
}
if (nlen-noffset < matches[j].rm_eo-matches[j].rm_so) {
nlen += nlen + (matches[j].rm_eo-matches[j].rm_so);
*new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
}
strncpy(*new_str+noffset, str+offset+matches[j].rm_so, matches[j].rm_eo);
noffset += matches[j].rm_eo-matches[j].rm_so;
}
else {
gk_free((void **)new_str, LTERM);
*new_str = gk_strdup("Error in replacement string. Missing subexpression number folloing '$'.");
regfree(&re);
return 0;
}
break;
default:
if (nlen-noffset < 1) {
nlen += nlen + 1;
*new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
}
(*new_str)[noffset++] = replacement[i];
}
}
/* Update the offset of str for the next match */
offset += matches[0].rm_eo;
if (!global) {
/* Copy the right portion of the string if no 'g' option */
if (nlen-noffset < len-offset) {
nlen += (len-offset) - (nlen-noffset);
*new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
}
strcpy(*new_str+noffset, str+offset);
noffset += (len-offset);
}
}
} while (global);
(*new_str)[noffset] = '\0';
regfree(&re);
return nmatches + 1;
}
/************************************************************************/
/*! \brief Prunes characters from the end of the string.
This function removes any trailing characters that are included in the
\c rmlist. The trimming stops at the last character (i.e., first character
from the end) that is not in \c rmlist.
This function can be used to removed trailing spaces, newlines, etc.
This is a distructive operation as it modifies the string.
\param str is the string that will be trimmed.
\param rmlist contains the set of characters that will be removed.
\returns A pointer to \c str itself.
\sa gk_strhprune()
*/
/*************************************************************************/
char *gk_strtprune(char *str, char *rmlist)
{
ssize_t i, j, len;
len = strlen(rmlist);
for (i=strlen(str)-1; i>=0; i--) {
for (j=0; j<len; j++) {
if (str[i] == rmlist[j])
break;
}
if (j == len)
break;
}
str[i+1] = '\0';
return str;
}
/************************************************************************/
/*! \brief Prunes characters from the beginning of the string.
This function removes any starting characters that are included in the
\c rmlist. The trimming stops at the first character that is not in
\c rmlist.
This function can be used to removed leading spaces, tabs, etc.
This is a distructive operation as it modifies the string.
\param str is the string that will be trimmed.
\param rmlist contains the set of characters that will be removed.
\returns A pointer to \c str itself.
\sa gk_strtprune()
*/
/*************************************************************************/
char *gk_strhprune(char *str, char *rmlist)
{
ssize_t i, j, len;
len = strlen(rmlist);
for (i=0; str[i]; i++) {
for (j=0; j<len; j++) {
if (str[i] == rmlist[j])
break;
}
if (j == len)
break;
}
if (i>0) { /* If something needs to be removed */
for (j=0; str[i]; i++, j++)
str[j] = str[i];
str[j] = '\0';
}
return str;
}
/************************************************************************/
/*! \brief Converts a string to upper case.
This function converts a string to upper case. This operation modifies the
string itself.
\param str is the string whose case will be changed.
\returns A pointer to \c str itself.
\sa gk_strtolower()
*/
/*************************************************************************/
char *gk_strtoupper(char *str)
{
int i;
for (i=0; str[i]!='\0'; str[i]=toupper(str[i]), i++);
return str;
}
/************************************************************************/
/*! \brief Converts a string to lower case.
This function converts a string to lower case. This operation modifies the
string itself.
\param str is the string whose case will be changed.
\returns A pointer to \c str itself.
\sa gk_strtoupper()
*/
/*************************************************************************/
char *gk_strtolower(char *str)
{
int i;
for (i=0; str[i]!='\0'; str[i]=tolower(str[i]), i++);
return str;
}
/************************************************************************/
/*! \brief Duplicates a string
This function is a replacement for C's standard <em>strdup()</em> function.
The key differences between the two are that gk_strdup():
- uses the dynamic memory allocation routines of \e GKlib.
- it correctly handles NULL input strings.
The string that is returned must be freed by gk_free().
\param orgstr is the string that will be duplicated.
\returns A pointer to the newly created string.
\sa gk_free()
*/
/*************************************************************************/
char *gk_strdup(char *orgstr)
{
int len;
char *str=NULL;
if (orgstr != NULL) {
len = strlen(orgstr)+1;
str = gk_malloc(len*sizeof(char), "gk_strdup: str");
strcpy(str, orgstr);
}
return str;
}
/************************************************************************/
/*! \brief Case insensitive string comparison.
This function compares two strings for equality by ignoring the case of the
strings.
\warning This function is \b not equivalent to a case-insensitive
<em>strcmp()</em> function, as it does not return ordering
information.
\todo Remove the above warning.
\param s1 is the first string to be compared.
\param s2 is the second string to be compared.
\retval 1 if the strings are identical,
\retval 0 otherwise.
*/
/*************************************************************************/
int gk_strcasecmp(char *s1, char *s2)
{
int i=0;
if (strlen(s1) != strlen(s2))
return 0;
while (s1[i] != '\0') {
if (tolower(s1[i]) != tolower(s2[i]))
return 0;
i++;
}
return 1;
}
/************************************************************************/
/*! \brief Compare two strings in revere order
This function is similar to strcmp but it performs the comparison as
if the two strings were reversed.
\param s1 is the first string to be compared.
\param s2 is the second string to be compared.
\retval -1, 0, 1, if the s1 < s2, s1 == s2, or s1 > s2.
*/
/*************************************************************************/
int gk_strrcmp(char *s1, char *s2)
{
int i1 = strlen(s1)-1;
int i2 = strlen(s2)-1;
while ((i1 >= 0) && (i2 >= 0)) {
if (s1[i1] != s2[i2])
return (s1[i1] - s2[i2]);
i1--;
i2--;
}
/* i1 == -1 and/or i2 == -1 */
if (i1 < i2)
return -1;
if (i1 > i2)
return 1;
return 0;
}
/************************************************************************/
/*! \brief Converts a time_t time into a string
This function takes a time_t-specified time and returns a string-formated
representation of the corresponding time. The format of the string is
<em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time.
\param time is the time to be converted.
\return It returns a pointer to a statically allocated string that is
over-written in successive calls of this function. If the
conversion failed, it returns NULL.
*/
/*************************************************************************/
char *gk_time2str(time_t time)
{
static char datestr[128];
struct tm *tm;
tm = localtime(&time);
if (strftime(datestr, 128, "%m/%d/%Y %H:%M:%S", tm) == 0)
return NULL;
else
return datestr;
}
#if !defined(WIN32) && !defined(__MINGW32__)
/************************************************************************/
/*! \brief Converts a date/time string into its equivalent time_t value
This function takes date and/or time specification and converts it in
the equivalent time_t representation. The conversion is done using the
strptime() function. The format that gk_str2time() understands is
<em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time.
\param str is the date/time string to be converted.
\return If the conversion was successful it returns the time, otherwise
it returns -1.
*/
/*************************************************************************/
time_t gk_str2time(char *str)
{
struct tm time;
time_t rtime;
memset(&time, '\0', sizeof(time));
if (strptime(str, "%m/%d/%Y %H:%M:%S", &time) == NULL)
return -1;
rtime = mktime(&time);
return (rtime < 0 ? 0 : rtime);
}
#endif
/*************************************************************************
* This function returns the ID of a particular string based on the
* supplied StringMap array
**************************************************************************/
int gk_GetStringID(gk_StringMap_t *strmap, char *key)
{
int i;
for (i=0; strmap[i].name; i++) {
if (gk_strcasecmp(key, strmap[i].name))
return strmap[i].id;
}
return -1;
}
# Where the header files reside
#include_directories(../)
# Build program.
add_executable(strings strings.c)
add_executable(gksort gksort.c)
add_executable(fis fis.c)
add_executable(gkrw rw.c)
add_executable(gkgraph gkgraph.c)
add_executable(csrcnv csrcnv.c)
add_executable(grKx grKx.c)
add_executable(m2mnbrs m2mnbrs.c)
add_executable(cmpnbrs cmpnbrs.c)
add_executable(splatt2svd splatt2svd.c)
foreach(prog strings gksort fis gkrw gkgraph csrcnv grKx m2mnbrs cmpnbrs splatt2svd)
target_link_libraries(${prog} GKlib)
endforeach(prog)
# Install
install(TARGETS fis csrcnv m2mnbrs gkrw cmpnbrs RUNTIME DESTINATION bin)
/*!
\file
\brief It takes as input two CSR matrices A and B and computes how
similar AA' and A'A are to BB' and B'B, respectively in terms
of the cosine similarity of the corresponding rows.
\date 11/09/2015
\author George
\version \verbatim $Id: m2mnbrs.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
int simtype; /*!< The similarity type to use */
int verbosity; /*!< The reporting verbosity level */
char *afile; /*!< The file storing the query documents */
char *bfile; /*!< The file storing the collection documents */
/* timers */
double timer_global;
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
/* Versions */
#define VER_MAJOR 0
#define VER_MINOR 1
#define VER_SUBMINOR 0
/* Command-line option codes */
#define CMD_SIMTYPE 10
#define CMD_VERBOSITY 70
#define CMD_HELP 100
/* The text labels for the different simtypes */
static char simtypenames[][10] = {"", "dotp", "cos", "jac", ""};
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"simtype", 1, 0, CMD_SIMTYPE},
{"verbosity", 1, 0, CMD_VERBOSITY},
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
static gk_StringMap_t simtype_options[] = {
{"dotp", GK_CSR_DOTP},
{"cos", GK_CSR_COS},
{"jac", GK_CSR_JAC},
{NULL, 0}
};
/*-------------------------------------------------------------------
* Mini help
*-------------------------------------------------------------------*/
static char helpstr[][100] =
{
" ",
"Usage: cmpnbrs [options] afile bfile",
" ",
" Options",
" -simtype=string",
" Specifies the type of similarity to use. Possible values are:",
" dotp - Dot-product similarity [default]",
" cos - Cosine similarity",
" jac - Jacquard similarity",
" ",
" -verbosity=int",
" Specifies the level of debugging information to be displayed.",
" Default value is 0.",
" ",
" -help",
" Prints this message.",
""
};
/*************************************************************************/
/*! Function prototypes */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[]);
double ComputeNeighborhoodSimilarity(params_t *params, gk_csr_t *amat, gk_csr_t *bmat);
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* initialize the params data structure */
params->simtype = GK_CSR_DOTP;
params->verbosity = -1;
params->afile = NULL;
params->bfile = NULL;
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_SIMTYPE:
if (gk_optarg) {
if ((params->simtype = gk_GetStringID(simtype_options, gk_optarg)) == -1)
errexit("Invalid simtype of %s.\n", gk_optarg);
}
break;
case CMD_VERBOSITY:
if (gk_optarg) params->verbosity = atoi(gk_optarg);
break;
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(EXIT_SUCCESS);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(EXIT_FAILURE);
}
}
/* Get the input/output file info */
if (argc-gk_optind != 2) {
printf("Missing input file info.\n Use %s -help for a summary of the options.\n", argv[0]);
exit(EXIT_FAILURE);
}
params->afile = gk_strdup(argv[gk_optind++]);
params->bfile = gk_strdup(argv[gk_optind++]);
if (!gk_fexists(params->afile))
errexit("input file %s does not exist.\n", params->afile);
if (!gk_fexists(params->bfile))
errexit("input file %s does not exist.\n", params->bfile);
return params;
}
/*************************************************************************/
/*! This is the entry point of the program */
/**************************************************************************/
int main(int argc, char *argv[])
{
params_t *params;
gk_csr_t *amat, *bmat, *amatt, *bmatt;
int rc = EXIT_SUCCESS;
params = parse_cmdline(argc, argv);
amat = gk_csr_Read(params->afile, GK_CSR_FMT_CSR, 1, 0);
bmat = gk_csr_Read(params->bfile, GK_CSR_FMT_CSR, 1, 0);
/* make the matrices of similar dimensions (if neccessary) */
GKASSERT(amat->nrows == bmat->nrows);
amat->ncols = gk_max(amat->ncols, bmat->ncols);
bmat->ncols = amat->ncols;
/* create the transpose matrices */
amatt = gk_csr_Transpose(amat);
bmatt = gk_csr_Transpose(bmat);
printf("********************************************************************************\n");
printf("cmpnbrs (%d.%d.%d) Copyright 2015, GK.\n", VER_MAJOR, VER_MINOR, VER_SUBMINOR);
printf(" simtype=%s\n",
simtypenames[params->simtype]);
printf(" afile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
params->afile, amat->nrows, amat->ncols, amat->rowptr[amat->nrows]);
printf(" bfile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
params->bfile, bmat->nrows, bmat->ncols, bmat->rowptr[bmat->nrows]);
gk_clearwctimer(params->timer_global);
gk_startwctimer(params->timer_global);
printf("SIM(AA', BB'): %.5lf\t", ComputeNeighborhoodSimilarity(params, amat, bmat));
printf("SIM(A'A, B'B): %.5lf\n", ComputeNeighborhoodSimilarity(params, amatt, bmatt));
gk_stopwctimer(params->timer_global);
printf(" wclock: %.2lfs\n", gk_getwctimer(params->timer_global));
printf("********************************************************************************\n");
gk_csr_Free(&amat);
gk_csr_Free(&bmat);
gk_csr_Free(&amatt);
gk_csr_Free(&bmatt);
exit(rc);
}
/*************************************************************************/
/*! Compares the neighbors of AA' vs BB' */
/**************************************************************************/
double ComputeNeighborhoodSimilarity(params_t *params, gk_csr_t *amat,
gk_csr_t *bmat)
{
int iR, iH, nahits, nbhits, ncmps;
int32_t *marker;
gk_fkv_t *ahits, *bhits, *cand;
double tabsim, abdot, anorm2, bnorm2, *avec, *bvec;
/* if cosine, make rows unit length */
if (params->simtype == GK_CSR_COS) {
gk_csr_Normalize(amat, GK_CSR_ROW, 2);
gk_csr_Normalize(bmat, GK_CSR_ROW, 2);
}
/* create the inverted index */
gk_csr_CreateIndex(amat, GK_CSR_COL);
gk_csr_CreateIndex(bmat, GK_CSR_COL);
/* compute the row squared norms */
gk_csr_ComputeSquaredNorms(amat, GK_CSR_ROW);
gk_csr_ComputeSquaredNorms(bmat, GK_CSR_ROW);
/* allocate memory for the necessary working arrays */
ahits = gk_fkvmalloc(amat->nrows, "ComputeNeighborhoodSimilarity: ahits");
bhits = gk_fkvmalloc(bmat->nrows, "ComputeNeighborhoodSimilarity: bhits");
marker = gk_i32smalloc(amat->nrows, -1, "ComputeNeighborhoodSimilarity: marker");
cand = gk_fkvmalloc(amat->nrows, "ComputeNeighborhoodSimilarity: cand");
avec = gk_dsmalloc(amat->nrows, 0.0, "ComputeNeighborhoodSimilarity: avec");
bvec = gk_dsmalloc(bmat->nrows, 0.0, "ComputeNeighborhoodSimilarity: bvec");
/* find the best neighbors for each row in the two matrices and compute
the cosine similarity between them. */
tabsim = 0.0;
ncmps = 0;
for (iR=0; iR<amat->nrows; iR++) {
if (params->verbosity > 1)
printf("Working on row %7d\n", iR);
if (amat->rowptr[iR+1]-amat->rowptr[iR] == 0 ||
bmat->rowptr[iR+1]-bmat->rowptr[iR] == 0)
continue;
nahits = gk_csr_GetSimilarRows(amat,
amat->rowptr[iR+1]-amat->rowptr[iR],
amat->rowind+amat->rowptr[iR],
amat->rowval+amat->rowptr[iR],
params->simtype, amat->nrows, 0.0,
ahits, marker, cand);
nbhits = gk_csr_GetSimilarRows(bmat,
bmat->rowptr[iR+1]-bmat->rowptr[iR],
bmat->rowind+bmat->rowptr[iR],
bmat->rowval+bmat->rowptr[iR],
params->simtype, bmat->nrows, 0.0,
bhits, marker, cand);
if (params->verbosity > 0)
printf("Row %7d %7d %7d %8zd %8zd\n", iR, nahits, nbhits,
amat->rowptr[iR+1]-amat->rowptr[iR], bmat->rowptr[iR+1]-bmat->rowptr[iR]);
for (iH=0; iH<nahits; iH++)
avec[ahits[iH].val] = ahits[iH].key;
for (iH=0; iH<nbhits; iH++)
bvec[bhits[iH].val] = bhits[iH].key;
for (abdot=anorm2=bnorm2=0.0, iH=0; iH<amat->nrows; iH++) {
abdot += avec[iH]*bvec[iH];
anorm2 += avec[iH]*avec[iH];
bnorm2 += bvec[iH]*bvec[iH];
}
tabsim += (abdot > 0 ? abdot/sqrt(anorm2*bnorm2) : 0.0);
ncmps++;
for (iH=0; iH<nahits; iH++)
avec[ahits[iH].val] = 0.0;
for (iH=0; iH<nbhits; iH++)
bvec[bhits[iH].val] = 0.0;
}
gk_free((void **)&ahits, &bhits, &marker, &cand, &avec, &bvec, LTERM);
return tabsim/ncmps;
}
/*!
\file
\brief A simple program to convert between different matrix formats that are supported
by the gk_csr_Read/gk_csr_Write functions.
\date 5/30/2013
\author George
\version \verbatim $Id: csrcnv.c 15314 2013-10-05 16:50:50Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
int inf, outf; /* input/output format */
int numbering; /* input numbering (output when applicable) */
int readvals; /* input values (output when applicable) */
int writevals; /* output values */
int rshuf, cshuf; /* random shuffle of rows/columns */
int symmetric; /* a symmetric shuffle */
int mincolfreq; /* column prunning */
int maxcolfreq; /* column prunning */
int minrowfreq; /* row prunning */
int maxrowfreq; /* row prunning */
float rownrmfltr; /* row-lowfilter threshold */
int compactcols; /* if to renumber columns to eliminate empty ones */
int transpose; /* transpose the output matrix */
char *infile; /* input file */
char *outfile; /* output file */
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
#define CMD_NUMONE 1
#define CMD_NOREADVALS 2
#define CMD_NOWRITEVALS 3
#define CMD_RSHUF 4
#define CMD_CSHUF 5
#define CMD_SYMMETRIC 6
#define CMD_MINCOLFREQ 7
#define CMD_MAXCOLFREQ 8
#define CMD_MINROWFREQ 9
#define CMD_MAXROWFREQ 10
#define CMD_ROWNRMFLTR 11
#define CMD_COMPACTCOLS 12
#define CMD_TRANSPOSE 13
#define CMD_HELP 100
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"numone", 0, 0, CMD_NUMONE},
{"noreadvals", 0, 0, CMD_NOREADVALS},
{"nowritevals", 0, 0, CMD_NOWRITEVALS},
{"rshuf", 0, 0, CMD_RSHUF},
{"cshuf", 0, 0, CMD_CSHUF},
{"symmetric", 0, 0, CMD_SYMMETRIC},
{"mincolfreq", 1, 0, CMD_MINCOLFREQ},
{"maxcolfreq", 1, 0, CMD_MAXCOLFREQ},
{"minrowfreq", 1, 0, CMD_MINROWFREQ},
{"maxrowfreq", 1, 0, CMD_MAXROWFREQ},
{"rownrmfltr", 1, 0, CMD_ROWNRMFLTR},
{"compactcols", 0, 0, CMD_COMPACTCOLS},
{"transpose", 0, 0, CMD_TRANSPOSE},
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
/*-------------------------------------------------------------------*/
/* Mini help */
/*-------------------------------------------------------------------*/
static char helpstr[][100] = {
" ",
"Usage: csrconv [options] <infile> <inf> <outfile> <outf>",
" ",
" Required parameters",
" infile, outfile",
" The name of the input/output CSR file.",
" ",
" inf/outf",
" The format of the input/output file.",
" Supported values are:",
" 1 GK_CSR_FMT_CLUTO",
" 2 GK_CSR_FMT_CSR",
" 3 GK_CSR_FMT_METIS",
" 4 GK_CSR_FMT_BINROW",
" 6 GK_CSR_FMT_IJV",
" 7 GK_CSR_FMT_BIJV",
" ",
" Optional parameters",
" -numone",
" Specifies that the numbering of the input file starts from 1. ",
" It only applies to CSR/IJV formats.",
" ",
" -nowritevals",
" Specifies that no values will be output.",
" ",
" -noreadvals",
" Specifies that the values will not be read when applicable.",
" ",
" -rshuf",
" Specifies that the rows will be randmly shuffled prior to output.",
" ",
" -cshuf",
" Specifies that the columns will be randmly shuffled prior to output.",
" ",
" -symmetric",
" Specifies that the row+column shuffling will be symmetric.",
" ",
" -mincolfreq=int",
" Used to prune infrequent columns.",
" ",
" -maxcolfreq=int",
" Used to prune frequent columns.",
" ",
" -minrowfreq=int",
" Used to prune infrequent rows.",
" ",
" -maxrowfreq=int",
" Used to prune frequent.",
" ",
" -rownrmfltr=float",
" The parameter to use for the row-wise low filter.",
" ",
" -compactcols",
" Specifies if empty columns will be removed and the columns renumbered.",
" ",
" -transpose",
" Specifies that the transposed matrix will be written.",
" ",
" -help",
" Prints this message.",
""
};
static char shorthelpstr[][100] = {
" ",
" Usage: csrconv [options] <infile> <inf> <outfile> <outf>",
" use 'csrconv -help' for a summary of the options.",
""
};
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* initialize the params data structure */
params->numbering = 0;
params->readvals = 1;
params->writevals = 1;
params->rshuf = 0;
params->cshuf = 0;
params->symmetric = 0;
params->transpose = 0;
params->mincolfreq = -1;
params->minrowfreq = -1;
params->maxcolfreq = -1;
params->maxrowfreq = -1;
params->rownrmfltr = -1;
params->compactcols = 0;
params->inf = -1;
params->outf = -1;
params->infile = NULL;
params->outfile = NULL;
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_NUMONE:
params->numbering = 1;
break;
case CMD_NOREADVALS:
params->readvals = 0;
break;
case CMD_NOWRITEVALS:
params->writevals = 0;
break;
case CMD_RSHUF:
params->rshuf = 1;
break;
case CMD_CSHUF:
params->cshuf = 1;
break;
case CMD_SYMMETRIC:
params->symmetric = 1;
break;
case CMD_TRANSPOSE:
params->transpose = 1;
break;
case CMD_MINCOLFREQ:
if (gk_optarg) params->mincolfreq = atoi(gk_optarg);
break;
case CMD_MINROWFREQ:
if (gk_optarg) params->minrowfreq = atoi(gk_optarg);
break;
case CMD_MAXCOLFREQ:
if (gk_optarg) params->maxcolfreq = atoi(gk_optarg);
break;
case CMD_MAXROWFREQ:
if (gk_optarg) params->maxrowfreq = atoi(gk_optarg);
break;
case CMD_ROWNRMFLTR:
if (gk_optarg) params->rownrmfltr = atof(gk_optarg);
break;
case CMD_COMPACTCOLS:
params->compactcols = 1;
break;
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(0);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(0);
}
}
if (argc-gk_optind != 4) {
printf("Unrecognized parameters.");
for (i=0; strlen(shorthelpstr[i]) > 0; i++)
printf("%s\n", shorthelpstr[i]);
exit(0);
}
params->infile = gk_strdup(argv[gk_optind++]);
params->inf = atoi(argv[gk_optind++]);
params->outfile = gk_strdup(argv[gk_optind++]);
params->outf = atoi(argv[gk_optind++]);
if (!gk_fexists(params->infile))
errexit("input file %s does not exist.\n", params->infile);
return params;
}
/*************************************************************************/
/*! the entry point */
/**************************************************************************/
int main(int argc, char *argv[])
{
int what;
params_t *params;
gk_csr_t *mat, *mat1, *smat;
/* get command-line options */
params = parse_cmdline(argc, argv);
/* read the data */
mat = gk_csr_Read(params->infile, params->inf, params->readvals, params->numbering);
/* deal with weird transformations */
if (params->mincolfreq != -1 || params->maxcolfreq != -1) {
params->mincolfreq = (params->mincolfreq == -1 ? 0 : params->mincolfreq);
params->maxcolfreq = (params->maxcolfreq == -1 ? mat->nrows : params->maxcolfreq);
printf("Column prune: %d %d; nnz: %zd => ",
params->mincolfreq, params->maxcolfreq, mat->rowptr[mat->nrows]);
mat1 = gk_csr_Prune(mat, GK_CSR_COL, params->mincolfreq, params->maxcolfreq);
gk_csr_Free(&mat);
mat = mat1;
mat1 = NULL;
printf("%zd\n", mat->rowptr[mat->nrows]);
}
if (params->minrowfreq != -1 || params->maxrowfreq != -1) {
params->minrowfreq = (params->minrowfreq == -1 ? 0 : params->minrowfreq);
params->maxrowfreq = (params->maxrowfreq == -1 ? mat->ncols : params->maxrowfreq);
printf("Row prune: %d %d; nnz: %zd => ",
params->minrowfreq, params->maxrowfreq, mat->rowptr[mat->nrows]);
mat1 = gk_csr_Prune(mat, GK_CSR_ROW, params->minrowfreq, params->maxrowfreq);
gk_csr_Free(&mat);
mat = mat1;
mat1 = NULL;
printf("%zd\n", mat->rowptr[mat->nrows]);
}
if (params->rownrmfltr >= 0.0) {
//gk_csr_Scale(mat, GK_CSR_LOG);
//gk_csr_Scale(mat, GK_CSR_IDF2);
printf("Row low filter: %f; nnz: %zd => ", params->rownrmfltr, mat->rowptr[mat->nrows]);
mat1 = gk_csr_LowFilter(mat, GK_CSR_ROW, 2, params->rownrmfltr);
gk_csr_Normalize(mat1, GK_CSR_ROW, 2);
gk_csr_Free(&mat);
mat = mat1;
mat1 = NULL;
printf("%zd\n", mat->rowptr[mat->nrows]);
}
if (params->compactcols) {
printf("Compacting columns: %d => ", mat->ncols);
gk_csr_CompactColumns(mat);
printf("%d\n", mat->ncols);
}
if (params->rshuf || params->cshuf) {
if (params->rshuf && params->cshuf)
what = GK_CSR_ROWCOL;
else if (params->rshuf)
what = GK_CSR_ROW;
else
what = GK_CSR_COL;
smat = gk_csr_Shuffle(mat, what, params->symmetric);
gk_csr_Free(&mat);
mat = smat;
}
if (params->writevals && mat->rowval == NULL)
mat->rowval = gk_fsmalloc(mat->rowptr[mat->nrows], 1.0, "mat->rowval");
if (params->transpose) {
mat1 = gk_csr_Transpose(mat);
gk_csr_Free(&mat);
mat = mat1;
mat1 = NULL;
}
gk_csr_Write(mat, params->outfile, params->outf, params->writevals, 0);
gk_csr_Free(&mat);
}
/*!
\file
\brief A simple frequent itemset discovery program to test GKlib's routines
\date 6/12/2008
\author George
\version \verbatim $Id: fis.c 11075 2011-11-11 22:31:52Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
ssize_t minlen, maxlen;
ssize_t minfreq, maxfreq;
char *filename;
int silent;
ssize_t nitemsets;
char *clabelfile;
char **clabels;
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
#define CMD_MINLEN 1
#define CMD_MAXLEN 2
#define CMD_MINFREQ 3
#define CMD_MAXFREQ 4
#define CMD_SILENT 5
#define CMD_CLABELFILE 6
#define CMD_HELP 10
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"minlen", 1, 0, CMD_MINLEN},
{"maxlen", 1, 0, CMD_MAXLEN},
{"minfreq", 1, 0, CMD_MINFREQ},
{"maxfreq", 1, 0, CMD_MAXFREQ},
{"silent", 0, 0, CMD_SILENT},
{"clabels", 1, 0, CMD_CLABELFILE},
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
/*-------------------------------------------------------------------*/
/* Mini help */
/*-------------------------------------------------------------------*/
static char helpstr[][100] = {
" ",
"Usage: fis [options] <mat-file>",
" ",
" Required parameters",
" mat-file",
" The name of the file storing the transactions. The file is in ",
" Cluto's .mat format.",
" ",
" Optional parameters",
" -minlen=int",
" Specifies the minimum length of the patterns. [default: 1]",
" ",
" -maxlen=int",
" Specifies the maximum length of the patterns. [default: none]",
" ",
" -minfreq=int",
" Specifies the minimum frequency of the patterns. [default: 10]",
" ",
" -maxfreq=int",
" Specifies the maximum frequency of the patterns. [default: none]",
" ",
" -silent",
" Does not print the discovered itemsets.",
" ",
" -clabels=filename",
" Specifies the name of the file that stores the column labels.",
" ",
" -help",
" Prints this message.",
""
};
static char shorthelpstr[][100] = {
" ",
" Usage: fis [options] <mat-file>",
" use 'fis -help' for a summary of the options.",
""
};
/*************************************************************************/
/*! Function prototypes */
/*************************************************************************/
void print_init_info(params_t *params, gk_csr_t *mat);
void print_final_info(params_t *params);
params_t *parse_cmdline(int argc, char *argv[]);
void print_an_itemset(void *stateptr, int nitems, int *itemind,
int ntrans, int *tranind);
/*************************************************************************/
/*! the entry point */
/**************************************************************************/
int main(int argc, char *argv[])
{
ssize_t i;
char line[8192];
FILE *fpin;
params_t *params;
gk_csr_t *mat;
params = parse_cmdline(argc, argv);
params->nitemsets = 0;
/* read the data */
mat = gk_csr_Read(params->filename, GK_CSR_FMT_CLUTO, 1, 1);
gk_csr_CreateIndex(mat, GK_CSR_COL);
/* read the column labels */
params->clabels = (char **)gk_malloc(mat->ncols*sizeof(char *), "main: clabels");
if (params->clabelfile == NULL) {
for (i=0; i<mat->ncols; i++) {
sprintf(line, "%zd", i);
params->clabels[i] = gk_strdup(line);
}
}
else {
fpin = gk_fopen(params->clabelfile, "r", "main: fpin");
for (i=0; i<mat->ncols; i++) {
if (fgets(line, 8192, fpin) == NULL)
errexit("Failed on fgets.\n");
params->clabels[i] = gk_strdup(gk_strtprune(line, " \n\t"));
}
gk_fclose(fpin);
}
print_init_info(params, mat);
gk_find_frequent_itemsets(mat->nrows, mat->rowptr, mat->rowind,
params->minfreq, params->maxfreq, params->minlen, params->maxlen,
&print_an_itemset, (void *)params);
printf("Total itemsets found: %zd\n", params->nitemsets);
print_final_info(params);
}
/*************************************************************************/
/*! This function prints run parameters */
/*************************************************************************/
void print_init_info(params_t *params, gk_csr_t *mat)
{
printf("*******************************************************************************\n");
printf(" fis\n\n");
printf("Matrix Information ---------------------------------------------------------\n");
printf(" input file=%s, [%d, %d, %zd]\n",
params->filename, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
printf("\n");
printf("Options --------------------------------------------------------------------\n");
printf(" minlen=%zd, maxlen=%zd, minfeq=%zd, maxfreq=%zd\n",
params->minlen, params->maxlen, params->minfreq, params->maxfreq);
printf("\n");
printf("Finding patterns... -----------------------------------------------------\n");
}
/*************************************************************************/
/*! This function prints final statistics */
/*************************************************************************/
void print_final_info(params_t *params)
{
printf("\n");
printf("Memory Usage Information -----------------------------------------------------\n");
printf(" Maximum memory used: %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
printf(" Current memory used: %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
printf("********************************************************************************\n");
}
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* initialize the params data structure */
params->minlen = 1;
params->maxlen = -1;
params->minfreq = 10;
params->maxfreq = -1;
params->silent = 0;
params->filename = NULL;
params->clabelfile = NULL;
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_MINLEN:
if (gk_optarg) params->minlen = atoi(gk_optarg);
break;
case CMD_MAXLEN:
if (gk_optarg) params->maxlen = atoi(gk_optarg);
break;
case CMD_MINFREQ:
if (gk_optarg) params->minfreq = atoi(gk_optarg);
break;
case CMD_MAXFREQ:
if (gk_optarg) params->maxfreq = atoi(gk_optarg);
break;
case CMD_SILENT:
params->silent = 1;
break;
case CMD_CLABELFILE:
if (gk_optarg) params->clabelfile = gk_strdup(gk_optarg);
break;
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(0);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(0);
}
}
if (argc-gk_optind != 1) {
printf("Unrecognized parameters.");
for (i=0; strlen(shorthelpstr[i]) > 0; i++)
printf("%s\n", shorthelpstr[i]);
exit(0);
}
params->filename = gk_strdup(argv[gk_optind++]);
if (!gk_fexists(params->filename))
errexit("input file %s does not exist.\n", params->filename);
return params;
}
/*************************************************************************/
/*! This is the callback function for the itemset discovery routine */
/*************************************************************************/
void print_an_itemset(void *stateptr, int nitems, int *itemids, int ntrans,
int *transids)
{
ssize_t i;
params_t *params;
params = (params_t *)stateptr;
params->nitemsets++;
if (!params->silent) {
printf("%4zd %4d %4d => ", params->nitemsets, nitems, ntrans);
for (i=0; i<nitems; i++)
printf(" %s", params->clabels[itemids[i]]);
printf("\n");
for (i=0; i<ntrans; i++)
printf(" %d\n", transids[i]);
printf("\n");
}
}
/*!
\file
\brief A simple frequent itemset discovery program to test GKlib's routines
\date 6/12/2008
\author George
\version \verbatim $Id: gkgraph.c 17700 2014-09-27 18:10:02Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
int type;
int niter;
float eps;
float lamda;
char *infile;
char *outfile;
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
#define CMD_NITER 1
#define CMD_EPS 2
#define CMD_LAMDA 3
#define CMD_TYPE 4
#define CMD_HELP 10
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"type", 1, 0, CMD_TYPE},
{"niter", 1, 0, CMD_NITER},
{"lamda", 1, 0, CMD_LAMDA},
{"eps", 1, 0, CMD_EPS},
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
/*-------------------------------------------------------------------*/
/* Mini help */
/*-------------------------------------------------------------------*/
static char helpstr[][100] = {
" ",
"Usage: gkgraph [options] <graph-file> [<out-file>]",
" ",
" Required parameters",
" graph-file",
" The name of the file storing the graph. The file is in ",
" Metis' graph format.",
" ",
" Optional parameters",
" -niter=int",
" Specifies the maximum number of iterations. [default: 100]",
" ",
" -lamda=float",
" Specifies the follow-the-adjacent-links probability. [default: 0.80]",
" ",
" -eps=float",
" Specifies the error tollerance. [default: 1e-10]",
" ",
" -help",
" Prints this message.",
""
};
static char shorthelpstr[][100] = {
" ",
" Usage: gkgraph [options] <graph-file> [<out-file>]",
" use 'gkgraph -help' for a summary of the options.",
""
};
/*************************************************************************/
/*! Function prototypes */
/*************************************************************************/
double compute_compactness(params_t *params, gk_graph_t *graph, int32_t *perm);
void reorder_centroid(params_t *params, gk_graph_t *graph, int32_t *perm);
void print_init_info(params_t *params, gk_graph_t *graph);
void print_final_info(params_t *params);
params_t *parse_cmdline(int argc, char *argv[]);
/*************************************************************************/
/*! the entry point */
/**************************************************************************/
int main(int argc, char *argv[])
{
ssize_t i, j, v;
params_t *params;
gk_graph_t *graph, *pgraph;
int32_t *perm;
/* get command-line options */
params = parse_cmdline(argc, argv);
/* read the data */
graph = gk_graph_Read(params->infile, GK_GRAPH_FMT_METIS, -1, -1, 0, 0, 0);
/* display some basic stats */
print_init_info(params, graph);
/* determine the initial compactness of the graph */
printf("Initial compactness: %le\n", compute_compactness(params, graph, NULL));
/* compute the BFS ordering and re-order the graph */
//for (i=0; i<params->niter; i++) {
for (i=0; i<1; i++) {
v = RandomInRange(graph->nvtxs);
gk_graph_ComputeBFSOrdering(graph, v, &perm, NULL);
printf("BFS from %8d. Compactness: %le\n",
(int) v, compute_compactness(params, graph, perm));
pgraph = gk_graph_Reorder(graph, perm, NULL);
gk_graph_Write(pgraph, "bfs.metis", GK_GRAPH_FMT_METIS);
gk_graph_Free(&pgraph);
gk_graph_ComputeBestFOrdering(graph, v, params->type, &perm, NULL);
printf("BestF from %8d. Compactness: %le\n",
(int) v, compute_compactness(params, graph, perm));
pgraph = gk_graph_Reorder(graph, perm, NULL);
gk_graph_Write(pgraph, "bestf.metis", GK_GRAPH_FMT_METIS);
gk_graph_Free(&pgraph);
#ifdef XXX
for (j=0; j<params->niter; j++) {
reorder_centroid(params, graph, perm);
printf("\tAfter centroid; Compactness: %le\n",
compute_compactness(params, graph, perm));
}
pgraph = gk_graph_Reorder(graph, perm, NULL);
gk_graph_Write(pgraph, "centroid.metis", GK_GRAPH_FMT_METIS);
gk_graph_Free(&pgraph);
#endif
gk_free((void **)&perm, LTERM);
}
gk_graph_Free(&graph);
//gk_graph_Free(&pgraph);
print_final_info(params);
}
/*************************************************************************/
/*! This function computes the compactness of the graph's adjacency list */
/*************************************************************************/
double compute_compactness(params_t *params, gk_graph_t *graph, int32_t *perm)
{
int i, v, u, nvtxs;
ssize_t j, *xadj;
int32_t *adjncy;
double compactness=0.0;
int *freq;
nvtxs = graph->nvtxs;
xadj = graph->xadj;
adjncy = graph->adjncy;
freq = gk_ismalloc(nvtxs, 0, "compute_compactness: freq");
for (i=0; i<nvtxs; i++) {
v = (perm == NULL ? i : perm[i]);
for (j=xadj[i]; j<xadj[i+1]; j++) {
u = (perm == NULL ? adjncy[j] : perm[adjncy[j]]);
compactness += abs(v-u);
freq[gk_abs(v-u)]++;
}
}
/*
for (i=0; i<nvtxs; i++) {
if (freq[i] > 0)
printf("%7d %6d\n", i, freq[i]);
}
*/
printf("\tnsmall: %d\n", freq[1]+freq[2]+freq[3]);
return compactness/xadj[nvtxs];
}
/*************************************************************************/
/*! This function uses a centroid-based approach to refine the ordering */
/*************************************************************************/
void reorder_centroid(params_t *params, gk_graph_t *graph, int32_t *perm)
{
int i, v, u, nvtxs;
ssize_t j, *xadj;
int32_t *adjncy;
gk_fkv_t *cand;
double displacement;
nvtxs = graph->nvtxs;
xadj = graph->xadj;
adjncy = graph->adjncy;
cand = gk_fkvmalloc(nvtxs, "reorder_centroid: cand");
for (i=0; i<nvtxs; i++) {
v = perm[i];
displacement = 0.0;
for (j=xadj[i]; j<xadj[i+1]; j++) {
u = perm[adjncy[j]];
displacement += u-v;
//displacement += sign(u-v, sqrt(abs(u-v)));
}
cand[i].val = i;
cand[i].key = v + displacement*params->lamda/(xadj[i+1]-xadj[i]);
}
/* sort them based on the target position in increasing order */
gk_fkvsorti(nvtxs, cand);
/* derive the permutation from the ordered list */
gk_i32set(nvtxs, -1, perm);
for (i=0; i<nvtxs; i++) {
if (perm[cand[i].val] != -1)
errexit("Resetting perm[%d] = %d\n", cand[i].val, perm[cand[i].val]);
perm[cand[i].val] = i;
}
gk_free((void **)&cand, LTERM);
}
/*************************************************************************/
/*! This function prints run parameters */
/*************************************************************************/
void print_init_info(params_t *params, gk_graph_t *graph)
{
printf("*******************************************************************************\n");
printf(" gkgraph\n\n");
printf("Graph Information ----------------------------------------------------------\n");
printf(" input file=%s, [%d, %zd]\n",
params->infile, graph->nvtxs, graph->xadj[graph->nvtxs]);
printf("\n");
printf("Options --------------------------------------------------------------------\n");
printf(" type=%d, niter=%d, lamda=%f, eps=%e\n",
params->type, params->niter, params->lamda, params->eps);
printf("\n");
printf("Working... -----------------------------------------------------------------\n");
}
/*************************************************************************/
/*! This function prints final statistics */
/*************************************************************************/
void print_final_info(params_t *params)
{
printf("\n");
printf("Memory Usage Information -----------------------------------------------------\n");
printf(" Maximum memory used: %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
printf(" Current memory used: %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
printf("********************************************************************************\n");
}
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* initialize the params data structure */
params->type = 1;
params->niter = 1;
params->eps = 1e-10;
params->lamda = 0.20;
params->infile = NULL;
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_TYPE:
if (gk_optarg) params->type = atoi(gk_optarg);
break;
case CMD_NITER:
if (gk_optarg) params->niter = atoi(gk_optarg);
break;
case CMD_EPS:
if (gk_optarg) params->eps = atof(gk_optarg);
break;
case CMD_LAMDA:
if (gk_optarg) params->lamda = atof(gk_optarg);
break;
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(0);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(0);
}
}
if (argc-gk_optind != 1) {
printf("Unrecognized parameters.");
for (i=0; strlen(shorthelpstr[i]) > 0; i++)
printf("%s\n", shorthelpstr[i]);
exit(0);
}
params->infile = gk_strdup(argv[gk_optind++]);
if (argc-gk_optind > 0)
params->outfile = gk_strdup(argv[gk_optind++]);
else
params->outfile = gk_strdup("gkgraph.out");
if (!gk_fexists(params->infile))
errexit("input file %s does not exist.\n", params->infile);
return params;
}
/*!
\file gksort.c
\brief Testing module for the various sorting routines in GKlib
\date Started 4/4/2007
\author George
\version\verbatim $Id: gksort.c 11058 2011-11-10 00:02:50Z karypis $ \endverbatim
*/
#include <GKlib.h>
#define N 10000
/*************************************************************************/
/*! Testing module for gk_?isort() routine */
/*************************************************************************/
void test_isort()
{
gk_idx_t i;
int array[N];
/* test the increasing sort */
printf("Testing iisort...\n");
for (i=0; i<N; i++)
array[i] = RandomInRange(123432);
gk_isorti(N, array);
for (i=0; i<N-1; i++) {
if (array[i] > array[i+1])
printf("gk_isorti error at index %jd [%d %d]\n", (intmax_t)i, array[i], array[i+1]);
}
/* test the decreasing sort */
printf("Testing disort...\n");
for (i=0; i<N; i++)
array[i] = RandomInRange(123432);
gk_isortd(N, array);
for (i=0; i<N-1; i++) {
if (array[i] < array[i+1])
printf("gk_isortd error at index %jd [%d %d]\n", (intmax_t)i, array[i], array[i+1]);
}
}
/*************************************************************************/
/*! Testing module for gk_?fsort() routine */
/*************************************************************************/
void test_fsort()
{
gk_idx_t i;
float array[N];
/* test the increasing sort */
printf("Testing ifsort...\n");
for (i=0; i<N; i++)
array[i] = RandomInRange(123432)/(1.0+RandomInRange(645323));
gk_fsorti(N, array);
for (i=0; i<N-1; i++) {
if (array[i] > array[i+1])
printf("gk_fsorti error at index %jd [%f %f]\n", (intmax_t)i, array[i], array[i+1]);
}
/* test the decreasing sort */
printf("Testing dfsort...\n");
for (i=0; i<N; i++)
array[i] = RandomInRange(123432)/(1.0+RandomInRange(645323));
gk_fsortd(N, array);
for (i=0; i<N-1; i++) {
if (array[i] < array[i+1])
printf("gk_fsortd error at index %jd [%f %f]\n", (intmax_t)i, array[i], array[i+1]);
}
}
/*************************************************************************/
/*! Testing module for gk_?idxsort() routine */
/*************************************************************************/
void test_idxsort()
{
gk_idx_t i;
gk_idx_t array[N];
/* test the increasing sort */
printf("Testing idxsorti...\n");
for (i=0; i<N; i++)
array[i] = RandomInRange(123432);
gk_idxsorti(N, array);
for (i=0; i<N-1; i++) {
if (array[i] > array[i+1])
printf("gk_idxsorti error at index %zd [%zd %zd]\n", (ssize_t)i, (ssize_t)array[i], (ssize_t)array[i+1]);
}
/* test the decreasing sort */
printf("Testing idxsortd...\n");
for (i=0; i<N; i++)
array[i] = RandomInRange(123432);
gk_idxsortd(N, array);
for (i=0; i<N-1; i++) {
if (array[i] < array[i+1])
printf("gk_idxsortd error at index %zd [%zd %zd]\n", (ssize_t)i, (ssize_t)array[i], (ssize_t)array[i+1]);
}
}
/*************************************************************************/
/*! Testing module for gk_?ikvsort() routine */
/*************************************************************************/
void test_ikvsort()
{
gk_idx_t i;
gk_ikv_t array[N];
/* test the increasing sort */
printf("Testing ikvsorti...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432);
array[i].val = i;
}
gk_ikvsorti(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key > array[i+1].key)
printf("gk_ikvsorti error at index %jd [%d %d] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
/* test the decreasing sort */
printf("Testing ikvsortd...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432);
array[i].val = i;
}
gk_ikvsortd(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key < array[i+1].key)
printf("gk_ikvsortd error at index %jd [%d %d] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
}
/*************************************************************************/
/*! Testing module for gk_?fkvsort() routine */
/*************************************************************************/
void test_fkvsort()
{
gk_idx_t i;
gk_fkv_t array[N];
/* test the increasing sort */
printf("Testing fkvsorti...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
array[i].val = i;
}
gk_fkvsorti(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key > array[i+1].key)
printf("gk_fkvsorti error at index %jd [%f %f] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
/* test the decreasing sort */
printf("Testing fkvsortd...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
array[i].val = i;
}
gk_fkvsortd(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key < array[i+1].key)
printf("gk_fkvsortd error at index %jd [%f %f] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
}
/*************************************************************************/
/*! Testing module for gk_?dkvsort() routine */
/*************************************************************************/
void test_dkvsort()
{
gk_idx_t i;
gk_dkv_t array[N];
/* test the increasing sort */
printf("Testing dkvsorti...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
array[i].val = i;
}
gk_dkvsorti(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key > array[i+1].key)
printf("gk_dkvsorti error at index %jd [%lf %lf] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
/* test the decreasing sort */
printf("Testing dkvsortd...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
array[i].val = i;
}
gk_dkvsortd(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key < array[i+1].key)
printf("gk_dkvsortd error at index %jd [%lf %lf] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
}
/*************************************************************************/
/*! Testing module for gk_?skvsort() routine */
/*************************************************************************/
void test_skvsort()
{
gk_idx_t i;
gk_skv_t array[N];
char line[256];
/* test the increasing sort */
printf("Testing skvsorti...\n");
for (i=0; i<N; i++) {
sprintf(line, "%d", RandomInRange(123432));
array[i].key = gk_strdup(line);
array[i].val = i;
}
gk_skvsorti(N, array);
for (i=0; i<N-1; i++) {
if (strcmp(array[i].key, array[i+1].key) > 0)
printf("gk_skvsorti error at index %jd [%s %s] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
/* test the decreasing sort */
printf("Testing skvsortd...\n");
for (i=0; i<N; i++) {
sprintf(line, "%d", RandomInRange(123432));
array[i].key = gk_strdup(line);
array[i].val = i;
}
gk_skvsortd(N, array);
for (i=0; i<N-1; i++) {
/*printf("%s\n", array[i].key);*/
if (strcmp(array[i].key, array[i+1].key) < 0)
printf("gk_skvsortd error at index %jd [%s %s] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
}
/*************************************************************************/
/*! Testing module for gk_?idxkvsort() routine */
/*************************************************************************/
void test_idxkvsort()
{
gk_idx_t i;
gk_idxkv_t array[N];
/* test the increasing sort */
printf("Testing idxkvsorti...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432);
array[i].val = i;
}
gk_idxkvsorti(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key > array[i+1].key)
printf("gk_idxkvsorti error at index %zd [%zd %zd] [%zd %zd]\n",
(ssize_t)i, (ssize_t)array[i].key, (ssize_t)array[i+1].key,
(ssize_t)array[i].val, (ssize_t)array[i+1].val);
}
/* test the decreasing sort */
printf("Testing idxkvsortd...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432);
array[i].val = i;
}
gk_idxkvsortd(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key < array[i+1].key)
printf("gk_idxkvsortd error at index %zd [%zd %zd] [%zd %zd]\n",
(ssize_t)i, (ssize_t)array[i].key, (ssize_t)array[i+1].key,
(ssize_t)array[i].val, (ssize_t)array[i+1].val);
}
}
int main()
{
test_isort();
test_fsort();
test_idxsort();
test_ikvsort();
test_fkvsort();
test_dkvsort();
test_skvsort();
test_idxkvsort();
}
/*!
\file
\brief A simple program to create multiple copies of an input matrix.
\date 5/30/2013
\author George
\version \verbatim $Id: grKx.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
int inf, outf;
int numbering; /* input numbering (output when applicable) */
int readvals; /* input values (output when applicable) */
int writevals; /* output values */
int rshuf, cshuf; /* random shuffle of rows/columns */
int symmetric; /* a symmetric shuffle */
int ncopies; /* the copies of the graph to create */
char *infile; /* input file */
char *outfile; /* output file */
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
#define CMD_NUMONE 1
#define CMD_NOREADVALS 2
#define CMD_NOWRITEVALS 3
#define CMD_RSHUF 4
#define CMD_CSHUF 5
#define CMD_SYMMETRIC 6
#define CMD_HELP 100
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"numone", 0, 0, CMD_NUMONE},
{"noreadvals", 0, 0, CMD_NOREADVALS},
{"nowritevals", 0, 0, CMD_NOWRITEVALS},
{"rshuf", 0, 0, CMD_RSHUF},
{"cshuf", 0, 0, CMD_CSHUF},
{"symmetric", 0, 0, CMD_SYMMETRIC},
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
/*-------------------------------------------------------------------*/
/* Mini help */
/*-------------------------------------------------------------------*/
static char helpstr[][100] = {
" ",
"Usage: grKx [options] <infile> <inf> <outfile> <outf> <ncopies>",
" ",
" Required parameters",
" infile, outfile",
" The name of the input/output CSR file.",
" ",
" inf/outf",
" The format of the input/output file.",
" Supported values are:",
" 1 GK_CSR_FMT_CLUTO",
" 2 GK_CSR_FMT_CSR",
" 3 GK_CSR_FMT_METIS",
" 4 GK_CSR_FMT_BINROW",
" 6 GK_CSR_FMT_IJV",
" 7 GK_CSR_FMT_BIJV",
" ",
" Optional parameters",
" -numone",
" Specifies that the numbering of the input file starts from 1. ",
" It only applies to CSR/IJV formats.",
" ",
" -nowritevals",
" Specifies that no values will be output.",
" ",
" -noreadvals",
" Specifies that the values will not be read when applicable.",
" ",
" -rshuf",
" Specifies that the rows will be randmly shuffled prior to output.",
" ",
" -cshuf",
" Specifies that the columns will be randmly shuffled prior to output.",
" ",
" -symmetric",
" Specifies that the row+column shuffling will be symmetric.",
" ",
" -help",
" Prints this message.",
""
};
static char shorthelpstr[][100] = {
" ",
" Usage: grKx [options] <infile> <inf> <outfile> <outf> <ncopies>",
" use 'csrconv -help' for a summary of the options.",
""
};
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* initialize the params data structure */
params->numbering = 0;
params->readvals = 1;
params->writevals = 1;
params->rshuf = 0;
params->cshuf = 0;
params->symmetric = 0;
params->inf = -1;
params->outf = -1;
params->infile = NULL;
params->outfile = NULL;
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_NUMONE:
params->numbering = 1;
break;
case CMD_NOREADVALS:
params->readvals = 0;
break;
case CMD_NOWRITEVALS:
params->writevals = 0;
break;
case CMD_RSHUF:
params->rshuf = 1;
break;
case CMD_CSHUF:
params->cshuf = 1;
break;
case CMD_SYMMETRIC:
params->symmetric = 1;
break;
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(0);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(0);
}
}
if (argc-gk_optind != 5) {
printf("Unrecognized parameters.");
for (i=0; strlen(shorthelpstr[i]) > 0; i++)
printf("%s\n", shorthelpstr[i]);
exit(0);
}
params->infile = gk_strdup(argv[gk_optind++]);
params->inf = atoi(argv[gk_optind++]);
params->outfile = gk_strdup(argv[gk_optind++]);
params->outf = atoi(argv[gk_optind++]);
params->ncopies = atoi(argv[gk_optind++]);
if (!gk_fexists(params->infile))
errexit("input file %s does not exist.\n", params->infile);
return params;
}
/*************************************************************************/
/*! the entry point */
/**************************************************************************/
int main(int argc, char *argv[])
{
ssize_t i, j, k, knnz, nrows, ncols, ncopies;
int what;
params_t *params;
gk_csr_t *mat, *kmat, *smat;
/* get command-line options */
params = parse_cmdline(argc, argv);
/* read the data */
mat = gk_csr_Read(params->infile, params->inf, params->readvals, params->numbering);
/* create the copies */
ncopies = params->ncopies;
nrows = mat->nrows;
ncols = mat->ncols;
knnz = mat->rowptr[nrows]*ncopies;
kmat = gk_csr_Create();
kmat->nrows = nrows*ncopies;
kmat->ncols = ncols*ncopies;
kmat->rowptr = gk_zmalloc(kmat->nrows+1, "rowptr");
kmat->rowind = gk_imalloc(knnz, "rowind");
if (mat->rowval)
kmat->rowval = gk_fmalloc(knnz, "rowval");
kmat->rowptr[0] = knnz = 0;
for (k=0; k<ncopies; k++) {
for (i=0; i<nrows; i++) {
for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++, knnz++) {
kmat->rowind[knnz] = mat->rowind[j] + k*ncols;
if (mat->rowval)
kmat->rowval[knnz] = mat->rowval[j];
}
kmat->rowptr[k*nrows+i+1] = knnz;
}
}
gk_csr_Free(&mat);
mat = kmat;
if (params->rshuf || params->cshuf) {
if (params->rshuf && params->cshuf)
what = GK_CSR_ROWCOL;
else if (params->rshuf)
what = GK_CSR_ROW;
else
what = GK_CSR_COL;
smat = gk_csr_Shuffle(mat, what, params->symmetric);
gk_csr_Free(&mat);
mat = smat;
}
if (params->writevals && mat->rowval == NULL)
mat->rowval = gk_fsmalloc(mat->rowptr[mat->nrows], 1.0, "mat->rowval");
gk_csr_Write(mat, params->outfile, params->outf, params->writevals, 0);
gk_csr_Free(&mat);
}
/*!
\file
\brief It takes as input two CSR matrices and finds for each row of the
first matrix the most similar rows in the second matrix.
\date 9/27/2014
\author George
\version \verbatim $Id: m2mnbrs.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
int simtype; /*!< The similarity type to use */
int nnbrs; /*!< The maximum number of nearest neighbots to output */
float minsim; /*!< The minimum similarity to use for keeping neighbors */
int verbosity; /*!< The reporting verbosity level */
char *qfile; /*!< The file storing the query documents */
char *cfile; /*!< The file storing the collection documents */
char *outfile; /*!< The file where the output will be stored */
/* timers */
double timer_global;
double timer_1;
double timer_2;
double timer_3;
double timer_4;
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
/* Versions */
#define VER_MAJOR 0
#define VER_MINOR 1
#define VER_SUBMINOR 0
/* Command-line option codes */
#define CMD_SIMTYPE 10
#define CMD_NNBRS 20
#define CMD_MINSIM 22
#define CMD_VERBOSITY 70
#define CMD_HELP 100
/* The text labels for the different simtypes */
static char simtypenames[][10] = {"", "dotp", "cos", "jac", ""};
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"simtype", 1, 0, CMD_SIMTYPE},
{"nnbrs", 1, 0, CMD_NNBRS},
{"minsim", 1, 0, CMD_MINSIM},
{"verbosity", 1, 0, CMD_VERBOSITY},
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
static gk_StringMap_t simtype_options[] = {
{"cos", GK_CSR_COS},
{"jac", GK_CSR_JAC},
{NULL, 0}
};
/*-------------------------------------------------------------------
* Mini help
*-------------------------------------------------------------------*/
static char helpstr[][100] =
{
" ",
"Usage: m2mnbrs [options] qfile cfile [outfile]",
" ",
" Options",
" -simtype=string",
" Specifies the type of similarity to use. Possible values are:",
" cos - Cosine similarity",
" jac - Jacquard similarity [default]",
" ",
" -nnbrs=int",
" Specifies the maximum number of nearest neighbors.",
" A value of -1 indicates that all neighbors will be considered.",
" Default value is 100.",
" ",
" -minsim=float",
" The minimum allowed similarity between neighbors. ",
" Default value is .25.",
" ",
" -verbosity=int",
" Specifies the level of debugging information to be displayed.",
" Default value is 0.",
" ",
" -help",
" Prints this message.",
""
};
/*************************************************************************/
/*! Function prototypes */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[]);
void FindNeighbors(params_t *params, gk_csr_t *qmat, gk_csr_t *cmat);
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* initialize the params data structure */
params->simtype = GK_CSR_JAC;
params->nnbrs = 100;
params->minsim = .25;
params->verbosity = -1;
params->qfile = NULL;
params->cfile = NULL;
params->outfile = NULL;
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_SIMTYPE:
if (gk_optarg) {
if ((params->simtype = gk_GetStringID(simtype_options, gk_optarg)) == -1)
errexit("Invalid simtype of %s.\n", gk_optarg);
}
break;
case CMD_NNBRS:
if (gk_optarg) params->nnbrs = atoi(gk_optarg);
break;
case CMD_MINSIM:
if (gk_optarg) params->minsim = atof(gk_optarg);
break;
case CMD_VERBOSITY:
if (gk_optarg) params->verbosity = atoi(gk_optarg);
break;
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(EXIT_SUCCESS);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(EXIT_FAILURE);
}
}
/* Get the input/output file info */
if (argc-gk_optind < 1) {
printf("Missing input/output file info.\n Use %s -help for a summary of the options.\n", argv[0]);
exit(EXIT_FAILURE);
}
params->qfile = gk_strdup(argv[gk_optind++]);
params->cfile = gk_strdup(argv[gk_optind++]);
params->outfile = (gk_optind < argc ? gk_strdup(argv[gk_optind++]) : NULL);
if (!gk_fexists(params->qfile))
errexit("input file %s does not exist.\n", params->qfile);
if (!gk_fexists(params->cfile))
errexit("input file %s does not exist.\n", params->cfile);
return params;
}
/*************************************************************************/
/*! This is the entry point of the program */
/**************************************************************************/
int main(int argc, char *argv[])
{
params_t *params;
gk_csr_t *qmat, *cmat;
int rc = EXIT_SUCCESS;
params = parse_cmdline(argc, argv);
qmat = gk_csr_Read(params->qfile, GK_CSR_FMT_CSR, 1, 0);
cmat = gk_csr_Read(params->cfile, GK_CSR_FMT_CSR, 1, 0);
printf("********************************************************************************\n");
printf("sd (%d.%d.%d) Copyright 2014, GK.\n", VER_MAJOR, VER_MINOR, VER_SUBMINOR);
printf(" simtype=%s, nnbrs=%d, minsim=%.2f\n",
simtypenames[params->simtype], params->nnbrs, params->minsim);
printf(" qfile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
params->qfile, qmat->nrows, qmat->ncols, qmat->rowptr[qmat->nrows]);
printf(" cfile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
params->cfile, cmat->nrows, cmat->ncols, cmat->rowptr[cmat->nrows]);
gk_clearwctimer(params->timer_global);
gk_clearwctimer(params->timer_1);
gk_clearwctimer(params->timer_2);
gk_clearwctimer(params->timer_3);
gk_clearwctimer(params->timer_4);
gk_startwctimer(params->timer_global);
FindNeighbors(params, qmat, cmat);
gk_stopwctimer(params->timer_global);
printf(" wclock: %.2lfs\n", gk_getwctimer(params->timer_global));
printf(" timer1: %.2lfs\n", gk_getwctimer(params->timer_1));
printf(" timer2: %.2lfs\n", gk_getwctimer(params->timer_2));
printf(" timer3: %.2lfs\n", gk_getwctimer(params->timer_3));
printf(" timer4: %.2lfs\n", gk_getwctimer(params->timer_4));
printf("********************************************************************************\n");
gk_csr_Free(&qmat);
gk_csr_Free(&cmat);
exit(rc);
}
/*************************************************************************/
/*! Reads and computes the neighbors of each query document against the
collection of documents */
/**************************************************************************/
void FindNeighbors(params_t *params, gk_csr_t *qmat, gk_csr_t *cmat)
{
int iQ, iH, nhits;
int32_t *marker;
gk_fkv_t *hits, *cand;
FILE *fpout;
GKASSERT(qmat->ncols <= cmat->ncols);
/* if cosine, make rows unit length */
if (params->simtype == GK_CSR_COS) {
gk_csr_Normalize(qmat, GK_CSR_ROW, 2);
gk_csr_Normalize(cmat, GK_CSR_ROW, 2);
}
/* create the inverted index */
gk_csr_CreateIndex(cmat, GK_CSR_COL);
/* compute the row norms */
gk_csr_ComputeSquaredNorms(cmat, GK_CSR_ROW);
/* create the output file */
fpout = (params->outfile ? gk_fopen(params->outfile, "w", "FindNeighbors: fpout") : NULL);
/* allocate memory for the necessary working arrays */
hits = gk_fkvmalloc(cmat->nrows, "FindNeighbors: hits");
marker = gk_i32smalloc(cmat->nrows, -1, "FindNeighbors: marker");
cand = gk_fkvmalloc(cmat->nrows, "FindNeighbors: cand");
/* find the best neighbors for each query document */
gk_startwctimer(params->timer_1);
for (iQ=0; iQ<qmat->nrows; iQ++) {
if (params->verbosity > 0)
printf("Working on query %7d\n", iQ);
/* find the neighbors of the ith document */
nhits = gk_csr_GetSimilarRows(cmat,
qmat->rowptr[iQ+1]-qmat->rowptr[iQ],
qmat->rowind+qmat->rowptr[iQ],
qmat->rowval+qmat->rowptr[iQ],
params->simtype, params->nnbrs, params->minsim,
hits, marker, cand);
/* write the results in the file */
if (fpout) {
for (iH=0; iH<nhits; iH++)
fprintf(fpout, "%8d %8zd %.3f\n", iQ, hits[iH].val, hits[iH].key);
}
}
gk_stopwctimer(params->timer_1);
/* cleanup and exit */
if (fpout) gk_fclose(fpout);
gk_free((void **)&hits, &marker, &cand, LTERM);
}
/*!
\file
\brief A simple (personalized) random walk program to test GKlib's routines
\date 6/12/2008
\author George
\version \verbatim $Id$ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
int niter;
int ntvs;
int ppr;
float eps;
float lamda;
char *infile;
char *outfile;
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
#define CMD_NITER 1
#define CMD_EPS 2
#define CMD_LAMDA 3
#define CMD_PPR 4
#define CMD_NTVS 5
#define CMD_HELP 10
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"niter", 1, 0, CMD_NITER},
{"lamda", 1, 0, CMD_LAMDA},
{"eps", 1, 0, CMD_EPS},
{"ppr", 1, 0, CMD_PPR},
{"ntvs", 1, 0, CMD_NTVS},
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
/*-------------------------------------------------------------------*/
/* Mini help */
/*-------------------------------------------------------------------*/
static char helpstr[][100] = {
" ",
"Usage: rw [options] <graph-file> <out-file>",
" ",
" Required parameters",
" graph-file",
" The name of the file storing the transactions. The file is in ",
" Metis' graph format.",
" ",
" Optional parameters",
" -niter=int",
" Specifies the maximum number of iterations. [default: 100]",
" ",
" -lamda=float",
" Specifies the follow-the-adjacent-links probability. [default: 0.80]",
" ",
" -eps=float",
" Specifies the error tollerance. [default: 1e-10]",
" ",
" -ppr=int",
" Specifies the source of the personalized PR. [default: -1]",
" ",
" -ntvs=int",
" Specifies the number of test-vectors to compute. [default: -1]",
" ",
" -help",
" Prints this message.",
""
};
static char shorthelpstr[][100] = {
" ",
" Usage: rw [options] <graph-file> <out-file>",
" use 'rw -help' for a summary of the options.",
""
};
/*************************************************************************/
/*! Function prototypes */
/*************************************************************************/
void print_init_info(params_t *params, gk_csr_t *mat);
void print_final_info(params_t *params);
params_t *parse_cmdline(int argc, char *argv[]);
/*************************************************************************/
/*! the entry point */
/**************************************************************************/
int main(int argc, char *argv[])
{
ssize_t i, j, niter;
params_t *params;
gk_csr_t *mat;
FILE *fpout;
/* get command-line options */
params = parse_cmdline(argc, argv);
/* read the data */
mat = gk_csr_Read(params->infile, GK_CSR_FMT_METIS, 1, 1);
/* display some basic stats */
print_init_info(params, mat);
if (params->ntvs != -1) {
/* compute the pr for different randomly generated restart-distribution vectors */
float **prs;
prs = gk_fAllocMatrix(params->ntvs, mat->nrows, 0.0, "main: prs");
/* generate the random restart vectors */
for (j=0; j<params->ntvs; j++) {
for (i=0; i<mat->nrows; i++)
prs[j][i] = RandomInRange(931);
gk_fscale(mat->nrows, 1.0/gk_fsum(mat->nrows, prs[j], 1), prs[j], 1);
niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, prs[j]);
printf("tvs#: %zd; niters: %zd\n", j, niter);
}
/* output the computed pr scores */
fpout = gk_fopen(params->outfile, "w", "main: outfile");
for (i=0; i<mat->nrows; i++) {
for (j=0; j<params->ntvs; j++)
fprintf(fpout, "%.4e ", prs[j][i]);
fprintf(fpout, "\n");
}
gk_fclose(fpout);
gk_fFreeMatrix(&prs, params->ntvs, mat->nrows);
}
else if (params->ppr != -1) {
/* compute the personalized pr from the specified vertex */
float *pr;
pr = gk_fsmalloc(mat->nrows, 0.0, "main: pr");
pr[params->ppr-1] = 1.0;
niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
printf("ppr: %d; niters: %zd\n", params->ppr, niter);
/* output the computed pr scores */
fpout = gk_fopen(params->outfile, "w", "main: outfile");
for (i=0; i<mat->nrows; i++)
fprintf(fpout, "%.4e\n", pr[i]);
gk_fclose(fpout);
gk_free((void **)&pr, LTERM);
}
else {
/* compute the standard pr */
int jmax;
float diff, maxdiff;
float *pr;
pr = gk_fsmalloc(mat->nrows, 1.0/mat->nrows, "main: pr");
niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
printf("pr; niters: %zd\n", niter);
/* output the computed pr scores */
fpout = gk_fopen(params->outfile, "w", "main: outfile");
for (i=0; i<mat->nrows; i++) {
for (jmax=i, maxdiff=0.0, j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
if ((diff = fabs(pr[i]-pr[mat->rowind[j]])) > maxdiff) {
maxdiff = diff;
jmax = mat->rowind[j];
}
}
fprintf(fpout, "%.4e %10zd %.4e %10d\n", pr[i],
mat->rowptr[i+1]-mat->rowptr[i], maxdiff, jmax+1);
}
gk_fclose(fpout);
gk_free((void **)&pr, LTERM);
}
gk_csr_Free(&mat);
/* display some final stats */
print_final_info(params);
}
/*************************************************************************/
/*! This function prints run parameters */
/*************************************************************************/
void print_init_info(params_t *params, gk_csr_t *mat)
{
printf("*******************************************************************************\n");
printf(" fis\n\n");
printf("Matrix Information ---------------------------------------------------------\n");
printf(" input file=%s, [%d, %d, %zd]\n",
params->infile, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
printf("\n");
printf("Options --------------------------------------------------------------------\n");
printf(" niter=%d, ntvs=%d, ppr=%d, lamda=%f, eps=%e\n",
params->niter, params->ntvs, params->ppr, params->lamda, params->eps);
printf("\n");
printf("Performing random walks... ----------------------------------------------\n");
}
/*************************************************************************/
/*! This function prints final statistics */
/*************************************************************************/
void print_final_info(params_t *params)
{
printf("\n");
printf("Memory Usage Information -----------------------------------------------------\n");
printf(" Maximum memory used: %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
printf(" Current memory used: %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
printf("********************************************************************************\n");
}
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* initialize the params data structure */
params->niter = 100;
params->ppr = -1;
params->ntvs = -1;
params->eps = 1e-10;
params->lamda = 0.80;
params->infile = NULL;
params->outfile = NULL;
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_NITER:
if (gk_optarg) params->niter = atoi(gk_optarg);
break;
case CMD_NTVS:
if (gk_optarg) params->ntvs = atoi(gk_optarg);
break;
case CMD_PPR:
if (gk_optarg) params->ppr = atoi(gk_optarg);
break;
case CMD_EPS:
if (gk_optarg) params->eps = atof(gk_optarg);
break;
case CMD_LAMDA:
if (gk_optarg) params->lamda = atof(gk_optarg);
break;
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(0);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(0);
}
}
if (argc-gk_optind != 2) {
printf("Unrecognized parameters.");
for (i=0; strlen(shorthelpstr[i]) > 0; i++)
printf("%s\n", shorthelpstr[i]);
exit(0);
}
params->infile = gk_strdup(argv[gk_optind++]);
params->outfile = gk_strdup(argv[gk_optind++]);
if (!gk_fexists(params->infile))
errexit("input file %s does not exist.\n", params->infile);
if (params->ppr != -1 && params->ntvs != -1)
errexit("Only one of the -ppr and -ntvs options can be specified.\n");
return params;
}
/*!
\file
\brief A simple program to convert a tensor in coordinate format into an unfolded
matrix
\author George
*/
#include <GKlib.h>
int main(int argc, char *argv[])
{
size_t nnz, i, j, k, nI, nJ, nK, nrows, ncols;
int32_t *I, *J, *K, *rowind, *colind;
ssize_t *rowptr, *colptr;
float *V, *rowval, *colval;
if (argc != 2)
errexit("Usage %s <infile> [%d]\n", argv[0], argc);
if (!gk_fexists(argv[1]))
errexit("File %s does not exist.\n", argv[1]);
gk_getfilestats(argv[1], &nnz, NULL, NULL, NULL);
I = gk_i32malloc(nnz, "I");
J = gk_i32malloc(nnz, "J");
K = gk_i32malloc(nnz, "K");
V = gk_fmalloc(nnz, "V");
fprintf(stderr, "Input nnz: %zd\n", nnz);
FILE *fpin = gk_fopen(argv[1], "r", "infile");
for (i=0; i<nnz; i++) {
if (4 != fscanf(fpin, "%d %d %d %f", K+i, I+i, J+i, V+i))
errexit("Failed to read 4 values in line %zd\n", i);
K[i]--; I[i]--; J[i]--;
}
gk_fclose(fpin);
nI = gk_i32max(nnz, I, 1)+1;
nJ = gk_i32max(nnz, J, 1)+1;
nK = gk_i32max(nnz, K, 1)+1;
fprintf(stderr, "nI: %zd, nJ: %zd, nK: %zd\n", nI, nJ, nK);
nrows = nK*nI;
ncols = nJ;
rowptr = gk_zsmalloc(nrows+1, 0, "rowptr");
for (i=0; i<nnz; i++)
rowptr[K[i]*nI+I[i]]++;
MAKECSR(i, nrows, rowptr);
rowind = gk_i32malloc(nnz, "rowind");
rowval = gk_fmalloc(nnz, "rowval");
for (i=0; i<nnz; i++) {
rowind[rowptr[K[i]*nI+I[i]]] = J[i];
rowval[rowptr[K[i]*nI+I[i]]] = V[i];
rowptr[K[i]*nI+I[i]]++;
}
SHIFTCSR(i, nrows, rowptr);
gk_free((void **)&I, &J, &K, &V, LTERM);
colptr = gk_zsmalloc(ncols+1, 0, "colptr");
colind = gk_i32malloc(nnz, "colind");
colval = gk_fmalloc(nnz, "colval");
for (i=0; i<nrows; i++) {
for (j=rowptr[i]; j<rowptr[i+1]; j++)
colptr[rowind[j]]++;
}
MAKECSR(i, ncols, colptr);
for (i=0; i<nrows; i++) {
for (j=rowptr[i]; j<rowptr[i+1]; j++) {
colind[colptr[rowind[j]]] = i;
colval[colptr[rowind[j]]] = rowval[j];
colptr[rowind[j]]++;
}
}
SHIFTCSR(i, ncols, colptr);
/* sanity check */
for (i=0; i<ncols; i++) {
for (j=colptr[i]+1; j<colptr[i+1]; j++) {
if (colind[j-1] == colind[j])
fprintf(stderr, "Duplicate row indices: %d %d %d\n", (int)i, colind[j], colind[j-1]);
}
}
printf("%zd %zd %zd\n", nrows, ncols, nnz);
for (i=0; i<ncols; i++) {
printf("%zd\n", colptr[i+1]-colptr[i]);
for (j=colptr[i]; j<colptr[i+1]; j++)
printf("%d %.3f\n", colind[j], colval[j]);
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment