/* Copyright (c) 2013, Philipp Krähenbühl All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the Stanford University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "permutohedral.h" #ifdef WIN32 inline int round(double X) { return int(X+.5); } #endif #ifdef __SSE__ // SSE Permutoheral lattice # define SSE_PERMUTOHEDRAL #endif #if defined(SSE_PERMUTOHEDRAL) # include # include # ifdef __SSE4_1__ # include # endif #endif /************************************************/ /*** Hash Table ***/ /************************************************/ class HashTable{ protected: size_t key_size_, filled_, capacity_; std::vector< short > keys_; std::vector< int > table_; void grow(){ // Create the new memory and copy the values in int old_capacity = capacity_; capacity_ *= 2; std::vector old_keys( (old_capacity+10)*key_size_ ); std::copy( keys_.begin(), keys_.end(), old_keys.begin() ); std::vector old_table( capacity_, -1 ); // Swap the memory table_.swap( old_table ); keys_.swap( old_keys ); // Reinsert each element for( int i=0; i= 0){ int e = old_table[i]; size_t h = hash( getKey(e) ) % capacity_; for(; table_[h] >= 0; h = h= capacity_) grow(); // Get the hash value size_t h = hash( k ) % capacity_; // Find the element with he right key, using linear probing while(1){ int e = table_[h]; if (e==-1){ if (create){ // Insert a new key and return the new id for( size_t i=0; i0; j-- ){ __m128 cf = f[j-1]*scale_factor[j-1]; elevated[j] = sm - _mm_set1_ps(j)*cf; sm += cf; } elevated[0] = sm; // Find the closest 0-colored simplex through rounding __m128 sum = Zero; for( int i=0; i<=d_; i++ ){ __m128 v = invdplus1 * elevated[i]; #ifdef __SSE4_1__ v = _mm_round_ps( v, _MM_FROUND_TO_NEAREST_INT ); #else v = _mm_cvtepi32_ps( _mm_cvtps_epi32( v ) ); #endif rem0[i] = v*dplus1; sum += v; } // Find the simplex we are in and store it in rank (where rank describes what position coorinate i has in the sorted order of the features values) for( int i=0; i<=d_; i++ ) rank[i] = Zero; for( int i=0; i0; j-- ){ float cf = f[j-1]*scale_factor[j-1]; elevated[j] = sm - j*cf; sm += cf; } elevated[0] = sm; // Find the closest 0-colored simplex through rounding float down_factor = 1.0f / (d_+1); float up_factor = (d_+1); int sum = 0; for( int i=0; i<=d_; i++ ){ //int rd1 = round( down_factor * elevated[i]); int rd2; float v = down_factor * elevated[i]; float up = ceilf(v)*up_factor; float down = floorf(v)*up_factor; if (up - elevated[i] < elevated[i] - down) rd2 = (short)up; else rd2 = (short)down; //if(rd1!=rd2) // break; rem0[i] = rd2; sum += rd2*down_factor; } // Find the simplex we are in and store it in rank (where rank describes what position coorinate i has in the sorted order of the features values) for( int i=0; i<=d_; i++ ) rank[i] = 0; for( int i=0; i d_ ){ rank[i] -= d_+1; rem0[i] -= d_+1; } } // Compute the barycentric coordinates (p.10 in [Adams etal 2010]) for( int i=0; i<=d_+1; i++ ) barycentric[i] = 0; for( int i=0; i<=d_; i++ ){ float v = (elevated[i] - rem0[i])*down_factor; barycentric[d_-rank[i] ] += v; barycentric[d_-rank[i]+1] -= v; } // Wrap around barycentric[0] += 1.0 + barycentric[d_+1]; // Compute all vertices and their offset for( int remainder=0; remainder<=d_; remainder++ ){ for( int i=0; i 0 (used for blurring) float * values = new float[ (M_+2)*value_size ]; float * new_values = new float[ (M_+2)*value_size ]; for( int i=0; i<(M_+2)*value_size; i++ ) values[i] = new_values[i] = 0; // Splatting for( int i=0; i=0; reverse?j--:j++ ){ for( int i=0; i 0 (used for blurring) __m128 * sse_val = (__m128*) _mm_malloc( sse_value_size*sizeof(__m128), 16 ); __m128 * values = (__m128*) _mm_malloc( (M_+2)*sse_value_size*sizeof(__m128), 16 ); __m128 * new_values = (__m128*) _mm_malloc( (M_+2)*sse_value_size*sizeof(__m128), 16 ); __m128 Zero = _mm_set1_ps( 0 ); for( int i=0; i<(M_+2)*sse_value_size; i++ ) values[i] = new_values[i] = Zero; for( int i=0; i=0; reverse?j--:j++ ){ for( int i=0; i 0 (used for blurring) float * values = new float[ (M_+2)*value_size ]; float * new_values = new float[ (M_+2)*value_size ]; // Set the results to 0 std::fill( df, df+N_*d_, 0.f ); // Initialize some constants std::vector scale_factor( d_ ); float inv_std_dev = sqrt(2.0 / 3.0)*(d_+1); for( int i=0; i=0; dir?j--:j++ ){ for( int i=0; i r_a( (d_+1)*value_size ), sm( value_size ); for( int i=0; id_?0:r0+1; int o0 = offset_[i*(d_+1)+r0]+1; int o1 = offset_[i*(d_+1)+r1]+1; for( int k=0; k