Added asserts and filled out the spec files.

cfd0c994 · Davis King · 5f191804 · cfd0c994 · cfd0c994 · cfd0c994
Commit cfd0c994 authored Dec 25, 2011 by Davis King
4 changed files
--- a/dlib/lsh/create_random_projection_hash.h
+++ b/dlib/lsh/create_random_projection_hash.h
@@ -14,12 +14,36 @@ namespace dlib

 // ----------------------------------------------------------------------------------------

-    template <typename vector_type>
+    template <
+        typename vector_type
+        >
    projection_hash create_random_projection_hash (
        const vector_type& v,
        const int bits
    ) 
    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(0 < bits && bits <= 32 &&
+                    v.size() > 1,
+            "\t projection_hash create_random_projection_hash()"
+            << "\n\t Invalid arguments were given to this function."
+            << "\n\t bits: " << bits
+            << "\n\t v.size(): " << v.size() 
+            );
+
+#ifdef ENABLE_ASSERTS
+        for (unsigned long i = 0; i < v.size(); ++i)
+        {
+            DLIB_ASSERT(v[0].size() == v[i].size() && v[i].size() > 0 && is_col_vector(v[i]), 
+                    "\t projection_hash create_random_projection_hash()"
+                   << "\n\t Invalid arguments were given to this function."
+                   << "\n\t m(0).size(): " << v[0].size()
+                   << "\n\t m("<<i<<").size(): " << v[i].size() 
+                   << "\n\t is_col_vector(v["<<i<<"]): " << is_col_vector(v[i]) 
+                );
+        }
+#endif
+

        // compute a whitening matrix
        matrix<double> whiten = trans(chol(pinv(covariance(vector_to_matrix(v)))));

--- a/dlib/lsh/create_random_projection_hash_abstract.h
+++ b/dlib/lsh/create_random_projection_hash_abstract.h
@@ -11,11 +11,32 @@ namespace dlib

 // ----------------------------------------------------------------------------------------

-    template <typename vector_type>
+    template <
+        typename vector_type
+        >
    projection_hash create_random_projection_hash (
        const vector_type& v,
        const int bits
    );
+    /*!
+        requires
+            - 0 < bits <= 32
+            - v.size() > 1
+            - vector_type == a std::vector containing dlib::matrix objects, each 
+              representing a column vector of the same size.
+            - for all valid i, j:
+                - is_col_vector(v[i]) == true 
+                - v[i].size() > 0
+                - v[i].size() == v[j].size() 
+                - i.e. v contains only column vectors and all the column vectors
+                  have the same non-zero length
+        ensures
+            - returns a hash function H such that:
+                - H.num_hash_bins() == pow(2,bits)
+                - H will be setup so that it hashes the contents of v such that
+                  each bin ends up with roughly the same number of elements
+                  in it.
+    !*/

 // ----------------------------------------------------------------------------------------


--- a/dlib/lsh/projection_hash.h
+++ b/dlib/lsh/projection_hash.h
@@ -23,7 +23,17 @@ namespace dlib
        projection_hash(
            const matrix_exp<EXP1>& proj_,
            const matrix_exp<EXP2>& offset_
-        ) : proj(proj_), offset(offset_) {}
+        ) : proj(proj_), offset(offset_) 
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(proj.nr() == offset.nr(),
+                "\t projection_hash::projection_hash()"
+                << "\n\t Invalid arguments were given to this function."
+                << "\n\t proj.nr():   " << proj.nr() 
+                << "\n\t offset.nr(): " << offset.nr() 
+                );
+
+        }

        const matrix<double>& get_projection_matrix (
        ) const { return proj; }
@@ -31,10 +41,10 @@ namespace dlib
        const matrix<double,0,1>& get_offset_matrix (
        ) const { return offset; }

-        unsigned long size (
+        unsigned long num_hash_bins (
        ) const
        {
-            return (unsigned long)std::pow(2, offset.size());
+            return static_cast<unsigned long>(std::pow(2, offset.size()));
        }

        template <typename EXP>
@@ -42,6 +52,17 @@ namespace dlib
            const matrix_exp<EXP>& v
        ) const
        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_col_vector(v) && 
+                        v.size() == get_projection_matrix().nc() &&
+                        v.size() > 0,
+                "\t unsigned long projection_hash::operator()(v)"
+                << "\n\t Invalid arguments were given to this function."
+                << "\n\t is_col_vector(v):             " << is_col_vector(v) 
+                << "\n\t get_projection_matrix().nc(): " << get_projection_matrix().nc() 
+                << "\n\t v.size():                     " << v.size() 
+                );
+
            return do_hash(proj*matrix_cast<double>(v) + offset);
        }


--- a/dlib/lsh/projection_hash_abstract.h
+++ b/dlib/lsh/projection_hash_abstract.h
@@ -14,31 +14,78 @@ namespace dlib
    {
        /*!
            WHAT THIS OBJECT REPRESENTS
+                This is a tool for hashing elements of a vector space into the integers.  
+                It is intended to represent locality sensitive hashing functions such as 
+                the popular random projection hashing method.
+                
+                In particular, it represents hash functions of the form:
+                    hash bit 0 = sign(rowm(P*v + O,0))
+                    hash bit 1 = sign(rowm(P*v + O,1))
+                    hash bit 2 = sign(rowm(P*v + O,2))
+                    ...
+                Where v is the vector to be hashed.  The parameters of the projection
+                hash are the P and O matrices.  
        !*/
    public:

        projection_hash(
        );
+        /*!
+            ensures
+                - #get_projection_matrix().size() == 0
+                - #get_offset_matrix().size() == 0
+        !*/

        template <typename EXP1, typename EXP2>
        projection_hash(
            const matrix_exp<EXP1>& proj,
            const matrix_exp<EXP2>& offset
        ); 
+        /*!
+            requires
+                - proj.nr() == offset.nr()
+            ensures
+                - #get_projection_matrix() == proj
+                - #get_offset_matrix() == offset
+        !*/

        const matrix<double>& get_projection_matrix (
        ) const;
+        /*!
+            ensures
+                - returns the P matrix discussed above in the WHAT THIS OBJECT REPRESENTS
+                  section.
+        !*/

        const matrix<double,0,1>& get_offset_matrix (
        ) const; 
+        /*!
+            ensures
+                - returns the O matrix discussed above in the WHAT THIS OBJECT REPRESENTS
+                  section.
+        !*/

-        unsigned long size (
+        unsigned long num_hash_bins (
        ) const;
+        /*!
+            ensures
+                - returns the number of possible outputs from this hashing function.
+                - Specifically, returns: std::pow(2, get_offset_matrix().size())
+        !*/

        template <typename EXP>
        unsigned long operator() (
            const matrix_exp<EXP>& v
        ) const;
+        /*!
+            requires
+                - is_col_vector(v) == true
+                - v.size() == get_projection_matrix().nc()
+                - v.size() > 0
+            ensures
+                - hashes v into the range [0, num_hash_bins()) using the method
+                  discussed in the WHAT THIS OBJECT REPRESENTS section.
+        !*/
    };

 // ----------------------------------------------------------------------------------------
@@ -47,11 +94,17 @@ namespace dlib
        const projection_hash& item,
        std::ostream& out
    );
+    /*!
+        provides serialization support 
+    !*/

    void deserialize (
        projection_hash& item,
        std::istream& in 
    );
+    /*!
+        provides deserialization support 
+    !*/

 // ----------------------------------------------------------------------------------------