"vscode:/vscode.git/clone" did not exist on "8c71f3f86d1c555ea6c464fcdb67b3448838092f"
Unverified Commit b5be4f4b authored by Tong He's avatar Tong He Committed by GitHub
Browse files

[Dataset] Update dataset hash with deterministic function (#1919)

* update hash with deterministic function

* update docstring

* add test on determination
parent 39ed0966
......@@ -3,7 +3,7 @@
from __future__ import absolute_import
import os, sys
import os, sys, hashlib
import abc
from .utils import download, extract_archive, get_download_dir, makedirs
from ..utils import retry_method_with_fix
......@@ -43,7 +43,7 @@ class DGLDataset(object):
A tuple of values as the input for the hash function.
Users can distinguish instances (and their caches on the disk)
from the same dataset class by comparing the hash values.
Default: (), the corresponding hash value is 3527539
Default: (), the corresponding hash value is 'f9065fa7'.
force_reload : bool
Whether to reload the dataset. Default: False
verbose : bool
......@@ -56,6 +56,7 @@ class DGLDataset(object):
self._force_reload = force_reload
self._verbose = verbose
self._hash_key = hash_key
self._hash_func = hashlib.sha1()
self._hash = self._get_hash()
# if no dir is provided, the default dgl download dir is used.
......@@ -161,11 +162,14 @@ class DGLDataset(object):
Example
-------
>>> hash_value = self._get_hash((10, False, True))
Assume `self._hash_key = (10, False, True)`
>>> hash_value = self._get_hash()
>>> hash_value
6299899980521991026
'a770b222'
"""
return abs(hash(self._hash_key))
self._hash_func.update(str(self._hash_key).encode('utf-8'))
return self._hash_func.hexdigest()[:8]
@property
def url(self):
......
......@@ -5,5 +5,19 @@ def test_minigc():
g, l = list(zip(*ds))
print(g, l)
def test_data_hash():
class HashTestDataset(data.DGLDataset):
def __init__(self, hash_key=()):
super(HashTestDataset, self).__init__('hashtest', hash_key=hash_key)
def _load(self):
pass
a = HashTestDataset((True, 0, '1', (1,2,3)))
b = HashTestDataset((True, 0, '1', (1,2,3)))
c = HashTestDataset((True, 0, '1', (1,2,4)))
assert a.hash == b.hash
assert a.hash != c.hash
if __name__ == '__main__':
test_minigc()
test_data_hash()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment