utils.py 481 Bytes
Newer Older
chenzk's avatar
v1.0.8  
chenzk committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import importlib
import sys
from pathlib import Path

from datasets import Dataset


def set_system_path():
    package = importlib.import_module("nanotron")
    # NOTE:  Path(package.__file__).parent = .../nanotron/src/nanotron
    # we want .../nanotron
    package_path = Path(package.__file__).parent.parent.parent
    sys.path.append(str(package_path))


def create_dummy_dataset(num_items: int):
    data = {"text": list(range(num_items))}
    return Dataset.from_dict(data)