train.py 1.38 KB
Newer Older
wangsen's avatar
wangsen committed
1
import datetime
wangsen's avatar
wangsen committed
2
import pickle
wangsen's avatar
wangsen committed
3
from geneformer import Classifier
wangsen's avatar
wangsen committed
4
import os 
wangsen's avatar
wangsen committed
5
6
7
8
current_date = datetime.datetime.now()
datestamp = f"{str(current_date.year)[-2:]}{current_date.month:02d}{current_date.day:02d}{current_date.hour:02d}{current_date.minute:02d}{current_date.second:02d}"
datestamp_min = f"{str(current_date.year)[-2:]}{current_date.month:02d}{current_date.day:02d}"

wangsen's avatar
wangsen committed
9
output_prefix = "tf_dosage_sens_test"
wangsen's avatar
wangsen committed
10
output_dir = f"/path/to/output_dir/{datestamp}"
wangsen's avatar
wangsen committed
11
os.makedirs(output_dir)
wangsen's avatar
wangsen committed
12

wangsen's avatar
wangsen committed
13
14
with open("/path/to/Genecorpus-30M/dosage_sensitivity_TFs.pickle", "rb") as fp:
    gene_class_dict = pickle.load(fp)
wangsen's avatar
wangsen committed
15

wangsen's avatar
wangsen committed
16
17
18
19
20
cc = Classifier(classifier="gene",
                gene_class_dict = gene_class_dict,
                max_ncells = 10_000,
                freeze_layers = 4,
                num_crossval_splits = 5,
wangsen's avatar
wangsen committed
21
22
23
24
                forward_batch_size=200,
                nproc=16)


wangsen's avatar
wangsen committed
25
cc.prepare_data(input_data_file="/path/to/Genecorpus-30M/dosage_sensitive_tfs",
wangsen's avatar
wangsen committed
26
                output_directory=output_dir,
wangsen's avatar
wangsen committed
27
                output_prefix=output_prefix)
wangsen's avatar
wangsen committed
28
29

all_metrics = cc.validate(model_directory="/home/Geneformer",
wangsen's avatar
wangsen committed
30
                          prepared_input_data_file=f"{output_dir}/{output_prefix}_labeled.dataset",
wangsen's avatar
wangsen committed
31
32
                          id_class_dict_file=f"{output_dir}/{output_prefix}_id_class_dict.pkl",
                          output_directory=output_dir,
wangsen's avatar
wangsen committed
33
34
35
                          output_prefix=output_prefix)


wangsen's avatar
wangsen committed
36
37
38