main.py 5.91 KB
Newer Older
1
2
3
4
5
6
7
from basic_function import format_parser
from basic_function import packaged_function
from basic_function import conformer_search
import time
import argparse
import os
import itertools
fanding2000's avatar
change  
fanding2000 committed
8
import sys
9
10
11
12
13
14
15
16
17
18


if __name__ == '__main__':

    time_start = time.time()

    # initiate configuration
    ##############################################################################################
    parser = argparse.ArgumentParser()
    parser.add_argument('--path', type=str, default="./", help='Path to process')
fanding2000's avatar
change  
fanding2000 committed
19
    parser.add_argument('--smiles', type=str, default="None", help='SMILES string of the molecules, split by . if multiple molecules are used')
20
21
22
23
24
25
26
    parser.add_argument('--generate_conformers', type=int, default=20, help='Number of conformers to generate. When it is <=0, only load existing conformers to generate structures')
    parser.add_argument('--use_conformers', type=int, default=4, help='Number of conformers used to generate structure. When it is <=0, no structure generation would be done')
    parser.add_argument('--molecule_num_in_cell', type=str, nargs='+', default=['1'], help='number of molecules in a unit cell, split by comma for multiple molecules, and split by space for multiple packings')
    parser.add_argument('--num_generation', type=int, nargs='+', default=[100], help='number of structures to generate, split by space for multiple packings')
    parser.add_argument('--space_group_list', type=str, nargs='+', default=["2,14"], help='Space group list for structure generation, spilt by comma to add mutiple groups, split by space for multiple packings')
    parser.add_argument('--add_name', type=str, nargs='+', default=["CRYSTAL"], help='Add name for the generated structures, split by space for multiple packings')
    parser.add_argument('--max_workers', type=int, default=8, help='Maximum number of workers for parallel processing')
fanding2000's avatar
change  
fanding2000 committed
27
    parser.add_argument('--mode', type=str, default='all', choices=['all', 'conformer_only', 'structure_only'], help='choose the jobs to do')
28
29
    args = parser.parse_args()

fanding2000's avatar
change  
fanding2000 committed
30
31
32
33
34
    mode = args.mode
    if args.smiles == "None" and mode != "structure_only":
        print("Smile is required for conformer search!")
        sys.exit(0)

35
36
37
38
39
40
41
42
43
44
45
    target_folder = args.path
    smiles_list = args.smiles.split('.')
    generate_conformers = args.generate_conformers
    use_conformers = args.use_conformers
    molecule_num_in_cell = [list(map(int, num.split(','))) for num in args.molecule_num_in_cell]
    num_generation = args.num_generation
    space_group_list = [list(map(int, group.split(','))) for group in args.space_group_list]
    add_name = args.add_name
    max_workers = args.max_workers

    num_molecules = len(smiles_list)
fanding2000's avatar
change  
fanding2000 committed
46
47
48
49
50
51
52
53
54
    if mode == "structure_only":
        num_molecules = 0
        while True:
            molecule_folder = os.path.join(target_folder, f"molecule_{num_molecules+1}")
            if os.path.exists(molecule_folder) and os.path.isdir(molecule_folder):
                num_molecules += 1
            else:
                break

55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
    num_packings = max(len(molecule_num_in_cell), len(space_group_list))

    for i in range(len(molecule_num_in_cell)):
        if len(molecule_num_in_cell[i]) < num_molecules:
            molecule_num_in_cell[i].extend([1] * (num_molecules - len(molecule_num_in_cell[i])))
        elif len(molecule_num_in_cell[i]) > num_molecules:
            molecule_num_in_cell[i] = molecule_num_in_cell[i][:num_molecules]

    while len(molecule_num_in_cell) < num_packings:
        molecule_num_in_cell.append(molecule_num_in_cell[-1])

    while len(space_group_list) < num_packings:
        space_group_list.append(space_group_list[-1])

    while len(add_name) < num_packings:
        add_name.append(add_name[-1])

    while len(num_generation) < num_packings:
        num_generation.append(num_generation[-1])

    
    # step1: conformer search
    ##############################################################################################
    molecule_data = []
    for i in range(num_molecules):
        molecule_folder = os.path.join(target_folder, f"molecule_{i+1}")
        molecule_data.append([])
        if generate_conformers > 0 and mode != "structure_only":
            conformer_search.conformer_search(smiles_list[i], molecule_folder, num_conformers=generate_conformers, max_attempts=10000, rms_thresh=0.1)
            with open(os.path.join(molecule_folder, "info.txt"), "w") as smiles_file:
                smiles_file.write(f"SMILES: {smiles_list[i]}")
        file_num = len(os.listdir(os.path.join(molecule_folder, "conformers")))
        cnt = 0
        for j in range(file_num):
            if cnt >= use_conformers:
                break
            temp_path = os.path.join(molecule_folder, "conformers", f"conformer_{j}.xyz")
            if not os.path.exists(temp_path):
                break
            molecule_data[i].append(format_parser.read_xyz_file(temp_path))
            cnt += 1
            
        if len(molecule_data[i]) <= 0:
            print(f"No conformer loaded for molecule_{i+1}. Check configurations!")
            break

    idx_data = [list(range(len(item))) for item in molecule_data]
    combinations = list(itertools.product(*idx_data))


    # step2: structure generation
    ##############################################################################################
    if mode != "conformer_only":
        for i in range(num_packings):
            for combination in combinations:
                molecule_list = []
                for j in range(num_molecules):
                    for cnt in range(molecule_num_in_cell[i][j]):
                        molecule_list.append(molecule_data[j][combination[j]])
                c_name = "".join(map(str, combination))
                packaged_function.CSP_generater_parallel(molecule_list, target_folder, need_structure=num_generation[i], space_group_list=space_group_list[i],add_name=f"{add_name[i]}_C{c_name}", max_workers=max_workers,start_seed=1)

    time_end=time.time()
    print('time cost',time_end-time_start,'s')