generation.py 6.01 KB
Newer Older
1
2
from pathlib import Path; import sys; sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

3
4
5
6
7
8
9
from basic_function import format_parser
from basic_function import packaged_function
from basic_function import conformer_search
import time
import argparse
import os
import itertools
fanding2000's avatar
change  
fanding2000 committed
10
import sys
11
12
13
14
15
16
17
18
19
20


if __name__ == '__main__':

    time_start = time.time()

    # initiate configuration
    ##############################################################################################
    parser = argparse.ArgumentParser()
    parser.add_argument('--path', type=str, default="./", help='Path to process')
fanding2000's avatar
change  
fanding2000 committed
21
    parser.add_argument('--smiles', type=str, default="None", help='SMILES string of the molecules, split by . if multiple molecules are used')
22
23
24
25
26
27
28
    parser.add_argument('--generate_conformers', type=int, default=20, help='Number of conformers to generate. When it is <=0, only load existing conformers to generate structures')
    parser.add_argument('--use_conformers', type=int, default=4, help='Number of conformers used to generate structure. When it is <=0, no structure generation would be done')
    parser.add_argument('--molecule_num_in_cell', type=str, nargs='+', default=['1'], help='number of molecules in a unit cell, split by comma for multiple molecules, and split by space for multiple packings')
    parser.add_argument('--num_generation', type=int, nargs='+', default=[100], help='number of structures to generate, split by space for multiple packings')
    parser.add_argument('--space_group_list', type=str, nargs='+', default=["2,14"], help='Space group list for structure generation, spilt by comma to add mutiple groups, split by space for multiple packings')
    parser.add_argument('--add_name', type=str, nargs='+', default=["CRYSTAL"], help='Add name for the generated structures, split by space for multiple packings')
    parser.add_argument('--max_workers', type=int, default=8, help='Maximum number of workers for parallel processing')
fanding2000's avatar
change  
fanding2000 committed
29
    parser.add_argument('--mode', type=str, default='all', choices=['all', 'conformer_only', 'structure_only'], help='choose the jobs to do')
30
31
    args = parser.parse_args()

fanding2000's avatar
change  
fanding2000 committed
32
33
34
35
36
    mode = args.mode
    if args.smiles == "None" and mode != "structure_only":
        print("Smile is required for conformer search!")
        sys.exit(0)

37
38
39
40
41
42
43
44
45
46
47
    target_folder = args.path
    smiles_list = args.smiles.split('.')
    generate_conformers = args.generate_conformers
    use_conformers = args.use_conformers
    molecule_num_in_cell = [list(map(int, num.split(','))) for num in args.molecule_num_in_cell]
    num_generation = args.num_generation
    space_group_list = [list(map(int, group.split(','))) for group in args.space_group_list]
    add_name = args.add_name
    max_workers = args.max_workers

    num_molecules = len(smiles_list)
fanding2000's avatar
change  
fanding2000 committed
48
49
50
51
52
53
54
55
56
    if mode == "structure_only":
        num_molecules = 0
        while True:
            molecule_folder = os.path.join(target_folder, f"molecule_{num_molecules+1}")
            if os.path.exists(molecule_folder) and os.path.isdir(molecule_folder):
                num_molecules += 1
            else:
                break

57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
    num_packings = max(len(molecule_num_in_cell), len(space_group_list))

    for i in range(len(molecule_num_in_cell)):
        if len(molecule_num_in_cell[i]) < num_molecules:
            molecule_num_in_cell[i].extend([1] * (num_molecules - len(molecule_num_in_cell[i])))
        elif len(molecule_num_in_cell[i]) > num_molecules:
            molecule_num_in_cell[i] = molecule_num_in_cell[i][:num_molecules]

    while len(molecule_num_in_cell) < num_packings:
        molecule_num_in_cell.append(molecule_num_in_cell[-1])

    while len(space_group_list) < num_packings:
        space_group_list.append(space_group_list[-1])

    while len(add_name) < num_packings:
        add_name.append(add_name[-1])

    while len(num_generation) < num_packings:
        num_generation.append(num_generation[-1])

    
    # step1: conformer search
    ##############################################################################################
    molecule_data = []
    for i in range(num_molecules):
        molecule_folder = os.path.join(target_folder, f"molecule_{i+1}")
        molecule_data.append([])
        if generate_conformers > 0 and mode != "structure_only":
            conformer_search.conformer_search(smiles_list[i], molecule_folder, num_conformers=generate_conformers, max_attempts=10000, rms_thresh=0.1)
            with open(os.path.join(molecule_folder, "info.txt"), "w") as smiles_file:
                smiles_file.write(f"SMILES: {smiles_list[i]}")
        file_num = len(os.listdir(os.path.join(molecule_folder, "conformers")))
        cnt = 0
        for j in range(file_num):
            if cnt >= use_conformers:
                break
            temp_path = os.path.join(molecule_folder, "conformers", f"conformer_{j}.xyz")
            if not os.path.exists(temp_path):
                break
            molecule_data[i].append(format_parser.read_xyz_file(temp_path))
            cnt += 1
            
        if len(molecule_data[i]) <= 0:
            print(f"No conformer loaded for molecule_{i+1}. Check configurations!")
            break

    idx_data = [list(range(len(item))) for item in molecule_data]
    combinations = list(itertools.product(*idx_data))


    # step2: structure generation
    ##############################################################################################
    if mode != "conformer_only":
        for i in range(num_packings):
            for combination in combinations:
                molecule_list = []
                for j in range(num_molecules):
                    for cnt in range(molecule_num_in_cell[i][j]):
                        molecule_list.append(molecule_data[j][combination[j]])
                c_name = "".join(map(str, combination))
                packaged_function.CSP_generater_parallel(molecule_list, target_folder, need_structure=num_generation[i], space_group_list=space_group_list[i],add_name=f"{add_name[i]}_C{c_name}", max_workers=max_workers,start_seed=1)

    time_end=time.time()
    print('time cost',time_end-time_start,'s')