format_parser.py 15.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
import re
from basic_function import operation
from basic_function import data_classes
from basic_function import chemical_knowledge
import copy


def read_xyz_file(file_path):
    input_file = open(file_path, 'r')
    lines = input_file.readlines()
    number_of_atoms = int(lines[0])
    name = str(lines[1][:-1])
    atoms = []
    for index,line in enumerate(lines):
        split_line = list(filter(lambda x: x != '', re.split("\\s+", line)))
        if len(split_line)==4 and operation.is_number(split_line[1]) and \
                operation.is_number(split_line[2]) and operation.is_number(split_line[3]):
            atoms.append(data_classes.Atom(element=split_line[0],
                                            cart_xyz=[float(split_line[1]), float(split_line[2]), float(split_line[3])],
                                           atom_id=index-2))

    if number_of_atoms!=len(atoms):
        print("Warning! The length of atoms don't match the number of atoms given")

    molecule = data_classes.Molecule(atoms=atoms, name=name, system_name=name)

    return molecule

def write_cif_file(crystal, sym=False, name="zcx"):
    """
    Accept crystal class, give the cif file out
    :param crystal: crystal class
    :param coordinates: frac or cart
    :param sym: False:give all atoms out; True:with symmetry
    :param name: file name
    :return: cif_out
    cif file in list format should be print using the following function:
                                        target=open("D:\\zcx.cif",'w')
                                        target.writelines(cif_out)
                                        target.close()
    """
    
    if crystal.system_name!="unknown":
        name = crystal.system_name
    cif_file = []

    cif_file.append("data_"+str(name)+"\n")
    if sym==False:
        if crystal.space_group==1:
            crystal_temp = crystal
        else:
            crystal_temp = copy.deepcopy(crystal)
            crystal_temp.make_p1()
        cif_file.append("_symmetry_space_group_name_H-M    \'P1\'"+"\n")
        cif_file.append("_symmetry_Int_Tables_number       1"+"\n")

        cif_file.append("loop_"+"\n")
        cif_file.append("_symmetry_equiv_pos_site_id"+"\n")
        cif_file.append("_symmetry_equiv_pos_as_xyz"+"\n")
        cif_file.append("1 x,y,z"+"\n")
        cif_file.append("_cell_length_a                   "+str(crystal_temp.cell_para[0][0])+"\n")
        cif_file.append("_cell_length_b                   "+str(crystal_temp.cell_para[0][1])+"\n")
        cif_file.append("_cell_length_c                   "+str(crystal_temp.cell_para[0][2])+"\n")
        cif_file.append("_cell_angle_alpha                "+str(crystal_temp.cell_para[1][0])+"\n")
        cif_file.append("_cell_angle_beta                 "+str(crystal_temp.cell_para[1][1])+"\n")
        cif_file.append("_cell_angle_gamma                "+str(crystal_temp.cell_para[1][2])+"\n")

        cif_file.append("loop_"+"\n")
        cif_file.append("_atom_site_label"+"\n")
        cif_file.append("_atom_site_type_symbol"+"\n")
        cif_file.append("_atom_site_fract_x"+"\n")
        cif_file.append("_atom_site_fract_y"+"\n")
        cif_file.append("_atom_site_fract_z"+"\n")
        for i in range(0,len(crystal_temp.atoms)):
            cif_file.append("{:6}  {:4} {:16.8f} {:16.8f} {:16.8f}\n"
                             .format(i+1,crystal_temp.atoms[i].element,crystal_temp.atoms[i].frac_xyz[0],
                                     crystal_temp.atoms[i].frac_xyz[1],crystal_temp.atoms[i].frac_xyz[2]))

        return cif_file

    elif sym==True:
        cif_file.append("_symmetry_space_group_name_H-M    \'{}\'".format(chemical_knowledge.space_group[crystal.space_group][1])+"\n")
        cif_file.append("_symmetry_Int_Tables_number       {}".format(crystal.space_group)+"\n")

        cif_file.append("loop_"+"\n")
        cif_file.append("_symmetry_equiv_pos_site_id"+"\n")
        cif_file.append("_symmetry_equiv_pos_as_xyz"+"\n")
        for idx, SYMM in enumerate(crystal.SYMM):
            cif_file.append("{} {}".format(idx+1,SYMM)+"\n")
        cif_file.append("_cell_length_a                   "+str(crystal.cell_para[0][0])+"\n")
        cif_file.append("_cell_length_b                   "+str(crystal.cell_para[0][1])+"\n")
        cif_file.append("_cell_length_c                   "+str(crystal.cell_para[0][2])+"\n")
        cif_file.append("_cell_angle_alpha                "+str(crystal.cell_para[1][0])+"\n")
        cif_file.append("_cell_angle_beta                 "+str(crystal.cell_para[1][1])+"\n")
        cif_file.append("_cell_angle_gamma                "+str(crystal.cell_para[1][2])+"\n")

        cif_file.append("loop_"+"\n")
        cif_file.append("_atom_site_label"+"\n")
        cif_file.append("_atom_site_type_symbol"+"\n")
        cif_file.append("_atom_site_fract_x"+"\n")
        cif_file.append("_atom_site_fract_y"+"\n")
        cif_file.append("_atom_site_fract_z"+"\n")
        for i in range(0,len(crystal.atoms)):
            cif_file.append("{:6}  {:4} {:16.8f} {:16.8f} {:16.8f}\n"
                             .format(i+1,crystal.atoms[i].element,crystal.atoms[i].frac_xyz[0],
                                     crystal.atoms[i].frac_xyz[1],crystal.atoms[i].frac_xyz[2]))

        return cif_file


def write_cifs_file(crystals, sym=False, name="zcx"):
    cifs_file = []
    for crystal in crystals:
        single_cif = write_cif_file(crystal,sym=sym, name=name)
        cifs_file.extend(single_cif)
    return cifs_file


def read_cif_file(file_path,on_sym_check=False,shut_up=False,system_name="unknown",comment_name="unknown"):
    input_file = open(file_path, 'r')
    lines = input_file.readlines()
    step_pickle = []
    crystal_all = []
    if system_name=="unknown":
        no_name = True
    else:
        no_name = False
    # first time scan
    for index,line in enumerate(lines):
        # find out all the step pickle
        if line.startswith("data_"):
            step_pickle.append(index)
    step_pickle.append(len(lines))

    # treat every step and return a crystal
    for m in range(0,len(step_pickle)-1):
        atoms = []
        atoms_P1 = []
        SYMM = []
        cell_para = [["unknown","unknown","unknown"],["unknown","unknown","unknown"]]

        for index, line in enumerate(lines[step_pickle[m]:step_pickle[m+1]]):
            split_line = list(filter(lambda x: x != '', re.split("\\s+", line)))
            if line.startswith("#"):
                pass
            elif len(split_line)==0:
                pass
            # read the loop of symmetry
            # elif split_line[0]=="loop_" and lines[step_pickle[m]+index+1]=="_symmetry_equiv_pos_as_xyz\n":
            elif split_line[0] == "loop_" and lines[step_pickle[m] + index + 1] == "_symmetry_equiv_pos_as_xyz\n":
                temp_number = 1
                while "_" not in lines[step_pickle[m]+index+1+temp_number]:
                    split_line_temp = list(filter(lambda x: x != '', re.split("\\s+", lines[step_pickle[m]+index+1+temp_number])))
                    temp_number+=1
                    if not operation.is_number(split_line_temp[0]):
                        SYMM.append(split_line_temp[0])
                    else:
                        SYMM.append(split_line_temp[1])
            elif split_line[0] == "loop_" and lines[step_pickle[m]+index+2].strip(" ")=="_symmetry_equiv_pos_as_xyz\n":
                temp_number = 1
                while "_" not in lines[step_pickle[m]+index+2+temp_number]:
                    split_line_temp = list(filter(lambda x: x != '', re.split("\\s+", lines[step_pickle[m]+index+2+temp_number])))
                    temp_number+=1
                    SYMM.append("".join(split_line_temp[1:]))
            elif split_line[0] == "loop_" and "_space_group_symop_operation_xyz\n" in lines[step_pickle[m] + index + 1]:
                # ase format
                temp_number = 1
                while "_" not in lines[step_pickle[m]+index+1+temp_number]:
                    if lines[step_pickle[m]+index+1+temp_number]=="\n":
                        temp_number += 1
                        continue
                    split_line_temp = list(filter(lambda x: x != '', re.split("\\s+", lines[step_pickle[m]+index+1+temp_number])))
                    temp_number+=1
                    if not operation.is_number(split_line_temp[0]):
                        SYMM.append("".join(split_line_temp))

            # read the loop of atoms:
            elif (split_line[0] == "loop_" and lines[step_pickle[m] + index + 1].strip(" ") == "_atom_site_label\n") or \
                    (split_line[0] == "loop_" and lines[step_pickle[m] + index + 2].strip(" ") == "_atom_site_label\n"):
                temp_number = 0
                while "_" in lines[step_pickle[m]+index+1+temp_number]:
                    if lines[step_pickle[m] + index + 1 + temp_number].strip(" ") == "_atom_site_type_symbol\n":
                        ele_pos = temp_number
                    elif lines[step_pickle[m] + index + 1 + temp_number].strip(" ") == "_atom_site_fract_x\n":
                        x_pos = temp_number
                    elif lines[step_pickle[m] + index + 1 + temp_number].strip(" ") == "_atom_site_fract_y\n":
                        y_pos = temp_number
                    elif lines[step_pickle[m] + index + 1 + temp_number].strip(" ") == "_atom_site_fract_z\n":
                        z_pos = temp_number
                    temp_number+=1
                how_long = temp_number

                while len(list(filter(lambda x: x != '', re.split("\\s+", lines[step_pickle[m]+index+1+temp_number])))) == how_long:
                    split_line_temp = list(filter(lambda x: x != '', re.split("\\s+", lines[step_pickle[m]+index+1+temp_number])))
                    atoms.append(data_classes.Atom(element=split_line_temp[ele_pos],
                                           frac_xyz=[float(split_line_temp[x_pos]),float(split_line_temp[y_pos]),
                                                     float(split_line_temp[z_pos])]))
                    temp_number += 1
                    if step_pickle[m]+index+1+temp_number==len(lines):
                        break

            elif split_line[0] == "_cell_length_a":
                cell_para[0][0] = float(split_line[1])
            elif split_line[0] == "_cell_length_b":
                cell_para[0][1] = float(split_line[1])
            elif split_line[0] == "_cell_length_c":
                cell_para[0][2] = float(split_line[1])
            elif split_line[0] == "_cell_angle_alpha":
                cell_para[1][0] = float(split_line[1])
            elif split_line[0] == "_cell_angle_beta":
                cell_para[1][1] = float(split_line[1])
            elif split_line[0] == "_cell_angle_gamma":
                cell_para[1][2] = float(split_line[1])
            elif "data_" in line:
                if no_name == True:
                    system_name = line[5:]
                    system_name = system_name.replace(" ","_")
                    system_name = system_name.replace("\\", "_")
                    system_name = system_name.replace("\n", "")
        for atom in atoms:
            all_reflect_position = operation.space_group_transfer_for_single_atom(atom.frac_xyz, SYMM)
            for new_position in all_reflect_position:
                atoms_P1.append(data_classes.Atom(element=atom.element,
                                            frac_xyz=[new_position[0], new_position[1], new_position[2]]))
        crystal_all.append(data_classes.Crystal(cell_para=cell_para, atoms=atoms_P1, comment=comment_name, system_name=system_name))
        if on_sym_check == True:
            raise Exception("Not finished part, TODO in code")
        if shut_up==False:
            if m%100 == 0:
                print("{} structures have been treated".format(m))

    return crystal_all


def write_poscar_file(crystal, coordinates = 'frac', name = "parser_zcx_create"):

    vasp_file = []

    vasp_file.append('{}\n'.format(name))
    vasp_file.append('1.0\n')
    cell_vect = crystal.cell_vect
    for vect in cell_vect:
        vasp_file.append("{:16.8f} {:16.8f} {:16.8f}\n".format(vect[0],vect[1],vect[2]))
    crystal.sort_by_element()
    vasp_file.append("".join("{:>6s}".format(x) for x in crystal.get_element()) + "\n")
    vasp_file.append("".join("{:>6.0f}".format(x) for x in crystal.get_element_amount()) + "\n")
    if coordinates == 'frac':
        vasp_file.append('Direct\n')
        for ELEMENT in crystal.get_element():
            for ATOM in crystal.atoms:
                if ATOM.element == ELEMENT:
                    vasp_file.append(
                        "{:16.8f} {:16.8f} {:16.8f}\n".format(ATOM.frac_xyz[0], ATOM.frac_xyz[1], ATOM.frac_xyz[2]))
    elif coordinates == 'cart':
        vasp_file.append('Cartesian\n')
        for ELEMENT in crystal.get_element():
            for ATOM in crystal.atoms:
                if ATOM.element == ELEMENT:
                    vasp_file.append(
                        "{:16.8f} {:16.8f} {:16.8f}\n".format(ATOM.cart_xyz[0], ATOM.cart_xyz[1], ATOM.cart_xyz[2]))
    else:
        raise Exception("Wrong coordinates type: {}".format(coordinates))

    return vasp_file
    
    
def read_ase_pbc_file(file_path,shut_up=False):
    input_file = open(file_path, 'r')
    lines = input_file.readlines()[2:]
    step_pickle = []
    crystal_all = []

    # first time scan
    for index,line in enumerate(lines):
        # find out all the step pickle
        if line.startswith("Step "):
            step_pickle.append(index)
    step_pickle.append(len(lines))

    # treat every step and return a crystal
    for m in range(0,len(step_pickle)-1):
        atoms_P1 = []
        force_matrix = []
        position_matrix = []
        in_forces = False
        in_positions = False


        for index, line in enumerate(lines[step_pickle[m]:step_pickle[m+1]]):
            # split_line = list(filter(lambda x: x != '', re.split("\\s+", line)))
            line = line.strip()
            # check Forces part
            if line.startswith("Forces:"):
                in_forces = True
                in_positions = False
                continue

            # check Positions part
            if line.startswith("Positions:"):
                in_positions = True
                in_forces = False
                continue

            if in_forces and line.startswith("[") and line.endswith("]"):
                line = line.replace("[", "").replace("]", "")
                force_matrix.append([float(x) for x in line.split()])

            # analyse Positions part
            if in_positions and line.startswith("[") and line.endswith("]"):
                line = line.replace("[", "").replace("]", "")
                position_matrix.append([float(x) for x in line.split()])

            if line.startswith("Elements:"):
                elements_string = line.strip().split(":", 1)[-1].strip()
                elements_string = elements_string[1:-1]  

                elements = [elem.strip().strip("'") for elem in elements_string.split(",")]

            if line.startswith("cell:"):
                matrix_string = line[len("cell: Cell("):-1]
                rows = matrix_string.split("], [")
                cell_vect = [
                    [float(value) for value in row.replace('[', '').replace(']', '').replace(')', '').split(", ")]
                    for row in rows
                ]

        for i in range(0,len(elements)):
            atoms_P1.append(data_classes.Atom(element=elements[i], cart_xyz=[position_matrix[i][0], position_matrix[i][1], position_matrix[i][2]]))

        crystal_all.append(data_classes.Crystal(cell_vect=cell_vect, atoms=atoms_P1))

    return crystal_all    
zcxzcx1's avatar
zcxzcx1 committed
333