Commit 2409a22f authored by fanding2000's avatar fanding2000
Browse files

Format fix. More options in readme

parent ce29afea
element_vdw_radii = { element_vdw_radii = {
# First Period # First Period
'H': 1.20, 'H': 1.20,
'He': 1.40, 'He': 1.40,
# Second Period # Second Period
'Li': 1.82, 'Be': 1.53, 'B': 1.92, 'C': 1.70, 'N': 1.55, 'O': 1.52, 'F': 1.35, 'Ne': 1.54, 'Li': 1.82, 'Be': 1.53, 'B': 1.92, 'C': 1.70, 'N': 1.55, 'O': 1.52, 'F': 1.35, 'Ne': 1.54,
# Third Period # Third Period
'Na': 2.27, 'Mg': 1.73, 'Al': 1.84, 'Si': 2.10, 'P': 1.80, 'S': 1.80, 'Cl': 1.75, 'Ar': 1.88, 'Na': 2.27, 'Mg': 1.73, 'Al': 1.84, 'Si': 2.10, 'P': 1.80, 'S': 1.80, 'Cl': 1.75, 'Ar': 1.88,
# Fourth Period # Fourth Period
'K': 2.75, 'Ca': 2.31, 'Sc': 2.11, 'Ti': 1.87, 'V': 1.79, 'Cr': 1.89, 'Mn': 1.97, 'Fe': 1.94, 'Co': 1.92, 'K': 2.75, 'Ca': 2.31, 'Sc': 2.11, 'Ti': 1.87, 'V': 1.79, 'Cr': 1.89, 'Mn': 1.97, 'Fe': 1.94, 'Co': 1.92,
'Ni': 1.63, 'Cu': 1.40, 'Zn': 1.39, 'Ga': 1.87, 'Ge': 2.11, 'As': 1.85, 'Se': 1.90, 'Br': 1.83, 'Kr': 2.02, 'Ni': 1.63, 'Cu': 1.40, 'Zn': 1.39, 'Ga': 1.87, 'Ge': 2.11, 'As': 1.85, 'Se': 1.90, 'Br': 1.83, 'Kr': 2.02,
# Fifth Period # Fifth Period
'Rb': None, 'Sr': None, 'Y': None, 'Zr': None, 'Nb': None, 'Mo': None, 'Tc': None, 'Ru': None, 'Rh': None, 'Rb': None, 'Sr': None, 'Y': None, 'Zr': None, 'Nb': None, 'Mo': None, 'Tc': None, 'Ru': None, 'Rh': None,
'Pd': None, 'Ag': None, 'Cd': None, 'In': None, 'Sn': None, 'Sb': None, 'Te': 2.06, 'I': 1.98, 'Xe': 2.16} 'Pd': None, 'Ag': None, 'Cd': None, 'In': None, 'Sn': None, 'Sb': None, 'Te': 2.06, 'I': 1.98, 'Xe': 2.16}
# comes from https://pubchem.ncbi.nlm.nih.gov/ptable/atomic-radius/ # comes from https://pubchem.ncbi.nlm.nih.gov/ptable/atomic-radius/
element_covalent_radii = { element_covalent_radii = {
# First Period # First Period
#'H': 0.31, #'H': 0.31,
'H': 0.31, 'H': 0.31,
'He': 0.28, 'He': 0.28,
# Second Period # Second Period
'Li': 1.28, 'Be': 0.96, 'B': 0.84, 'C': 0.76, 'N': 0.71, 'O': 0.66, 'F': 0.57, 'Ne': 0.58, 'Li': 1.28, 'Be': 0.96, 'B': 0.84, 'C': 0.76, 'N': 0.71, 'O': 0.66, 'F': 0.57, 'Ne': 0.58,
# Third Period # Third Period
'Na': 1.66, 'Mg': 1.41, 'Al': 1.21, 'Si': 1.11, 'P': 1.07, 'S': 1.05, 'Cl': 1.02, 'Ar': 1.06, 'Na': 1.66, 'Mg': 1.41, 'Al': 1.21, 'Si': 1.11, 'P': 1.07, 'S': 1.05, 'Cl': 1.02, 'Ar': 1.06,
# Fourth Period # Fourth Period
'K': 2.03, 'Ca': 1.76, 'Sc': 1.70, 'Ti': 1.60, 'V': 1.53, 'Cr': 1.39, 'Mn': 1.61, 'Fe': 1.52, 'Co': 1.50, 'K': 2.03, 'Ca': 1.76, 'Sc': 1.70, 'Ti': 1.60, 'V': 1.53, 'Cr': 1.39, 'Mn': 1.61, 'Fe': 1.52, 'Co': 1.50,
'Ni': 1.24, 'Cu': 1.32, 'Zn': 1.22, 'Ga': 1.22, 'Ge': 1.20, 'As': 1.19, 'Se': 1.20, 'Br': 1.20, 'Kr': 1.16, 'Ni': 1.24, 'Cu': 1.32, 'Zn': 1.22, 'Ga': 1.22, 'Ge': 1.20, 'As': 1.19, 'Se': 1.20, 'Br': 1.20, 'Kr': 1.16,
# Fifth Period # Fifth Period
'Rb': 2.20, 'Sr': 1.95, 'Y': 1.90, 'Zr': 1.75, 'Nb': 1.64, 'Mo': 1.54, 'Tc': 1.47, 'Ru': 1.46, 'Rh': 1.42, 'Rb': 2.20, 'Sr': 1.95, 'Y': 1.90, 'Zr': 1.75, 'Nb': 1.64, 'Mo': 1.54, 'Tc': 1.47, 'Ru': 1.46, 'Rh': 1.42,
'Pd': 1.39, 'Ag': 1.45, 'Cd': 1.44, 'In': 1.42, 'Sn': 1.39, 'Sb': 1.39, 'Te': 1.38, 'I': 1.39, 'Xe': 1.40} 'Pd': 1.39, 'Ag': 1.45, 'Cd': 1.44, 'In': 1.42, 'Sn': 1.39, 'Sb': 1.39, 'Te': 1.38, 'I': 1.39, 'Xe': 1.40}
# comes from DOI: 10.1039/b801115j # comes from DOI: 10.1039/b801115j
element_masses = { element_masses = {
# First Period # First Period
'H': 1.0079, 'He': 4.0026, 'H': 1.0079, 'He': 4.0026,
# Second Period # Second Period
'Li': 6.941, 'Be': 9.0122, 'B': 10.811, 'C': 12.0107, 'N': 14.0067, 'O': 15.9994, 'F': 18.9984, 'Ne': 20.1797, 'Li': 6.941, 'Be': 9.0122, 'B': 10.811, 'C': 12.0107, 'N': 14.0067, 'O': 15.9994, 'F': 18.9984, 'Ne': 20.1797,
# Third Period # Third Period
'Na': 22.9897, 'Mg': 24.305, 'Al': 26.9815, 'Si': 28.0855, 'P': 30.9738, 'S': 32.065, 'Cl': 35.453, 'Ar': 39.948, 'Na': 22.9897, 'Mg': 24.305, 'Al': 26.9815, 'Si': 28.0855, 'P': 30.9738, 'S': 32.065, 'Cl': 35.453, 'Ar': 39.948,
# Fourth Period # Fourth Period
'K': 39.0983, 'Ca': 40.078, 'Sc': 44.9559, 'Ti': 47.867, 'V': 50.9415, 'Cr': 51.9961, 'Mn': 54.938, 'Fe': 55.845, 'K': 39.0983, 'Ca': 40.078, 'Sc': 44.9559, 'Ti': 47.867, 'V': 50.9415, 'Cr': 51.9961, 'Mn': 54.938, 'Fe': 55.845,
'Co': 58.9332, 'Ni': 58.6934, 'Cu': 63.546, 'Zn': 65.39, 'Ga': 69.723, 'Ge': 72.64, 'As': 74.9216, 'Se': 78.96, 'Co': 58.9332, 'Ni': 58.6934, 'Cu': 63.546, 'Zn': 65.39, 'Ga': 69.723, 'Ge': 72.64, 'As': 74.9216, 'Se': 78.96,
'Br': 79.904, 'Kr': 83.8, 'Br': 79.904, 'Kr': 83.8,
# Fifth Period # Fifth Period
'Rb': 85.4678, 'Sr': 87.62, 'Y': 88.9059, 'Zr': 91.224, 'Nb': 92.9064, 'Mo': 95.94, 'Tc': 98, 'Ru': 101.07, 'Rb': 85.4678, 'Sr': 87.62, 'Y': 88.9059, 'Zr': 91.224, 'Nb': 92.9064, 'Mo': 95.94, 'Tc': 98, 'Ru': 101.07,
'Rh': 102.9055, 'Pd': 106.42, 'Ag': 107.8682, 'Cd': 112.411, 'In': 114.818, 'Sn': 118.71, 'Sb': 121.76, 'Te': 127.6, 'Rh': 102.9055, 'Pd': 106.42, 'Ag': 107.8682, 'Cd': 112.411, 'In': 114.818, 'Sn': 118.71, 'Sb': 121.76, 'Te': 127.6,
'I': 126.9045, 'Xe': 131.293, 'I': 126.9045, 'Xe': 131.293,
# Sixth Period # Sixth Period
'Cs': 132.9055, 'Ba': 137.327, 'La': 138.9055, 'Ce': 140.116, 'Pr': 140.9077, 'Nd': 144.24, 'Pm': 145, 'Sm': 150.36, 'Cs': 132.9055, 'Ba': 137.327, 'La': 138.9055, 'Ce': 140.116, 'Pr': 140.9077, 'Nd': 144.24, 'Pm': 145, 'Sm': 150.36,
'Eu': 151.964, 'Gd': 157.25, 'Tb': 158.9253, 'Dy': 162.5, 'Ho': 164.9303, 'Er': 167.259, 'Tm': 168.9342, 'Eu': 151.964, 'Gd': 157.25, 'Tb': 158.9253, 'Dy': 162.5, 'Ho': 164.9303, 'Er': 167.259, 'Tm': 168.9342,
'Yb': 173.04, 'Lu': 174.967, 'Hf': 178.49, 'Ta': 180.9479, 'W': 183.84, 'Re': 186.207, 'Os': 190.23, 'Ir': 192.217, 'Yb': 173.04, 'Lu': 174.967, 'Hf': 178.49, 'Ta': 180.9479, 'W': 183.84, 'Re': 186.207, 'Os': 190.23, 'Ir': 192.217,
'Pt': 195.078, 'Au': 196.9665, 'Hg': 200.59, 'Tl': 204.3833, 'Pb': 207.2, 'Bi': 208.9804, 'Po': 209, 'At': 210, 'Pt': 195.078, 'Au': 196.9665, 'Hg': 200.59, 'Tl': 204.3833, 'Pb': 207.2, 'Bi': 208.9804, 'Po': 209, 'At': 210,
'Rn': 222, 'Rn': 222,
# Seventh Period # Seventh Period
'Fr': 223, 'Ra': 226, 'Ac': 227, 'Th': 232.0381, 'Pa': 231.0359, 'U': 238.0289, 'Np': 237, 'Pu': 244, 'Am': 243, 'Fr': 223, 'Ra': 226, 'Ac': 227, 'Th': 232.0381, 'Pa': 231.0359, 'U': 238.0289, 'Np': 237, 'Pu': 244, 'Am': 243,
'Cm': 247, 'Bk': 247, 'Cf': 251, 'Es': 252, 'Fm': 257, 'Md': 258, 'No': 259, 'Lr': 262, 'Rf': 261, 'Db': 262, 'Cm': 247, 'Bk': 247, 'Cf': 251, 'Es': 252, 'Fm': 257, 'Md': 258, 'No': 259, 'Lr': 262, 'Rf': 261, 'Db': 262,
'Sg': 266, 'Bh': 264, 'Hs': 277, 'Mt': 268} 'Sg': 266, 'Bh': 264, 'Hs': 277, 'Mt': 268}
periodic_table_list = { periodic_table_list = {
# First Period # First Period
'H': 1, 'He': 2, 'H': 1, 'He': 2,
# Second Period # Second Period
'Li': 3, 'Be': 4, 'B': 5, 'C': 6, 'N': 7, 'O': 8, 'F': 9, 'Ne': 10, 'Li': 3, 'Be': 4, 'B': 5, 'C': 6, 'N': 7, 'O': 8, 'F': 9, 'Ne': 10,
# Third Period # Third Period
'Na': 11, 'Mg': 12, 'Al': 13, 'Si': 14, 'P': 15, 'S': 16, 'Cl': 17, 'Ar': 18, 'Na': 11, 'Mg': 12, 'Al': 13, 'Si': 14, 'P': 15, 'S': 16, 'Cl': 17, 'Ar': 18,
# Fourth Period # Fourth Period
'K': 19, 'Ca': 20, 'Sc': 21, 'Ti': 22, 'V': 23, 'Cr': 24, 'Mn': 25, 'Fe': 26, 'Co': 27, 'K': 19, 'Ca': 20, 'Sc': 21, 'Ti': 22, 'V': 23, 'Cr': 24, 'Mn': 25, 'Fe': 26, 'Co': 27,
'Ni': 28, 'Cu': 29, 'Zn': 30, 'Ga': 31, 'Ge': 32, 'As': 33, 'Se': 34, 'Br': 35, 'Kr': 36, 'Ni': 28, 'Cu': 29, 'Zn': 30, 'Ga': 31, 'Ge': 32, 'As': 33, 'Se': 34, 'Br': 35, 'Kr': 36,
# Fifth Period # Fifth Period
'Rb': 37, 'Sr': 38, 'Y': 39, 'Zr': 40, 'Nb': 41, 'Mo': 42, 'Tc': 43, 'Ru': 44, 'Rh': 45, 'Rb': 37, 'Sr': 38, 'Y': 39, 'Zr': 40, 'Nb': 41, 'Mo': 42, 'Tc': 43, 'Ru': 44, 'Rh': 45,
'Pd': 46, 'Ag': 47, 'Cd': 48, 'In': 49, 'Sn': 50, 'Sb': 51, 'Te': 52, 'I': 53, 'Xe': 54, 'Pd': 46, 'Ag': 47, 'Cd': 48, 'In': 49, 'Sn': 50, 'Sb': 51, 'Te': 52, 'I': 53, 'Xe': 54,
# Sixth Period # Sixth Period
'Cs': 55, 'Ba': 56, 'La': 57, 'Ce': 58, 'Pr': 59, 'Nd': 60, 'Pm': 61, 'Sm': 62, 'Eu': 63, 'Cs': 55, 'Ba': 56, 'La': 57, 'Ce': 58, 'Pr': 59, 'Nd': 60, 'Pm': 61, 'Sm': 62, 'Eu': 63,
'Gd': 64, 'Tb': 65, 'Dy': 66, 'Ho': 67, 'Er': 68, 'Tm': 69, 'Yb': 70, 'Lu': 71, 'Hf': 72, 'Gd': 64, 'Tb': 65, 'Dy': 66, 'Ho': 67, 'Er': 68, 'Tm': 69, 'Yb': 70, 'Lu': 71, 'Hf': 72,
'Ta': 73, 'W': 74, 'Re': 75, 'Os': 76, 'Ir': 77, 'Pt': 78, 'Au': 79, 'Hg': 80, 'Tl': 81, 'Ta': 73, 'W': 74, 'Re': 75, 'Os': 76, 'Ir': 77, 'Pt': 78, 'Au': 79, 'Hg': 80, 'Tl': 81,
'Pb': 82, 'Bi': 83, 'Po': 84, 'At': 85, 'Rn': 86, 'Pb': 82, 'Bi': 83, 'Po': 84, 'At': 85, 'Rn': 86,
# Seventh Period # Seventh Period
'Fr': 87, 'Ra': 88, 'Ac': 89, 'Th': 90, 'Pa': 91, 'U': 92, 'Np': 93, 'Pu': 94, 'Am': 95, 'Fr': 87, 'Ra': 88, 'Ac': 89, 'Th': 90, 'Pa': 91, 'U': 92, 'Np': 93, 'Pu': 94, 'Am': 95,
'Cm': 96, 'Bk': 97, 'Cf': 98, 'Es': 99, 'Fm': 100, 'Md': 101, 'No': 102, 'Lr': 103, 'Rf': 104, 'Cm': 96, 'Bk': 97, 'Cf': 98, 'Es': 99, 'Fm': 100, 'Md': 101, 'No': 102, 'Lr': 103, 'Rf': 104,
'Db': 105, 'Sg': 106, 'Bh': 107, 'Hs': 108, 'Mt': 109} 'Db': 105, 'Sg': 106, 'Bh': 107, 'Hs': 108, 'Mt': 109}
def sort_by_atomic_number(all_element_type): def sort_by_atomic_number(all_element_type):
current_order = [] current_order = []
for element in all_element_type: for element in all_element_type:
assert element in periodic_table_list, '{} element is not exist'.format(element) assert element in periodic_table_list, '{} element is not exist'.format(element)
current_order.append(periodic_table_list[element]) current_order.append(periodic_table_list[element])
sorted_elements = [ELEMENT for ORDER, ELEMENT in sorted(zip(current_order, all_element_type))] sorted_elements = [ELEMENT for ORDER, ELEMENT in sorted(zip(current_order, all_element_type))]
return sorted_elements return sorted_elements
periodic_table_std = {value: key for key, value in periodic_table_list.items()} periodic_table_std = {value: key for key, value in periodic_table_list.items()}
# following these website to get help: # following these website to get help:
# http://img.chem.ucl.ac.uk/sgp/LARGE/sgp.htm # http://img.chem.ucl.ac.uk/sgp/LARGE/sgp.htm
# https://en.wikipedia.org/wiki/Space_group # https://en.wikipedia.org/wiki/Space_group
space_group = { space_group = {
1:[["x,y,z"],'P1',"Triclinic"], 1:[["x,y,z"],'P1',"Triclinic"],
2:[["x,y,z","-x,-y,-z"],'P-1',"Triclinic"], 2:[["x,y,z","-x,-y,-z"],'P-1',"Triclinic"],
4:[["x,y,z","-x,y+1/2,-z"],'P21',"Monoclinic"], 4:[["x,y,z","-x,y+1/2,-z"],'P21',"Monoclinic"],
5:[["x,y,z","-x,y,-z","x+1/2,y+1/2,z","-x+1/2,y+1/2,-z"],'C2',"Monoclinic"], 5:[["x,y,z","-x,y,-z","x+1/2,y+1/2,z","-x+1/2,y+1/2,-z"],'C2',"Monoclinic"],
7:[["x,y,z","x,-y,1/2+z"],"Pc","Monoclinic"], 7:[["x,y,z","x,-y,1/2+z"],"Pc","Monoclinic"],
9:[["x,y,z","x,-y,1/2+z","1/2+x,1/2+y,z","1/2+x,1/2-y,1/2+z"],"CC","Monoclinic"], 9:[["x,y,z","x,-y,1/2+z","1/2+x,1/2+y,z","1/2+x,1/2-y,1/2+z"],"CC","Monoclinic"],
12:[["x,y,z","-x,y,-z","1/2+x,1/2+y,z","1/2-x,1/2+y,-z","-x,-y,-z","x,-y,z","1/2-x,1/2-y,-z","1/2+x,1/2-y,z"],"C2/M","Monoclinic"], 12:[["x,y,z","-x,y,-z","1/2+x,1/2+y,z","1/2-x,1/2+y,-z","-x,-y,-z","x,-y,z","1/2-x,1/2-y,-z","1/2+x,1/2-y,z"],"C2/M","Monoclinic"],
11:[["x,y,z","-x,y+1/2,-z","-x,-y,-z","x,1/2-y,z"],"P21/m","Monoclinic"], 11:[["x,y,z","-x,y+1/2,-z","-x,-y,-z","x,1/2-y,z"],"P21/m","Monoclinic"],
12:[["x,y,z","x,-y,z","-x,y,-z","-x,-y,-z","1/2+x,1/2+y,z","1/2+x,1/2-y,z","1/2-x,1/2+y,-z","1/2-x,1/2-y,-z"],"C2/m","Monoclinic"], 12:[["x,y,z","x,-y,z","-x,y,-z","-x,-y,-z","1/2+x,1/2+y,z","1/2+x,1/2-y,z","1/2-x,1/2+y,-z","1/2-x,1/2-y,-z"],"C2/m","Monoclinic"],
13:[["x,y,z","-x,y,1/2-z","-x,-y,-z","x,-y,1/2+z"],"P2/c","Monoclinic"], 13:[["x,y,z","-x,y,1/2-z","-x,-y,-z","x,-y,1/2+z"],"P2/c","Monoclinic"],
13:[["x,y,z", "-x,y,-z+1/2", "-x,-y,-z", "x,-y,z+1/2"],'P2/c',"Monoclinic"], 13:[["x,y,z", "-x,y,-z+1/2", "-x,-y,-z", "x,-y,z+1/2"],'P2/c',"Monoclinic"],
14:[["x,y,z", "-x,y+1/2,-z+1/2", "-x,-y,-z", "x,-y+1/2,z+1/2"],'P21/C',"Monoclinic"], 14:[["x,y,z", "-x,y+1/2,-z+1/2", "-x,-y,-z", "x,-y+1/2,z+1/2"],'P21/C',"Monoclinic"],
15:[["x,y,z","-x,y,1/2-z","1/2+x,1/2+y,z","1/2-x,1/2+y,1/2-z","-x,-y,-z","x,-y,1/2+z","1/2-x,1/2-y,-z","1/2+x,1/2-y,1/2+z"],"C2/C","Monoclinic"], 15:[["x,y,z","-x,y,1/2-z","1/2+x,1/2+y,z","1/2-x,1/2+y,1/2-z","-x,-y,-z","x,-y,1/2+z","1/2-x,1/2-y,-z","1/2+x,1/2-y,1/2+z"],"C2/C","Monoclinic"],
18:[["x,y,z","1/2+x,1/2-y,-z","1/2-x,1/2+y,-z","-x,-y,z"],"P21212","Orthorhombic"], 18:[["x,y,z","1/2+x,1/2-y,-z","1/2-x,1/2+y,-z","-x,-y,z"],"P21212","Orthorhombic"],
19:[["x,y,z","1/2+x,1/2-y,-z","-x,1/2+y,1/2-z","1/2-x,-y,1/2+z"],"P212121","Orthorhombic"], 19:[["x,y,z","1/2+x,1/2-y,-z","-x,1/2+y,1/2-z","1/2-x,-y,1/2+z"],"P212121","Orthorhombic"],
29:[["x,y,z","1/2-x,y,1/2+z","1/2+x,-y,z","-x,-y,1/2+z"],"PCA21","Orthorhombic"], 29:[["x,y,z","1/2-x,y,1/2+z","1/2+x,-y,z","-x,-y,1/2+z"],"PCA21","Orthorhombic"],
33:[["x,y,z","1/2-x,1/2+y,1/2+z","1/2+x,1/2-y,z","-x,-y,1/2+z"],"PNA21","Orthorhombic"], 33:[["x,y,z","1/2-x,1/2+y,1/2+z","1/2+x,1/2-y,z","-x,-y,1/2+z"],"PNA21","Orthorhombic"],
43:[["x,y,z","1/4-x,1/4+y,1/4+z","1/4+x,1/4-y,1/4+z","-x,-y,z","x,y+1/2,z+1/2","1/4-x,3/4+y,3/4+z","1/4+x,3/4-y,3/4+z","-x,-y+1/2,z+1/2", 43:[["x,y,z","1/4-x,1/4+y,1/4+z","1/4+x,1/4-y,1/4+z","-x,-y,z","x,y+1/2,z+1/2","1/4-x,3/4+y,3/4+z","1/4+x,3/4-y,3/4+z","-x,-y+1/2,z+1/2",
"x+1/2,y,z+1/2","3/4-x,1/4+y,3/4+z","3/4+x,1/4-y,3/4+z","-x+1/2,-y,z+1/2","x+1/2,y+1/2,z","3/4-x,3/4+y,1/4+z","3/4+x,3/4-y,1/4+z","-x+1/2,-y+1/2,z"], "Fdd2", "Orthorhombic"], "x+1/2,y,z+1/2","3/4-x,1/4+y,3/4+z","3/4+x,1/4-y,3/4+z","-x+1/2,-y,z+1/2","x+1/2,y+1/2,z","3/4-x,3/4+y,1/4+z","3/4+x,3/4-y,1/4+z","-x+1/2,-y+1/2,z"], "Fdd2", "Orthorhombic"],
56:[["x,y,z","1/2-x,y,1/2+z","x,1/2-y,1/2+z","1/2+x,1/2+y,-z","-x,-y,-z","1/2+x,-y,1/2-z","-x,1/2+y,1/2-z","1/2-x,1/2-y,z"], "Pccn", "Orthorhombic"], 56:[["x,y,z","1/2-x,y,1/2+z","x,1/2-y,1/2+z","1/2+x,1/2+y,-z","-x,-y,-z","1/2+x,-y,1/2-z","-x,1/2+y,1/2-z","1/2-x,1/2-y,z"], "Pccn", "Orthorhombic"],
60:[["x,y,z","1/2-x,1/2+y,z","x,-y,1/2+z","1/2+x,1/2+y,1/2-z","-x,-y,-z","1/2+x,1/2-y,-z","-x,y,1/2-z","1/2-x,1/2-y,1/2+z"],"Pbcn","Orthorhombic"], 60:[["x,y,z","1/2-x,1/2+y,z","x,-y,1/2+z","1/2+x,1/2+y,1/2-z","-x,-y,-z","1/2+x,1/2-y,-z","-x,y,1/2-z","1/2-x,1/2-y,1/2+z"],"Pbcn","Orthorhombic"],
61:[["x,y,z","1/2-x,1/2+y,z","x,1/2-y,1/2+z","1/2+x,y,1/2-z","-x,-y,-z","1/2+x,1/2-y,-z","-x,1/2+y,1/2-z","1/2-x,-y,1/2+z"],"PBCA","Orthorhombic"], 61:[["x,y,z","1/2-x,1/2+y,z","x,1/2-y,1/2+z","1/2+x,y,1/2-z","-x,-y,-z","1/2+x,1/2-y,-z","-x,1/2+y,1/2-z","1/2-x,-y,1/2+z"],"PBCA","Orthorhombic"],
62:[["x,y,z","x+1/2,-y+1/2,-z+1/2","-x,y+1/2,-z","-x+1/2,-y,z+1/2","-x,-y,-z","-x+1/2,y+1/2,z+1/2","x,-y+1/2,z","x+1/2,y,-z+1/2"],"Pnma","Orthorhombic"], 62:[["x,y,z","x+1/2,-y+1/2,-z+1/2","-x,y+1/2,-z","-x+1/2,-y,z+1/2","-x,-y,-z","-x+1/2,y+1/2,z+1/2","x,-y+1/2,z","x+1/2,y,-z+1/2"],"Pnma","Orthorhombic"],
77:[["x,y,z","-x,-y,z","-y,x,1/2+z","y,-x,1/2+z"],"P42","Tetragonal"], 77:[["x,y,z","-x,-y,z","-y,x,1/2+z","y,-x,1/2+z"],"P42","Tetragonal"],
88:[["x,y,z","-x,-y,z","-y,1/2+x,1/4+z","y,1/2-x,1/4+z","-x,1/2-y,1/4-z","x,1/2+y,1/4-z","y,-x,-z","-y,x,-z", 88:[["x,y,z","-x,-y,z","-y,1/2+x,1/4+z","y,1/2-x,1/4+z","-x,1/2-y,1/4-z","x,1/2+y,1/4-z","y,-x,-z","-y,x,-z",
"1/2+x,1/2+y,1/2+z","1/2-x,1/2-y,1/2+z","1/2-y,x,3/4+z","1/2+y,-x,3/4+z","1/2-x,-y,3/4-z","1/2+x,y,3/4-z","1/2+y,1/2-x,1/2-z","1/2-y,1/2+x,1/2-z"],"I41/a","Tetragonal"], "1/2+x,1/2+y,1/2+z","1/2-x,1/2-y,1/2+z","1/2-y,x,3/4+z","1/2+y,-x,3/4+z","1/2-x,-y,3/4-z","1/2+x,y,3/4-z","1/2+y,1/2-x,1/2-z","1/2-y,1/2+x,1/2-z"],"I41/a","Tetragonal"],
96:[["x,y,z","-x,-y,1/2+z","1/2-y,1/2+x,3/4+z","1/2+y,1/2-x,1/4+z","1/2+x,1/2-y,1/4-z","1/2-x,1/2+y,3/4-z","-y,-x,1/2-z","y,x,-z"],"P43212","Tetragonal"], 96:[["x,y,z","-x,-y,1/2+z","1/2-y,1/2+x,3/4+z","1/2+y,1/2-x,1/4+z","1/2+x,1/2-y,1/4-z","1/2-x,1/2+y,3/4-z","-y,-x,1/2-z","y,x,-z"],"P43212","Tetragonal"],
143:[["x,y,z","-y,x-y,z","-x+y,-x,z"],"P3","Hexagonal"], 143:[["x,y,z","-y,x-y,z","-x+y,-x,z"],"P3","Hexagonal"],
147:[["x,y,z","-y,x-y,z","-x+y,-x,z","-x,-y,-z","y,-x+y,-z","x-y,x,-z"],"P-3","Hexagonal"], 147:[["x,y,z","-y,x-y,z","-x+y,-x,z","-x,-y,-z","y,-x+y,-z","x-y,x,-z"],"P-3","Hexagonal"],
148:[["x,y,z","z,x,y","y,z,x","-x,-y,-z","-z,-x,-y","-y,-z,-x"],"R-3","Trigonal"], 148:[["x,y,z","z,x,y","y,z,x","-x,-y,-z","-z,-x,-y","-y,-z,-x"],"R-3","Trigonal"],
169:[["x,y,z","-y,x-y,1/3+z","-x+y,-x,2/3+z","-x,-y,1/2+z","x-y,x,1/6+z","y,-x+y,5/6+z"],"P61","Hexagonal"] 169:[["x,y,z","-y,x-y,1/3+z","-x+y,-x,2/3+z","-x,-y,1/2+z","x-y,x,1/6+z","y,-x+y,5/6+z"],"P61","Hexagonal"]
} }
point_group = {"Triclinic":[["a","b","c"],["alpha","beta","gamma"]], point_group = {"Triclinic":[["a","b","c"],["alpha","beta","gamma"]],
"Monoclinic":[["a","b","c"],[90,"beta",90]], "Monoclinic":[["a","b","c"],[90,"beta",90]],
"Orthorhombic":[["a","b","c"],[90,90,90]], "Orthorhombic":[["a","b","c"],[90,90,90]],
"Tetragonal":[["a","a","c"],[90,90,90]], "Tetragonal":[["a","a","c"],[90,90,90]],
"Trigonal":[["a","a","a"],["alpha","alpha","alpha"]], "Trigonal":[["a","a","a"],["alpha","alpha","alpha"]],
"Hexagonal":[["a","a","c"],[90,90,120]], "Hexagonal":[["a","a","c"],[90,90,120]],
"Cubic":[["a","a","a"],[90,90,90]]} "Cubic":[["a","a","a"],[90,90,90]]}
from rdkit import Chem from rdkit import Chem
from rdkit.Chem import AllChem from rdkit.Chem import AllChem
import os import os
def generate_conformers(molecule, num_conformers=10, max_attempts=1000, rms_thresh=0.2): def generate_conformers(molecule, num_conformers=10, max_attempts=1000, rms_thresh=0.2):
""" """
Generate molecular conformers. Generate molecular conformers.
Parameters: Parameters:
molecule (RDKit Mol object): The input molecule. molecule (RDKit Mol object): The input molecule.
num_conformers (int): Number of conformers to generate. num_conformers (int): Number of conformers to generate.
max_attempts (int): Maximum number of attempts. max_attempts (int): Maximum number of attempts.
rms_thresh (float): RMSD threshold for considering conformers as duplicates. rms_thresh (float): RMSD threshold for considering conformers as duplicates.
Returns: Returns:
list: A list of generated conformers. list: A list of generated conformers.
""" """
params = AllChem.ETKDG() params = AllChem.ETKDG()
params.numThreads = 0 params.numThreads = 0
params.maxAttempts = max_attempts params.maxAttempts = max_attempts
params.pruneRmsThresh = rms_thresh params.pruneRmsThresh = rms_thresh
conformer_ids = AllChem.EmbedMultipleConfs(molecule, numConfs=num_conformers, params=params) conformer_ids = AllChem.EmbedMultipleConfs(molecule, numConfs=num_conformers, params=params)
results = AllChem.UFFOptimizeMolecule(molecule) results = AllChem.UFFOptimizeMolecule(molecule)
return conformer_ids, results return conformer_ids, results
def conformer_search(smiles, out_path, num_conformers=1000, max_attempts=10000, rms_thresh=0.2): def conformer_search(smiles, out_path, num_conformers=1000, max_attempts=10000, rms_thresh=0.2):
try: try:
os.makedirs("{}/conformers".format(out_path)) os.makedirs("{}/conformers".format(out_path))
except: except:
print("Warning, these is already an structures folder in this path, skip mkdir") print("Warning, these is already an structures folder in this path, skip mkdir")
mol = Chem.MolFromSmiles(smiles) mol = Chem.MolFromSmiles(smiles)
mol = Chem.AddHs(mol) # add H atoms mol = Chem.AddHs(mol) # add H atoms
# conformer generate # conformer generate
conformer_ids, results = generate_conformers(mol, num_conformers=num_conformers, max_attempts=max_attempts, rms_thresh=rms_thresh) conformer_ids, results = generate_conformers(mol, num_conformers=num_conformers, max_attempts=max_attempts, rms_thresh=rms_thresh)
# print info # print info
for i, conf in enumerate(conformer_ids): for i, conf in enumerate(conformer_ids):
print(f'Conformer {i}:') print(f'Conformer {i}:')
xyz_file = [] xyz_file = []
xyz_file.append("{}\n".format(mol.GetNumAtoms())) xyz_file.append("{}\n".format(mol.GetNumAtoms()))
xyz_file.append("conformer_{}\n".format(i)) xyz_file.append("conformer_{}\n".format(i))
for j in range(mol.GetNumAtoms()): for j in range(mol.GetNumAtoms()):
atom = mol.GetAtomWithIdx(j) atom = mol.GetAtomWithIdx(j)
symbol = atom.GetSymbol() symbol = atom.GetSymbol()
pos = mol.GetConformer(conf).GetAtomPosition(j) pos = mol.GetConformer(conf).GetAtomPosition(j)
# print(f' Atom {j} ({symbol}): x={pos.x:.3f}, y={pos.y:.3f}, z={pos.z:.3f}') # print(f' Atom {j} ({symbol}): x={pos.x:.3f}, y={pos.y:.3f}, z={pos.z:.3f}')
xyz_file.append("{:6} {:16.8f} {:16.8f} {:16.8f}\n".format(symbol, pos.x, pos.y, pos.z)) xyz_file.append("{:6} {:16.8f} {:16.8f} {:16.8f}\n".format(symbol, pos.x, pos.y, pos.z))
target = open("{}/conformers/conformer_{}.xyz".format(out_path,i), 'w') target = open("{}/conformers/conformer_{}.xyz".format(out_path,i), 'w')
target.writelines(xyz_file) target.writelines(xyz_file)
target.close() target.close()
""" """
This module defines the core data structures for representing atomic structures: This module defines the core data structures for representing atomic structures:
Atom, Crystal, and Molecule. Atom, Crystal, and Molecule.
These classes store information about atomic coordinates, lattice parameters, These classes store information about atomic coordinates, lattice parameters,
and other physical properties, providing a foundational toolkit for geometric and other physical properties, providing a foundational toolkit for geometric
and structural analysis in materials science simulations. and structural analysis in materials science simulations.
""" """
import copy import copy
from typing import List, Tuple, Union, Any, Optional, Dict from typing import List, Tuple, Union, Any, Optional, Dict
import numpy as np import numpy as np
import fractions import fractions
import re import re
from scipy.spatial.distance import cdist from scipy.spatial.distance import cdist
from tqdm import tqdm from tqdm import tqdm
from basic_function import unit_cell_parser from basic_function import unit_cell_parser
from basic_function import chemical_knowledge from basic_function import chemical_knowledge
from basic_function import operation from basic_function import operation
class Atom: class Atom:
""" """
Represents a single atom in a chemical structure. Represents a single atom in a chemical structure.
Attributes: Attributes:
element (str): The chemical symbol of the atom (e.g., 'H', 'C', 'O'). element (str): The chemical symbol of the atom (e.g., 'H', 'C', 'O').
cart_xyz (np.ndarray): Cartesian coordinates [x, y, z] in Angstroms. cart_xyz (np.ndarray): Cartesian coordinates [x, y, z] in Angstroms.
frac_xyz (np.ndarray): Fractional coordinates [u, v, w] with respect to a lattice. frac_xyz (np.ndarray): Fractional coordinates [u, v, w] with respect to a lattice.
atom_id (int): A unique identifier for the atom within a larger structure. atom_id (int): A unique identifier for the atom within a larger structure.
force (np.ndarray): Force vector [fx, fy, fz] acting on the atom. force (np.ndarray): Force vector [fx, fy, fz] acting on the atom.
atom_charge (float): Partial charge of the atom. atom_charge (float): Partial charge of the atom.
atom_energy (float): Site potential energy of the atom. atom_energy (float): Site potential energy of the atom.
molecule (int): Identifier for the molecule this atom belongs to. molecule (int): Identifier for the molecule this atom belongs to.
bonded_atom (list): A list of IDs of atoms bonded to this one. bonded_atom (list): A list of IDs of atoms bonded to this one.
descriptor (any): A placeholder for feature vectors or other descriptors. descriptor (any): A placeholder for feature vectors or other descriptors.
comment (dict): A dictionary for storing arbitrary metadata. comment (dict): A dictionary for storing arbitrary metadata.
""" """
def __init__(self, **kwargs: Any): def __init__(self, **kwargs: Any):
""" """
Initializes an Atom object. Initializes an Atom object.
Args: Args:
**kwargs: Keyword arguments to set atom attributes. **kwargs: Keyword arguments to set atom attributes.
Required: 'element' and one of 'cart_xyz' or 'frac_xyz'. Required: 'element' and one of 'cart_xyz' or 'frac_xyz'.
Optional: 'atom_id', 'force_xyz', 'atom_charge', 'atom_energy', etc. Optional: 'atom_id', 'force_xyz', 'atom_charge', 'atom_energy', etc.
""" """
self.element: str = kwargs.get("element", "unknown") self.element: str = kwargs.get("element", "unknown")
self.cart_xyz: Union[str, np.ndarray] = kwargs.get('cart_xyz', "unknown") self.cart_xyz: Union[str, np.ndarray] = kwargs.get('cart_xyz', "unknown")
self.frac_xyz: Union[str, np.ndarray] = kwargs.get('frac_xyz', "unknown") self.frac_xyz: Union[str, np.ndarray] = kwargs.get('frac_xyz', "unknown")
self.atom_id: Union[str, int] = kwargs.get("atom_id", "unknown") self.atom_id: Union[str, int] = kwargs.get("atom_id", "unknown")
self.force: Union[str, np.ndarray] = kwargs.get('force_xyz', 'unknown') self.force: Union[str, np.ndarray] = kwargs.get('force_xyz', 'unknown')
self.atom_charge: Union[str, float] = kwargs.get('atom_charge', 'unknown') self.atom_charge: Union[str, float] = kwargs.get('atom_charge', 'unknown')
self.atom_energy: Union[str, float] = kwargs.get('atom_energy', 'unknown') self.atom_energy: Union[str, float] = kwargs.get('atom_energy', 'unknown')
self.molecule: Union[str, int] = kwargs.get('molecule', 'unknown') self.molecule: Union[str, int] = kwargs.get('molecule', 'unknown')
self.bonded_atom: list = kwargs.get('bonded_atom', []) self.bonded_atom: list = kwargs.get('bonded_atom', [])
self.descriptor: Any = kwargs.get("descriptor", "unknown") self.descriptor: Any = kwargs.get("descriptor", "unknown")
self.comment: dict = kwargs.get("comment", {}) self.comment: dict = kwargs.get("comment", {})
def info(self) -> None: def info(self) -> None:
"""Prints all attributes of the atom to the console.""" """Prints all attributes of the atom to the console."""
for key, value in self.__dict__.items(): for key, value in self.__dict__.items():
print(f"{key}: {value}") print(f"{key}: {value}")
def check(self) -> None: def check(self) -> None:
""" """
Performs basic sanity checks on the atom's attributes. Performs basic sanity checks on the atom's attributes.
Raises: Raises:
AssertionError: If element is not defined, or if neither cartesian AssertionError: If element is not defined, or if neither cartesian
nor fractional coordinates are provided. nor fractional coordinates are provided.
""" """
assert self.element != "unknown", "Atom must have an element type." assert self.element != "unknown", "Atom must have an element type."
has_cart = isinstance(self.cart_xyz, (np.ndarray, list)) has_cart = isinstance(self.cart_xyz, (np.ndarray, list))
has_frac = isinstance(self.frac_xyz, (np.ndarray, list)) has_frac = isinstance(self.frac_xyz, (np.ndarray, list))
assert has_cart or has_frac, "Atom needs either cart_xyz or frac_xyz." assert has_cart or has_frac, "Atom needs either cart_xyz or frac_xyz."
class Crystal: class Crystal:
""" """
Represents a periodic crystal structure. Represents a periodic crystal structure.
Contains lattice information (cell parameters or vectors) and a list of atoms Contains lattice information (cell parameters or vectors) and a list of atoms
that constitute the structure within the unit cell. that constitute the structure within the unit cell.
""" """
def __init__(self, **kwargs: Any): def __init__(self, **kwargs: Any):
""" """
Initializes a Crystal object. Initializes a Crystal object.
The constructor requires lattice and atom information. It will automatically The constructor requires lattice and atom information. It will automatically
calculate derived properties like volume and density. calculate derived properties like volume and density.
Args: Args:
**kwargs: Keyword arguments to set crystal attributes. **kwargs: Keyword arguments to set crystal attributes.
Required: 'atoms' and one of 'cell_vect' or 'cell_para'. Required: 'atoms' and one of 'cell_vect' or 'cell_para'.
- cell_vect (list): 3x3 list or array of lattice vectors. - cell_vect (list): 3x3 list or array of lattice vectors.
- cell_para (list): [[a, b, c], [alpha, beta, gamma]]. - cell_para (list): [[a, b, c], [alpha, beta, gamma]].
- atoms (List[Atom]): A list of Atom objects. - atoms (List[Atom]): A list of Atom objects.
Optional: 'energy', 'comment', 'system_name', 'space_group', 'SYMM', etc. Optional: 'energy', 'comment', 'system_name', 'space_group', 'SYMM', etc.
""" """
self.cell_vect: Union[str, np.ndarray] = kwargs.get("cell_vect", "unknown") self.cell_vect: Union[str, np.ndarray] = kwargs.get("cell_vect", "unknown")
self.cell_para: Union[str, list] = kwargs.get("cell_para", "unknown") self.cell_para: Union[str, list] = kwargs.get("cell_para", "unknown")
self.atoms: Union[str, List[Atom]] = kwargs.get("atoms", "unknown") self.atoms: Union[str, List[Atom]] = kwargs.get("atoms", "unknown")
self.energy: Union[str, float] = kwargs.get("energy", "unknown") self.energy: Union[str, float] = kwargs.get("energy", "unknown")
self.comment: Any = kwargs.get("comment", "unknown") self.comment: Any = kwargs.get("comment", "unknown")
self.descriptor: Any = kwargs.get("descriptor", "unknown") self.descriptor: Any = kwargs.get("descriptor", "unknown")
self.molecule_number: Union[str, int] = kwargs.get("molecule_number", "unknown") self.molecule_number: Union[str, int] = kwargs.get("molecule_number", "unknown")
self.system_name: str = kwargs.get("system_name", "unknown") self.system_name: str = kwargs.get("system_name", "unknown")
self.virial: Any = kwargs.get("virial", "unknown") self.virial: Any = kwargs.get("virial", "unknown")
self.SYMM: list = kwargs.get("SYMM", ["x,y,z"]) self.SYMM: list = kwargs.get("SYMM", ["x,y,z"])
self.space_group: int = kwargs.get("space_group", 1) self.space_group: int = kwargs.get("space_group", 1)
self.other_properties: dict = {} self.other_properties: dict = {}
# This method completes the initialization. # This method completes the initialization.
self.lattice_and_atom_complete() self.lattice_and_atom_complete()
def lattice_and_atom_complete(self) -> None: def lattice_and_atom_complete(self) -> None:
""" """
Completes the initialization by ensuring consistency between cell representations, Completes the initialization by ensuring consistency between cell representations,
atom coordinates, and calculating derived properties. atom coordinates, and calculating derived properties.
""" """
# --- 1. Finalize Lattice Representation --- # --- 1. Finalize Lattice Representation ---
has_vect = isinstance(self.cell_vect, (np.ndarray, list)) has_vect = isinstance(self.cell_vect, (np.ndarray, list))
has_para = isinstance(self.cell_para, (np.ndarray, list)) has_para = isinstance(self.cell_para, (np.ndarray, list))
if has_vect and has_para: if has_vect and has_para:
# If both are provided, check for consistency # If both are provided, check for consistency
derived_para = np.array(unit_cell_parser.cell_vect_to_para(self.cell_vect)).flatten() derived_para = np.array(unit_cell_parser.cell_vect_to_para(self.cell_vect)).flatten()
provided_para = np.array(self.cell_para).flatten() provided_para = np.array(self.cell_para).flatten()
assert np.allclose(derived_para, provided_para, atol=1e-3), \ assert np.allclose(derived_para, provided_para, atol=1e-3), \
"Provided cell_para and cell_vect are inconsistent." "Provided cell_para and cell_vect are inconsistent."
elif has_vect and not has_para: elif has_vect and not has_para:
self.cell_para = unit_cell_parser.cell_vect_to_para(self.cell_vect) self.cell_para = unit_cell_parser.cell_vect_to_para(self.cell_vect)
elif not has_vect and has_para: elif not has_vect and has_para:
self.cell_vect = unit_cell_parser.cell_para_to_vect(self.cell_para) self.cell_vect = unit_cell_parser.cell_para_to_vect(self.cell_para)
else: else:
raise ValueError("Crystal lattice is not defined. Provide 'cell_vect' or 'cell_para'.") raise ValueError("Crystal lattice is not defined. Provide 'cell_vect' or 'cell_para'.")
# --- 2. Finalize Atom Coordinates --- # --- 2. Finalize Atom Coordinates ---
if self.atoms == "unknown" or not self.atoms: if self.atoms == "unknown" or not self.atoms:
print("Warning: Crystal initialized with no atoms.") print("Warning: Crystal initialized with no atoms.")
self.atoms = [] self.atoms = []
else: else:
for atom in self.atoms: for atom in self.atoms:
has_cart = isinstance(atom.cart_xyz, (np.ndarray, list)) has_cart = isinstance(atom.cart_xyz, (np.ndarray, list))
has_frac = isinstance(atom.frac_xyz, (np.ndarray, list)) has_frac = isinstance(atom.frac_xyz, (np.ndarray, list))
if not (has_cart and has_frac): if not (has_cart and has_frac):
if has_frac: if has_frac:
atom.cart_xyz = unit_cell_parser.atom_frac_to_cart_by_cell_vect(atom.frac_xyz, self.cell_vect) atom.cart_xyz = unit_cell_parser.atom_frac_to_cart_by_cell_vect(atom.frac_xyz, self.cell_vect)
elif has_cart: elif has_cart:
atom.frac_xyz = unit_cell_parser.atom_cart_to_frac_by_cell_vect(atom.cart_xyz, self.cell_vect) atom.frac_xyz = unit_cell_parser.atom_cart_to_frac_by_cell_vect(atom.cart_xyz, self.cell_vect)
else: else:
raise ValueError(f"Atom {atom.element} {atom.atom_id} has no coordinate information.") raise ValueError(f"Atom {atom.element} {atom.atom_id} has no coordinate information.")
# --- 3. Calculate Derived Properties --- # --- 3. Calculate Derived Properties ---
self.volume = unit_cell_parser.calculate_volume(self.cell_para) self.volume = unit_cell_parser.calculate_volume(self.cell_para)
if self.atoms: if self.atoms:
total_mass = sum(chemical_knowledge.element_masses[atom.element] for atom in self.atoms) total_mass = sum(chemical_knowledge.element_masses[atom.element] for atom in self.atoms)
# Density in g/cm^3 # Density in g/cm^3
self.density = total_mass / (self.volume * 1e-24) / (6.022140857e23) self.density = total_mass / (self.volume * 1e-24) / (6.022140857e23)
else: else:
self.density = 0.0 self.density = 0.0
def update_cart_by_frac(self) -> None: def update_cart_by_frac(self) -> None:
"""Updates all atom cartesian coordinates from their fractional coordinates.""" """Updates all atom cartesian coordinates from their fractional coordinates."""
for atom in self.atoms: for atom in self.atoms:
atom.cart_xyz = unit_cell_parser.atom_frac_to_cart_by_cell_vect(atom.frac_xyz, self.cell_vect) atom.cart_xyz = unit_cell_parser.atom_frac_to_cart_by_cell_vect(atom.frac_xyz, self.cell_vect)
def update_frac_by_cart(self) -> None: def update_frac_by_cart(self) -> None:
"""Updates all atom fractional coordinates from their cartesian coordinates.""" """Updates all atom fractional coordinates from their cartesian coordinates."""
for atom in self.atoms: for atom in self.atoms:
atom.frac_xyz = unit_cell_parser.atom_cart_to_frac_by_cell_vect(atom.cart_xyz, self.cell_vect) atom.frac_xyz = unit_cell_parser.atom_cart_to_frac_by_cell_vect(atom.cart_xyz, self.cell_vect)
def check(self) -> None: def check(self) -> None:
"""Performs consistency checks on the crystal structure.""" """Performs consistency checks on the crystal structure."""
print("Performing consistency checks...") print("Performing consistency checks...")
# Check lattice consistency # Check lattice consistency
self.lattice_and_atom_complete() self.lattice_and_atom_complete()
# Check atom coordinate consistency # Check atom coordinate consistency
for atom in self.atoms: for atom in self.atoms:
derived_cart = unit_cell_parser.atom_frac_to_cart_by_cell_vect(atom.frac_xyz, self.cell_vect) derived_cart = unit_cell_parser.atom_frac_to_cart_by_cell_vect(atom.frac_xyz, self.cell_vect)
assert np.allclose(atom.cart_xyz, derived_cart, atol=1e-3), \ assert np.allclose(atom.cart_xyz, derived_cart, atol=1e-3), \
f"Atom {atom.atom_id} cartesian and fractional coordinates do not match." f"Atom {atom.atom_id} cartesian and fractional coordinates do not match."
# Check atom IDs # Check atom IDs
if all(atom.atom_id != "unknown" for atom in self.atoms): if all(atom.atom_id != "unknown" for atom in self.atoms):
print("All atoms have IDs.") print("All atoms have IDs.")
else: else:
print("Warning: Not all atoms have IDs. Use .give_atom_id_forced() to assign them.") print("Warning: Not all atoms have IDs. Use .give_atom_id_forced() to assign them.")
print("Checks passed.") print("Checks passed.")
def give_atom_id_forced(self) -> None: def give_atom_id_forced(self) -> None:
"""Assigns or resets atom IDs from 0 to N-1 and clears bonding info.""" """Assigns or resets atom IDs from 0 to N-1 and clears bonding info."""
print("Warning: Resetting all atom IDs and bonding information!") print("Warning: Resetting all atom IDs and bonding information!")
for i, atom in enumerate(self.atoms): for i, atom in enumerate(self.atoms):
atom.atom_id = i atom.atom_id = i
atom.bonded_atom = [] atom.bonded_atom = []
def move_atom_into_cell(self) -> None: def move_atom_into_cell(self) -> None:
""" """
Moves all atoms into the primary unit cell [0, 1) in fractional coordinates. Moves all atoms into the primary unit cell [0, 1) in fractional coordinates.
""" """
for atom in self.atoms: for atom in self.atoms:
# Use modulo for a more direct and efficient way to wrap coordinates # Use modulo for a more direct and efficient way to wrap coordinates
atom.frac_xyz = np.mod(atom.frac_xyz, 1.0) atom.frac_xyz = np.mod(atom.frac_xyz, 1.0)
self.update_cart_by_frac() self.update_cart_by_frac()
def find_molecule(self, tolerance: float = 1.15) -> None: def find_molecule(self, tolerance: float = 1.15) -> None:
""" """
Identifies molecules within the crystal based on bonding distances. Identifies molecules within the crystal based on bonding distances.
This method performs a graph search (BFS) on the atoms, connecting them This method performs a graph search (BFS) on the atoms, connecting them
based on scaled covalent radii. It populates the `atom.molecule` and based on scaled covalent radii. It populates the `atom.molecule` and
`self.molecule_number` attributes. `self.molecule_number` attributes.
Args: Args:
tolerance: A scaling factor for covalent radii to determine bonding. tolerance: A scaling factor for covalent radii to determine bonding.
A bond is formed if dist(A, B) < (radius(A) + radius(B)) * tolerance. A bond is formed if dist(A, B) < (radius(A) + radius(B)) * tolerance.
""" """
self.move_atom_into_cell() self.move_atom_into_cell()
atoms_to_visit = list(range(len(self.atoms))) atoms_to_visit = list(range(len(self.atoms)))
molecule_id = 0 molecule_id = 0
while atoms_to_visit: while atoms_to_visit:
molecule_id += 1 molecule_id += 1
# Start a Breadth-First Search (BFS) from the first unvisited atom # Start a Breadth-First Search (BFS) from the first unvisited atom
q = [atoms_to_visit[0]] q = [atoms_to_visit[0]]
visited_in_molecule = {atoms_to_visit[0]} visited_in_molecule = {atoms_to_visit[0]}
head = 0 head = 0
while head < len(q): while head < len(q):
current_atom_idx = q[head] current_atom_idx = q[head]
head += 1 head += 1
self.atoms[current_atom_idx].molecule = molecule_id self.atoms[current_atom_idx].molecule = molecule_id
# Check for bonds with all other atoms # Check for bonds with all other atoms
for other_atom_idx in range(len(self.atoms)): for other_atom_idx in range(len(self.atoms)):
if current_atom_idx == other_atom_idx: if current_atom_idx == other_atom_idx:
continue continue
# is_bonding_crystal handles periodic boundaries # is_bonding_crystal handles periodic boundaries
is_bonded, _ = operation.is_bonding_crystal( is_bonded, _ = operation.is_bonding_crystal(
self.atoms[current_atom_idx], self.atoms[current_atom_idx],
self.atoms[other_atom_idx], self.atoms[other_atom_idx],
self.cell_vect, self.cell_vect,
tolerance=tolerance, tolerance=tolerance,
update_atom2=False # Do not modify coordinates during search update_atom2=False # Do not modify coordinates during search
) )
if is_bonded and other_atom_idx not in visited_in_molecule: if is_bonded and other_atom_idx not in visited_in_molecule:
visited_in_molecule.add(other_atom_idx) visited_in_molecule.add(other_atom_idx)
q.append(other_atom_idx) q.append(other_atom_idx)
# Remove all atoms found in the new molecule from the list to visit # Remove all atoms found in the new molecule from the list to visit
atoms_to_visit = [idx for idx in atoms_to_visit if idx not in visited_in_molecule] atoms_to_visit = [idx for idx in atoms_to_visit if idx not in visited_in_molecule]
self.molecule_number = molecule_id self.molecule_number = molecule_id
def get_element(self) -> List[str]: def get_element(self) -> List[str]:
"""Returns a sorted list of unique element symbols in the crystal.""" """Returns a sorted list of unique element symbols in the crystal."""
return chemical_knowledge.sort_by_atomic_number(set(atom.element for atom in self.atoms)) return chemical_knowledge.sort_by_atomic_number(set(atom.element for atom in self.atoms))
def get_element_amount(self) -> List[int]: def get_element_amount(self) -> List[int]:
"""Returns the count of each element, sorted by atomic number.""" """Returns the count of each element, sorted by atomic number."""
all_elements = [atom.element for atom in self.atoms] all_elements = [atom.element for atom in self.atoms]
return [all_elements.count(element) for element in self.get_element()] return [all_elements.count(element) for element in self.get_element()]
def make_p1(self) -> None: def make_p1(self) -> None:
""" """
Expands the asymmetric unit to the full P1 cell using symmetry operations. Expands the asymmetric unit to the full P1 cell using symmetry operations.
The crystal's space group is set to 1 (P1) and SYMM is reset. This The crystal's space group is set to 1 (P1) and SYMM is reset. This
implementation is robustly designed to ensure the final coordinate array implementation is robustly designed to ensure the final coordinate array
is always 2-dimensional, preventing downstream errors. is always 2-dimensional, preventing downstream errors.
""" """
all_ele, all_frac = self.get_ele_and_frac() all_ele, all_frac = self.get_ele_and_frac()
all_reflect_position = [] all_reflect_position = []
all_matrix_M = [] all_matrix_M = []
all_matrix_C = [] all_matrix_C = []
for sym_opt in self.SYMM: for sym_opt in self.SYMM:
sym_opt_ele = sym_opt.lower().replace(" ", "").split(",") sym_opt_ele = sym_opt.lower().replace(" ", "").split(",")
# assert len(sym_opt_ele) == 3, "sym {} could not be treat".format(sym_opt_ele) # assert len(sym_opt_ele) == 3, "sym {} could not be treat".format(sym_opt_ele)
matrix_M = np.zeros((3, 3)) matrix_M = np.zeros((3, 3))
matrix_C = np.zeros((1, 3)) matrix_C = np.zeros((1, 3))
for idx, word in enumerate(sym_opt_ele): for idx, word in enumerate(sym_opt_ele):
sym_opt_ele_split = re.findall(r".*?([+-]*[xyz0-9\/\.]+)", word) sym_opt_ele_split = re.findall(r".*?([+-]*[xyz0-9\/\.]+)", word)
for sym_opt_frag in sym_opt_ele_split: for sym_opt_frag in sym_opt_ele_split:
if sym_opt_frag == 'x' or sym_opt_frag == '+x': if sym_opt_frag == 'x' or sym_opt_frag == '+x':
matrix_M[0][idx] = 1 matrix_M[0][idx] = 1
elif str(sym_opt_frag) == '-x': elif str(sym_opt_frag) == '-x':
matrix_M[0][idx] = -1 matrix_M[0][idx] = -1
elif sym_opt_frag == 'y' or sym_opt_frag == '+y': elif sym_opt_frag == 'y' or sym_opt_frag == '+y':
matrix_M[1][idx] = 1 matrix_M[1][idx] = 1
elif sym_opt_frag == '-y': elif sym_opt_frag == '-y':
matrix_M[1][idx] = -1 matrix_M[1][idx] = -1
elif sym_opt_frag == 'z' or sym_opt_frag == '+z': elif sym_opt_frag == 'z' or sym_opt_frag == '+z':
matrix_M[2][idx] = 1 matrix_M[2][idx] = 1
elif sym_opt_frag == '-z': elif sym_opt_frag == '-z':
matrix_M[2][idx] = -1 matrix_M[2][idx] = -1
elif operation.is_number(sym_opt_frag) is True: elif operation.is_number(sym_opt_frag) is True:
matrix_C[0][idx] = float(fractions.Fraction(sym_opt_frag)) matrix_C[0][idx] = float(fractions.Fraction(sym_opt_frag))
else: else:
raise Exception("wrong sym opt of" + sym_opt_frag) raise Exception("wrong sym opt of" + sym_opt_frag)
all_matrix_M.append(matrix_M) all_matrix_M.append(matrix_M)
all_matrix_C.append(matrix_C) all_matrix_C.append(matrix_C)
for j in range(0, len(all_matrix_M)): for j in range(0, len(all_matrix_M)):
new_positions = np.dot(np.array([all_frac]), all_matrix_M[j]) + all_matrix_C[j] new_positions = np.dot(np.array([all_frac]), all_matrix_M[j]) + all_matrix_C[j]
all_reflect_position.append(new_positions.squeeze()) all_reflect_position.append(new_positions.squeeze())
all_ele = all_ele*len(self.SYMM) all_ele = all_ele*len(self.SYMM)
new_atoms = [] new_atoms = []
idx=0 idx=0
for element, frac_xyz in zip(all_ele, np.array(all_reflect_position).reshape(-1,3)): for element, frac_xyz in zip(all_ele, np.array(all_reflect_position).reshape(-1,3)):
new_atoms.append(Atom(element=element, new_atoms.append(Atom(element=element,
frac_xyz=frac_xyz, frac_xyz=frac_xyz,
atom_id=idx)) atom_id=idx))
idx+=1 idx+=1
self.SYMM = "[x,y,z]" self.SYMM = "[x,y,z]"
self.space_group = 1 self.space_group = 1
self.atoms = new_atoms self.atoms = new_atoms
self.update_cart_by_frac() self.update_cart_by_frac()
def sort_by_element(self) -> None: def sort_by_element(self) -> None:
"""Sorts the atoms list based on atomic number.""" """Sorts the atoms list based on atomic number."""
self.atoms.sort(key=lambda atom: chemical_knowledge.periodic_table_list[atom.element]) self.atoms.sort(key=lambda atom: chemical_knowledge.periodic_table_list[atom.element])
def get_ele_and_cart(self) -> Tuple[List[str], np.ndarray]: def get_ele_and_cart(self) -> Tuple[List[str], np.ndarray]:
"""Returns all element symbols and their cartesian coordinates.""" """Returns all element symbols and their cartesian coordinates."""
if not self.atoms: if not self.atoms:
return [], np.empty((0, 3)) return [], np.empty((0, 3))
all_ele = [atom.element for atom in self.atoms] all_ele = [atom.element for atom in self.atoms]
all_carts = np.array([atom.cart_xyz for atom in self.atoms]) all_carts = np.array([atom.cart_xyz for atom in self.atoms])
return all_ele, all_carts return all_ele, all_carts
def get_ele_and_frac(self) -> Tuple[List[str], np.ndarray]: def get_ele_and_frac(self) -> Tuple[List[str], np.ndarray]:
"""Returns all element symbols and their fractional coordinates.""" """Returns all element symbols and their fractional coordinates."""
if not self.atoms: if not self.atoms:
return [], np.empty((0, 3)) return [], np.empty((0, 3))
all_ele = [atom.element for atom in self.atoms] all_ele = [atom.element for atom in self.atoms]
all_fracs = np.array([atom.frac_xyz for atom in self.atoms]) all_fracs = np.array([atom.frac_xyz for atom in self.atoms])
return all_ele, all_fracs return all_ele, all_fracs
def info(self, all_info: bool = False) -> None: def info(self, all_info: bool = False) -> None:
""" """
Prints a formatted summary of the crystal structure. Prints a formatted summary of the crystal structure.
Args: Args:
all_info: If True, prints an extended table including fractional all_info: If True, prints an extended table including fractional
coordinates, forces, and other properties. coordinates, forces, and other properties.
""" """
print("--- Crystal System ---") print("--- Crystal System ---")
print(f"Name: {self.system_name}") print(f"Name: {self.system_name}")
print("Lattice Vectors (Angstrom):") print("Lattice Vectors (Angstrom):")
for vec in self.cell_vect: for vec in self.cell_vect:
print(f"{vec[0]:16.8f} {vec[1]:16.8f} {vec[2]:16.8f}") print(f"{vec[0]:16.8f} {vec[1]:16.8f} {vec[2]:16.8f}")
print("Lattice Parameters:") print("Lattice Parameters:")
print(f"a, b, c (A): {self.cell_para[0][0]:.4f}, {self.cell_para[0][1]:.4f}, {self.cell_para[0][2]:.4f}") print(f"a, b, c (A): {self.cell_para[0][0]:.4f}, {self.cell_para[0][1]:.4f}, {self.cell_para[0][2]:.4f}")
print(f"alpha, beta, gamma (deg): {self.cell_para[1][0]:.4f}, {self.cell_para[1][1]:.4f}, {self.cell_para[1][2]:.4f}") print(f"alpha, beta, gamma (deg): {self.cell_para[1][0]:.4f}, {self.cell_para[1][1]:.4f}, {self.cell_para[1][2]:.4f}")
print(f"Volume (A^3): {self.volume:.4f} | Density (g/cm^3): {self.density:.4f}") print(f"Volume (A^3): {self.volume:.4f} | Density (g/cm^3): {self.density:.4f}")
print(f"\n--- Atomic Coordinates (Total: {len(self.atoms)}) ---") print(f"\n--- Atomic Coordinates (Total: {len(self.atoms)}) ---")
if not all_info: if not all_info:
print(f"{'Element':<10} {'Cartesian X':>16} {'Cartesian Y':>16} {'Cartesian Z':>16}") print(f"{'Element':<10} {'Cartesian X':>16} {'Cartesian Y':>16} {'Cartesian Z':>16}")
print("-" * 58) print("-" * 58)
for atom in self.atoms: for atom in self.atoms:
print(f"{atom.element:<10} {atom.cart_xyz[0]:16.8f} {atom.cart_xyz[1]:16.8f} {atom.cart_xyz[2]:16.8f}") print(f"{atom.element:<10} {atom.cart_xyz[0]:16.8f} {atom.cart_xyz[1]:16.8f} {atom.cart_xyz[2]:16.8f}")
else: else:
header = ( header = (
f"{'ID':<5} {'Elem':<6} " f"{'ID':<5} {'Elem':<6} "
f"{'Frac X':>10} {'Frac Y':>10} {'Frac Z':>10} | " f"{'Frac X':>10} {'Frac Y':>10} {'Frac Z':>10} | "
f"{'Cart X':>12} {'Cart Y':>12} {'Cart Z':>12}" f"{'Cart X':>12} {'Cart Y':>12} {'Cart Z':>12}"
) )
print(header) print(header)
print("-" * len(header)) print("-" * len(header))
for atom in self.atoms: for atom in self.atoms:
aid = str(atom.atom_id) if atom.atom_id != 'unknown' else '-' aid = str(atom.atom_id) if atom.atom_id != 'unknown' else '-'
print( print(
f"{aid:<5} {atom.element:<6} " f"{aid:<5} {atom.element:<6} "
f"{atom.frac_xyz[0]:10.6f} {atom.frac_xyz[1]:10.6f} {atom.frac_xyz[2]:10.6f} | " f"{atom.frac_xyz[0]:10.6f} {atom.frac_xyz[1]:10.6f} {atom.frac_xyz[2]:10.6f} | "
f"{atom.cart_xyz[0]:12.6f} {atom.cart_xyz[1]:12.6f} {atom.cart_xyz[2]:12.6f}" f"{atom.cart_xyz[0]:12.6f} {atom.cart_xyz[1]:12.6f} {atom.cart_xyz[2]:12.6f}"
) )
print("\n--- Other Properties ---") print("\n--- Other Properties ---")
print(f"Energy: {self.energy}") print(f"Energy: {self.energy}")
print(f"Comment: {self.comment}") print(f"Comment: {self.comment}")
print(f"Virial: {self.virial}") print(f"Virial: {self.virial}")
class Molecule: class Molecule:
"""Represents a non-periodic molecule (a collection of atoms).""" """Represents a non-periodic molecule (a collection of atoms)."""
def __init__(self, **kwargs: Any): def __init__(self, **kwargs: Any):
""" """
Initializes a Molecule object. Initializes a Molecule object.
Args: Args:
**kwargs: Keyword arguments to set molecule attributes. **kwargs: Keyword arguments to set molecule attributes.
Required: 'atoms' (List[Atom]). Required: 'atoms' (List[Atom]).
Optional: 'energy', 'comment', 'name', 'system_name'. Optional: 'energy', 'comment', 'name', 'system_name'.
""" """
self.atoms: Union[str, List[Atom]] = kwargs.get("atoms", "unknown") self.atoms: Union[str, List[Atom]] = kwargs.get("atoms", "unknown")
self.energy: Union[str, float] = kwargs.get("energy", "unknown") self.energy: Union[str, float] = kwargs.get("energy", "unknown")
self.comment: Any = kwargs.get("comment", "unknown") self.comment: Any = kwargs.get("comment", "unknown")
self.descriptor: Any = kwargs.get("descriptor", "unknown") self.descriptor: Any = kwargs.get("descriptor", "unknown")
self.name: str = kwargs.get("name", "unknown") self.name: str = kwargs.get("name", "unknown")
self.system_name: str = kwargs.get("system_name", "unknown") self.system_name: str = kwargs.get("system_name", "unknown")
if self.atoms == "unknown": if self.atoms == "unknown":
print("Warning: Molecule initialized with no atoms.") print("Warning: Molecule initialized with no atoms.")
self.atoms = [] self.atoms = []
def give_atom_id_forced(self) -> None: def give_atom_id_forced(self) -> None:
"""Assigns or resets atom IDs from 0 to N-1 and clears bonding info.""" """Assigns or resets atom IDs from 0 to N-1 and clears bonding info."""
print("Warning: Resetting all atom IDs and bonding information!") print("Warning: Resetting all atom IDs and bonding information!")
for i, atom in enumerate(self.atoms): for i, atom in enumerate(self.atoms):
atom.atom_id = i atom.atom_id = i
atom.bonded_atom = [] atom.bonded_atom = []
def get_element(self) -> List[str]: def get_element(self) -> List[str]:
"""Returns a sorted list of unique element symbols in the molecule.""" """Returns a sorted list of unique element symbols in the molecule."""
if not self.atoms: return [] if not self.atoms: return []
return chemical_knowledge.sort_by_atomic_number(set(atom.element for atom in self.atoms)) return chemical_knowledge.sort_by_atomic_number(set(atom.element for atom in self.atoms))
def get_element_amount(self) -> List[int]: def get_element_amount(self) -> List[int]:
"""Returns the count of each element, sorted by atomic number.""" """Returns the count of each element, sorted by atomic number."""
if not self.atoms: return [] if not self.atoms: return []
all_elements = [atom.element for atom in self.atoms] all_elements = [atom.element for atom in self.atoms]
return [all_elements.count(element) for element in self.get_element()] return [all_elements.count(element) for element in self.get_element()]
def get_ele_and_cart(self) -> Tuple[List[str], np.ndarray]: def get_ele_and_cart(self) -> Tuple[List[str], np.ndarray]:
"""Returns all element symbols and their cartesian coordinates.""" """Returns all element symbols and their cartesian coordinates."""
if not self.atoms: if not self.atoms:
return [], np.empty((0, 3)) return [], np.empty((0, 3))
all_ele = [atom.element for atom in self.atoms] all_ele = [atom.element for atom in self.atoms]
all_carts = np.array([atom.cart_xyz for atom in self.atoms]) all_carts = np.array([atom.cart_xyz for atom in self.atoms])
return all_ele, all_carts return all_ele, all_carts
def put_ele_cart_back(self, all_ele: List[str], all_carts: np.ndarray) -> None: def put_ele_cart_back(self, all_ele: List[str], all_carts: np.ndarray) -> None:
"""Updates the molecule's atoms from lists of elements and coordinates.""" """Updates the molecule's atoms from lists of elements and coordinates."""
for i, atom in enumerate(self.atoms): for i, atom in enumerate(self.atoms):
atom.element = all_ele[i] atom.element = all_ele[i]
atom.cart_xyz = all_carts[i] atom.cart_xyz = all_carts[i]
def build_molecules_by_ele_cart(self, all_ele: List[str], all_carts: np.ndarray) -> None: def build_molecules_by_ele_cart(self, all_ele: List[str], all_carts: np.ndarray) -> None:
"""Rebuilds the molecule's atoms list from elements and coordinates.""" """Rebuilds the molecule's atoms list from elements and coordinates."""
assert len(all_ele) == len(all_carts), "Element and coordinate lists must have the same length." assert len(all_ele) == len(all_carts), "Element and coordinate lists must have the same length."
self.atoms = [ self.atoms = [
Atom(element=ele, cart_xyz=cart, atom_id=i) Atom(element=ele, cart_xyz=cart, atom_id=i)
for i, (ele, cart) in enumerate(zip(all_ele, all_carts)) for i, (ele, cart) in enumerate(zip(all_ele, all_carts))
] ]
def get_mass(self) -> float: def get_mass(self) -> float:
"""Calculates the total mass of the molecule.""" """Calculates the total mass of the molecule."""
if not self.atoms: return 0.0 if not self.atoms: return 0.0
return sum(chemical_knowledge.element_masses[atom.element] for atom in self.atoms) return sum(chemical_knowledge.element_masses[atom.element] for atom in self.atoms)
def get_center_of_mass(self) -> np.ndarray: def get_center_of_mass(self) -> np.ndarray:
"""Calculates the center of mass of the molecule.""" """Calculates the center of mass of the molecule."""
if not self.atoms: return np.zeros(3) if not self.atoms: return np.zeros(3)
all_ele, all_carts = self.get_ele_and_cart() all_ele, all_carts = self.get_ele_and_cart()
masses = np.array([chemical_knowledge.element_masses[x] for x in all_ele]) masses = np.array([chemical_knowledge.element_masses[x] for x in all_ele])
total_mass = np.sum(masses) total_mass = np.sum(masses)
if total_mass == 0: return np.zeros(3) if total_mass == 0: return np.zeros(3)
return np.sum(all_carts * masses[:, np.newaxis], axis=0) / total_mass return np.sum(all_carts * masses[:, np.newaxis], axis=0) / total_mass
def sort_by_element(self) -> None: def sort_by_element(self) -> None:
"""Sorts the atoms list based on atomic number.""" """Sorts the atoms list based on atomic number."""
self.atoms.sort(key=lambda atom: chemical_knowledge.periodic_table_list[atom.element]) self.atoms.sort(key=lambda atom: chemical_knowledge.periodic_table_list[atom.element])
def sort_by_id(self) -> None: def sort_by_id(self) -> None:
"""Sorts the atoms list based on their atom_id.""" """Sorts the atoms list based on their atom_id."""
self.atoms.sort(key=lambda atom: atom.atom_id) self.atoms.sort(key=lambda atom: atom.atom_id)
def info(self) -> None: def info(self) -> None:
"""Prints a formatted summary of the molecule.""" """Prints a formatted summary of the molecule."""
print(f"--- Molecule ---") print(f"--- Molecule ---")
print(f"Name: {self.name} | System: {self.system_name}") print(f"Name: {self.name} | System: {self.system_name}")
print(f"Number of atoms: {len(self.atoms)}") print(f"Number of atoms: {len(self.atoms)}")
print(f"Total Mass (amu): {self.get_mass():.4f}") print(f"Total Mass (amu): {self.get_mass():.4f}")
print(f"Energy: {self.energy}") print(f"Energy: {self.energy}")
print(f"Comment: {self.comment}") print(f"Comment: {self.comment}")
print(f"\n{'Element':<10} {'Cartesian X':>16} {'Cartesian Y':>16} {'Cartesian Z':>16}") print(f"\n{'Element':<10} {'Cartesian X':>16} {'Cartesian Y':>16} {'Cartesian Z':>16}")
print("-" * 58) print("-" * 58)
if self.atoms: if self.atoms:
for atom in self.atoms: for atom in self.atoms:
print(f"{atom.element:<10} {atom.cart_xyz[0]:16.8f} {atom.cart_xyz[1]:16.8f} {atom.cart_xyz[2]:16.8f}") print(f"{atom.element:<10} {atom.cart_xyz[0]:16.8f} {atom.cart_xyz[1]:16.8f} {atom.cart_xyz[2]:16.8f}")
def find_fragment(self, tolerance: float = 1.15) -> Dict[int, List[int]]: def find_fragment(self, tolerance: float = 1.15) -> Dict[int, List[int]]:
""" """
Identifies covalently bonded fragments within the molecule. Identifies covalently bonded fragments within the molecule.
This is useful for molecules that are actually composed of several This is useful for molecules that are actually composed of several
disconnected components (e.g., salts, solvent shells). disconnected components (e.g., salts, solvent shells).
Args: Args:
tolerance: Scaling factor for covalent radii to determine bonding. tolerance: Scaling factor for covalent radii to determine bonding.
Returns: Returns:
A dictionary mapping a fragment ID (starting from 1) to a list of A dictionary mapping a fragment ID (starting from 1) to a list of
atom indices belonging to that fragment. atom indices belonging to that fragment.
""" """
if not self.atoms: return {} if not self.atoms: return {}
num_atoms = len(self.atoms) num_atoms = len(self.atoms)
cart_matrix = np.array([atom.cart_xyz for atom in self.atoms]) cart_matrix = np.array([atom.cart_xyz for atom in self.atoms])
radii = np.array([chemical_knowledge.element_covalent_radii[atom.element] for atom in self.atoms]) radii = np.array([chemical_knowledge.element_covalent_radii[atom.element] for atom in self.atoms])
# Create a matrix of bond thresholds (r_i + r_j) # Create a matrix of bond thresholds (r_i + r_j)
bond_threshold_matrix = (radii[:, np.newaxis] + radii) * tolerance bond_threshold_matrix = (radii[:, np.newaxis] + radii) * tolerance
# True where distance is less than the bond threshold # True where distance is less than the bond threshold
dist_matrix = cdist(cart_matrix, cart_matrix) dist_matrix = cdist(cart_matrix, cart_matrix)
adj_matrix = dist_matrix < bond_threshold_matrix adj_matrix = dist_matrix < bond_threshold_matrix
np.fill_diagonal(adj_matrix, False) np.fill_diagonal(adj_matrix, False)
# Graph traversal (DFS) to find connected components # Graph traversal (DFS) to find connected components
visited = [False] * num_atoms visited = [False] * num_atoms
groups = {} groups = {}
group_id = 0 group_id = 0
for i in range(num_atoms): for i in range(num_atoms):
if not visited[i]: if not visited[i]:
group_id += 1 group_id += 1
groups[group_id] = [] groups[group_id] = []
stack = [i] stack = [i]
while stack: while stack:
atom_idx = stack.pop() atom_idx = stack.pop()
if not visited[atom_idx]: if not visited[atom_idx]:
visited[atom_idx] = True visited[atom_idx] = True
groups[group_id].append(atom_idx) groups[group_id].append(atom_idx)
# Find neighbors and add to stack # Find neighbors and add to stack
neighbors = np.where(adj_matrix[atom_idx])[0] neighbors = np.where(adj_matrix[atom_idx])[0]
stack.extend(neighbors) stack.extend(neighbors)
return groups return groups
def give_molecule_id(self, tolerance: float = 1.15) -> None: def give_molecule_id(self, tolerance: float = 1.15) -> None:
"""Assigns a molecule ID to each atom based on fragment analysis.""" """Assigns a molecule ID to each atom based on fragment analysis."""
fragments = self.find_fragment(tolerance=tolerance) fragments = self.find_fragment(tolerance=tolerance)
for group_id, atom_indices in fragments.items(): for group_id, atom_indices in fragments.items():
for atom_idx in atom_indices: for atom_idx in atom_indices:
self.atoms[atom_idx].molecule = group_id self.atoms[atom_idx].molecule = group_id
def take_out_fragment(self, tolerance: float = 1.15) -> List['Molecule']: def take_out_fragment(self, tolerance: float = 1.15) -> List['Molecule']:
""" """
Splits the current molecule into a list of new Molecule objects, Splits the current molecule into a list of new Molecule objects,
one for each disconnected fragment. one for each disconnected fragment.
""" """
if not self.atoms: return [] if not self.atoms: return []
self.give_atom_id_forced() # Ensure IDs are set for lookup self.give_atom_id_forced() # Ensure IDs are set for lookup
fragments = self.find_fragment(tolerance=tolerance) fragments = self.find_fragment(tolerance=tolerance)
new_molecules = [] new_molecules = []
for i, atom_indices in fragments.items(): for i, atom_indices in fragments.items():
fragment_atoms = [self.atoms[j] for j in atom_indices] fragment_atoms = [self.atoms[j] for j in atom_indices]
new_mol = Molecule( new_mol = Molecule(
atoms=copy.deepcopy(fragment_atoms), atoms=copy.deepcopy(fragment_atoms),
name=f"{self.name}_frag{i}", name=f"{self.name}_frag{i}",
system_name=f"{self.system_name}_frag{i}" system_name=f"{self.system_name}_frag{i}"
) )
new_molecules.append(new_mol) new_molecules.append(new_mol)
return new_molecules return new_molecules
def calculate_frac_xyz_by_cell_para(self, cell_para: list) -> None: def calculate_frac_xyz_by_cell_para(self, cell_para: list) -> None:
"""Calculates fractional coordinates for all atoms given cell parameters.""" """Calculates fractional coordinates for all atoms given cell parameters."""
for atom in self.atoms: for atom in self.atoms:
atom.frac_xyz = unit_cell_parser.atom_cart_to_frac_by_cell_para(atom.cart_xyz, cell_para) atom.frac_xyz = unit_cell_parser.atom_cart_to_frac_by_cell_para(atom.cart_xyz, cell_para)
def molecule_volume(self, num_samples: int = 100000) -> float: def molecule_volume(self, num_samples: int = 100000) -> float:
""" """
Calculates the van der Waals volume using a Monte Carlo integration method. Calculates the van der Waals volume using a Monte Carlo integration method.
This method samples points in a bounding box around the molecule and This method samples points in a bounding box around the molecule and
determines the ratio of points that fall within any atom's vdW sphere. determines the ratio of points that fall within any atom's vdW sphere.
Args: Args:
num_samples: The number of random points to sample. More points num_samples: The number of random points to sample. More points
yield a more accurate volume at the cost of performance. yield a more accurate volume at the cost of performance.
Returns: Returns:
The estimated van der Waals volume in cubic Angstroms. The estimated van der Waals volume in cubic Angstroms.
""" """
if not self.atoms: return 0.0 if not self.atoms: return 0.0
elements, coords = self.get_ele_and_cart() elements, coords = self.get_ele_and_cart()
radii = np.array([chemical_knowledge.element_vdw_radii[el] for el in elements]) radii = np.array([chemical_knowledge.element_vdw_radii[el] for el in elements])
# Determine bounding box for sampling # Determine bounding box for sampling
min_bounds = np.min(coords, axis=0) - np.max(radii) min_bounds = np.min(coords, axis=0) - np.max(radii)
max_bounds = np.max(coords, axis=0) + np.max(radii) max_bounds = np.max(coords, axis=0) + np.max(radii)
bounding_box_volume = np.prod(max_bounds - min_bounds) bounding_box_volume = np.prod(max_bounds - min_bounds)
# Generate random sample points within the bounding box # Generate random sample points within the bounding box
random_points = np.random.uniform(min_bounds, max_bounds, (num_samples, 3)) random_points = np.random.uniform(min_bounds, max_bounds, (num_samples, 3))
# Check for each point if it's inside ANY sphere # Check for each point if it's inside ANY sphere
count_inside = 0 count_inside = 0
for rp in tqdm(random_points, desc="Monte Carlo Volume", leave=False): for rp in tqdm(random_points, desc="Monte Carlo Volume", leave=False):
# Calculate squared distances from the point to all atom centers # Calculate squared distances from the point to all atom centers
dist_sq = np.sum((coords - rp)**2, axis=1) dist_sq = np.sum((coords - rp)**2, axis=1)
# If any distance is within the radius, the point is inside # If any distance is within the radius, the point is inside
if np.any(dist_sq <= radii**2): if np.any(dist_sq <= radii**2):
count_inside += 1 count_inside += 1
return (count_inside / num_samples) * bounding_box_volume return (count_inside / num_samples) * bounding_box_volume
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment