from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem.Fraggle import FraggleSim
from rdkit import DataStructs,Chem
from rdkit.Avalon import pyAvalonTools
import ase
from ase import Atoms
from ase.geometry.analysis import Analysis
from ase.neighborlist import build_neighbor_list,natural_cutoffs
import numpy as np
import pandas as pd
from functools import lru_cache
from jinja2 import Template, Environment, FileSystemLoader
import os
from matlanticgrrm.geometry.geometry import _Geometry, _calc_max_distance, atoms2smiles, atoms2mol,_renumbering_target,_atoms2mol
from matlanticgrrm.geometry.mol2png_binary import mol2png_binary
import matlanticgrrm.atomslist as ma
from copy import deepcopy
[ドキュメント]class Geometries():
"""同一構造の判定や類似構造の判定などを行なえるクラス.
target0,target1,target2に指定した原子の結合を見て同一構造か判断する.
Parameters:
atoms_list: list of Atoms
Atomsオブジェクトのリスト
target0: list of integer
target0,target1,target2に指定した原子との結合状態を見る
target1: list of integer
target0,target1に指定した原子との結合状態を見る
target2: list of integer
target0に指定した原子との結合状態を見る
mult: float
| 結合判定を行なう時の係数
| 大きい程,遠く離れていても結合しているとみなす
kargs:
| 共有結合半径を変更する
| ex) Al = 0.25
Property:
smileses: list of string
atoms_listのSMILES表記のリスト(全て単結合として判断する)
mols:
RDkitのMolオブジェクトのリスト
group: list of integer
| atoms_list中の同じ構造を推定し,グループ番号を振って返す.
| ex)[0,0,1,1,0,0,2,2] の場合,EQ0,EQ1,EQ4,EQ5が同じグループに属している.
| propertyであるが,呼び出し時にgroup化の計算を行なう. 1度行えばキャッシュを残すので次回の呼び出しには時間はかからない.
| atoms_list,mult,target012を変更した際には再計算される.
cluster: 2D list of integer
| atoms_list中の同じ構造を推定し,同じグループごとにまとめる(index番号でまとめる)
| ex)[[0,1,4,5],[2,3],[6,7]] の場合,EQ0,EQ1,EQ4,EQ5が同じグループに属している.
| propertyであるが,呼び出し時にgroup化の計算を行なう. 1度行えばキャッシュを残すので次回の呼び出しには時間はかからない.
| atoms_list,mult,target012を変更した際には再計算される.
atoms_list: list of Atoms
| 引数に指定したAtomsリスト
| 上書きする場合はset_mult()を用いる. 上書きするとsmileses,molsも自動で変更される.
mult: float
| 引数に指定したmult.
| 上書きする場合はset_mult()を用いる. 上書きするとsmileses,molsも自動で変更される.
target0: list of integer
| 引数に指定したtarget0
| 上書きする場合はset_target0()を用いる. 上書きするとsmileses,molsも自動で変更される.
target1: list of integer
| 引数に指定したtarget1
| 上書きする場合はset_target1()を用いる. 上書きするとsmileses,molsも自動で変更される.
target2: list of integer
| 引数に指定したtarget2
| 上書きする場合はset_target2()を用いる. 上書きするとsmileses,molsも自動で変更される.
Examples:
.. code-block:: python
from matlanticgrrm.grrmdata import log2atoms
from matlanticgrrm.geometry import Geometries
eq_list1 = log2atoms("sample1_EQ_list.log")
eq_list2 = log2atoms("sample2_EQ_list.log") #eq_list1とeq_list2は同じ要素数
eq_geometries1 = Geometries(eq_list1,[20,21,22,23,24,25,26,27,28])
eq_geometries2 = Geometries(eq_list2,[20,21,22,23,24,25,26,27,28])
eq_geometries1[0] == eq_geometries1[2]
>>> [True] #EQ0とEQ2は同じ構造
eq_geometries1[[0,1,2]] == eq_geometries1[[2,4,5]]
>>> [True,False,False] #EQ0-EQ2は同じ構造,EQ1-eQ4とEQ2-EQ5は異なる構造
eq_geometries1 == eq_geometries2
>>> [True,False,True,True,....] # eq_list1のEQ0とeq_list2のEQ0の構造は同じ
"""
def __init__(self,atoms_list,target0=None,target1=[],target2=[],mult=1,mols=None,smileses=None,geometry_list=None,**kwargs):
"""2種類のインスタンス方法がある"""
self.__atoms_list = atoms_list
self.__target0 = target0
self.__target1 = target1
self.__target2 = target2
self.__mult = mult
if mols and smileses and geometry_list: # __getitem__の時に使う
self.__mols = mols
self.__smileses = smileses
self._geometry_list = geometry_list
else: #通常のインスタンス化
self.__mols = atomslist2mols(atoms_list,target0,target1,target2,mult,**kwargs)
self.__smileses = [Chem.MolToSmiles(mol) if mol else None for mol in self.mols]
self._geometry_list = [_Geometry(atoms,target0,target1,target2,mult,mol,smiles)
for atoms,mol,smiles in zip(self.atoms_list,self.mols,self.smileses)]
self.__group, self.__cluster = self._assign_group_and_cluster(self.smileses)
def initialize(self,atoms_list,target0=None,target1=[],target2=[],mult=1,**kwargs):
super().__init__(atoms_list,target0,target1,target2,mult)
self.__mols = atomslist2mols(atoms_list,target0,target1,target2,mult,**kwargs)
self.__smileses = [Chem.MolToSmiles(mol) if mol else None for mol in self.mols]
self._geometry_list = [_Geometry(atoms,target0,target1,target2,mult,mol,smiles)
for atoms,mol,smiles in zip(self.atoms_list,self.mols,self.smileses)]
self.__group, self.__cluster = self._assign_group_and_cluster(self.smileses)
@property
def atoms_list(self):
return self.__atoms_list
def set_atoms_list(self, atoms_list):
self.__atoms_list = atoms_list
self.initialize(self.atoms_list,self.target0,self.target1,self.target2,self.mult)
@property
def mult(self):
return self.__mult
def set_mult(self, mult):
self.__mult = mult
self.initialize(self.atoms_list,self.target0,self.target1,self.target2,self.mult)
@property
def target0(self):
return self.__target0
def set_target0(self, target0):
self.__target0 = target0
self.initialize(self.atoms_list,self.target0,self.target1,self.target2,self.mult)
@property
def target1(self):
return self.__target1
def set_target1(self, target1):
self.__target1 = target1
self.initialize(self.atoms_list,self.target0,self.target1,self.target2,self.mult)
@property
def target2(self):
return self.__target2
def set_target2(self, target2):
self.__target2 = target2
self.initialize(self.atoms_list,self.target0,self.target1,self.target2,self.mult)
@property
def smileses(self):
return list(self.__smileses)
@property
def mols(self):
return self.__mols
@property
def group(self):
return self.__group
@property
def cluster(self):
return self.__cluster
def __len__(self):
return len(self._geometry_list)
def __eq__(self,other):
if len(self) != len(other):
raise ValueError("要素数が異なります")
return [smiles1==smiles2 for smiles1,smiles2 in zip(self.smileses,other.smileses)]
def __ne__(self, other):
return [smiles1!=smiles2 for smiles1,smiles2 in zip(self.smileses,other.smileses)]
def __add__(self,other):
"""targetは最初のobjectが採用される"""
atoms_list= ma.copy(self.atoms_list) + ma.copy(other.atoms_list)
target0 = self.target0
target1 = self.target1
target2 = self.target2
mult = self.mult
mols = deepcopy(self.mols) + deepcopy(other.mols)
smileses = deepcopy(self.smileses) + deepcopy(other.smileses)
geometry_list = deepcopy(self._geometry_list) + deepcopy(other._geometry_list)
return Geometries(atoms_list,target0,target1,target2,mult,mols,smileses,geometry_list)
def __getitem__(self,index):
target0 = self.target0
target1 = self.target1
target2 = self.target2
mult = self.mult
if type(index) == list:
if len(index)==len(self) and type(index[0])==bool:
"""要素数と同じ数のbool[True,False,False, ...]のようにマスクで与えた場合"""
index = [i for i,b in enumerate(index) if b]
print(index)
else:
"""[[1,2,3]]のようにindex番号のリストで入れた場合"""
pass
atoms_list = [atoms for i,atoms in enumerate(self.atoms_list) if i in index]
mols = [mol for i,mol in enumerate(self.mols) if i in index]
smileses = [smiles for i,smiles in enumerate(self.smileses) if i in index]
geometry_list = [geometry for i,geometry in enumerate(self._geometry_list) if i in index]
elif type(index) == slice:
"""[2:5]のようにスライスで入れた場合"""
atoms_list = self.atoms_list[index]
mols = self.mols[index]
smileses = self.smileses[index]
geometry_list = self._geometry_list[index]
elif type(index) == int:
"""[1]のようにindex番号を入れた場合"""
atoms_list = [self.atoms_list[index]]
mols = [self.mols[index]]
smileses = [self.smileses[index]]
geometry_list = [self._geometry_list[index]]
return Geometries(atoms_list,target0,target1,target2,mult,mols,smileses,geometry_list)
def __str__(self):
return str([smiles for smiles in self.smileses])
[ドキュメント] def same(self,idx,other=None):
"""同じ構造を検索する
| idxと同じ構造を検索する
| otherにGeometriesを設定した場合はidxで指定した構造がotherにないか検索する
| other=Noneの場合, 自身の構造の中からidxと同じ構造を検索する
| 同じ構造のindex番号のリストを返す
Parameters:
idx: int
対象の構造のindex番号
other: Geometries
Geometriesオブジェクト
Exsamples:
.. code-block:: python
from matlanticgrrm.grrmdata import log2atoms
from matlanticgrrm.geometry import Geometries
eq_list = log2atoms("*_EQ_list.log")
eq_geometries = Geometries(eq_list,[20,21,22,23,24,25,26,27,28])
eq_geometries.same(6) # --> [6,7,21]
#EQ6はEQ,6,21と同じ構造である
"""
if other is None:
group_name = self.group[idx]
return [i for i,name in enumerate(self.group) if group_name==name]
else:
smiles = self.smileses[idx]
other_smileses = other.smileses
return [i for i,other_smiles in enumerate(other_smileses) if other_smiles==smiles]
[ドキュメント] def similar(self,idx,method="maccs"):
"""類似構造を検索する
| idxで指定した構造と類似の構造を検索する
| 類似度の高い順に[(index番号,類似度)]のリストで返す
| 類似度は1に近い程,類似性が高い
Parameters:
idx: int
index番号
method:
"maccs","tanimoto","avalon"の三種類がある.
Examples:
.. code-block:: python
eq_list = log2atoms("*_EQ_list.log")
eq_geometries = Geometries(eq_list,[20,21,22,23,24,25,26,27,28])
eq_geometries.similar(6)
#EQ6はEQ,6,21と同じ構造である
>>> [(6, 1.0),
>>> (7, 1.0),
>>> (21, 1.0),
>>> (2, 0.92),
>>> (5, 0.92),
>>> (8, 0.92),
>>> (9, 0.92),
>>> (10, 0.92),
>>> (11, 0.92),
>>> (12, 0.92),
>>> ...
>>> ...
# EQ6と構造似ている順で出力される
"""
mols = deepcopy(self.mols)
for mol in mols:
mol.UpdatePropertyCache(strict=False)
Chem.SanitizeMol(
mol,
Chem.SanitizeFlags.SANITIZE_FINDRADICALS|\
Chem.SanitizeFlags.SANITIZE_KEKULIZE|\
Chem.SanitizeFlags.SANITIZE_SETAROMATICITY|\
Chem.SanitizeFlags.SANITIZE_SETCONJUGATION|\
Chem.SanitizeFlags.SANITIZE_SETHYBRIDIZATION|\
Chem.SanitizeFlags.SANITIZE_SYMMRINGS,
catchErrors=True) #長いので改行
if method == "maccs":
return self.macss(idx,mols)
elif method == "tanimoto":
return self.tanimoto(idx,mols)
elif method == "avalon":
return self.avalon(idx,mols)
def macss(self,idx,mols):
maccs_fps = [AllChem.GetMACCSKeysFingerprint(mol) for mol in mols]
maccs = DataStructs.BulkTanimotoSimilarity(maccs_fps[idx], maccs_fps)
maccs_dict = {i:val for i,val in enumerate(maccs)}
maccs_dict = sorted(maccs_dict.items(),key=lambda x: x[1], reverse=True)
return maccs_dict
def tanimoto(self,idx,mols):
morgan_fp = [AllChem.GetMorganFingerprintAsBitVect(mol, 2, 2048) for mol in mols]
tanimoto = DataStructs.BulkTanimotoSimilarity(morgan_fp[idx], morgan_fp)
tanimoto_dict = {i:val for i,val in enumerate(tanimoto)}
tanimoto_dict = sorted(tanimoto_dict.items(),key=lambda x: x[1], reverse=True)
return tanimoto_dict
def avalon(self,idx,mols):
avalon_fps = [pyAvalonTools.GetAvalonFP(mol) for mol in mols]
avalon = DataStructs.BulkTanimotoSimilarity(avalon_fps[idx], avalon_fps)
avalon_dict = {i:val for i,val in enumerate(avalon)}
avalon_dict = sorted(avalon_dict.items(),key=lambda x: x[1], reverse=True)
return avalon_dict
def _assign_group_and_cluster(self,smileses):
df = pd.DataFrame({'similes': smileses})
unique = df['similes'].unique().tolist()
group = [unique.index(smile) for smile in smileses]
culster = [[] for _ in range(len(unique))]
for i,smiles in enumerate(smileses):
culster[unique.index(smiles)].append(i)
return group, culster
def get_reaction_summary(self,connections,html="Reactions_Summary.html"):
self.get_valid_reaction_and_summary(connections,html)
def get_valid_reaction_and_summary(self,connections,html="Reactions_Summary.html"):
atoms_list = self.atoms_list
group = self.group
smileses = self.smileses
if self.target2 == []:
mols = self.mols # Pngを作成するためのMolオブジェクト
else:
mols = atomslist2mols(self.atoms_list,self.target0,self.target1,mult=self.mult)
def _add_invalid_reactions(ts_idx, ini_idx, fin_idx):
ini_group = group[ini_idx] if type(ini_idx)!=str else ""
fin_group = group[fin_idx] if type(fin_idx)!=str else ""
append_invaid([ts_idx,ini_idx,ini_group,fin_idx,fin_group])
unique_react = {}
unique_title_img = []
conformational_react = {}
conformational_title_img = []
invalid_react = []
append_invaid = invalid_react.append #計算を高速化するためオブジェクト化しておく
append_unique_img = unique_title_img.append #計算を高速化するためオブジェクト化しておく
append_conformational_img = conformational_title_img.append #計算を高速化するためオブジェクト化しておく
for ts_idx,(ini_idx,fin_idx) in enumerate(connections):
if type(ini_idx)==str or type(fin_idx)==str:
_add_invalid_reactions(ts_idx, ini_idx, fin_idx)
break
ini_atoms = atoms_list[ini_idx]
fin_atoms = atoms_list[fin_idx]
ini_smiles = smileses[ini_idx]
fin_smiles = smileses[fin_idx]
ini_group = group[ini_idx]
fin_group = group[fin_idx]
max_dist = round(_calc_max_distance(ini_atoms,fin_atoms,self.target0+self.target1),2)
if ini_smiles == fin_smiles:
reaction = [ts_idx,ini_idx,fin_idx,max_dist]
if ini_smiles in conformational_react.keys():
conformational_react[ini_smiles].append(reaction)
else:
conformational_react[ini_smiles] = [reaction]
append_conformational_img([group[ini_idx],mol2png_binary(mols[ini_idx])])
else:
ini_fin = (ini_smiles,fin_smiles) in unique_react.keys() # ini,finの順で存在する時
fin_ini = (fin_smiles,ini_smiles) in unique_react.keys() # fin,iniの順で存在する時
reaction = [ts_idx,ini_idx,ini_group,fin_idx,fin_group,max_dist]
if ini_fin or fin_ini:
if ini_fin:
unique_react[(ini_smiles,fin_smiles)].append(reaction)
elif fin_ini:
unique_react[(fin_smiles,ini_smiles)].append(reaction)
else:
unique_react[(ini_smiles,fin_smiles)] = [reaction]
append_unique_img([ini_group,fin_group,mol2png_binary(mols[ini_idx]),mol2png_binary(mols[fin_idx])])
unique_reaction = [sorted(table,key=lambda x:x[5]) for table in unique_react.values()] # max_distで表をソート
unique_ts_idx_max_dist= {table[0][0]:table[0][5] for table in unique_reaction} # {TS番号:max_dist}
unique_reaction = sorted(unique_reaction,key=lambda x:x[0][0]) # TS番号でソート
unique_reaction = map(lambda x:enumerate(x),unique_reaction) # tableの行番号を付ける
conformational_reaction = [sorted(table,key=lambda x:x[3]) for table in conformational_react.values()]
conformational_reaction = sorted(conformational_reaction,key=lambda x:x[0][0])
invalid_reaction = sorted(invalid_react,key=lambda x:x[0])
###書き込む####
env = Environment(loader=FileSystemLoader(os.path.dirname(__file__)))
template = env.get_template("template.html")
size = (170, 170)
data = {"ts_link":[i for i in range(len(connections))],
"unique_react_data": zip(unique_title_img, unique_reaction),
"conformational_react_data":zip(conformational_title_img, conformational_reaction),
"invalid_reactions_data":invalid_reaction}
rendered = template.render(data)
with open(html,"w") as f:
f.write(rendered)
return unique_ts_idx_max_dist
def atomslist2mols(atoms_list,target0=None,target1=[],target2=[],mult=1,**kwargs):
for atoms in atoms_list:
if atoms:
n_atoms = len(atoms)
break
target0,target1,target2,target3 = _renumbering_target(n_atoms,target0,target1,target2)
mols = [_atoms2mol(atoms,target0,target1,target2,target3,mult,**kwargs) if atoms else None for atoms in atoms_list]
return mols
def atomslist2smileses(atoms_list,target0=None,target1=[],target2=[],mult=1,**kwargs):
mols = atomslist2mols(atoms_list,target0,target1,target2,mult,**kwargs)
return [Chem.MolToSmiles(mol) if mol else None for mol in mols]