matlanticgrrm.geometry.geometries のソースコード

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem.Fraggle import FraggleSim
from rdkit import DataStructs,Chem
from rdkit.Avalon import pyAvalonTools
import ase
from ase import Atoms
from ase.geometry.analysis import Analysis
from ase.neighborlist import build_neighbor_list,natural_cutoffs
import numpy as np
import pandas as pd
from functools import lru_cache
from jinja2 import Template, Environment, FileSystemLoader
import os
from matlanticgrrm.geometry.geometry import _Geometry, _calc_max_distance, atoms2smiles, atoms2mol,_renumbering_target,_atoms2mol
from matlanticgrrm.geometry.mol2png_binary import mol2png_binary
import matlanticgrrm.atomslist as ma
from copy import deepcopy

[ドキュメント]class Geometries(): """同一構造の判定や類似構造の判定などを行なえるクラス. target0,target1,target2に指定した原子の結合を見て同一構造か判断する. Parameters: atoms_list: list of Atoms Atomsオブジェクトのリスト target0: list of integer target0,target1,target2に指定した原子との結合状態を見る target1: list of integer target0,target1に指定した原子との結合状態を見る target2: list of integer target0に指定した原子との結合状態を見る mult: float | 結合判定を行なう時の係数 | 大きい程,遠く離れていても結合しているとみなす kargs: | 共有結合半径を変更する | ex) Al = 0.25 Property: smileses: list of string atoms_listのSMILES表記のリスト(全て単結合として判断する) mols: RDkitのMolオブジェクトのリスト group: list of integer | atoms_list中の同じ構造を推定し,グループ番号を振って返す. | ex)[0,0,1,1,0,0,2,2] の場合,EQ0,EQ1,EQ4,EQ5が同じグループに属している. | propertyであるが,呼び出し時にgroup化の計算を行なう. 1度行えばキャッシュを残すので次回の呼び出しには時間はかからない. | atoms_list,mult,target012を変更した際には再計算される. cluster: 2D list of integer | atoms_list中の同じ構造を推定し,同じグループごとにまとめる(index番号でまとめる) | ex)[[0,1,4,5],[2,3],[6,7]] の場合,EQ0,EQ1,EQ4,EQ5が同じグループに属している. | propertyであるが,呼び出し時にgroup化の計算を行なう. 1度行えばキャッシュを残すので次回の呼び出しには時間はかからない. | atoms_list,mult,target012を変更した際には再計算される. atoms_list: list of Atoms | 引数に指定したAtomsリスト | 上書きする場合はset_mult()を用いる. 上書きするとsmileses,molsも自動で変更される. mult: float | 引数に指定したmult. | 上書きする場合はset_mult()を用いる. 上書きするとsmileses,molsも自動で変更される. target0: list of integer | 引数に指定したtarget0 | 上書きする場合はset_target0()を用いる. 上書きするとsmileses,molsも自動で変更される. target1: list of integer | 引数に指定したtarget1 | 上書きする場合はset_target1()を用いる. 上書きするとsmileses,molsも自動で変更される. target2: list of integer | 引数に指定したtarget2 | 上書きする場合はset_target2()を用いる. 上書きするとsmileses,molsも自動で変更される. Examples: .. code-block:: python from matlanticgrrm.grrmdata import log2atoms from matlanticgrrm.geometry import Geometries eq_list1 = log2atoms("sample1_EQ_list.log") eq_list2 = log2atoms("sample2_EQ_list.log") #eq_list1とeq_list2は同じ要素数 eq_geometries1 = Geometries(eq_list1,[20,21,22,23,24,25,26,27,28]) eq_geometries2 = Geometries(eq_list2,[20,21,22,23,24,25,26,27,28]) eq_geometries1[0] == eq_geometries1[2] >>> [True] #EQ0とEQ2は同じ構造 eq_geometries1[[0,1,2]] == eq_geometries1[[2,4,5]] >>> [True,False,False] #EQ0-EQ2は同じ構造,EQ1-eQ4とEQ2-EQ5は異なる構造 eq_geometries1 == eq_geometries2 >>> [True,False,True,True,....] # eq_list1のEQ0とeq_list2のEQ0の構造は同じ """ def __init__(self,atoms_list,target0=None,target1=[],target2=[],mult=1,mols=None,smileses=None,geometry_list=None,**kwargs): """2種類のインスタンス方法がある""" self.__atoms_list = atoms_list self.__target0 = target0 self.__target1 = target1 self.__target2 = target2 self.__mult = mult if mols and smileses and geometry_list: # __getitem__の時に使う self.__mols = mols self.__smileses = smileses self._geometry_list = geometry_list else: #通常のインスタンス化 self.__mols = atomslist2mols(atoms_list,target0,target1,target2,mult,**kwargs) self.__smileses = [Chem.MolToSmiles(mol) if mol else None for mol in self.mols] self._geometry_list = [_Geometry(atoms,target0,target1,target2,mult,mol,smiles) for atoms,mol,smiles in zip(self.atoms_list,self.mols,self.smileses)] self.__group, self.__cluster = self._assign_group_and_cluster(self.smileses) def initialize(self,atoms_list,target0=None,target1=[],target2=[],mult=1,**kwargs): super().__init__(atoms_list,target0,target1,target2,mult) self.__mols = atomslist2mols(atoms_list,target0,target1,target2,mult,**kwargs) self.__smileses = [Chem.MolToSmiles(mol) if mol else None for mol in self.mols] self._geometry_list = [_Geometry(atoms,target0,target1,target2,mult,mol,smiles) for atoms,mol,smiles in zip(self.atoms_list,self.mols,self.smileses)] self.__group, self.__cluster = self._assign_group_and_cluster(self.smileses) @property def atoms_list(self): return self.__atoms_list def set_atoms_list(self, atoms_list): self.__atoms_list = atoms_list self.initialize(self.atoms_list,self.target0,self.target1,self.target2,self.mult) @property def mult(self): return self.__mult def set_mult(self, mult): self.__mult = mult self.initialize(self.atoms_list,self.target0,self.target1,self.target2,self.mult) @property def target0(self): return self.__target0 def set_target0(self, target0): self.__target0 = target0 self.initialize(self.atoms_list,self.target0,self.target1,self.target2,self.mult) @property def target1(self): return self.__target1 def set_target1(self, target1): self.__target1 = target1 self.initialize(self.atoms_list,self.target0,self.target1,self.target2,self.mult) @property def target2(self): return self.__target2 def set_target2(self, target2): self.__target2 = target2 self.initialize(self.atoms_list,self.target0,self.target1,self.target2,self.mult) @property def smileses(self): return list(self.__smileses) @property def mols(self): return self.__mols @property def group(self): return self.__group @property def cluster(self): return self.__cluster def __len__(self): return len(self._geometry_list) def __eq__(self,other): if len(self) != len(other): raise ValueError("要素数が異なります") return [smiles1==smiles2 for smiles1,smiles2 in zip(self.smileses,other.smileses)] def __ne__(self, other): return [smiles1!=smiles2 for smiles1,smiles2 in zip(self.smileses,other.smileses)] def __add__(self,other): """targetは最初のobjectが採用される""" atoms_list= ma.copy(self.atoms_list) + ma.copy(other.atoms_list) target0 = self.target0 target1 = self.target1 target2 = self.target2 mult = self.mult mols = deepcopy(self.mols) + deepcopy(other.mols) smileses = deepcopy(self.smileses) + deepcopy(other.smileses) geometry_list = deepcopy(self._geometry_list) + deepcopy(other._geometry_list) return Geometries(atoms_list,target0,target1,target2,mult,mols,smileses,geometry_list) def __getitem__(self,index): target0 = self.target0 target1 = self.target1 target2 = self.target2 mult = self.mult if type(index) == list: if len(index)==len(self) and type(index[0])==bool: """要素数と同じ数のbool[True,False,False, ...]のようにマスクで与えた場合""" index = [i for i,b in enumerate(index) if b] print(index) else: """[[1,2,3]]のようにindex番号のリストで入れた場合""" pass atoms_list = [atoms for i,atoms in enumerate(self.atoms_list) if i in index] mols = [mol for i,mol in enumerate(self.mols) if i in index] smileses = [smiles for i,smiles in enumerate(self.smileses) if i in index] geometry_list = [geometry for i,geometry in enumerate(self._geometry_list) if i in index] elif type(index) == slice: """[2:5]のようにスライスで入れた場合""" atoms_list = self.atoms_list[index] mols = self.mols[index] smileses = self.smileses[index] geometry_list = self._geometry_list[index] elif type(index) == int: """[1]のようにindex番号を入れた場合""" atoms_list = [self.atoms_list[index]] mols = [self.mols[index]] smileses = [self.smileses[index]] geometry_list = [self._geometry_list[index]] return Geometries(atoms_list,target0,target1,target2,mult,mols,smileses,geometry_list) def __str__(self): return str([smiles for smiles in self.smileses])
[ドキュメント] def same(self,idx,other=None): """同じ構造を検索する | idxと同じ構造を検索する | otherにGeometriesを設定した場合はidxで指定した構造がotherにないか検索する | other=Noneの場合, 自身の構造の中からidxと同じ構造を検索する | 同じ構造のindex番号のリストを返す Parameters: idx: int 対象の構造のindex番号 other: Geometries Geometriesオブジェクト Exsamples: .. code-block:: python from matlanticgrrm.grrmdata import log2atoms from matlanticgrrm.geometry import Geometries eq_list = log2atoms("*_EQ_list.log") eq_geometries = Geometries(eq_list,[20,21,22,23,24,25,26,27,28]) eq_geometries.same(6) # --> [6,7,21] #EQ6はEQ,6,21と同じ構造である """ if other is None: group_name = self.group[idx] return [i for i,name in enumerate(self.group) if group_name==name] else: smiles = self.smileses[idx] other_smileses = other.smileses return [i for i,other_smiles in enumerate(other_smileses) if other_smiles==smiles]
[ドキュメント] def similar(self,idx,method="maccs"): """類似構造を検索する | idxで指定した構造と類似の構造を検索する | 類似度の高い順に[(index番号,類似度)]のリストで返す | 類似度は1に近い程,類似性が高い Parameters: idx: int index番号 method: "maccs","tanimoto","avalon"の三種類がある. Examples: .. code-block:: python eq_list = log2atoms("*_EQ_list.log") eq_geometries = Geometries(eq_list,[20,21,22,23,24,25,26,27,28]) eq_geometries.similar(6) #EQ6はEQ,6,21と同じ構造である >>> [(6, 1.0), >>> (7, 1.0), >>> (21, 1.0), >>> (2, 0.92), >>> (5, 0.92), >>> (8, 0.92), >>> (9, 0.92), >>> (10, 0.92), >>> (11, 0.92), >>> (12, 0.92), >>> ... >>> ... # EQ6と構造似ている順で出力される """ mols = deepcopy(self.mols) for mol in mols: mol.UpdatePropertyCache(strict=False) Chem.SanitizeMol( mol, Chem.SanitizeFlags.SANITIZE_FINDRADICALS|\ Chem.SanitizeFlags.SANITIZE_KEKULIZE|\ Chem.SanitizeFlags.SANITIZE_SETAROMATICITY|\ Chem.SanitizeFlags.SANITIZE_SETCONJUGATION|\ Chem.SanitizeFlags.SANITIZE_SETHYBRIDIZATION|\ Chem.SanitizeFlags.SANITIZE_SYMMRINGS, catchErrors=True) #長いので改行 if method == "maccs": return self.macss(idx,mols) elif method == "tanimoto": return self.tanimoto(idx,mols) elif method == "avalon": return self.avalon(idx,mols)
def macss(self,idx,mols): maccs_fps = [AllChem.GetMACCSKeysFingerprint(mol) for mol in mols] maccs = DataStructs.BulkTanimotoSimilarity(maccs_fps[idx], maccs_fps) maccs_dict = {i:val for i,val in enumerate(maccs)} maccs_dict = sorted(maccs_dict.items(),key=lambda x: x[1], reverse=True) return maccs_dict def tanimoto(self,idx,mols): morgan_fp = [AllChem.GetMorganFingerprintAsBitVect(mol, 2, 2048) for mol in mols] tanimoto = DataStructs.BulkTanimotoSimilarity(morgan_fp[idx], morgan_fp) tanimoto_dict = {i:val for i,val in enumerate(tanimoto)} tanimoto_dict = sorted(tanimoto_dict.items(),key=lambda x: x[1], reverse=True) return tanimoto_dict def avalon(self,idx,mols): avalon_fps = [pyAvalonTools.GetAvalonFP(mol) for mol in mols] avalon = DataStructs.BulkTanimotoSimilarity(avalon_fps[idx], avalon_fps) avalon_dict = {i:val for i,val in enumerate(avalon)} avalon_dict = sorted(avalon_dict.items(),key=lambda x: x[1], reverse=True) return avalon_dict def _assign_group_and_cluster(self,smileses): df = pd.DataFrame({'similes': smileses}) unique = df['similes'].unique().tolist() group = [unique.index(smile) for smile in smileses] culster = [[] for _ in range(len(unique))] for i,smiles in enumerate(smileses): culster[unique.index(smiles)].append(i) return group, culster def get_reaction_summary(self,connections,html="Reactions_Summary.html"): self.get_valid_reaction_and_summary(connections,html) def get_valid_reaction_and_summary(self,connections,html="Reactions_Summary.html"): atoms_list = self.atoms_list group = self.group smileses = self.smileses if self.target2 == []: mols = self.mols # Pngを作成するためのMolオブジェクト else: mols = atomslist2mols(self.atoms_list,self.target0,self.target1,mult=self.mult) def _add_invalid_reactions(ts_idx, ini_idx, fin_idx): ini_group = group[ini_idx] if type(ini_idx)!=str else "" fin_group = group[fin_idx] if type(fin_idx)!=str else "" append_invaid([ts_idx,ini_idx,ini_group,fin_idx,fin_group]) unique_react = {} unique_title_img = [] conformational_react = {} conformational_title_img = [] invalid_react = [] append_invaid = invalid_react.append #計算を高速化するためオブジェクト化しておく append_unique_img = unique_title_img.append #計算を高速化するためオブジェクト化しておく append_conformational_img = conformational_title_img.append #計算を高速化するためオブジェクト化しておく for ts_idx,(ini_idx,fin_idx) in enumerate(connections): if type(ini_idx)==str or type(fin_idx)==str: _add_invalid_reactions(ts_idx, ini_idx, fin_idx) break ini_atoms = atoms_list[ini_idx] fin_atoms = atoms_list[fin_idx] ini_smiles = smileses[ini_idx] fin_smiles = smileses[fin_idx] ini_group = group[ini_idx] fin_group = group[fin_idx] max_dist = round(_calc_max_distance(ini_atoms,fin_atoms,self.target0+self.target1),2) if ini_smiles == fin_smiles: reaction = [ts_idx,ini_idx,fin_idx,max_dist] if ini_smiles in conformational_react.keys(): conformational_react[ini_smiles].append(reaction) else: conformational_react[ini_smiles] = [reaction] append_conformational_img([group[ini_idx],mol2png_binary(mols[ini_idx])]) else: ini_fin = (ini_smiles,fin_smiles) in unique_react.keys() # ini,finの順で存在する時 fin_ini = (fin_smiles,ini_smiles) in unique_react.keys() # fin,iniの順で存在する時 reaction = [ts_idx,ini_idx,ini_group,fin_idx,fin_group,max_dist] if ini_fin or fin_ini: if ini_fin: unique_react[(ini_smiles,fin_smiles)].append(reaction) elif fin_ini: unique_react[(fin_smiles,ini_smiles)].append(reaction) else: unique_react[(ini_smiles,fin_smiles)] = [reaction] append_unique_img([ini_group,fin_group,mol2png_binary(mols[ini_idx]),mol2png_binary(mols[fin_idx])]) unique_reaction = [sorted(table,key=lambda x:x[5]) for table in unique_react.values()] # max_distで表をソート unique_ts_idx_max_dist= {table[0][0]:table[0][5] for table in unique_reaction} # {TS番号:max_dist} unique_reaction = sorted(unique_reaction,key=lambda x:x[0][0]) # TS番号でソート unique_reaction = map(lambda x:enumerate(x),unique_reaction) # tableの行番号を付ける conformational_reaction = [sorted(table,key=lambda x:x[3]) for table in conformational_react.values()] conformational_reaction = sorted(conformational_reaction,key=lambda x:x[0][0]) invalid_reaction = sorted(invalid_react,key=lambda x:x[0]) ###書き込む#### env = Environment(loader=FileSystemLoader(os.path.dirname(__file__))) template = env.get_template("template.html") size = (170, 170) data = {"ts_link":[i for i in range(len(connections))], "unique_react_data": zip(unique_title_img, unique_reaction), "conformational_react_data":zip(conformational_title_img, conformational_reaction), "invalid_reactions_data":invalid_reaction} rendered = template.render(data) with open(html,"w") as f: f.write(rendered) return unique_ts_idx_max_dist
def atomslist2mols(atoms_list,target0=None,target1=[],target2=[],mult=1,**kwargs): for atoms in atoms_list: if atoms: n_atoms = len(atoms) break target0,target1,target2,target3 = _renumbering_target(n_atoms,target0,target1,target2) mols = [_atoms2mol(atoms,target0,target1,target2,target3,mult,**kwargs) if atoms else None for atoms in atoms_list] return mols def atomslist2smileses(atoms_list,target0=None,target1=[],target2=[],mult=1,**kwargs): mols = atomslist2mols(atoms_list,target0,target1,target2,mult,**kwargs) return [Chem.MolToSmiles(mol) if mol else None for mol in mols]