xtbf
1from collections import defaultdict 2from contextlib import redirect_stderr, redirect_stdout 3import copy 4import io 5import multiprocessing 6import uuid 7from joblib import Parallel, delayed, Memory 8import tqdm 9import random 10import re 11import numpy as np 12import pandas as pd 13import sqlite3 14import shutil 15import os 16from pathlib import Path 17 18 19# TODO: copy all functionality to remove dependency to smal package 20# from smal.io import obj_load, obj_store 21# from smal.mol_edit import remove_atom_at_index, mol_to_xyzstring 22from rdkit import Chem 23from rdkit.Chem import AllChem, rdDistGeom 24 25import tempfile 26 27# Path to the xtb binary 28 29BIN_XTB = os.getenv("XTBF__BIN_XTB") or "xtb" 30 31import subprocess 32import time 33import psutil 34 35class Silencer: 36 """ 37 A useful tool for silencing stdout and stderr. 38 Usage: 39 >>> with Silencer() as s: 40 ... print("kasldjf") 41 42 >>> print("I catched:",s.out.getvalue()) 43 I catched: kasldjf 44 <BLANKLINE> 45 46 Note that nothing was printed and that we can later 47 access the stdout via the out field. Similarly, 48 stderr will be redirected to the err field. 49 """ 50 51 def __init__(self): 52 self.out = io.StringIO() 53 self.err = io.StringIO() 54 55 def __enter__(self): 56 self.rs = redirect_stdout(self.out) 57 self.re = redirect_stderr(self.err) 58 self.rs.__enter__() 59 self.re.__enter__() 60 return self 61 62 def __exit__(self, exctype, excinst, exctb): 63 self.rs.__exit__(exctype, excinst, exctb) 64 self.re.__exit__(exctype, excinst, exctb) 65 66 67 68try: 69 with Silencer() as s: 70 xtb_version = subprocess.check_output( 71 [f"{BIN_XTB}", "--version"], 72 stderr=subprocess.DEVNULL, 73 ) 74 if not isinstance(xtb_version, str): 75 xtb_version = xtb_version.decode("utf-8") 76 xtb_version = re.findall(r"(version \d+.\d+.\d+)", xtb_version) 77except: 78 print("could not determine xtb version.") 79 print( 80 "most likely no xtb binary is installed. See: https://xtb-docs.readthedocs.io/en/latest/setup.html" 81 ) 82 raise 83 84if xtb_version: 85 xtb_version = xtb_version[0] 86 if xtb_version >= "6.5.1": 87 print("xtb version:", xtb_version) 88 else: 89 assert ( 90 f"detected outdated xtb version: '{xtb_version}'. Please install version >= 6.5.1." 91 "see https://xtb-docs.readthedocs.io/en/latest/setup.html" 92 ) 93else: 94 print("could not determine xtb version.") 95 print( 96 "most likely no xtb binary is installed. See: https://xtb-docs.readthedocs.io/en/latest/setup.html" 97 ) 98 exit(1) 99 100 101# Try to prioritize memory mapped file system 102# to improve speed and reduce strain, 103# fallback to potentially memory mapped, or 104# non-mem mapped file system otherwise... 105TMP_ROOT = Path("/dev/shm/") 106if not TMP_ROOT.exists(): 107 print("Warning: could not find /dev/shm/ mem-mapped io not possible") 108 TMP_ROOT = Path("/tmp") 109if not TMP_ROOT.exists(): 110 TMP_ROOT = tempfile.gettempdir() 111 112XTB_TMP_DIR = TMP_ROOT / Path("xtbf") 113 114XTB_OUTER_JOBS = min(32,multiprocessing.cpu_count()-1) 115XTB_INNER_JOBS = 1 116XTB_TIMEOUT_SECONDS = 1200 # 20 minutes 117 118 119 120def temp_dir(): 121 """ 122 Creates a path to a temporary directory 123 """ 124 if Path("/tmp").exists(): 125 tmp_dir = Path("/tmp") / "smal" 126 else: 127 tmp_dir = Path(tempfile.gettempdir()) / "smal" 128 tmp_dir.mkdir( 129 exist_ok=True, 130 ) 131 return tmp_dir 132 133 134def random_fle( 135 suf: str, 136): 137 """ 138 Creates a random file with the given suffix suf 139 """ 140 fle_name = f"{uuid.uuid4().hex}.{suf}" 141 return temp_dir() / fle_name 142 143 144def mol_to_xyzstring( 145 mol: "Chem.Mol", 146 conf_id=0, 147) -> tuple[Chem.Mol, str]: 148 """ 149 Embeds the given molecule in 3D and returns the 150 resulting molecule with 3D coordinates and the 151 corresponding xyz string. 152 153 >>> from smal.all import * 154 >>> mol = embed_molecule(from_smi('CCCO')) 155 >>> mol, xyz = mol_to_xyzstring(mol) 156 >>> print(xyz) # doctest: +SKIP 157 12 158 <BLANKLINE> 159 C -1.285327 -0.056776 0.434662 160 C -0.175447 0.695786 -0.299881 161 C 0.918409 -0.342619 -0.555572 162 O 1.309356 -0.801512 0.717050 163 H -1.923389 0.626223 0.994971 164 H -0.850209 -0.851665 1.084577 165 H -1.831209 -0.618285 -0.370858 166 H -0.509218 1.121896 -1.245390 167 H 0.216108 1.472826 0.409811 168 H 0.439516 -1.203877 -1.086785 169 H 1.744456 0.056257 -1.140213 170 H 1.946955 -0.098256 1.057627 171 <BLANKLINE> 172 """ 173 rand_fle = random_fle("xyz") 174 Chem.MolToXYZFile(mol, str(rand_fle), confId=conf_id) 175 if rand_fle.exists(): 176 xyz_string = rand_fle.read_text() 177 rand_fle.unlink() 178 return mol, xyz_string 179 else: 180 return None, None 181 182 183 184def run_parallel( 185 n_jobs:int, 186 fun:"callable", elts:list, 187 do_tqdm:bool, 188) -> None: 189 if n_jobs > 1: 190 rslt = Parallel(n_jobs=n_jobs)( 191 delayed(fun)(elt) 192 for elt in (tqdm.tqdm(elts) if do_tqdm else elts) 193 ) 194 else: 195 rslt = [fun(elt) for elt in (tqdm.tqdm(elts) if do_tqdm else elts)] 196 return rslt 197 198def embed_molecule(mol: "Chem.Mol", enforceChirality:int, maxAttempts=100) -> "Chem.Mol": 199 mol = Chem.AddHs(mol) 200 ps = rdDistGeom.ETKDGv3() 201 ps.enforceChirality = bool(enforceChirality) 202 AllChem.EmbedMolecule( 203 mol, 204 enforceChirality=enforceChirality, 205 maxAttempts=maxAttempts, 206 ) 207 return mol 208 209def embed_multi( 210 mol: "Chem.Mol", 211 n_confs: int, 212) -> "Chem.Mol": 213 mol = Chem.AddHs(mol) 214 ps = rdDistGeom.ETKDGv3() 215 cids = rdDistGeom.EmbedMultipleConfs(mol, n_confs, ps) 216 217 return cids 218 219 220def run_xtb(xtb_command:str, mol:Chem.Mol, multiplicity:int, conf_id:int=0,): 221 """ 222 223 >>> from smal.all import * 224 >>> mol = from_smi("CCCOCCC") 225 >>> mol = embed_molecule(mol) 226 >>> success,rslt = run_xtb("--opt", mol, 1) 227 >>> success 228 True 229 230 231 """ 232 mol, xyz_string = mol_to_xyzstring(mol,conf_id=conf_id) 233 return run_xtb_xyz(xtb_command=xtb_command,xyz_string=xyz_string, multiplicity=multiplicity, ) 234 235def run_xtb_xyz(xtb_command:str, xyz_string:str, multiplicity:int,): 236 """ 237 Runs the given xtb job as identified by 238 the following components: 239 - The command <xtb>, 240 - the xyz string of the molecule <xyz_string> 241 242 """ 243 assert XTB_TMP_DIR.parent.exists() 244 XTB_TMP_DIR.mkdir(exist_ok=True) 245 assert XTB_TMP_DIR.exists() 246 job_dir = XTB_TMP_DIR / f"job_{random.randint(1,10**9)}" 247 assert not job_dir.exists(), "internal logic error" 248 cwd_before = os.getcwd() 249 250 if multiplicity != 1: 251 uhf_arg = f"--uhf {multiplicity} " 252 else: 253 uhf_arg = "" 254 try: 255 os.mkdir(job_dir) 256 os.chdir(job_dir) 257 (job_dir / "input.xyz").write_text(xyz_string) 258 output_fle = job_dir / "output.out" 259 assert not output_fle.exists() 260 with Silencer() as s: 261 # alpb_str = "" 262 cmd = f"{BIN_XTB} input.xyz --parallel {XTB_INNER_JOBS} {xtb_command} {uhf_arg}> output.out 2> err.out" 263 264 # normal approach: 265 # subprocess.check_output(cmd,shell=True,timeout=XTB_TIMEOUT_SECONDS) 266 # ^ 267 # \__ sadly, this approach does not work because the timeout isnt applied 268 # 269 # As suggested in https://stackoverflow.com/questions/48763362/python-subprocess-kill-with-timeout 270 # we apply the following workaround: 271 parent = subprocess.Popen(cmd, shell=True) 272 for _ in range(XTB_TIMEOUT_SECONDS): 273 if parent.poll() is not None: # process just ended 274 break 275 time.sleep(1) 276 else: 277 # the for loop ended without break: timeout 278 parent = psutil.Process(parent.pid) 279 for child in parent.children( 280 recursive=True 281 ): # or parent.children() for recursive=False 282 child.kill() 283 parent.kill() 284 285 if output_fle.exists(): 286 rslt = (True, output_fle.read_text()) 287 return rslt 288 else: 289 return False, "" 290 finally: 291 os.chdir(cwd_before) 292 # Garbage Collection 293 if job_dir.exists(): 294 shutil.rmtree(job_dir)
BIN_XTB =
'xtb'
class
Silencer:
36class Silencer: 37 """ 38 A useful tool for silencing stdout and stderr. 39 Usage: 40 >>> with Silencer() as s: 41 ... print("kasldjf") 42 43 >>> print("I catched:",s.out.getvalue()) 44 I catched: kasldjf 45 <BLANKLINE> 46 47 Note that nothing was printed and that we can later 48 access the stdout via the out field. Similarly, 49 stderr will be redirected to the err field. 50 """ 51 52 def __init__(self): 53 self.out = io.StringIO() 54 self.err = io.StringIO() 55 56 def __enter__(self): 57 self.rs = redirect_stdout(self.out) 58 self.re = redirect_stderr(self.err) 59 self.rs.__enter__() 60 self.re.__enter__() 61 return self 62 63 def __exit__(self, exctype, excinst, exctb): 64 self.rs.__exit__(exctype, excinst, exctb) 65 self.re.__exit__(exctype, excinst, exctb)
A useful tool for silencing stdout and stderr. Usage:
>>> with Silencer() as s:
... print("kasldjf")
>>> print("I catched:",s.out.getvalue())
I catched: kasldjf
<BLANKLINE>
Note that nothing was printed and that we can later access the stdout via the out field. Similarly, stderr will be redirected to the err field.
TMP_ROOT =
PosixPath('/tmp')
XTB_TMP_DIR =
PosixPath('/tmp/xtbf')
XTB_OUTER_JOBS =
7
XTB_INNER_JOBS =
1
XTB_TIMEOUT_SECONDS =
1200
def
temp_dir():
121def temp_dir(): 122 """ 123 Creates a path to a temporary directory 124 """ 125 if Path("/tmp").exists(): 126 tmp_dir = Path("/tmp") / "smal" 127 else: 128 tmp_dir = Path(tempfile.gettempdir()) / "smal" 129 tmp_dir.mkdir( 130 exist_ok=True, 131 ) 132 return tmp_dir
Creates a path to a temporary directory
def
random_fle(suf: str):
135def random_fle( 136 suf: str, 137): 138 """ 139 Creates a random file with the given suffix suf 140 """ 141 fle_name = f"{uuid.uuid4().hex}.{suf}" 142 return temp_dir() / fle_name
Creates a random file with the given suffix suf
def
mol_to_xyzstring( mol: rdkit.Chem.rdchem.Mol, conf_id=0) -> tuple[rdkit.Chem.rdchem.Mol, str]:
145def mol_to_xyzstring( 146 mol: "Chem.Mol", 147 conf_id=0, 148) -> tuple[Chem.Mol, str]: 149 """ 150 Embeds the given molecule in 3D and returns the 151 resulting molecule with 3D coordinates and the 152 corresponding xyz string. 153 154 >>> from smal.all import * 155 >>> mol = embed_molecule(from_smi('CCCO')) 156 >>> mol, xyz = mol_to_xyzstring(mol) 157 >>> print(xyz) # doctest: +SKIP 158 12 159 <BLANKLINE> 160 C -1.285327 -0.056776 0.434662 161 C -0.175447 0.695786 -0.299881 162 C 0.918409 -0.342619 -0.555572 163 O 1.309356 -0.801512 0.717050 164 H -1.923389 0.626223 0.994971 165 H -0.850209 -0.851665 1.084577 166 H -1.831209 -0.618285 -0.370858 167 H -0.509218 1.121896 -1.245390 168 H 0.216108 1.472826 0.409811 169 H 0.439516 -1.203877 -1.086785 170 H 1.744456 0.056257 -1.140213 171 H 1.946955 -0.098256 1.057627 172 <BLANKLINE> 173 """ 174 rand_fle = random_fle("xyz") 175 Chem.MolToXYZFile(mol, str(rand_fle), confId=conf_id) 176 if rand_fle.exists(): 177 xyz_string = rand_fle.read_text() 178 rand_fle.unlink() 179 return mol, xyz_string 180 else: 181 return None, None
Embeds the given molecule in 3D and returns the resulting molecule with 3D coordinates and the corresponding xyz string.
>>> from smal.all import *
>>> mol = embed_molecule(from_smi('CCCO'))
>>> mol, xyz = mol_to_xyzstring(mol)
>>> print(xyz) # doctest: +SKIP
12
<BLANKLINE>
C -1.285327 -0.056776 0.434662
C -0.175447 0.695786 -0.299881
C 0.918409 -0.342619 -0.555572
O 1.309356 -0.801512 0.717050
H -1.923389 0.626223 0.994971
H -0.850209 -0.851665 1.084577
H -1.831209 -0.618285 -0.370858
H -0.509218 1.121896 -1.245390
H 0.216108 1.472826 0.409811
H 0.439516 -1.203877 -1.086785
H 1.744456 0.056257 -1.140213
H 1.946955 -0.098256 1.057627
<BLANKLINE>
def
run_parallel( n_jobs: int, fun: <built-in function callable>, elts: list, do_tqdm: bool) -> None:
185def run_parallel( 186 n_jobs:int, 187 fun:"callable", elts:list, 188 do_tqdm:bool, 189) -> None: 190 if n_jobs > 1: 191 rslt = Parallel(n_jobs=n_jobs)( 192 delayed(fun)(elt) 193 for elt in (tqdm.tqdm(elts) if do_tqdm else elts) 194 ) 195 else: 196 rslt = [fun(elt) for elt in (tqdm.tqdm(elts) if do_tqdm else elts)] 197 return rslt
def
embed_molecule( mol: rdkit.Chem.rdchem.Mol, enforceChirality: int, maxAttempts=100) -> rdkit.Chem.rdchem.Mol:
199def embed_molecule(mol: "Chem.Mol", enforceChirality:int, maxAttempts=100) -> "Chem.Mol": 200 mol = Chem.AddHs(mol) 201 ps = rdDistGeom.ETKDGv3() 202 ps.enforceChirality = bool(enforceChirality) 203 AllChem.EmbedMolecule( 204 mol, 205 enforceChirality=enforceChirality, 206 maxAttempts=maxAttempts, 207 ) 208 return mol
def
embed_multi(mol: rdkit.Chem.rdchem.Mol, n_confs: int) -> rdkit.Chem.rdchem.Mol:
def
run_xtb( xtb_command: str, mol: rdkit.Chem.rdchem.Mol, multiplicity: int, conf_id: int = 0):
221def run_xtb(xtb_command:str, mol:Chem.Mol, multiplicity:int, conf_id:int=0,): 222 """ 223 224 >>> from smal.all import * 225 >>> mol = from_smi("CCCOCCC") 226 >>> mol = embed_molecule(mol) 227 >>> success,rslt = run_xtb("--opt", mol, 1) 228 >>> success 229 True 230 231 232 """ 233 mol, xyz_string = mol_to_xyzstring(mol,conf_id=conf_id) 234 return run_xtb_xyz(xtb_command=xtb_command,xyz_string=xyz_string, multiplicity=multiplicity, )
>>> from smal.all import *
>>> mol = from_smi("CCCOCCC")
>>> mol = embed_molecule(mol)
>>> success,rslt = run_xtb("--opt", mol, 1)
>>> success
True
def
run_xtb_xyz(xtb_command: str, xyz_string: str, multiplicity: int):
236def run_xtb_xyz(xtb_command:str, xyz_string:str, multiplicity:int,): 237 """ 238 Runs the given xtb job as identified by 239 the following components: 240 - The command <xtb>, 241 - the xyz string of the molecule <xyz_string> 242 243 """ 244 assert XTB_TMP_DIR.parent.exists() 245 XTB_TMP_DIR.mkdir(exist_ok=True) 246 assert XTB_TMP_DIR.exists() 247 job_dir = XTB_TMP_DIR / f"job_{random.randint(1,10**9)}" 248 assert not job_dir.exists(), "internal logic error" 249 cwd_before = os.getcwd() 250 251 if multiplicity != 1: 252 uhf_arg = f"--uhf {multiplicity} " 253 else: 254 uhf_arg = "" 255 try: 256 os.mkdir(job_dir) 257 os.chdir(job_dir) 258 (job_dir / "input.xyz").write_text(xyz_string) 259 output_fle = job_dir / "output.out" 260 assert not output_fle.exists() 261 with Silencer() as s: 262 # alpb_str = "" 263 cmd = f"{BIN_XTB} input.xyz --parallel {XTB_INNER_JOBS} {xtb_command} {uhf_arg}> output.out 2> err.out" 264 265 # normal approach: 266 # subprocess.check_output(cmd,shell=True,timeout=XTB_TIMEOUT_SECONDS) 267 # ^ 268 # \__ sadly, this approach does not work because the timeout isnt applied 269 # 270 # As suggested in https://stackoverflow.com/questions/48763362/python-subprocess-kill-with-timeout 271 # we apply the following workaround: 272 parent = subprocess.Popen(cmd, shell=True) 273 for _ in range(XTB_TIMEOUT_SECONDS): 274 if parent.poll() is not None: # process just ended 275 break 276 time.sleep(1) 277 else: 278 # the for loop ended without break: timeout 279 parent = psutil.Process(parent.pid) 280 for child in parent.children( 281 recursive=True 282 ): # or parent.children() for recursive=False 283 child.kill() 284 parent.kill() 285 286 if output_fle.exists(): 287 rslt = (True, output_fle.read_text()) 288 return rslt 289 else: 290 return False, "" 291 finally: 292 os.chdir(cwd_before) 293 # Garbage Collection 294 if job_dir.exists(): 295 shutil.rmtree(job_dir)
Runs the given xtb job as identified by the following components:
- The command
, - the xyz string of the molecule
s =
<xtbf.Silencer object>