In [6]:
import sqlite3


import pandas as pd
from rdkit import Chem
from rdkit.Chem import PandasTools
In [2]:
connection = sqlite3.connect("Pubchem.sqlite")
In [3]:
cursor = connection.cursor()
In [4]:
targetinchikey = "WPQAOGZTDKTBHI-UHFFFAOYSA-N"
rows = cursor.execute("SELECT * FROM ID_DATA  WHERE INCHIKEY == ?", (targetinchikey,),).fetchall()
 
In [5]:
print(rows)
[('OC1(C(=CCC(C1)C(=C)C)C)C1(O)C(=CCC(C1)C(=C)C)C', '500061', 'WPQAOGZTDKTBHI-UHFFFAOYSA-N')]
In [10]:
pubchemdf = pd.DataFrame(rows)
pubchemdf.columns =['SMILES','ID','INCHIKEY']
pubchemdf
Out[10]:
SMILES ID INCHIKEY
0 OC1(C(=CCC(C1)C(=C)C)C)C1(O)C(=CCC(C1)C(=C)C)C 500061 WPQAOGZTDKTBHI-UHFFFAOYSA-N
In [11]:
PandasTools.AddMoleculeColumnToFrame(pubchemdf,'SMILES','Molecule',includeFingerprints = True)
In [12]:
pubchemdf
Out[12]:
SMILES ID INCHIKEY Molecule
0 OC1(C(=CCC(C1)C(=C)C)C)C1(O)C(=CCC(C1)C(=C)C)C 500061 WPQAOGZTDKTBHI-UHFFFAOYSA-N
Mol
In [ ]:
 
In [17]:
targetinchikey = ["WPQAOGZTDKTBHI-UHFFFAOYSA-N","NEYCGDYQBQONFC-UHFFFAOYSA-N", "FESQCMADYYULGP-UHFFFAOYSA-N"]

rows = cursor.execute(f'SELECT * FROM ID_DATA  WHERE INCHIKEY IN ({", ".join(["?"]*len(targetinchikey))})', targetinchikey).fetchall() 
print(rows) 
[('OC1(C(=CCC(C1)C(=C)C)C)C1(O)C(=CCC(C1)C(=C)C)C', '500061', 'WPQAOGZTDKTBHI-UHFFFAOYSA-N'), ('c1occ(c1)C1C(=O)C2C3(C1(C)CCC1C3(C)C(OC(=O)C)CC3C1(C=CC(=O)C3(C)C)C)O2', '500067', 'NEYCGDYQBQONFC-UHFFFAOYSA-N'), ('OC(=O)C(CP(=O)(O)O)NC(=O)c1cccc2c1n(c(n2)C(C)c1nc2c([nH]1)cccc2)C', '500996', 'FESQCMADYYULGP-UHFFFAOYSA-N')]
In [21]:
pubchemdf = pd.DataFrame(rows)
pubchemdf.columns =['SMILES','ID','INCHIKEY']
PandasTools.AddMoleculeColumnToFrame(pubchemdf,'SMILES','Molecule',includeFingerprints = True)
In [22]:
pubchemdf
Out[22]:
SMILES ID INCHIKEY Molecule
0 OC1(C(=CCC(C1)C(=C)C)C)C1(O)C(=CCC(C1)C(=C)C)C 500061 WPQAOGZTDKTBHI-UHFFFAOYSA-N
Mol
1 c1occ(c1)C1C(=O)C2C3(C1(C)CCC1C3(C)C(OC(=O)C)C... 500067 NEYCGDYQBQONFC-UHFFFAOYSA-N
Mol
2 OC(=O)C(CP(=O)(O)O)NC(=O)c1cccc2c1n(c(n2)C(C)c... 500996 FESQCMADYYULGP-UHFFFAOYSA-N
Mol
In [ ]: