import sqlite3
import pandas as pd
from rdkit import Chem
from rdkit.Chem import PandasTools
connection = sqlite3.connect("Pubchem.sqlite")
cursor = connection.cursor()
targetinchikey = "WPQAOGZTDKTBHI-UHFFFAOYSA-N"
rows = cursor.execute("SELECT * FROM ID_DATA WHERE INCHIKEY == ?", (targetinchikey,),).fetchall()
print(rows)
[('OC1(C(=CCC(C1)C(=C)C)C)C1(O)C(=CCC(C1)C(=C)C)C', '500061', 'WPQAOGZTDKTBHI-UHFFFAOYSA-N')]
pubchemdf = pd.DataFrame(rows)
pubchemdf.columns =['SMILES','ID','INCHIKEY']
pubchemdf
SMILES | ID | INCHIKEY | |
---|---|---|---|
0 | OC1(C(=CCC(C1)C(=C)C)C)C1(O)C(=CCC(C1)C(=C)C)C | 500061 | WPQAOGZTDKTBHI-UHFFFAOYSA-N |
PandasTools.AddMoleculeColumnToFrame(pubchemdf,'SMILES','Molecule',includeFingerprints = True)
pubchemdf
SMILES | ID | INCHIKEY | Molecule | |
---|---|---|---|---|
0 | OC1(C(=CCC(C1)C(=C)C)C)C1(O)C(=CCC(C1)C(=C)C)C | 500061 | WPQAOGZTDKTBHI-UHFFFAOYSA-N |
targetinchikey = ["WPQAOGZTDKTBHI-UHFFFAOYSA-N","NEYCGDYQBQONFC-UHFFFAOYSA-N", "FESQCMADYYULGP-UHFFFAOYSA-N"]
rows = cursor.execute(f'SELECT * FROM ID_DATA WHERE INCHIKEY IN ({", ".join(["?"]*len(targetinchikey))})', targetinchikey).fetchall()
print(rows)
[('OC1(C(=CCC(C1)C(=C)C)C)C1(O)C(=CCC(C1)C(=C)C)C', '500061', 'WPQAOGZTDKTBHI-UHFFFAOYSA-N'), ('c1occ(c1)C1C(=O)C2C3(C1(C)CCC1C3(C)C(OC(=O)C)CC3C1(C=CC(=O)C3(C)C)C)O2', '500067', 'NEYCGDYQBQONFC-UHFFFAOYSA-N'), ('OC(=O)C(CP(=O)(O)O)NC(=O)c1cccc2c1n(c(n2)C(C)c1nc2c([nH]1)cccc2)C', '500996', 'FESQCMADYYULGP-UHFFFAOYSA-N')]
pubchemdf = pd.DataFrame(rows)
pubchemdf.columns =['SMILES','ID','INCHIKEY']
PandasTools.AddMoleculeColumnToFrame(pubchemdf,'SMILES','Molecule',includeFingerprints = True)
pubchemdf
SMILES | ID | INCHIKEY | Molecule | |
---|---|---|---|---|
0 | OC1(C(=CCC(C1)C(=C)C)C)C1(O)C(=CCC(C1)C(=C)C)C | 500061 | WPQAOGZTDKTBHI-UHFFFAOYSA-N | |
1 | c1occ(c1)C1C(=O)C2C3(C1(C)CCC1C3(C)C(OC(=O)C)C... | 500067 | NEYCGDYQBQONFC-UHFFFAOYSA-N | |
2 | OC(=O)C(CP(=O)(O)O)NC(=O)c1cccc2c1n(c(n2)C(C)c... | 500996 | FESQCMADYYULGP-UHFFFAOYSA-N |