#To download the structures
# wget "ftp://ftp.ncbi.nlm.nih.gov/pubchem/Compound/CURRENT-Full/SDF/*.sdf.gz"
from os import listdir
import os
from os.path import isfile, join
from pathlib import Path
current_working_directory = os.getcwd()
#Need path to folder where downloaded files are
onlysdfFiles = [f for f in listdir(current_working_directory) if isfile(join(current_working_directory, f)) and f.endswith(".gz")]
onlysdfFiles
#Read each file (filename something like Compound_049500001_050000000.sdf.gz)
#Structures with errors --> Errors/filename_errors.sdf
#Correct structures --> Valid/filename.sdf
#Correct structures --> save as SMILES/filename.smi
#Correct structures --> save as InChiKey/filename.smi
# Specify the directory name
directory_name = "Errors"
# Create the directory
try:
os.mkdir(directory_name)
print(f"Directory '{directory_name}' created successfully.")
except FileExistsError:
print(f"Directory '{directory_name}' already exists.")
except PermissionError:
print(f"Permission denied: Unable to create '{directory_name}'.")
except Exception as e:
print(f"An error occurred: {e}")
# Specify the directory name
directory_name = "Valid"
# Create the directory
try:
os.mkdir(directory_name)
print(f"Directory '{directory_name}' created successfully.")
except FileExistsError:
print(f"Directory '{directory_name}' already exists.")
except PermissionError:
print(f"Permission denied: Unable to create '{directory_name}'.")
except Exception as e:
print(f"An error occurred: {e}")
# Specify the directory name
directory_name = "SMILES"
# Create the directory
try:
os.mkdir(directory_name)
print(f"Directory '{directory_name}' created successfully.")
except FileExistsError:
print(f"Directory '{directory_name}' already exists.")
except PermissionError:
print(f"Permission denied: Unable to create '{directory_name}'.")
except Exception as e:
print(f"An error occurred: {e}")
# Specify the directory name
directory_name = "InChiKey"
# Create the directory
try:
os.mkdir(directory_name)
print(f"Directory '{directory_name}' created successfully.")
except FileExistsError:
print(f"Directory '{directory_name}' already exists.")
except PermissionError:
print(f"Permission denied: Unable to create '{directory_name}'.")
except Exception as e:
print(f"An error occurred: {e}")
#Generate SMILES only
for r in range(0, int(len(onlysdfFiles))):
#for r in range(0, 3): #for testing
sdfFile = onlysdfFiles[r]
#remove .sdf.gz
fileName = Path(sdfFile).stem
fileName = Path(fileName).stem
smiFile = "/Users/chrisswain/Projects/Pubchem/SMILES/" + fileName + ".smi"
sdfPath = '/Users/chrisswain/Projects/Pubchem/' + sdfFile
!/Users/chrisswain/miniconda3/bin/obabel '{sdfPath}' -osmi -O '{smiFile}' --canonical
#Generate SMILES and InChiKey
for r in range(0, int(len(onlysdfFiles))):
#for r in range(0, 3): #for testing
sdfFile = onlysdfFiles[r]
#remove .sdf.gz
fileName = Path(sdfFile).stem
fileName = Path(fileName).stem
smiFile = "/Users/chrisswain/Projects/Pubchem/InChiKey/" + fileName + ".smi"
sdfPath = '/Users/chrisswain/Projects/Pubchem/' + sdfFile
!/Users/chrisswain/miniconda3/bin/obabel '{sdfPath}' -osmi -O '{smiFile}' --canonical --append "PUBCHEM_IUPAC_INCHIKEY"