Untitled
unknown
plain_text
3 years ago
5.6 kB
15
Indexable
from logging import DEBUG, INFO
import requests
import pubchempy as pcp
from urllib.request import urlopen
import mtranslate as gtrans
import py2opsin as opsin
import pandas as pd
import pymysql
from loguru import logger
import argparse
parser = argparse.ArgumentParser()
args = parser.parse_args()
debug_level = "DEBUG"
logger.add("file_{time}.log", level=f"{debug_level}", rotation="100 MB")
try:
connection = pymysql.connect(user='cheminfo',password='dMA4HLPv55PjoE',host='localhost',database='cheminfo')
mycursor=connection.cursor() ## connect to database
except pymysql.err.OperationalError as e:
logger.exception(e)
raise SystemExit("Unable to connect to database")
molimage = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/PNG?image_size=800x800"
lines = 0
pubchem_pass = 0
def translate_name(translation):
logger.debug("Swedish: " + translation)
translated = gtrans.translate(translation,"en","sv")
translated = translated.replace(" ","").replace("ethylamine","ethylamino")
logger.debug("English: " + translated)
return translated
failed_file = open('failed.txt','a')
list2 = open('list2.txt','a')
with open('list.txt') as file:
for swedish_name in file:
lines += 1
swedish_name = swedish_name.replace("\n","")
english_name = translate_name(swedish_name)
opsin_smiles = opsin.py2opsin(english_name)
logger.debug("Smiles: " + opsin_smiles)
pubchem_iupac_name, pubchem_smiles = False, False
pubchem = pd.DataFrame()
iupac_name = english_name
comp = pcp.get_compounds(iupac_name,'name')
if not bool(comp) == False:
pubchem = pcp.get_properties('Title,MolecularFormula,CanonicalSMILES,IsomericSMILES,InChI,InChIKey,IUPACName,ExactMass',iupac_name,'name',as_dataframe=True)
if pubchem.empty == False:
logger.debug("--- FOUND WITH NAME ---")
pubchem = pubchem.head(1)
pubchem_iupac_name = True
else:
pubchem_iupac_name = False
else:
if not opsin_smiles == "":
comp = pcp.get_compounds(opsin_smiles,'smiles')
if not bool(comp) == False:
pubchem = pcp.get_properties('Title,MolecularFormula,CanonicalSMILES,IsomericSMILES,InChI,InChIKey,IUPACName,ExactMass',opsin_smiles,'smiles',as_dataframe=True)
if pubchem.empty == False:
logger.debug("--- FOUND WITH SMILES ---")
pubchem = pubchem.head(1)
pubchem_smiles = True
else:
pubchem_smiles = False
else:
pubchem_smiles = False
else:
pubchem_smiles = False
pubchem_check = pubchem_iupac_name or pubchem_smiles
if pubchem_check:
pubchem_pass = pubchem_pass + 1
print("PubChem CID: " + str(pubchem.first_valid_index()))
molurl = molimage.format(str(pubchem.first_valid_index()))
moldata = requests.get(molurl)
molimg = moldata.content
if 'Title' in pubchem:
logger.info("PubChem Title: " + pubchem.Title.item())
else:
logger.info("PubChem Title: " + "None")
logger.info("PubChem IUPACName: " + pubchem.IUPACName.item())
logger.info("PubChem CanonicalSMILES: " + pubchem.CanonicalSMILES.item())
logger.info("PubChem IsomericSMILES: " + pubchem.IsomericSMILES.item())
logger.info("PubChem MolecularFormula: " + pubchem.MolecularFormula.item())
logger.info("PubChem ExactMass: " + pubchem.ExactMass.item())
logger.info("PubChem InChI: " + pubchem.InChI.item())
logger.info("PubChem InChIKey: " + pubchem.InChIKey.item())
sql=""" INSERT INTO cheminfo (p_cid, title, iupac_name, canonical_smiles, isomeric_smiles, molecularformula, exactmass, inchi, inchikey, swedish_name, mol_image) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
try:
if 'Title' in pubchem:
mycursor.execute(sql, (int(pubchem.first_valid_index()), pubchem.Title.item(), pubchem.IUPACName.item(),pubchem.CanonicalSMILES.item(),pubchem.IsomericSMILES.item(),pubchem.MolecularFormula.item(),pubchem.ExactMass.item(),pubchem.InChI.item(),pubchem.InChIKey.item(), swedish_name, molimg))
connection.commit() # save to database
else:
mycursor.execute(sql, (int(pubchem.first_valid_index()), "None", pubchem.IUPACName.item(),pubchem.CanonicalSMILES.item(),pubchem.IsomericSMILES.item(),pubchem.MolecularFormula.item(),pubchem.ExactMass.item(),pubchem.InChI.item(),pubchem.InChIKey.item(),swedish_name,molimg))
connection.commit() # save to database
except pymysql.err.IntegrityError:
logger.debug("--- ALREADY IN DB ---")
logger.info("--- INFO END ---")
else:
logger.info("--- NO INFO FOUND ---")
failed_file.write("pubchem failed swedish_name: " + swedish_name + "\n")
failed_file.write("pubchem failed english_name: " + english_name + "\n")
list2.write(english_name + "\n")
connection.close()
logger.debug("pubchem found: " + str(pubchem_pass) + " / " + str(lines))Editor is loading...