Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release version 2024.06 #260

Merged
merged 19 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
104 changes: 59 additions & 45 deletions .scripts/fix_smiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import pubchempy as pcp


def query_yes_no(question, default="yes"):
"""Ask a yes/no question via raw_input() and return their answer.

Expand All @@ -12,9 +13,9 @@ def query_yes_no(question, default="yes"):

The "answer" return value is one of "yes" or "no".
"""
valid = {"yes":"yes", "y":"yes", "ye":"yes",
"no":"no", "n":"no"}
if default == None:
valid = {"yes": "yes", "y": "yes", "ye": "yes",
"no": "no", "n": "no"}
if default is None:
prompt = " [y/n] "
elif default == "yes":
prompt = " [Y/n] "
Expand All @@ -25,80 +26,93 @@ def query_yes_no(question, default="yes"):

while 1:
sys.stdout.write(question + prompt)
choice = raw_input().lower()
choice = input().lower()
if default is not None and choice == '':
return default
elif choice in valid.keys():
return valid[choice]
else:
sys.stdout.write("Please respond with 'yes' or 'no' "\
"(or 'y' or 'n').\n")
sys.stdout.write("Please respond with 'yes' or 'no' (or 'y' or 'n').\n")


def inplace_change(filename, old_string, new_string):
# Safely read the input filename using 'with'
with open(filename) as f:
s = f.read()
if old_string not in s:
print '"{old_string}" not found in {filename}.'.format(**locals())
print('"{old_string}" not found in {filename}.'.format(**locals()))
return

# Safely write the changed content, if found in the file
with open(filename, 'w') as f:
print 'Changing "{old_string}" to "{new_string}" in {filename}'.format(**locals())
print('Changing "{old_string}" to "{new_string}" in {filename}'.format(**locals()))
s = s.replace(old_string, new_string)
f.write(s)

names=[]
formula=""
inchi=""

names = []
formula = ""
inchi = ""

for line in open(sys.argv[1]):
if "CH$NAME" in line:
names.append(''.join(line.split(" ")[1:]).strip().lower())
if "CH$FORMULA" in line:
formula=''.join(line.split(" ")[1:]).strip()
if "CH$IUPAC" in line:
inchi=''.join(line.split(" ")[1:]).strip()
if "CH$SMILES" in line:
smiles=''.join(line.split(" ")[1:]).strip()
if "CH$NAME" in line:
names.append(''.join(line.split(" ")[1:]).strip().lower())
if "CH$FORMULA" in line:
formula = ''.join(line.split(" ")[1:]).strip()
if "CH$IUPAC" in line:
inchi = ''.join(line.split(" ")[1:]).strip()
if "CH$SMILES" in line:
smiles = ''.join(line.split(" ")[1:]).strip()

results = pcp.get_compounds(inchi, namespace=u'inchi')

if smiles == results[0].isomeric_smiles:
print('Nothing to do! Exiting.')
sys.exit(0)

#clear screen
print(chr(27) + "[2J")
if len(results) != 1:
print "#results != 1; exiting"
sys.exit(1)
print("#results != 1; exiting")
sys.exit(1)

if results[0].molecular_formula == formula:
print 'Formula matches ' + '\033[92m[OK]\033[0m'
print('Formula matches ' + '\033[92m[OK]\033[0m')
print(formula)
else:
print results[0].molecular_formula
print formula
print 'Formulas different ' + '\033[93m[OK]\033[0m'
print(f'Formula from pubchem: {results[0].molecular_formula}')
print(f'Formula from file: {formula}')
print('Formulas different ' + '\033[93m[!!]\033[0m')

synonyms=[x.encode('utf-8').lower() for x in results[0].synonyms]
print(f'InChI : {inchi}')

common_names=[]
synonyms = []
if results[0].synonyms is not None:
synonyms = [x.encode('utf-8').lower() for x in results[0].synonyms]

common_names = []

print()
for name in names:
if name in synonyms:
print 'Name in synonyms ' + '\033[92m[OK]\033[0m'
common_names.append(name)
print synonyms
print
print names
print '\033[92m'
print common_names
print '\033[0m'
print
print
if name in synonyms:
print('Name in synonyms ' + '\033[92m[OK]\033[0m')
common_names.append(name)

print(f'Names from pubchem: {synonyms}')
print(f'Names from file: {names}')
if len(common_names):
print('\033[92m')
print(common_names)
print('\033[0m')
print()
print()
if (results[0].molecular_formula == formula) and (len(common_names)):
inplace_change(sys.argv[1], 'CH$SMILES: '+smiles, 'CH$SMILES: '+results[0].isomeric_smiles)
inplace_change(sys.argv[1], 'CH$SMILES: ' + smiles, 'CH$SMILES: ' + results[0].isomeric_smiles)
else:
sub=query_yes_no('Substitute \033[91m' + smiles + '\033[0m by \033[92m ' + results[0].isomeric_smiles + '\033[0m?', default="no")
if sub=="yes":
inplace_change(sys.argv[1], 'CH$SMILES: '+smiles, 'CH$SMILES: '+results[0].isomeric_smiles)
sub = query_yes_no(
'Substitute \033[91m' + smiles + '\033[0m by \033[92m ' + results[0].isomeric_smiles + '\033[0m?', default="no")
if sub == "yes":
inplace_change(sys.argv[1], 'CH$SMILES: ' + smiles, 'CH$SMILES: ' + results[0].isomeric_smiles)

print
print
print()
print()
49 changes: 49 additions & 0 deletions Eawag/MSBNK-Eawag-EQ00273051.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
ACCESSION: MSBNK-Eawag-EQ00273051
RECORD_TITLE: N-MeFOSA; LC-ESI-QFT; MS2; CE: 15%; R=17500; [M-H]-
DATE: 2024.05.15
AUTHORS: B. Beck [dtc,com], J. Hollender [dtc]
LICENSE: CC BY-SA
COPYRIGHT: Copyright (C) Eawag 2023
COMMENT: CONFIDENCE standard compound
COMMENT: UCHEM_ID 2730
CH$NAME: N-MeFOSA
CH$NAME: Heptadecafluoro-N-methyloctanesulphonamide
CH$NAME: 1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-heptadecafluoro-N-methyloctane-1-sulfonamide
CH$COMPOUND_CLASS: N/A; Environmental Standard
CH$FORMULA: C9H4F17NO2S
CH$EXACT_MASS: 512.969129108
CH$SMILES: CNS(=O)(=O)C(C(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F
CH$IUPAC: InChI=1S/C9H4F17NO2S/c1-27-30(28,29)9(25,26)7(20,21)5(16,17)3(12,13)2(10,11)4(14,15)6(18,19)8(22,23)24/h27H,1H3
CH$LINK: PUBCHEM CID:3034468
CH$LINK: INCHIKEY SRMWNTGHXHOWBT-UHFFFAOYSA-N
CH$LINK: CHEMSPIDER 2298910
AC$INSTRUMENT: Exploris 240 Orbitrap Thermo Scientific
AC$INSTRUMENT_TYPE: LC-ESI-QFT
AC$MASS_SPECTROMETRY: MS_TYPE MS2
AC$MASS_SPECTROMETRY: ION_MODE NEGATIVE
AC$MASS_SPECTROMETRY: IONIZATION ESI
AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE HCD
AC$MASS_SPECTROMETRY: COLLISION_ENERGY 15 % (nominal)
AC$MASS_SPECTROMETRY: RESOLUTION 17500
AC$MASS_SPECTROMETRY: MASS_RANGE_M/Z 54-543
AC$CHROMATOGRAPHY: COLUMN_NAME XBridge C18 3.5um, 2.1x50mm, Waters
AC$CHROMATOGRAPHY: FLOW_GRADIENT 90/10 at 0 min, 50/50 at 4 min, 5/95 at 17 min, 5/95 at 25 min, 90/10 at 25.1 min, 90/10 at 30 min
AC$CHROMATOGRAPHY: FLOW_RATE 200 uL/min
AC$CHROMATOGRAPHY: RETENTION_TIME 14.446 min
MS$FOCUSED_ION: BASE_PEAK 601.9938
MS$FOCUSED_ION: PRECURSOR_M/Z 511.9619
MS$FOCUSED_ION: PRECURSOR_TYPE [M-H]-
MS$DATA_PROCESSING: RECALIBRATE loess on assigned fragments and MS1
MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included
MS$DATA_PROCESSING: WHOLE RMassBank 3.15.1.1
PK$SPLASH: splash10-03di-0000090000-f58beb82ae83963dfa05
PK$ANNOTATION: m/z tentative_formula formula_count mass error(ppm)
218.9871 C4F9- 3 218.9862 4.35
387.9821 C9F14N- 3 387.9813 2.07
511.9621 C9H3F17NO2S- 1 511.9619 0.53
PK$NUM_PEAK: 3
PK$PEAK: m/z int. rel.int.
218.9871 56016.6 3
387.9821 25188.9 1
511.9621 17799186 999
//
61 changes: 61 additions & 0 deletions Eawag/MSBNK-Eawag-EQ00273052.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
ACCESSION: MSBNK-Eawag-EQ00273052
RECORD_TITLE: N-MeFOSA; LC-ESI-QFT; MS2; CE: 30%; R=17500; [M-H]-
DATE: 2024.05.15
AUTHORS: B. Beck [dtc,com], J. Hollender [dtc]
LICENSE: CC BY-SA
COPYRIGHT: Copyright (C) Eawag 2023
COMMENT: CONFIDENCE standard compound
COMMENT: UCHEM_ID 2730
CH$NAME: N-MeFOSA
CH$NAME: Heptadecafluoro-N-methyloctanesulphonamide
CH$NAME: 1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-heptadecafluoro-N-methyloctane-1-sulfonamide
CH$COMPOUND_CLASS: N/A; Environmental Standard
CH$FORMULA: C9H4F17NO2S
CH$EXACT_MASS: 512.969129108
CH$SMILES: CNS(=O)(=O)C(C(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F
CH$IUPAC: InChI=1S/C9H4F17NO2S/c1-27-30(28,29)9(25,26)7(20,21)5(16,17)3(12,13)2(10,11)4(14,15)6(18,19)8(22,23)24/h27H,1H3
CH$LINK: PUBCHEM CID:3034468
CH$LINK: INCHIKEY SRMWNTGHXHOWBT-UHFFFAOYSA-N
CH$LINK: CHEMSPIDER 2298910
AC$INSTRUMENT: Exploris 240 Orbitrap Thermo Scientific
AC$INSTRUMENT_TYPE: LC-ESI-QFT
AC$MASS_SPECTROMETRY: MS_TYPE MS2
AC$MASS_SPECTROMETRY: ION_MODE NEGATIVE
AC$MASS_SPECTROMETRY: IONIZATION ESI
AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE HCD
AC$MASS_SPECTROMETRY: COLLISION_ENERGY 30 % (nominal)
AC$MASS_SPECTROMETRY: RESOLUTION 17500
AC$MASS_SPECTROMETRY: MASS_RANGE_M/Z 54-543
AC$CHROMATOGRAPHY: COLUMN_NAME XBridge C18 3.5um, 2.1x50mm, Waters
AC$CHROMATOGRAPHY: FLOW_GRADIENT 90/10 at 0 min, 50/50 at 4 min, 5/95 at 17 min, 5/95 at 25 min, 90/10 at 25.1 min, 90/10 at 30 min
AC$CHROMATOGRAPHY: FLOW_RATE 200 uL/min
AC$CHROMATOGRAPHY: RETENTION_TIME 14.446 min
MS$FOCUSED_ION: BASE_PEAK 601.9938
MS$FOCUSED_ION: PRECURSOR_M/Z 511.9619
MS$FOCUSED_ION: PRECURSOR_TYPE [M-H]-
MS$DATA_PROCESSING: RECALIBRATE loess on assigned fragments and MS1
MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included
MS$DATA_PROCESSING: WHOLE RMassBank 3.15.1.1
PK$SPLASH: splash10-03xr-1530090000-b039868553c7d505dcd7
PK$ANNOTATION: m/z tentative_formula formula_count mass error(ppm)
63.9624 O2S- 1 63.9624 -1.19
64.9703 HO2S- 1 64.9703 0.15
82.9608 FO2S- 1 82.9609 -0.09
111.9874 CH3FNO2S- 1 111.9874 -0.12
118.9927 C2F5- 1 118.9926 0.81
168.9894 C3F7- 1 168.9894 0.23
218.9861 C4F9- 1 218.9862 -0.32
387.9801 C9F14N- 1 387.9813 -3.04
511.9619 C9H3F17NO2S- 1 511.9619 0.11
PK$NUM_PEAK: 9
PK$PEAK: m/z int. rel.int.
63.9624 31033.2 8
64.9703 380172.8 105
82.9608 191858.3 53
111.9874 379871.1 105
118.9927 169509.9 46
168.9894 1676803.9 463
218.9861 1243892.6 344
387.9801 139576.5 38
511.9619 3610915.2 999
//
57 changes: 57 additions & 0 deletions Eawag/MSBNK-Eawag-EQ00273053.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
ACCESSION: MSBNK-Eawag-EQ00273053
RECORD_TITLE: N-MeFOSA; LC-ESI-QFT; MS2; CE: 45%; R=17500; [M-H]-
DATE: 2024.05.15
AUTHORS: B. Beck [dtc,com], J. Hollender [dtc]
LICENSE: CC BY-SA
COPYRIGHT: Copyright (C) Eawag 2023
COMMENT: CONFIDENCE standard compound
COMMENT: UCHEM_ID 2730
CH$NAME: N-MeFOSA
CH$NAME: Heptadecafluoro-N-methyloctanesulphonamide
CH$NAME: 1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-heptadecafluoro-N-methyloctane-1-sulfonamide
CH$COMPOUND_CLASS: N/A; Environmental Standard
CH$FORMULA: C9H4F17NO2S
CH$EXACT_MASS: 512.969129108
CH$SMILES: CNS(=O)(=O)C(C(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F
CH$IUPAC: InChI=1S/C9H4F17NO2S/c1-27-30(28,29)9(25,26)7(20,21)5(16,17)3(12,13)2(10,11)4(14,15)6(18,19)8(22,23)24/h27H,1H3
CH$LINK: PUBCHEM CID:3034468
CH$LINK: INCHIKEY SRMWNTGHXHOWBT-UHFFFAOYSA-N
CH$LINK: CHEMSPIDER 2298910
AC$INSTRUMENT: Exploris 240 Orbitrap Thermo Scientific
AC$INSTRUMENT_TYPE: LC-ESI-QFT
AC$MASS_SPECTROMETRY: MS_TYPE MS2
AC$MASS_SPECTROMETRY: ION_MODE NEGATIVE
AC$MASS_SPECTROMETRY: IONIZATION ESI
AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE HCD
AC$MASS_SPECTROMETRY: COLLISION_ENERGY 45 % (nominal)
AC$MASS_SPECTROMETRY: RESOLUTION 17500
AC$MASS_SPECTROMETRY: MASS_RANGE_M/Z 54-543
AC$CHROMATOGRAPHY: COLUMN_NAME XBridge C18 3.5um, 2.1x50mm, Waters
AC$CHROMATOGRAPHY: FLOW_GRADIENT 90/10 at 0 min, 50/50 at 4 min, 5/95 at 17 min, 5/95 at 25 min, 90/10 at 25.1 min, 90/10 at 30 min
AC$CHROMATOGRAPHY: FLOW_RATE 200 uL/min
AC$CHROMATOGRAPHY: RETENTION_TIME 14.446 min
MS$FOCUSED_ION: BASE_PEAK 601.9938
MS$FOCUSED_ION: PRECURSOR_M/Z 511.9619
MS$FOCUSED_ION: PRECURSOR_TYPE [M-H]-
MS$DATA_PROCESSING: RECALIBRATE loess on assigned fragments and MS1
MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included
MS$DATA_PROCESSING: WHOLE RMassBank 3.15.1.1
PK$SPLASH: splash10-02t9-5900000000-afffac53fb15d6f6ffde
PK$ANNOTATION: m/z tentative_formula formula_count mass error(ppm)
63.9626 O2S- 1 63.9624 2.92
64.9703 HO2S- 1 64.9703 0.85
82.9607 FO2S- 1 82.9609 -1.28
111.9872 CH3FNO2S- 1 111.9874 -1.62
118.9927 C2F5- 1 118.9926 1
168.9894 C3F7- 1 168.9894 0.05
218.9853 C4F9- 1 218.9862 -4.09
PK$NUM_PEAK: 7
PK$PEAK: m/z int. rel.int.
63.9626 56192.8 83
64.9703 451233.1 673
82.9607 116880.3 174
111.9872 136455.4 203
118.9927 235802 351
168.9894 669386.4 999
218.9853 109504.1 163
//
55 changes: 55 additions & 0 deletions Eawag/MSBNK-Eawag-EQ00273054.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
ACCESSION: MSBNK-Eawag-EQ00273054
RECORD_TITLE: N-MeFOSA; LC-ESI-QFT; MS2; CE: 60%; R=17500; [M-H]-
DATE: 2024.05.15
AUTHORS: B. Beck [dtc,com], J. Hollender [dtc]
LICENSE: CC BY-SA
COPYRIGHT: Copyright (C) Eawag 2023
COMMENT: CONFIDENCE standard compound
COMMENT: UCHEM_ID 2730
CH$NAME: N-MeFOSA
CH$NAME: Heptadecafluoro-N-methyloctanesulphonamide
CH$NAME: 1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-heptadecafluoro-N-methyloctane-1-sulfonamide
CH$COMPOUND_CLASS: N/A; Environmental Standard
CH$FORMULA: C9H4F17NO2S
CH$EXACT_MASS: 512.969129108
CH$SMILES: CNS(=O)(=O)C(C(C(C(C(C(C(C(F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F)(F)F
CH$IUPAC: InChI=1S/C9H4F17NO2S/c1-27-30(28,29)9(25,26)7(20,21)5(16,17)3(12,13)2(10,11)4(14,15)6(18,19)8(22,23)24/h27H,1H3
CH$LINK: PUBCHEM CID:3034468
CH$LINK: INCHIKEY SRMWNTGHXHOWBT-UHFFFAOYSA-N
CH$LINK: CHEMSPIDER 2298910
AC$INSTRUMENT: Exploris 240 Orbitrap Thermo Scientific
AC$INSTRUMENT_TYPE: LC-ESI-QFT
AC$MASS_SPECTROMETRY: MS_TYPE MS2
AC$MASS_SPECTROMETRY: ION_MODE NEGATIVE
AC$MASS_SPECTROMETRY: IONIZATION ESI
AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE HCD
AC$MASS_SPECTROMETRY: COLLISION_ENERGY 60 % (nominal)
AC$MASS_SPECTROMETRY: RESOLUTION 17500
AC$MASS_SPECTROMETRY: MASS_RANGE_M/Z 54-543
AC$CHROMATOGRAPHY: COLUMN_NAME XBridge C18 3.5um, 2.1x50mm, Waters
AC$CHROMATOGRAPHY: FLOW_GRADIENT 90/10 at 0 min, 50/50 at 4 min, 5/95 at 17 min, 5/95 at 25 min, 90/10 at 25.1 min, 90/10 at 30 min
AC$CHROMATOGRAPHY: FLOW_RATE 200 uL/min
AC$CHROMATOGRAPHY: RETENTION_TIME 14.446 min
MS$FOCUSED_ION: BASE_PEAK 601.9938
MS$FOCUSED_ION: PRECURSOR_M/Z 511.9619
MS$FOCUSED_ION: PRECURSOR_TYPE [M-H]-
MS$DATA_PROCESSING: RECALIBRATE loess on assigned fragments and MS1
MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included
MS$DATA_PROCESSING: WHOLE RMassBank 3.15.1.1
PK$SPLASH: splash10-03di-9400000000-d63e8066a239a1a875b1
PK$ANNOTATION: m/z tentative_formula formula_count mass error(ppm)
63.9625 O2S- 1 63.9624 1.25
64.9703 HO2S- 1 64.9703 0.5
82.9608 FO2S- 1 82.9609 -0.27
111.9874 CH3FNO2S- 1 111.9874 -0.33
118.9926 C2F5- 1 118.9926 0.68
168.9899 C3F7- 3 168.9894 3.39
PK$NUM_PEAK: 6
PK$PEAK: m/z int. rel.int.
63.9625 114908.7 266
64.9703 430633.7 999
82.9608 33711 78
111.9874 42490.1 98
118.9926 147210.9 341
168.9899 68993.2 160
//
Loading
Loading