rdkit.Chem.SimpleEnum.Enumerator module

rdkit.Chem.SimpleEnum.Enumerator.EnumerateReaction(reaction, bbLists, uniqueProductsOnly=False, funcGroupFilename='/scratch/RDKit_git/Data/Functional_Group_Hierarchy.txt', propName='molFileValue')
>>> testFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'SimpleEnum', 'test_data', 'boronic1.rxn')
>>> rxn = AllChem.ReactionFromRxnFile(testFile)
>>> rxn.Initialize()
>>> reacts1 = ['Brc1ccccc1', 'Brc1ncccc1', 'Brc1cnccc1']
>>> reacts1 = [Chem.MolFromSmiles(x) for x in reacts1]
>>> reacts2 = ['CCB(O)O', 'CCCB(O)O']
>>> reacts2 = [Chem.MolFromSmiles(x) for x in reacts2]
>>> prods = EnumerateReaction(rxn, (reacts1, reacts2))
>>> prods = list(prods)

This is a bit nasty because of the symmetry of the boronic acid:

>>> len(prods)
12
>>> smis = list(set([Chem.MolToSmiles(x[0]) for x in prods]))
>>> smis.sort()
>>> len(smis)
6
>>> print(smis)
['CCCc1ccccc1', 'CCCc1ccccn1', 'CCCc1cccnc1', 'CCc1ccccc1', 'CCc1ccccn1', 'CCc1cccnc1']

The nastiness can be avoided at the cost of some memory by asking for only unique products:

>>> prods = EnumerateReaction(rxn, (reacts1, reacts2), uniqueProductsOnly=True)
>>> prods = list(prods)
>>> len(prods)
6
>>> print(sorted([Chem.MolToSmiles(x[0]) for x in prods]))
['CCCc1ccccc1', 'CCCc1ccccn1', 'CCCc1cccnc1', 'CCc1ccccc1', 'CCc1ccccn1', 'CCc1cccnc1']
rdkit.Chem.SimpleEnum.Enumerator.PreprocessReaction(reaction, funcGroupFilename=None, propName='molFileValue')
>>> from rdkit.Chem import AllChem
>>> testFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'SimpleEnum', 'test_data', 'boronic1.rxn')
>>> rxn = AllChem.ReactionFromRxnFile(testFile)
>>> rxn.Initialize()
>>> nWarn, nError, nReacts, nProds, reactantLabels = PreprocessReaction(rxn)
>>> nWarn
0
>>> nError
0
>>> nReacts
2
>>> nProds
1
>>> reactantLabels
(((0, 'halogen.bromine.aromatic'),), ((1, 'boronicacid'),))

If there are functional group labels in the input reaction (via atoms with molFileValue properties), the corresponding atoms will have queries added to them so that they only match such things. We can see this here:

>>> rxn = AllChem.ReactionFromRxnFile(testFile)
>>> rxn.Initialize()
>>> r1 = rxn.GetReactantTemplate(0)
>>> m1 = Chem.MolFromSmiles('CCBr')
>>> m2 = Chem.MolFromSmiles('c1ccccc1Br')

These both match because the reaction file itself just has R1-Br:

>>> m1.HasSubstructMatch(r1)
True
>>> m2.HasSubstructMatch(r1)
True

After preprocessing, we only match the aromatic Br:

>>> d = PreprocessReaction(rxn)
>>> m1.HasSubstructMatch(r1)
False
>>> m2.HasSubstructMatch(r1)
True

We also support or queries in the values field (separated by commas):

>>> testFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'SimpleEnum', 'test_data', 'azide_reaction.rxn')
>>> rxn = AllChem.ReactionFromRxnFile(testFile)
>>> rxn.Initialize()
>>> reactantLabels = PreprocessReaction(rxn)[-1]
>>> reactantLabels
(((1, 'azide'),), ((1, 'carboxylicacid,acidchloride'),))
>>> m1 = Chem.MolFromSmiles('CC(=O)O')
>>> m2 = Chem.MolFromSmiles('CC(=O)Cl')
>>> m3 = Chem.MolFromSmiles('CC(=O)N')
>>> r2 = rxn.GetReactantTemplate(1)
>>> m1.HasSubstructMatch(r2)
True
>>> m2.HasSubstructMatch(r2)
True
>>> m3.HasSubstructMatch(r2)
False

unrecognized final group types are returned as None:

>>> testFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'SimpleEnum', 'test_data', 'bad_value1.rxn')
>>> rxn = AllChem.ReactionFromRxnFile(testFile)
>>> rxn.Initialize()
>>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
Traceback (most recent call last):
  File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run
    compileflags, 1) in test.globs
  File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module>
    nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
  File "Enumerator.py", line 105, in PreprocessReaction
    reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True)
KeyError: 'boromicacid'

One unrecognized group type in a comma-separated list makes the whole thing fail:

>>> testFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'SimpleEnum', 'test_data', 'bad_value2.rxn')
>>> rxn = AllChem.ReactionFromRxnFile(testFile)
>>> rxn.Initialize()
>>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
Traceback (most recent call last):
  File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run
    compileflags, 1) in test.globs
  File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module>
    nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
  File "Enumerator.py", line 105, in PreprocessReaction
    reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True)
KeyError: 'carboxylicacid,acidchlroide'
>>> testFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'SimpleEnum', 'test_data', 'bad_value3.rxn')
>>> rxn = AllChem.ReactionFromRxnFile(testFile)
>>> rxn.Initialize()
>>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
Traceback (most recent call last):
  File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run
    compileflags, 1) in test.globs
  File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module>
    nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
  File "Enumerator.py", line 105, in PreprocessReaction
    reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True)
KeyError: 'carboxyliccaid,acidchloride'
>>> rxn = rdChemReactions.ChemicalReaction()
>>> rxn.Initialize()
>>> nWarn, nError, nReacts, nProds, reactantLabels = PreprocessReaction(rxn)
>>> reactantLabels
()
>>> reactantLabels == ()
True