RDKit
Open-source cheminformatics and machine learning.
MolOps.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2019 Greg Landrum and Rational Discovery LLC
3 // Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc.
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #include <RDGeneral/export.h>
12 #ifndef _RD_MOL_OPS_H_
13 #define _RD_MOL_OPS_H_
14 
15 #include <vector>
16 #include <map>
17 #include <list>
19 #include <boost/smart_ptr.hpp>
20 #include <boost/dynamic_bitset.hpp>
22 #include <RDGeneral/types.h>
23 
24 RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF;
25 namespace RDKit {
26 class ROMol;
27 class RWMol;
28 class Atom;
29 class Bond;
30 class Conformer;
31 typedef std::vector<double> INVAR_VECT;
32 typedef INVAR_VECT::iterator INVAR_VECT_I;
33 typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
34 
35 //! \brief Groups a variety of molecular query and transformation operations.
36 namespace MolOps {
37 
38 //! return the number of electrons available on an atom to donate for
39 // aromaticity
40 /*!
41  The result is determined using the default valency, number of lone pairs,
42  number of bonds and the formal charge. Note that the atom may not donate
43  all of these electrons to a ring for aromaticity (also used in Conjugation
44  and hybridization code).
45 
46  \param at the atom of interest
47 
48  \return the number of electrons
49 */
51 
52 //! sums up all atomic formal charges and returns the result
54 
55 //! returns whether or not the given Atom is involved in a conjugated bond
57 
58 //! find fragments (disconnected components of the molecular graph)
59 /*!
60 
61  \param mol the molecule of interest
62  \param mapping used to return the mapping of Atoms->fragments.
63  On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
64  and will contain the fragment assignment for each Atom
65 
66  \return the number of fragments found.
67 
68 */
69 RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol,
70  std::vector<int> &mapping);
71 //! find fragments (disconnected components of the molecular graph)
72 /*!
73 
74  \param mol the molecule of interest
75  \param frags used to return the Atoms in each fragment
76  On return \c mapping will be \c numFrags long, and each entry
77  will contain the indices of the Atoms in that fragment.
78 
79  \return the number of fragments found.
80 
81 */
83  const ROMol &mol, std::vector<std::vector<int>> &frags);
84 
85 //! splits a molecule into its component fragments
86 // (disconnected components of the molecular graph)
87 /*!
88 
89  \param mol the molecule of interest
90  \param sanitizeFrags toggles sanitization of the fragments after
91  they are built
92  \param frags used to return the mapping of Atoms->fragments.
93  if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
94  on return and will contain the fragment assignment for each Atom
95  \param fragsMolAtomMapping used to return the Atoms in each fragment
96  On return \c mapping will be \c numFrags long, and each entry
97  will contain the indices of the Atoms in that fragment.
98  \param copyConformers toggles copying conformers of the fragments after
99  they are built
100  \return a vector of the fragments as smart pointers to ROMols
101 
102 */
103 RDKIT_GRAPHMOL_EXPORT std::vector<boost::shared_ptr<ROMol>> getMolFrags(
104  const ROMol &mol, bool sanitizeFrags = true, std::vector<int> *frags = 0,
105  std::vector<std::vector<int>> *fragsMolAtomMapping = 0,
106  bool copyConformers = true);
107 
108 //! splits a molecule into pieces based on labels assigned using a query
109 /*!
110 
111  \param mol the molecule of interest
112  \param query the query used to "label" the molecule for fragmentation
113  \param sanitizeFrags toggles sanitization of the fragments after
114  they are built
115  \param whiteList if provided, only labels in the list will be kept
116  \param negateList if true, the white list logic will be inverted: only labels
117  not in the list will be kept
118 
119  \return a map of the fragments and their labels
120 
121 */
122 template <typename T>
123 RDKIT_GRAPHMOL_EXPORT std::map<T, boost::shared_ptr<ROMol>>
124 getMolFragsWithQuery(const ROMol &mol, T (*query)(const ROMol &, const Atom *),
125  bool sanitizeFrags = true,
126  const std::vector<T> *whiteList = 0,
127  bool negateList = false);
128 
129 #if 0
130  //! finds a molecule's minimium spanning tree (MST)
131  /*!
132  \param mol the molecule of interest
133  \param mst used to return the MST as a vector of bond indices
134  */
135  RDKIT_GRAPHMOL_EXPORT void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
136 #endif
137 
138 //! calculates Balaban's J index for the molecule
139 /*!
140  \param mol the molecule of interest
141  \param useBO toggles inclusion of the bond order in the calculation
142  (when false, we're not really calculating the J value)
143  \param force forces the calculation (instead of using cached results)
144  \param bondPath when included, only paths using bonds whose indices occur
145  in this vector will be included in the calculation
146  \param cacheIt If this is true, the calculated value will be cached
147  as a property on the molecule
148  \return the J index
149 
150 */
152  const ROMol &mol, bool useBO = true, bool force = false,
153  const std::vector<int> *bondPath = 0, bool cacheIt = true);
154 //! \overload
155 RDKIT_GRAPHMOL_EXPORT double computeBalabanJ(double *distMat, int nb, int nAts);
156 
157 //! \name Dealing with hydrogens
158 //{@
159 
160 //! returns a copy of a molecule with hydrogens added in as explicit Atoms
161 /*!
162  \param mol the molecule to add Hs to
163  \param explicitOnly (optional) if this \c true, only explicit Hs will be
164  added
165  \param addCoords (optional) If this is true, estimates for the atomic
166  coordinates
167  of the added Hs will be used.
168  \param onlyOnAtoms (optional) if provided, this should be a vector of
169  IDs of the atoms that will be considered for H addition.
170  \param addResidueInfo (optional) if this is true, add residue info to
171  hydrogen atoms (useful for PDB files).
172 
173  \return the new molecule
174 
175  <b>Notes:</b>
176  - it makes no sense to use the \c addCoords option if the molecule's
177  heavy
178  atoms don't already have coordinates.
179  - the caller is responsible for <tt>delete</tt>ing the pointer this
180  returns.
181  */
182 RDKIT_GRAPHMOL_EXPORT ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
183  bool addCoords = false,
184  const UINT_VECT *onlyOnAtoms = NULL,
185  bool addResidueInfo = false);
186 //! \overload
187 // modifies the molecule in place
188 RDKIT_GRAPHMOL_EXPORT void addHs(RWMol &mol, bool explicitOnly = false,
189  bool addCoords = false,
190  const UINT_VECT *onlyOnAtoms = NULL,
191  bool addResidueInfo = false);
192 
193 //! returns a copy of a molecule with hydrogens removed
194 /*!
195  \param mol the molecule to remove Hs from
196  \param implicitOnly (optional) if this \c true, only implicit Hs will be
197  removed
198  \param updateExplicitCount (optional) If this is \c true, when explicit Hs
199  are removed
200  from the graph, the heavy atom to which they are bound will have its
201  counter of
202  explicit Hs increased.
203  \param sanitize: (optional) If this is \c true, the final molecule will be
204  sanitized
205 
206  \return the new molecule
207 
208  <b>Notes:</b>
209  - Hydrogens which aren't connected to a heavy atom will not be
210  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
211  all atoms removed.
212  - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
213  will not be removed.
214  - two coordinate Hs, like the central H in C[H-]C, will not be removed
215  - Hs connected to dummy atoms will not be removed
216  - Hs that are part of the definition of double bond Stereochemistry
217  will not be removed
218  - Hs that are not connected to anything else will not be removed
219  - Hs that have a query defined (i.e. hasQuery() returns true) will not
220  be removed
221 
222  - the caller is responsible for <tt>delete</tt>ing the pointer this
223  returns.
224 */
226  bool implicitOnly = false,
227  bool updateExplicitCount = false,
228  bool sanitize = true);
229 //! \overload
230 // modifies the molecule in place
231 RDKIT_GRAPHMOL_EXPORT void removeHs(RWMol &mol, bool implicitOnly = false,
232  bool updateExplicitCount = false,
233  bool sanitize = true);
234 
235 //! returns a copy of a molecule with hydrogens removed and added as queries
236 //! to the heavy atoms to which they are bound.
237 /*!
238  This is really intended to be used with molecules that contain QueryAtoms
239 
240  \param mol the molecule to remove Hs from
241 
242  \return the new molecule
243 
244  <b>Notes:</b>
245  - Atoms that do not already have hydrogen count queries will have one
246  added, other H-related queries will not be touched. Examples:
247  - C[H] -> [C;!H0]
248  - [C;H1][H] -> [C;H1]
249  - [C;H2][H] -> [C;H2]
250  - Hydrogens which aren't connected to a heavy atom will not be
251  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
252  all atoms removed.
253  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
254  - By default all hydrogens are removed, however if
255  mergeUnmappedOnly is true, any hydrogen participating
256  in an atom map will be retained
257 
258 */
260  bool mergeUnmappedOnly = false);
261 //! \overload
262 // modifies the molecule in place
264  bool mergeUnmappedOnly = false);
265 
266 typedef enum {
273  ADJUST_IGNOREALL = 0xFFFFFFF
275 
277  bool adjustDegree; /**< add degree queries */
278  std::uint32_t adjustDegreeFlags;
279  bool adjustRingCount; /**< add ring-count queries */
280  std::uint32_t adjustRingCountFlags;
281 
282  bool makeDummiesQueries; /**< convert dummy atoms without isotope labels to
283  any-atom queries */
285  bool makeBondsGeneric; /**< convert bonds to generic queries (any bonds) */
286  std::uint32_t makeBondsGenericFlags;
287  bool makeAtomsGeneric; /**< convert atoms to generic queries (any atoms) */
288  std::uint32_t makeAtomsGenericFlags;
289  bool adjustHeavyDegree; /**< adjust the heavy-atom degree instead of overall
290  degree */
291  std::uint32_t adjustHeavyDegreeFlags;
292  bool adjustRingChain; /**< add ring-chain queries */
293  std::uint32_t adjustRingChainFlags;
294 
296  : adjustDegree(true),
297  adjustDegreeFlags(ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS),
298  adjustRingCount(false),
299  adjustRingCountFlags(ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS),
300  makeDummiesQueries(true),
301  aromatizeIfPossible(true),
302  makeBondsGeneric(false),
303  makeBondsGenericFlags(ADJUST_IGNORENONE),
304  makeAtomsGeneric(false),
305  makeAtomsGenericFlags(ADJUST_IGNORENONE),
306  adjustHeavyDegree(false),
307  adjustHeavyDegreeFlags(ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS),
308  adjustRingChain(false),
309  adjustRingChainFlags(ADJUST_IGNORENONE) {}
310 };
311 //! returns a copy of a molecule with query properties adjusted
312 /*!
313  \param mol the molecule to adjust
314  \param params controls the adjustments made
315 
316  \return the new molecule
317 */
319  const ROMol &mol, const AdjustQueryParameters *params = NULL);
320 //! \overload
321 // modifies the molecule in place
323  RWMol &mol, const AdjustQueryParameters *params = NULL);
324 
325 //! returns a copy of a molecule with the atoms renumbered
326 /*!
327 
328  \param mol the molecule to work with
329  \param newOrder the new ordering of the atoms (should be numAtoms long)
330  for example: if newOrder is [3,2,0,1], then atom 3 in the original
331  molecule will be atom 0 in the new one
332 
333  \return the new molecule
334 
335  <b>Notes:</b>
336  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
337 
338 */
340  const ROMol &mol, const std::vector<unsigned int> &newOrder);
341 
342 //@}
343 
344 //! \name Sanitization
345 //@{
346 
347 typedef enum {
359  SANITIZE_ALL = 0xFFFFFFF
360 } SanitizeFlags;
361 
362 //! \brief carries out a collection of tasks for cleaning up a molecule and
363 // ensuring
364 //! that it makes "chemical sense"
365 /*!
366  This functions calls the following in sequence
367  -# MolOps::cleanUp()
368  -# mol.updatePropertyCache()
369  -# MolOps::symmetrizeSSSR()
370  -# MolOps::Kekulize()
371  -# MolOps::assignRadicals()
372  -# MolOps::setAromaticity()
373  -# MolOps::setConjugation()
374  -# MolOps::setHybridization()
375  -# MolOps::cleanupChirality()
376  -# MolOps::adjustHs()
377 
378  \param mol : the RWMol to be cleaned
379 
380  \param operationThatFailed : the first (if any) sanitization operation that
381  fails is set here.
382  The values are taken from the \c SanitizeFlags
383  enum.
384  On success, the value is \c
385  SanitizeFlags::SANITIZE_NONE
386 
387  \param sanitizeOps : the bits here are used to set which sanitization
388  operations are carried
389  out. The elements of the \c SanitizeFlags enum define
390  the operations.
391 
392  <b>Notes:</b>
393  - If there is a failure in the sanitization, a \c SanitException
394  will be thrown.
395  - in general the user of this function should cast the molecule following
396  this
397  function to a ROMol, so that new atoms and bonds cannot be added to the
398  molecule and screw up the sanitizing that has been done here
399 */
401  unsigned int &operationThatFailed,
402  unsigned int sanitizeOps = SANITIZE_ALL);
403 //! \overload
405 
406 //! Possible aromaticity models
407 /*!
408 - \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
409 - \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
410 Book)
411 - \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
412 does not consider the outer envelope of fused rings)
413 - \c AROMATICITY_MDL
414 - \c AROMATICITY_CUSTOM uses a caller-provided function
415 */
416 typedef enum {
417  AROMATICITY_DEFAULT = 0x0, ///< future proofing
421  AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
423 
424 //! Sets up the aromaticity for a molecule
425 /*!
426 
427  This is what happens here:
428  -# find all the simple rings by calling the findSSSR function
429  -# loop over all the Atoms in each ring and mark them if they are
430  candidates
431  for aromaticity. A ring atom is a candidate if it can spare electrons
432  to the ring and if it's from the first two rows of the periodic table.
433  -# based on the candidate atoms, mark the rings to be either candidates
434  or non-candidates. A ring is a candidate only if all its atoms are
435  candidates
436  -# apply Hueckel rule to each of the candidate rings to check if the ring
437  can be
438  aromatic
439 
440  \param mol the RWMol of interest
441  \param model the aromaticity model to use
442  \param func a custom function for assigning aromaticity (only used when
443  model=\c AROMATICITY_CUSTOM)
444 
445  \return >0 on success, <= 0 otherwise
446 
447  <b>Assumptions:</b>
448  - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
449  been called)
450 
451 */
454  int (*func)(RWMol &) = NULL);
455 
456 //! Designed to be called by the sanitizer to handle special cases before
457 // anything is done.
458 /*!
459 
460  Currently this:
461  - modifies nitro groups, so that the nitrogen does not have an unreasonable
462  valence of 5, as follows:
463  - the nitrogen gets a positive charge
464  - one of the oxygens gets a negative chage and the double bond to this
465  oxygen is changed to a single bond
466  The net result is that nitro groups can be counted on to be:
467  \c "[N+](=O)[O-]"
468  - modifies halogen-oxygen containing species as follows:
469  \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
470  \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
471  \c [Cl,Br,I](=O)O -> [X+]([O-])O
472  - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
473 
474  \param mol the molecule of interest
475 
476 */
478 
479 //! Called by the sanitizer to assign radical counts to atoms
481 
482 //! adjust the number of implicit and explicit Hs for special cases
483 /*!
484 
485  Currently this:
486  - modifies aromatic nitrogens so that, when appropriate, they have an
487  explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
488 
489  \param mol the molecule of interest
490 
491  <b>Assumptions</b>
492  - this is called after the molecule has been sanitized,
493  aromaticity has been perceived, and the implicit valence of
494  everything has been calculated.
495 
496 */
498 
499 //! Kekulizes the molecule
500 /*!
501 
502  \param mol the molecule of interest
503  \param markAtomsBonds if this is set to true, \c isAromatic boolean settings
504  on both the Bonds and Atoms are turned to false
505  following
506  the Kekulization, otherwise they are left alone in
507  their
508  original state.
509  \param maxBackTracks the maximum number of attempts at back-tracking. The
510  algorithm
511  uses a back-tracking procedure to revist a previous
512  setting of
513  double bond if we hit a wall in the kekulization
514  process
515 
516  <b>Notes:</b>
517  - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
518  bonds will be changed from \c RDKit::Bond::AROMATIC to \c
519  RDKit::Bond::SINGLE
520  or RDKit::Bond::DOUBLE during Kekulization.
521 
522 */
523 RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds = true,
524  unsigned int maxBackTracks = 100);
525 
526 //! flags the molecule's conjugated bonds
528 
529 //! calculates and sets the hybridization of all a molecule's Stoms
531 
532 // @}
533 
534 //! \name Ring finding and SSSR
535 //@{
536 
537 //! finds a molecule's Smallest Set of Smallest Rings
538 /*!
539  Currently this implements a modified form of Figueras algorithm
540  (JCICS - Vol. 36, No. 5, 1996, 986-991)
541 
542  \param mol the molecule of interest
543  \param res used to return the vector of rings. Each entry is a vector with
544  atom indices. This information is also stored in the molecule's
545  RingInfo structure, so this argument is optional (see overload)
546 
547  \return number of smallest rings found
548 
549  Base algorithm:
550  - The original algorithm starts by finding representative degree 2
551  nodes.
552  - Representative because if a series of deg 2 nodes are found only
553  one of them is picked.
554  - The smallest ring around each of them is found.
555  - The bonds that connect to this degree 2 node are them chopped off,
556  yielding
557  new deg two nodes
558  - The process is repeated on the new deg 2 nodes.
559  - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
560  with it is found. A bond from this is "carefully" (look in the paper)
561  selected and chopped, yielding deg 2 nodes. The process is same as
562  above once this is done.
563 
564  Our Modifications:
565  - If available, more than one smallest ring around a representative deg 2
566  node will be computed and stored
567  - Typically 3 rings are found around a degree 3 node (when no deg 2s are
568  available)
569  and all the bond to that node are chopped.
570  - The extra rings that were found in this process are removed after all the
571  nodes
572  have been covered.
573 
574  These changes were motivated by several factors:
575  - We believe the original algorithm fails to find the correct SSSR
576  (finds the correct number of them but the wrong ones) on some sample mols
577  - Since SSSR may not be unique, a post-SSSR step to symmetrize may be done.
578  The extra rings this process adds can be quite useful.
579 */
580 RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol,
581  std::vector<std::vector<int>> &res);
582 //! \overload
583 RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol,
584  std::vector<std::vector<int>> *res = 0);
585 
586 //! use a DFS algorithm to identify ring bonds and atoms in a molecule
587 /*!
588  \b NOTE: though the RingInfo structure is populated by this function,
589  the only really reliable calls that can be made are to check if
590  mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
591  return values >0
592 */
593 RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol);
594 
595 //! symmetrize the molecule's Smallest Set of Smallest Rings
596 /*!
597  SSSR rings obatined from "findSSSR" can be non-unique in some case.
598  For example, cubane has five SSSR rings, not six as one would hope.
599 
600  This function adds additional rings to the SSSR list if necessary
601  to make the list symmetric, e.g. all atoms in cubane will be part of the same
602  number
603  of SSSRs. This function choses these extra rings from the extra rings
604  computed
605  and discarded during findSSSR. The new ring are chosen such that:
606  - replacing a same sized ring in the SSSR list with an extra ring yields
607  the same union of bond IDs as the orignal SSSR list
608 
609  \param mol - the molecule of interest
610  \param res used to return the vector of rings. Each entry is a vector with
611  atom indices. This information is also stored in the molecule's
612  RingInfo structure, so this argument is optional (see overload)
613 
614  \return the total number of rings = (new rings + old SSSRs)
615 
616  <b>Notes:</b>
617  - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
618  first
619 */
621  std::vector<std::vector<int>> &res);
622 //! \overload
624 
625 //@}
626 
627 //! \name Shortest paths and other matrices
628 //@{
629 
630 //! returns a molecule's adjacency matrix
631 /*!
632  \param mol the molecule of interest
633  \param useBO toggles use of bond orders in the matrix
634  \param emptyVal sets the empty value (for non-adjacent atoms)
635  \param force forces calculation of the matrix, even if already
636  computed
637  \param propNamePrefix used to set the cached property name
638 
639  \return the adjacency matrix.
640 
641  <b>Notes</b>
642  - The result of this is cached in the molecule's local property dictionary,
643  which will handle deallocation. The caller should <b>not</b> \c delete
644  this pointer.
645 
646 */
648  const ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false,
649  const char *propNamePrefix = 0,
650  const boost::dynamic_bitset<> *bondsToUse = 0);
651 
652 //! Computes the molecule's topological distance matrix
653 /*!
654  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
655 
656  \param mol the molecule of interest
657  \param useBO toggles use of bond orders in the matrix
658  \param useAtomWts sets the diagonal elements of the result to
659  6.0/(atomic number) so that the matrix can be used to calculate
660  Balaban J values. This does not affect the bond weights.
661  \param force forces calculation of the matrix, even if already
662  computed
663  \param propNamePrefix used to set the cached property name
664 
665  \return the distance matrix.
666 
667  <b>Notes</b>
668  - The result of this is cached in the molecule's local property dictionary,
669  which will handle deallocation. The caller should <b>not</b> \c delete
670  this pointer.
671 
672 
673 */
674 RDKIT_GRAPHMOL_EXPORT double *getDistanceMat(const ROMol &mol,
675  bool useBO = false,
676  bool useAtomWts = false,
677  bool force = false,
678  const char *propNamePrefix = 0);
679 
680 //! Computes the molecule's topological distance matrix
681 /*!
682  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
683 
684  \param mol the molecule of interest
685  \param activeAtoms only elements corresponding to these atom indices
686  will be included in the calculation
687  \param bonds only bonds found in this list will be included in the
688  calculation
689  \param useBO toggles use of bond orders in the matrix
690  \param useAtomWts sets the diagonal elements of the result to
691  6.0/(atomic number) so that the matrix can be used to calculate
692  Balaban J values. This does not affect the bond weights.
693 
694  \return the distance matrix.
695 
696  <b>Notes</b>
697  - The results of this call are not cached, the caller <b>should</b> \c
698  delete
699  this pointer.
700 
701 
702 */
704  const ROMol &mol, const std::vector<int> &activeAtoms,
705  const std::vector<const Bond *> &bonds, bool useBO = false,
706  bool useAtomWts = false);
707 
708 //! Computes the molecule's 3D distance matrix
709 /*!
710 
711  \param mol the molecule of interest
712  \param confId the conformer to use
713  \param useAtomWts sets the diagonal elements of the result to
714  6.0/(atomic number)
715  \param force forces calculation of the matrix, even if already
716  computed
717  \param propNamePrefix used to set the cached property name
718  (if set to an empty string, the matrix will not be
719  cached)
720 
721  \return the distance matrix.
722 
723  <b>Notes</b>
724  - If propNamePrefix is not empty the result of this is cached in the
725  molecule's local property dictionary, which will handle deallocation.
726  In other cases the caller is responsible for freeing the memory.
727 
728 */
729 RDKIT_GRAPHMOL_EXPORT double *get3DDistanceMat(const ROMol &mol,
730  int confId = -1,
731  bool useAtomWts = false,
732  bool force = false,
733  const char *propNamePrefix = 0);
734 //! Find the shortest path between two atoms
735 /*!
736  Uses the Bellman-Ford algorithm
737 
738  \param mol molecule of interest
739  \param aid1 index of the first atom
740  \param aid2 index of the second atom
741 
742  \return an std::list with the indices of the atoms along the shortest
743  path
744 
745  <b>Notes:</b>
746  - the starting and end atoms are included in the path
747  - if no path is found, an empty path is returned
748 
749 */
750 RDKIT_GRAPHMOL_EXPORT std::list<int> getShortestPath(const ROMol &mol, int aid1,
751  int aid2);
752 
753 //@}
754 
755 #if 0
756  //! \name Canonicalization
757  //@{
758 
759  //! assign a canonical ordering to a molecule's atoms
760  /*!
761  The algorithm used here is a modification of the published Daylight canonical
762  smiles algorithm (i.e. it uses atom invariants and products of primes).
763 
764  \param mol the molecule of interest
765  \param ranks used to return the ranks
766  \param breakTies toggles breaking of ties (see below)
767  \param includeChirality toggles inclusion of chirality in the invariants
768  \param includeIsotopes toggles inclusion of isotopes in the invariants
769  \param rankHistory used to return the rank history (see below)
770 
771  <b>Notes:</b>
772  - Tie breaking should be done when it's important to have a full ordering
773  of the atoms (e.g. when generating canonical traversal trees). If it's
774  acceptable to have ties between symmetry-equivalent atoms (e.g. when
775  generating CIP codes), tie breaking can/should be skipped.
776  - if the \c rankHistory argument is provided, the evolution of the ranks of
777  individual atoms will be tracked. The \c rankHistory pointer should be
778  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
779  */
780  RDKIT_GRAPHMOL_EXPORT void rankAtoms(const ROMol &mol,std::vector<int> &ranks,
781  bool breakTies=true,
782  bool includeChirality=true,
783  bool includeIsotopes=true,
784  std::vector<std::vector<int> > *rankHistory=0);
785  //! assign a canonical ordering to a sub-molecule's atoms
786  /*!
787  The algorithm used here is a modification of the published Daylight canonical
788  smiles algorithm (i.e. it uses atom invariants and products of primes).
789 
790  \param mol the molecule of interest
791  \param atomsToUse atoms to be included
792  \param bondsToUse bonds to be included
793  \param atomSymbols symbols to use for the atoms in the output (these are
794  used in place of atomic number and isotope information)
795  \param ranks used to return the ranks
796  \param breakTies toggles breaking of ties (see below)
797  \param rankHistory used to return the rank history (see below)
798 
799  <b>Notes:</b>
800  - Tie breaking should be done when it's important to have a full ordering
801  of the atoms (e.g. when generating canonical traversal trees). If it's
802  acceptable to have ties between symmetry-equivalent atoms (e.g. when
803  generating CIP codes), tie breaking can/should be skipped.
804  - if the \c rankHistory argument is provided, the evolution of the ranks of
805  individual atoms will be tracked. The \c rankHistory pointer should be
806  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
807  */
808  RDKIT_GRAPHMOL_EXPORT void rankAtomsInFragment(const ROMol &mol,std::vector<int> &ranks,
809  const boost::dynamic_bitset<> &atomsToUse,
810  const boost::dynamic_bitset<> &bondsToUse,
811  const std::vector<std::string> *atomSymbols=0,
812  const std::vector<std::string> *bondSymbols=0,
813  bool breakTies=true,
814  std::vector<std::vector<int> > *rankHistory=0);
815 
816  // @}
817 #endif
818 //! \name Stereochemistry
819 //@{
820 
821 //! removes bogus chirality markers (those on non-sp3 centers):
823 
824 //! \brief Uses a conformer to assign ChiralType to a molecule's atoms
825 /*!
826  \param mol the molecule of interest
827  \param confId the conformer to use
828  \param replaceExistingTags if this flag is true, any existing atomic chiral
829  tags will be replaced
830 
831  If the conformer provided is not a 3D conformer, nothing will be done.
832 */
834  ROMol &mol, int confId = -1, bool replaceExistingTags = true);
835 
836 //! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and
837 //! stereo flags to its bonds
838 /*!
839 
840  \param mol the molecule of interest
841  \param confId the conformer to use
842  \param replaceExistingTags if this flag is true, any existing info about
843  stereochemistry will be replaced
844 
845  If the conformer provided is not a 3D conformer, nothing will be done.
846 */
848  ROMol &mol, int confId = -1, bool replaceExistingTags = true);
849 
850 //! \brief Use bond directions to assign ChiralTypes to a molecule's atoms and
851 //! stereo flags to its bonds
852 /*!
853 
854  \param mol the molecule of interest
855  \param confId the conformer to use
856  \param replaceExistingTags if this flag is true, any existing info about
857  stereochemistry will be replaced
858 */
860  ROMol &mol, int confId = -1, bool replaceExistingTags = true);
861 
862 //! \brief Uses a conformer to assign directionality to the single bonds
863 //! around double bonds
864 /*!
865 
866  \param mol the molecule of interest
867  \param confId the conformer to use
868 */
870  int confId = -1);
872  ROMol &mol, const Conformer *conf = NULL);
873 
874 //! Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
875 /*!
876  Does the CIP stereochemistry assignment for the molecule's atoms
877  (R/S) and double bond (Z/E). Chiral atoms will have a property
878  '_CIPCode' indicating their chiral code.
879 
880  \param mol the molecule to use
881  \param cleanIt if true, atoms with a chiral specifier that aren't
882  actually chiral (e.g. atoms with duplicate
883  substituents or only 2 substituents, etc.) will have
884  their chiral code set to CHI_UNSPECIFIED. Bonds with
885  STEREOCIS/STEREOTRANS specified that have duplicate
886  substituents based upon the CIP atom ranks will be
887  marked STEREONONE.
888  \param force causes the calculation to be repeated even if it has
889  already been done
890  \param flagPossibleStereoCenters set the _ChiralityPossible property on
891  atoms that are possible stereocenters
892 
893  <b>Notes:M</b>
894  - Throughout we assume that we're working with a hydrogen-suppressed
895  graph.
896 
897 */
899  ROMol &mol, bool cleanIt = false, bool force = false,
900  bool flagPossibleStereoCenters = false);
901 //! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
902 //(i.e. Z/E)
903 /*!
904 
905  \param mol the molecule of interest
906 */
908 
909 //! \brief finds bonds that could be cis/trans in a molecule and mark them as
910 //! Bond::STEREOANY.
911 /*!
912  \param mol the molecule of interest
913  \param cleanIt toggles removal of stereo flags from double bonds that can
914  not have stereochemistry
915 
916  This function finds any double bonds that can potentially be part of
917  a cis/trans system. No attempt is made here to mark them cis or
918  trans. No attempt is made to detect double bond stereo in ring systems.
919 
920  This function is useful in the following situations:
921  - when parsing a mol file; for the bonds marked here, coordinate
922  information on the neighbors can be used to indentify cis or trans states
923  - when writing a mol file; bonds that can be cis/trans but not marked as
924  either need to be specially marked in the mol file
925  - finding double bonds with unspecified stereochemistry so they
926  can be enumerated for downstream 3D tools
927 
928  The CIPranks on the neighboring atoms are checked in this function. The
929  _CIPCode property if set to any on the double bond.
930 */
932  bool cleanIt = false);
933 //@}
934 
935 //! returns the number of atoms which have a particular property set
937  const ROMol &mol, std::string prop);
938 
939 }; // end of namespace MolOps
940 }; // end of namespace RDKit
941 
942 #endif
RDKIT_GRAPHMOL_EXPORT void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule&#39;s Stoms
RDKIT_GRAPHMOL_EXPORT double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=0)
Computes the molecule&#39;s 3D distance matrix.
RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol)
Designed to be called by the sanitizer to handle special cases before.
RDKIT_GRAPHMOL_EXPORT int countAtomElec(const Atom *at)
return the number of electrons available on an atom to donate for
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralType to a molecule&#39;s atoms.
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
RDKIT_GRAPHMOL_EXPORT void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
RDKIT_GRAPHMOL_EXPORT int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=NULL)
Sets up the aromaticity for a molecule.
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_GRAPHMOL_EXPORT std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=0, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
AromaticityModel
Possible aromaticity models.
Definition: MolOps.h:416
RDKIT_GRAPHMOL_EXPORT unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
RDKIT_GRAPHMOL_EXPORT void detectBondStereochemistry(ROMol &mol, int confId=-1)
Uses a conformer to assign directionality to the single bonds around double bonds.
INVAR_VECT::iterator INVAR_VECT_I
Definition: MolOps.h:32
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromBondDirs(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Use bond directions to assign ChiralTypes to a molecule&#39;s atoms and stereo flags to its bonds...
RDKIT_GRAPHMOL_EXPORT std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
RDKIT_GRAPHMOL_EXPORT void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf=NULL)
RDKIT_GRAPHMOL_EXPORT ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=NULL)
returns a copy of a molecule with query properties adjusted
RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol)
removes bogus chirality markers (those on non-sp3 centers):
RDKIT_GRAPHMOL_EXPORT bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
RDKIT_GRAPHMOL_EXPORT const int ci_LOCAL_INF
RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREOANY.
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:307
RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int >> &res)
symmetrize the molecule&#39;s Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
RDKIT_GRAPHMOL_EXPORT double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=0)
Computes the molecule&#39;s topological distance matrix.
RDKIT_GRAPHMOL_EXPORT ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition: MolOps.h:33
RDKIT_GRAPHMOL_EXPORT void setConjugation(ROMol &mol)
flags the molecule&#39;s conjugated bonds
Std stuff.
Definition: Atom.h:30
RDKIT_GRAPHMOL_EXPORT ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false)
RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol, std::vector< std::vector< int >> &res)
finds a molecule&#39;s Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
RDKIT_GRAPHMOL_EXPORT void assignStereochemistryFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule&#39;s atoms and stereo flags to its bonds...
RDKIT_GRAPHMOL_EXPORT double computeBalabanJ(const ROMol &mol, bool useBO=true, bool force=false, const std::vector< int > *bondPath=0, bool cacheIt=true)
calculates Balaban&#39;s J index for the molecule
RDKIT_GRAPHMOL_EXPORT int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
The class for representing 2D or 3D conformation of a molecule.
Definition: Conformer.h:42
RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol)
Removes all stereochemistry information from atoms (i.e. R/S) and bonds.
std::uint32_t adjustHeavyDegreeFlags
Definition: MolOps.h:291
std::vector< UINT > UINT_VECT
Definition: types.h:272
std::vector< double > INVAR_VECT
Definition: MolOps.h:30
RDKIT_GRAPHMOL_EXPORT double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=0, const boost::dynamic_bitset<> *bondsToUse=0)
returns a molecule&#39;s adjacency matrix
RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and
AdjustQueryWhichFlags
Definition: MolOps.h:266
The class for representing atoms.
Definition: Atom.h:69
RDKIT_GRAPHMOL_EXPORT ROMol * addHs(const ROMol &mol, bool explicitOnly=false, bool addCoords=false, const UINT_VECT *onlyOnAtoms=NULL, bool addResidueInfo=false)
returns a copy of a molecule with hydrogens added in as explicit Atoms