RDKit
Open-source cheminformatics and machine learning.
SubstanceGroup.h
Go to the documentation of this file.
1 //
2 //
3 // Copyright (C) 2002-2018 Greg Landrum and T5 Informatics GmbH
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 /*! \file SubstanceGroup.h
12 
13  \brief Defines the SubstanceGroup class
14 
15 */
16 #include <RDGeneral/export.h>
17 #ifndef _RD_SGROUP_H
18 #define _RD_SGROUP_H
19 
20 #include <unordered_map>
21 
22 #include <Geometry/point.h>
23 #include <RDGeneral/types.h>
24 #include <RDGeneral/RDProps.h>
25 #include <boost/smart_ptr.hpp>
26 
27 namespace RDKit {
28 class ROMol;
29 class Bond;
30 class Atom;
31 
32 //! used to indicate errors from incorrect sgroup access
34  : public std::runtime_error {
35  public:
36  //! construct with an error message
37  SubstanceGroupException(const char *msg) : std::runtime_error(msg){};
38  //! construct with an error message
39  SubstanceGroupException(const std::string &msg) : std::runtime_error(msg){};
40 };
41 
42 //! The class for representing SubstanceGroups
43 /*!
44  <b>Notes:</b>
45  - These are inspired by the SGroups in the MDL formats
46  - Implementation is based on 2010 MDL SD specification:
47  http://infochim.u-strasbg.fr/recherche/Download/Fragmentor/MDL_SDF.pdf
48  - See SGroups.md for further, more comprehensive notes.
49 
50 */
51 
53  public:
54  //! Bond type (see V3000 spec)
55  enum class BondType {
56  XBOND, // External/Crossing bond
57  CBOND, // Internal/Contained bond
58  };
59 
60  typedef std::array<RDGeom::Point3D, 3> Bracket;
61 
62  //! Data structure for SAP lines (see V3000 spec)
63  //! lvIdx may not be set; this signaled with value -1
64  struct AttachPoint {
65  unsigned int aIdx;
66  int lvIdx;
67  std::string id;
68  bool operator==(const AttachPoint &other) const {
69  return aIdx == other.aIdx && lvIdx == other.lvIdx && id == other.id;
70  }
71  };
72 
73  //! See specification for V3000 CSTATE
74  //! vector may or not be considered, depending on TYPE
75  struct CState {
76  unsigned int bondIdx;
78  bool operator==(const CState &other) const {
79  // note that we ignore coordinates for this
80  return bondIdx == other.bondIdx;
81  }
82  };
83 
84  //! No default constructor
85  SubstanceGroup() = delete;
86 
87  //! Main Constructor. Ownsership is only set on this side of the relationship:
88  //! mol->addSubstanceGroup(sgroup) still needs to be called to get ownership
89  //! on the other side.
90  SubstanceGroup(ROMol *owning_mol, const std::string &type);
91 
92  SubstanceGroup(const SubstanceGroup &other) = default;
93  SubstanceGroup(SubstanceGroup &&other) = default;
94 
95  SubstanceGroup &operator=(const SubstanceGroup &other) = default;
96  SubstanceGroup &operator=(SubstanceGroup &&other) = default;
97 
98  //! Destructor
100 
101  //! Get the molecule that owns this conformation
102  ROMol &getOwningMol() const { return *dp_mol; }
103 
104  //! get the index of this sgroup in dp_mol's sgroups vector
105  //! (do not mistake this by the ID!)
106  unsigned int getIndexInMol() const;
107 
108  /* Atom and Bond methods */
109  void addAtomWithIdx(unsigned int idx);
110  void addParentAtomWithIdx(unsigned int idx);
111  void addBondWithIdx(unsigned int idx);
112  void addAtomWithBookmark(int mark);
113  void addParentAtomWithBookmark(int mark);
114  void addBondWithBookmark(int mark);
115 
116  void addBracket(const Bracket &bracket);
117  void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector);
118  void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr);
119 
120  BondType getBondType(unsigned int bondIdx) const;
121 
122  const std::vector<unsigned int> &getAtoms() const { return d_atoms; }
123  const std::vector<unsigned int> &getParentAtoms() const { return d_patoms; }
124  const std::vector<unsigned int> &getBonds() const { return d_bonds; }
125 
126  const std::vector<Bracket> &getBrackets() const { return d_brackets; }
127  const std::vector<CState> &getCStates() const { return d_cstates; }
128  const std::vector<AttachPoint> &getAttachPoints() const { return d_saps; }
129 
130  //! Set owning molecule
131  //! This only updates atoms and bonds; parent sgroup has to be updated
132  //! independently, since parent might not exist at the time this is called.
133  void setOwningMol(ROMol *mol);
134 
135  bool operator==(const SubstanceGroup &other) const {
136  // we ignore brackets and cstates, which involve coordinates
137  return dp_mol == other.dp_mol && d_atoms == other.d_atoms &&
138  d_patoms == other.d_patoms && d_bonds == other.d_bonds &&
139  d_saps == other.d_saps;
140  }
141 
142  private:
143  ROMol *dp_mol = nullptr; // owning molecule
144 
145  std::vector<unsigned int> d_atoms;
146  std::vector<unsigned int> d_patoms;
147  std::vector<unsigned int> d_bonds;
148 
149  std::vector<Bracket> d_brackets;
150  std::vector<CState> d_cstates;
151  std::vector<AttachPoint> d_saps;
152 };
153 
154 namespace SubstanceGroupChecks {
155 
156 const std::vector<std::string> sGroupTypes = {
157  // polymer sgroups:
158  "SRU", "MON", "COP", "CRO", "GRA", "MOD", "MER", "ANY",
159  // formulations/mixtures:
160  "COM", "MIX", "FOR",
161  // other
162  "SUP", "MUL", "DAT", "GEN"};
163 
164 const std::vector<std::string> sGroupSubtypes = {"ALT", "RAN", "BLO"};
165 const std::vector<std::string> sGroupConnectTypes = {"HH", "HT", "EU"};
166 
167 RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type);
168 
169 RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type);
170 
171 RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type);
172 
174  unsigned int id);
175 
176 } // namespace SubstanceGroupChecks
177 
178 //! \name SubstanceGroups and molecules
179 //@{
180 
181 RDKIT_GRAPHMOL_EXPORT std::vector<SubstanceGroup> &getSubstanceGroups(
182  ROMol &mol);
183 RDKIT_GRAPHMOL_EXPORT const std::vector<SubstanceGroup> &getSubstanceGroups(
184  const ROMol &mol);
185 
186 //! Add a new SubstanceGroup. A copy is added, so we can be sure that no other
187 //! references to the SubstanceGroup exist.
188 /*!
189  \param sgroup - SubstanceGroup to be added to the molecule.
190 */
192  SubstanceGroup sgroup);
193 //@}
194 
195 } // namespace RDKit
196 
197 //! allows SubstanceGroup objects to be dumped to streams
198 RDKIT_GRAPHMOL_EXPORT std::ostream &operator<<(std::ostream &target,
199  const RDKit::SubstanceGroup &sg);
200 #endif
RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type)
const std::vector< unsigned int > & getBonds() const
const std::vector< unsigned int > & getParentAtoms() const
The class for representing SubstanceGroups.
SubstanceGroupException(const char *msg)
construct with an error message
RDKIT_GRAPHMOL_EXPORT unsigned int addSubstanceGroup(ROMol &mol, SubstanceGroup sgroup)
bool operator==(const AttachPoint &other) const
const std::vector< Bracket > & getBrackets() const
STL namespace.
SubstanceGroupException(const std::string &msg)
construct with an error message
RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type)
const std::vector< CState > & getCStates() const
const std::vector< std::string > sGroupSubtypes
const std::vector< unsigned int > & getAtoms() const
RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type)
BondType
Bond type (see V3000 spec)
RDKIT_GRAPHMOL_EXPORT std::vector< SubstanceGroup > & getSubstanceGroups(ROMol &mol)
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:307
RDKIT_GRAPHMOL_EXPORT std::ostream & operator<<(std::ostream &target, const RDKit::SubstanceGroup &sg)
allows SubstanceGroup objects to be dumped to streams
bool operator==(const CState &other) const
bool operator==(const SubstanceGroup &other) const
used to indicate errors from incorrect sgroup access
Std stuff.
Definition: Atom.h:30
ROMol & getOwningMol() const
Get the molecule that owns this conformation.
~SubstanceGroup()
Destructor.
const std::vector< AttachPoint > & getAttachPoints() const
const std::vector< std::string > sGroupConnectTypes
const std::vector< std::string > sGroupTypes
std::array< RDGeom::Point3D, 3 > Bracket
RDKIT_GRAPHMOL_EXPORT bool isSubstanceGroupIdFree(const ROMol &mol, unsigned int id)