RDKit
Open-source cheminformatics and machine learning.
FileParsers.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2013 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef _RD_FILEPARSERS_H
12 #define _RD_FILEPARSERS_H
13 
14 #include <RDGeneral/types.h>
15 #include <GraphMol/RDKitBase.h>
16 
17 #include <string>
18 #include <iostream>
19 #include <vector>
20 #include <exception>
21 
22 #include <boost/shared_ptr.hpp>
23 
24 namespace RDKit {
25 const int MOLFILE_MAXLINE = 256;
26 RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig);
27 
28 class MolFileUnhandledFeatureException : public std::exception {
29  public:
30  //! construct with an error message
31  explicit MolFileUnhandledFeatureException(const char *msg) : _msg(msg){};
32  //! construct with an error message
33  explicit MolFileUnhandledFeatureException(const std::string msg)
34  : _msg(msg){};
35  //! get the error message
36  const char *message() const { return _msg.c_str(); };
37  ~MolFileUnhandledFeatureException() noexcept override{};
38 
39  private:
40  std::string _msg;
41 };
42 
43 //-----
44 // mol files
45 //-----
46 typedef std::vector<RWMOL_SPTR> RWMOL_SPTR_VECT;
47 // \brief construct a molecule from MDL mol data in a stream
48 /*!
49  * \param inStream - stream containing the data
50  * \param line - current line number (used for error reporting)
51  * \param sanitize - toggles sanitization and stereochemistry
52  * perception of the molecule
53  * \param removeHs - toggles removal of Hs from the molecule. H removal
54  * is only done if the molecule is sanitized
55  * \param line - current line number (used for error reporting)
56  * \param strictParsing - if not set, the parser is more lax about correctness
57  * of the contents.
58  *
59  */
60 RDKIT_FILEPARSERS_EXPORT RWMol *MolDataStreamToMol(std::istream *inStream,
61  unsigned int &line,
62  bool sanitize = true,
63  bool removeHs = true,
64  bool strictParsing = true);
65 // \overload
66 RDKIT_FILEPARSERS_EXPORT RWMol *MolDataStreamToMol(std::istream &inStream,
67  unsigned int &line,
68  bool sanitize = true,
69  bool removeHs = true,
70  bool strictParsing = true);
71 // \brief construct a molecule from an MDL mol block
72 /*!
73  * \param molBlock - string containing the mol block
74  * \param sanitize - toggles sanitization and stereochemistry
75  * perception of the molecule
76  * \param removeHs - toggles removal of Hs from the molecule. H removal
77  * is only done if the molecule is sanitized
78  * \param strictParsing - if set, the parser is more lax about correctness
79  * of the contents.
80  */
81 RDKIT_FILEPARSERS_EXPORT RWMol *MolBlockToMol(const std::string &molBlock,
82  bool sanitize = true,
83  bool removeHs = true,
84  bool strictParsing = true);
85 
86 // \brief construct a molecule from an MDL mol file
87 /*!
88  * \param fName - string containing the file name
89  * \param sanitize - toggles sanitization and stereochemistry
90  * perception of the molecule
91  * \param removeHs - toggles removal of Hs from the molecule. H removal
92  * is only done if the molecule is sanitized
93  * \param strictParsing - if set, the parser is more lax about correctness
94  * of the contents.
95  */
96 RDKIT_FILEPARSERS_EXPORT RWMol *MolFileToMol(const std::string &fName,
97  bool sanitize = true,
98  bool removeHs = true,
99  bool strictParsing = true);
100 
101 // \brief generates an MDL mol block for a molecule
102 /*!
103  * \param mol - the molecule in question
104  * \param includeStereo - toggles inclusion of stereochemistry information
105  * \param confId - selects the conformer to be used
106  * \param kekulize - triggers kekulization of the molecule before it is
107  * written
108  * \param forceV3000 - force generation a V3000 mol block (happens
109  * automatically with
110  * more than 999 atoms or bonds)
111  */
112 RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(const ROMol &mol,
113  bool includeStereo = true,
114  int confId = -1,
115  bool kekulize = true,
116  bool forceV3000 = false);
117 // \brief Writes a molecule to an MDL mol file
118 /*!
119  * \param mol - the molecule in question
120  * \param fName - the name of the file to use
121  * \param includeStereo - toggles inclusion of stereochemistry information
122  * \param confId - selects the conformer to be used
123  * \param kekulize - triggers kekulization of the molecule before it is
124  * written
125  * \param forceV3000 - force generation a V3000 mol block (happens
126  * automatically with
127  * more than 999 atoms or bonds)
128  */
130  const ROMol &mol, const std::string &fName, bool includeStereo = true,
131  int confId = -1, bool kekulize = true, bool forceV3000 = false);
132 
133 //-----
134 // TPL handling:
135 //-----
136 
137 //! \brief translate TPL data (BioCad format) into a multi-conf molecule
138 /*!
139  \param inStream: the stream from which to read
140  \param line: used to track the line number of errors
141  \param sanitize: toggles sanitization and stereochemistry
142  perception of the molecule
143  \param skipFirstConf: according to the TPL format description, the atomic
144  coords in the atom-information block describe the first
145  conformation and the first conf block describes second
146  conformation. The CombiCode, on the other hand, writes
147  the first conformation data both to the atom-information
148  block and to the first conf block. We want to be able to
149  read CombiCode-style tpls, so we'll allow this
150  mis-feature
151  to be parsed when this flag is set.
152 */
153 RDKIT_FILEPARSERS_EXPORT RWMol *TPLDataStreamToMol(std::istream *inStream,
154  unsigned int &line,
155  bool sanitize = true,
156  bool skipFirstConf = false);
157 
158 //! \brief construct a multi-conf molecule from a TPL (BioCad format) file
159 /*!
160  \param fName: the name of the file from which to read
161  \param sanitize: toggles sanitization and stereochemistry
162  perception of the molecule
163  \param skipFirstConf: according to the TPL format description, the atomic
164  coords in the atom-information block describe the first
165  conformation and the first conf block describes second
166  conformation. The CombiCode, on the other hand, writes
167  the first conformation data both to the atom-information
168  block and to the first conf block. We want to be able to
169  read CombiCode-style tpls, so we'll allow this
170  mis-feature
171  to be parsed when this flag is set.
172 */
173 RDKIT_FILEPARSERS_EXPORT RWMol *TPLFileToMol(const std::string &fName,
174  bool sanitize = true,
175  bool skipFirstConf = false);
176 
178  const ROMol &mol, const std::string &partialChargeProp = "_GasteigerCharge",
179  bool writeFirstConfTwice = false);
181  const ROMol &mol, const std::string &fName,
182  const std::string &partialChargeProp = "_GasteigerCharge",
183  bool writeFirstConfTwice = false);
184 
185 //-----
186 // MOL2 handling
187 //-----
188 
189 typedef enum {
190  CORINA = 0 //! supports output from Corina and some dbtranslate output
191 } Mol2Type;
192 
193 // \brief construct a molecule from a Tripos mol2 file
194 /*!
195  *
196  * \param fName - string containing the file name
197  * \param sanitize - toggles sanitization of the molecule
198  * \param removeHs - toggles removal of Hs from the molecule. H removal
199  * is only done if the molecule is sanitized
200  * \param variant - the atom type definitions to use
201  * \param cleanupSubstructures - toggles recognition and cleanup of common
202  * substructures
203  */
204 RDKIT_FILEPARSERS_EXPORT RWMol *Mol2FileToMol(const std::string &fName,
205  bool sanitize = true,
206  bool removeHs = true,
207  Mol2Type variant = CORINA,
208  bool cleanupSubstructures = true);
209 
210 // \brief construct a molecule from Tripos mol2 data in a stream
211 /*!
212  * \param inStream - stream containing the data
213  * \param sanitize - toggles sanitization of the molecule
214  * \param removeHs - toggles removal of Hs from the molecule. H removal
215  * is only done if the molecule is sanitized
216  * \param variant - the atom type definitions to use
217  * \param cleanupSubstructures - toggles recognition and cleanup of common
218  * substructures
219  */
221  std::istream *inStream, bool sanitize = true, bool removeHs = true,
222  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
223 // \overload
225  std::istream &inStream, bool sanitize = true, bool removeHs = true,
226  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
227 
228 // \brief construct a molecule from a Tripos mol2 block
229 /*!
230  * \param molBlock - string containing the mol block
231  * \param sanitize - toggles sanitization of the molecule
232  * \param removeHs - toggles removal of Hs from the molecule. H removal
233  * is only done if the molecule is sanitized
234  * \param variant - the atom type definitions to use
235  * \param cleanupSubstructures - toggles recognition and cleanup of common
236  * substructures
237  */
239  const std::string &molBlock, bool sanitize = true, bool removeHs = true,
240  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
241 
243  bool sanitize = true,
244  bool removeHs = true,
245  unsigned int flavor = 0,
246  bool proximityBonding = true);
247 
248 RDKIT_FILEPARSERS_EXPORT RWMol *PDBBlockToMol(const std::string &str,
249  bool sanitize = true,
250  bool removeHs = true,
251  unsigned int flavor = 0,
252  bool proximityBonding = true);
254  std::istream *inStream, bool sanitize = true, bool removeHs = true,
255  unsigned int flavor = 0, bool proximityBonding = true);
257  std::istream &inStream, bool sanitize = true, bool removeHs = true,
258  unsigned int flavor = 0, bool proximityBonding = true);
259 RDKIT_FILEPARSERS_EXPORT RWMol *PDBFileToMol(const std::string &fname,
260  bool sanitize = true,
261  bool removeHs = true,
262  unsigned int flavor = 0,
263  bool proximityBonding = true);
264 
265 // \brief generates an PDB block for a molecule
266 /*!
267  * \param mol - the molecule in question
268  * \param confId - selects the conformer to be used
269  * \param flavor - controls what gets written:
270  * flavor & 1 : Write MODEL/ENDMDL lines around each record
271  * flavor & 2 : Don't write any CONECT records
272  * flavor & 4 : Write CONECT records in both directions
273  * flavor & 8 : Don't use multiple CONECTs to encode bond order
274  * flavor & 16 : Write MASTER record
275  * flavor & 32 : Write TER record
276  */
277 RDKIT_FILEPARSERS_EXPORT std::string MolToPDBBlock(const ROMol &mol,
278  int confId = -1,
279  unsigned int flavor = 0);
280 // \brief Writes a molecule to an MDL mol file
281 /*!
282  * \param mol - the molecule in question
283  * \param fName - the name of the file to use
284  * \param confId - selects the conformer to be used
285  * \param flavor - controls what gets written:
286  * flavor & 1 : Write MODEL/ENDMDL lines around each record
287  * flavor & 2 : Don't write any CONECT records
288  * flavor & 4 : Write CONECT records in both directions
289  * flavor & 8 : Don't use multiple CONECTs to encode bond order
290  * flavor & 16 : Write MASTER record
291  * flavor & 32 : Write TER record
292  */
294  const std::string &fname,
295  int confId = -1,
296  unsigned int flavor = 0);
297 
298 // \brief reads a molecule from the metadata in an RDKit-generated SVG file
299 /*!
300  * \param svg - string containing the SVG
301  * \param sanitize - toggles sanitization of the molecule
302  * \param removeHs - toggles removal of Hs from the molecule. H removal
303  * is only done if the molecule is sanitized
304  *
305  * **NOTE** This functionality should be considered beta.
306  */
307 RDKIT_FILEPARSERS_EXPORT RWMol *RDKitSVGToMol(const std::string &svg,
308  bool sanitize = true,
309  bool removeHs = true);
310 /*! \overload
311  */
312 RDKIT_FILEPARSERS_EXPORT RWMol *RDKitSVGToMol(std::istream *instream,
313  bool sanitize = true,
314  bool removeHs = true);
315 
316 } // namespace RDKit
317 
318 #endif
RDKIT_FILEPARSERS_EXPORT RWMol * TPLFileToMol(const std::string &fName, bool sanitize=true, bool skipFirstConf=false)
construct a multi-conf molecule from a TPL (BioCad format) file
#define RDKIT_FILEPARSERS_EXPORT
Definition: export.h:216
std::vector< RWMOL_SPTR > RWMOL_SPTR_VECT
Definition: FileParsers.h:46
MolFileUnhandledFeatureException(const char *msg)
construct with an error message
Definition: FileParsers.h:31
RDKIT_FILEPARSERS_EXPORT RWMol * MolBlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
const int MOLFILE_MAXLINE
Definition: FileParsers.h:25
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
RDKIT_FILEPARSERS_EXPORT RWMol * PDBDataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_FILEPARSERS_EXPORT void MolToPDBFile(const ROMol &mol, const std::string &fname, int confId=-1, unsigned int flavor=0)
RDKIT_FILEPARSERS_EXPORT std::string MolToPDBBlock(const ROMol &mol, int confId=-1, unsigned int flavor=0)
RDKIT_FILEPARSERS_EXPORT RWMol * RDKitSVGToMol(const std::string &svg, bool sanitize=true, bool removeHs=true)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2DataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
pulls in the core RDKit functionality
RDKIT_FILEPARSERS_EXPORT RWMol * PDBFileToMol(const std::string &fname, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKIT_FILEPARSERS_EXPORT RWMol * TPLDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool skipFirstConf=false)
translate TPL data (BioCad format) into a multi-conf molecule
RDKIT_FILEPARSERS_EXPORT RWMol * MolFileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
~MolFileUnhandledFeatureException() noexcept override
Definition: FileParsers.h:37
RDKIT_FILEPARSERS_EXPORT std::string MolToTPLText(const ROMol &mol, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RDKIT_FILEPARSERS_EXPORT void MolToTPLFile(const ROMol &mol, const std::string &fName, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RDKIT_FILEPARSERS_EXPORT RWMol * PDBBlockToMol(const char *str, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
Std stuff.
Definition: Atom.h:30
RDKIT_FILEPARSERS_EXPORT RWMol * MolDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
const char * message() const
get the error message
Definition: FileParsers.h:36
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2FileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2BlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT void MolToMolFile(const ROMol &mol, const std::string &fName, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig)
MolFileUnhandledFeatureException(const std::string msg)
construct with an error message
Definition: FileParsers.h:33
RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(const ROMol &mol, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)