RDKit
Open-source cheminformatics and machine learning.
SubstructLibrary.h
Go to the documentation of this file.
1 // Copyright (c) 2017-2019, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 #ifndef RDK_SUBSTRUCT_LIBRARY
32 #define RDK_SUBSTRUCT_LIBRARY
33 #include <RDGeneral/export.h>
34 #include <GraphMol/RDKitBase.h>
35 #include <GraphMol/MolPickler.h>
40 #include <DataStructs/BitOps.h>
41 
42 namespace RDKit {
43 
45 
46 //! Base class API for holding molecules to substructure search.
47 /*!
48  This is an API that hides the implementation details used for
49  indexing molecules for substructure searching. It simply
50  provides an API for adding and getting molecules from a set.
51  */
53  public:
54  virtual ~MolHolderBase() {}
55 
56  //! Add a new molecule to the substructure search library
57  //! Returns the molecules index in the library
58  virtual unsigned int addMol(const ROMol &m) = 0;
59 
60  // implementations should throw IndexError on out of range
61  virtual boost::shared_ptr<ROMol> getMol(unsigned int) const = 0;
62 
63  //! Get the current library size
64  virtual unsigned int size() const = 0;
65 };
66 
67 //! Concrete class that holds molecules in memory
68 /*!
69  This is currently one of the faster implementations.
70  However it is very memory intensive.
71 */
73  std::vector<boost::shared_ptr<ROMol>> mols;
74 
75  public:
76  MolHolder() : MolHolderBase(), mols() {}
77 
78  virtual unsigned int addMol(const ROMol &m) {
79  mols.push_back(boost::make_shared<ROMol>(m));
80  return size() - 1;
81  }
82 
83  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
84  if (idx >= mols.size()) throw IndexErrorException(idx);
85  return mols[idx];
86  }
87 
88  virtual unsigned int size() const {
89  return rdcast<unsigned int>(mols.size());
90  }
91 
92  std::vector<boost::shared_ptr<ROMol>> & getMols() { return mols; }
93  const std::vector<boost::shared_ptr<ROMol>> & getMols() const { return mols; }
94 };
95 
96 //! Concrete class that holds binary cached molecules in memory
97 /*!
98  This implementation uses quite a bit less memory than the
99  non cached implementation. However, due to the reduced speed
100  it should be used in conjunction with a pattern fingerprinter.
101 
102  See RDKit::FPHolder
103 */
105  std::vector<std::string> mols;
106 
107  public:
109 
110  virtual unsigned int addMol(const ROMol &m) {
111  mols.push_back(std::string());
112  MolPickler::pickleMol(m, mols.back());
113  return size() - 1;
114  }
115 
116  //! Adds a pickled binary molecule, no validity checking of the input
117  //! is done.
118  unsigned int addBinary(const std::string &pickle) {
119  mols.push_back(pickle);
120  return size() - 1;
121  }
122 
123  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
124  if (idx >= mols.size()) throw IndexErrorException(idx);
125  boost::shared_ptr<ROMol> mol(new ROMol);
126  MolPickler::molFromPickle(mols[idx], mol.get());
127  return mol;
128  }
129 
130  virtual unsigned int size() const {
131  return rdcast<unsigned int>(mols.size());
132  }
133 
134  std::vector<std::string> &getMols() { return mols; }
135  const std::vector<std::string> &getMols() const { return mols; }
136 };
137 
138 //! Concrete class that holds smiles strings in memory
139 /*!
140  This implementation uses quite a bit less memory than the
141  cached binary or uncached implementation. However, due to the
142  reduced speed it should be used in conjunction with a pattern
143  fingerprinter.
144 
145  See RDKit::FPHolder
146 */
148  std::vector<std::string> mols;
149 
150  public:
152 
153  virtual unsigned int addMol(const ROMol &m) {
154  bool doIsomericSmiles = true;
155  mols.push_back(MolToSmiles(m, doIsomericSmiles));
156  return size() - 1;
157  }
158 
159  //! Add a smiles to the dataset, no validation is done
160  //! to the inputs.
161  unsigned int addSmiles(const std::string &smiles) {
162  mols.push_back(smiles);
163  return size() - 1;
164  }
165 
166  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
167  if (idx >= mols.size()) throw IndexErrorException(idx);
168 
169  boost::shared_ptr<ROMol> mol(SmilesToMol(mols[idx]));
170  return mol;
171  }
172 
173  virtual unsigned int size() const {
174  return rdcast<unsigned int>(mols.size());
175  }
176 
177  std::vector<std::string> &getMols() { return mols; }
178  const std::vector<std::string> &getMols() const { return mols; }
179 };
180 
181 //! Concrete class that holds trusted smiles strings in memory
182 /*!
183  A trusted smiles is essentially a smiles string that
184  RDKit has generated. This indicates that fewer
185  sanitization steps are required. See
186  http://rdkit.blogspot.com/2016/09/avoiding-unnecessary-work-and.html
187 
188  This implementation uses quite a bit less memory than the
189  cached binary or uncached implementation. However, due to the
190  reduced speed it should be used in conjunction with a pattern
191  fingerprinter.
192 
193  See RDKit::FPHolder
194 */
196  std::vector<std::string> mols;
197 
198  public:
200 
201  virtual unsigned int addMol(const ROMol &m) {
202  bool doIsomericSmiles = true;
203  mols.push_back(MolToSmiles(m, doIsomericSmiles));
204  return size() - 1;
205  }
206 
207  //! Add a smiles to the dataset, no validation is done
208  //! to the inputs.
209  unsigned int addSmiles(const std::string &smiles) {
210  mols.push_back(smiles);
211  return size() - 1;
212  }
213 
214  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
215  if (idx >= mols.size()) throw IndexErrorException(idx);
216 
217  RWMol *m = SmilesToMol(mols[idx], 0, false);
218  m->updatePropertyCache();
219  return boost::shared_ptr<ROMol>(m);
220  }
221 
222  virtual unsigned int size() const {
223  return rdcast<unsigned int>(mols.size());
224  }
225 
226  std::vector<std::string> &getMols() { return mols; }
227  const std::vector<std::string> &getMols() const { return mols; }
228 };
229 
230 //! Base FPI for the fingerprinter used to rule out impossible matches
232  std::vector<ExplicitBitVect *> fps;
233 
234  public:
235  virtual ~FPHolderBase() {
236  for (size_t i = 0; i < fps.size(); ++i) delete fps[i];
237  }
238 
239  //! Adds a molecule to the fingerprinter
240  unsigned int addMol(const ROMol &m) {
241  fps.push_back(makeFingerprint(m));
242  return rdcast<unsigned int>(fps.size() - 1);
243  }
244 
245  //! Adds a raw bit vector to the fingerprinter
246  unsigned int addFingerprint(const ExplicitBitVect &v) {
247  fps.push_back(new ExplicitBitVect(v));
248  return rdcast<unsigned int>(fps.size() - 1);
249  }
250 
251  //! Return false if a substructure search can never match the molecule
252  bool passesFilter(unsigned int idx, const ExplicitBitVect &query) const {
253  if (idx >= fps.size()) throw IndexErrorException(idx);
254 
255  return AllProbeBitsMatch(query, *fps[idx]);
256  }
257 
258  //! Get the bit vector at the specified index (throws IndexError if out of
259  //! range)
260  const ExplicitBitVect &getFingerprint(unsigned int idx) const {
261  if (idx >= fps.size()) throw IndexErrorException(idx);
262  return *fps[idx];
263  }
264 
265  //! make the query vector
266  //! Caller owns the vector!
267  virtual ExplicitBitVect *makeFingerprint(const ROMol &m) const = 0;
268 
269  std::vector<ExplicitBitVect *> &getFingerprints() { return fps; }
270  const std::vector<ExplicitBitVect *> &getFingerprints() const { return fps; }
271 };
272 
273 //! Uses the pattern fingerprinter to rule out matches
275  public:
276  //! Caller owns the vector!
277  virtual ExplicitBitVect *makeFingerprint(const ROMol &m) const {
278  return PatternFingerprintMol(m, 2048);
279  }
280 };
281 
282 //! Substructure Search a library of molecules
283 /*! This class allows for multithreaded substructure searches os
284  large datasets.
285 
286  The implementations can use fingerprints to speed up searches
287  and have molecules cached as binary forms to reduce memory
288  usage.
289 
290  basic usage:
291  \code
292  SubstructLibrary lib;
293  lib.addMol(mol);
294  std::vector<unsigned int> results = lib.getMatches(query);
295  for(std::vector<unsigned int>::const_iterator matchIndex=results.begin();
296  matchIndex != results.end();
297  ++matchIndex) {
298  boost::shared_ptr<ROMol> match = lib.getMol(*matchIndex);
299  }
300  \endcode
301 
302  Using different mol holders and pattern fingerprints.
303 
304  \code
305  boost::shared_ptr<CachedTrustedSmilesMolHolder> molHolder = \
306  boost::make_shared<CachedTrustedSmilesMolHolder>();
307  boost::shared_ptr<PatternHolder> patternHolder = \
308  boost::make_shared<PatternHolder>();
309 
310  SubstructLibrary lib(molHolder, patternHolder);
311  lib.addMol(mol);
312  \endcode
313 
314  Cached molecule holders create molecules on demand. There are currently
315  three styles of cached molecules.
316 
317  CachedMolHolder: stores molecules in the rdkit binary format.
318  CachedSmilesMolHolder: stores molecules in smiles format.
319  CachedTrustedSmilesMolHolder: stores molecules in smiles format.
320 
321  The CachedTrustedSmilesMolHolder is made to add molecules from
322  a trusted source. This makes the basic assumption that RDKit was
323  used to sanitize and canonicalize the smiles string. In practice
324  this is considerably faster than using arbitrary smiles strings since
325  certain assumptions can be made.
326 
327  When loading from external data, as opposed to using the "addMol" API,
328  care must be taken to ensure that the pattern fingerprints and smiles
329  are synchronized.
330 
331  Each pattern holder has an API point for making its fingerprint. This
332  is useful to ensure that the pattern stored in the database will be
333  compatible with the patterns made when analyzing queries.
334 
335  \code
336  boost::shared_ptr<CachedTrustedSmilesMolHolder> molHolder = \
337  boost::make_shared<CachedTrustedSmilesMolHolder>();
338  boost::shared_ptr<PatternHolder> patternHolder = \
339  boost::make_shared<PatternHolder>();
340 
341  // the PatternHolder instance is able to make fingerprints.
342  // These, of course, can be read from a file. For demonstration
343  // purposes we construct them here.
344  const std::string trustedSmiles = "c1ccccc1";
345  ROMol *m = SmilesToMol(trustedSmiles);
346  const ExplicitBitVect *bitVector = patternHolder->makeFingerprint(*m);
347 
348  // The trusted smiles and bitVector can be read from any source.
349  // This is the fastest way to load a substruct library.
350  molHolder->addSmiles( trustedSmiles );
351  patternHolder->addFingerprint( *bitVector );
352  SubstructLibrary lib(molHolder, patternHolder);
353  delete m;
354  delete bitVector;
355  \endcode
356 
357 */
359  boost::shared_ptr<MolHolderBase> molholder;
360  boost::shared_ptr<FPHolderBase> fpholder;
361  MolHolderBase *mols; // used for a small optimization
362  FPHolderBase *fps;
363 
364  public:
366  : molholder(new MolHolder),
367  fpholder(),
368  mols(molholder.get()),
369  fps(nullptr) {}
370 
371  SubstructLibrary(boost::shared_ptr<MolHolderBase> molecules)
372  : molholder(molecules), fpholder(), mols(molholder.get()), fps(0) {}
373 
374  SubstructLibrary(boost::shared_ptr<MolHolderBase> molecules,
375  boost::shared_ptr<FPHolderBase> fingerprints)
376  : molholder(molecules),
377  fpholder(fingerprints),
378  mols(molholder.get()),
379  fps(fpholder.get()) {}
380 
381  SubstructLibrary(const std::string &pickle)
382  : molholder(new MolHolder), fpholder(), mols(molholder.get()),
383  fps(nullptr) {
384  initFromString(pickle);
385  }
386 
387  //! Get the underlying molecule holder implementation
388  boost::shared_ptr<MolHolderBase> &getMolHolder() {
389  return molholder;
390  }
391 
392  const boost::shared_ptr<MolHolderBase> &getMolHolder() const {
393  return molholder;
394  }
395 
396  //! Get the underlying molecule holder implementation
397  boost::shared_ptr<FPHolderBase> &getFpHolder() {
398  return fpholder;
399  }
400 
401  //! Get the underlying molecule holder implementation
402  const boost::shared_ptr<FPHolderBase> &getFpHolder() const {
403  return fpholder;
404  }
405 
406  const MolHolderBase &getMolecules() const {
407  PRECONDITION(mols, "Molecule holder NULL in SubstructLibrary");
408  return *mols;
409  }
410 
411  //! Get the underlying fingerprint implementation.
412  /*! Throws a value error if no fingerprints have been set */
414  if (!fps)
415  throw ValueErrorException("Substruct Library does not have fingerprints");
416  return *fps;
417  }
418 
419  const FPHolderBase &getFingerprints() const {
420  if (!fps)
421  throw ValueErrorException("Substruct Library does not have fingerprints");
422  return *fps;
423  }
424 
425  //! Add a molecule to the library
426  /*!
427  \param mol Molecule to add
428 
429  returns index for the molecule in the library
430  */
431  unsigned int addMol(const ROMol &mol);
432 
433  //! Get the matching indices for the query
434  /*!
435  \param query Query to match against molecules
436  \param recursionPossible flags whether or not recursive matches are allowed
437  [ default true ]
438  \param useChirality use atomic CIP codes as part of the comparison [
439  default true ]
440  \param useQueryQueryMatches if set, the contents of atom and bond queries [
441  default false ]
442  will be used as part of the matching
443  \param numThreads If -1 use all available processors [default -1]
444  \param maxResults Maximum results to return, -1 means return all [default
445  -1]
446  */
447  std::vector<unsigned int> getMatches(const ROMol &query,
448  bool recursionPossible = true,
449  bool useChirality = true,
450  bool useQueryQueryMatches = false,
451  int numThreads = -1,
452  int maxResults = -1);
453  //! Get the matching indices for the query between the given indices
454  /*!
455  \param query Query to match against molecules
456  \param startIdx Start index of the search
457  \param endIdx Ending idx (non-inclusive) of the search.
458  \param recursionPossible flags whether or not recursive matches are allowed
459  [ default true ]
460  \param useChirality use atomic CIP codes as part of the comparison [
461  default true ]
462  \param useQueryQueryMatches if set, the contents of atom and bond queries [
463  default false ]
464  will be used as part of the matching
465  \param numThreads If -1 use all available processors [default -1]
466  \param maxResults Maximum results to return, -1 means return all [default
467  -1]
468  */
469  std::vector<unsigned int> getMatches(
470  const ROMol &query, unsigned int startIdx, unsigned int endIdx,
471  bool recursionPossible = true, bool useChirality = true,
472  bool useQueryQueryMatches = false, int numThreads = -1,
473  int maxResults = -1);
474 
475  //! Return the number of matches for the query
476  /*!
477  \param query Query to match against molecules
478  \param recursionPossible flags whether or not recursive matches are allowed
479  [ default true ]
480  \param useChirality use atomic CIP codes as part of the comparison [
481  default true ]
482  \param useQueryQueryMatches if set, the contents of atom and bond queries [
483  default false ]
484  will be used as part of the matching
485  \param numThreads If -1 use all available processors [default -1]
486  */
487  unsigned int countMatches(const ROMol &query, bool recursionPossible = true,
488  bool useChirality = true,
489  bool useQueryQueryMatches = false,
490  int numThreads = -1);
491  //! Return the number of matches for the query between the given indices
492  /*!
493  \param query Query to match against molecules
494  \param startIdx Start index of the search
495  \param endIdx Ending idx (non-inclusive) of the search.
496  \param recursionPossible flags whether or not recursive matches are allowed
497  [ default true ]
498  \param useChirality use atomic CIP codes as part of the comparison [
499  default true ]
500  \param useQueryQueryMatches if set, the contents of atom and bond queries [
501  default false ]
502  will be used as part of the matching
503  \param numThreads If -1 use all available processors [default -1]
504  */
505  unsigned int countMatches(const ROMol &query, unsigned int startIdx,
506  unsigned int endIdx, bool recursionPossible = true,
507  bool useChirality = true,
508  bool useQueryQueryMatches = false,
509  int numThreads = -1);
510 
511  //! Returns true if any match exists for the query
512  /*!
513  \param query Query to match against molecules
514  \param recursionPossible flags whether or not recursive matches are allowed
515  [ default true ]
516  \param useChirality use atomic CIP codes as part of the comparison [
517  default true ]
518  \param useQueryQueryMatches if set, the contents of atom and bond queries [
519  default false ]
520  will be used as part of the matching
521  \param numThreads If -1 use all available processors [default -1]
522  */
523  bool hasMatch(const ROMol &query, bool recursionPossible = true,
524  bool useChirality = true, bool useQueryQueryMatches = false,
525  int numThreads = -1);
526  //! Returns true if any match exists for the query between the specified
527  //! indices
528  /*!
529  \param query Query to match against molecules
530  \param startIdx Start index of the search
531  \param endIdx Ending idx (inclusive) of the search.
532  \param recursionPossible flags whether or not recursive matches are allowed
533  [ default true ]
534  \param useChirality use atomic CIP codes as part of the comparison [
535  default true ]
536  \param useQueryQueryMatches if set, the contents of atom and bond queries [
537  default false ]
538  will be used as part of the matching
539  \param numThreads If -1 use all available processors [default -1]
540  */
541  bool hasMatch(const ROMol &query, unsigned int startIdx, unsigned int endIdx,
542  bool recursionPossible = true, bool useChirality = true,
543  bool useQueryQueryMatches = false, int numThreads = -1);
544 
545  //! Returns the molecule at the given index
546  /*!
547  \param idx Index of the molecule in the library
548  */
549  boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
550  // expects implementation to throw IndexError if out of range
551  PRECONDITION(mols, "molholder is null in SubstructLibrary");
552  return mols->getMol(idx);
553  }
554 
555  //! Returns the molecule at the given index
556  /*!
557  \param idx Index of the molecule in the library
558  */
559  boost::shared_ptr<ROMol> operator[](unsigned int idx) {
560  // expects implementation to throw IndexError if out of range
561  PRECONDITION(mols, "molholder is null in SubstructLibrary");
562  return mols->getMol(idx);
563  }
564 
565  //! return the number of molecules in the library
566  unsigned int size() const {
567  PRECONDITION(mols, "molholder is null in SubstructLibrary");
568  return rdcast<unsigned int>(molholder->size());
569  }
570 
571 
572  //! access required for serialization
573  void resetHolders() {
574  mols = molholder.get();
575  fps = fpholder.get();
576  }
577 
578  //! serializes (pickles) to a stream
579  void toStream(std::ostream &ss) const;
580  //! returns a string with a serialized (pickled) representation
581  std::string Serialize() const;
582  //! initializes from a stream pickle
583  void initFromStream(std::istream &ss);
584  //! initializes from a string pickle
585  void initFromString(const std::string &text);
586 };
587 }
588 
590 #endif
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< FPHolderBase > fingerprints)
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * PatternFingerprintMol(const ROMol &mol, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=0, ExplicitBitVect *setOnlyBits=0)
Generates a topological fingerprint for a molecule using a series of pre-defined structural patterns...
const boost::shared_ptr< FPHolderBase > & getFpHolder() const
Get the underlying molecule holder implementation.
boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Returns the molecule at the given index.
const ExplicitBitVect & getFingerprint(unsigned int idx) const
static void pickleMol(const ROMol *mol, std::ostream &ss)
pickles a molecule and sends the results to stream ss
SubstructLibrary(const std::string &pickle)
RDKIT_SMILESPARSE_EXPORT RWMol * SmilesToMol(const std::string &smi, const SmilesParserParams &params)
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
boost::shared_ptr< ROMol > operator[](unsigned int idx)
Returns the molecule at the given index.
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
virtual unsigned int size() const
Get the current library size.
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
unsigned int size() const
return the number of molecules in the library
#define RDKIT_SUBSTRUCTLIBRARY_EXPORT
Definition: export.h:658
Concrete class that holds molecules in memory.
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles=true, bool doKekule=false, int rootedAtAtom=-1, bool canonical=true, bool allBondsExplicit=false, bool allHsExplicit=false, bool doRandom=false)
returns canonical SMILES for a molecule
virtual unsigned int addMol(const ROMol &m)
boost::shared_ptr< MolHolderBase > & getMolHolder()
Get the underlying molecule holder implementation.
const std::vector< std::string > & getMols() const
Concrete class that holds trusted smiles strings in memory.
const std::vector< boost::shared_ptr< ROMol > > & getMols() const
virtual unsigned int size() const
Get the current library size.
std::vector< std::string > & getMols()
RDKIT_CHEMREACTIONS_EXPORT void pickle(const boost::shared_ptr< EnumerationStrategyBase > &enumerator, std::ostream &ss)
pickles a EnumerationStrategy and adds the results to a stream ss
pulls in the core RDKit functionality
RDKIT_DATASTRUCTS_EXPORT bool AllProbeBitsMatch(const char *probe, const char *ref)
RDKIT_SUBSTRUCTLIBRARY_EXPORT bool SubstructLibraryCanSerialize()
virtual unsigned int addMol(const ROMol &m)
const std::vector< std::string > & getMols() const
unsigned int addMol(const ROMol &m)
Adds a molecule to the fingerprinter.
Base FPI for the fingerprinter used to rule out impossible matches.
virtual boost::shared_ptr< ROMol > getMol(unsigned int) const =0
virtual unsigned int addMol(const ROMol &m)
virtual unsigned int size() const
Get the current library size.
void updatePropertyCache(bool strict=true)
calculates any of our lazy properties
const MolHolderBase & getMolecules() const
Std stuff.
Definition: Atom.h:30
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules)
boost::shared_ptr< FPHolderBase > & getFpHolder()
Get the underlying molecule holder implementation.
Class to allow us to throw an IndexError from C++ and have it make it back to Python.
Definition: Exceptions.h:19
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
const FPHolderBase & getFingerprints() const
const boost::shared_ptr< MolHolderBase > & getMolHolder() const
Base class API for holding molecules to substructure search.
std::vector< ExplicitBitVect * > & getFingerprints()
bool passesFilter(unsigned int idx, const ExplicitBitVect &query) const
Return false if a substructure search can never match the molecule.
std::vector< std::string > & getMols()
std::vector< boost::shared_ptr< ROMol > > & getMols()
virtual ExplicitBitVect * makeFingerprint(const ROMol &m) const
Caller owns the vector!
unsigned int addSmiles(const std::string &smiles)
const std::vector< ExplicitBitVect * > & getFingerprints() const
Contains general bit-comparison and similarity operations.
unsigned int addSmiles(const std::string &smiles)
static void molFromPickle(const std::string &pickle, ROMol *mol)
constructs a molecule from a pickle stored in a string
unsigned int addBinary(const std::string &pickle)
Concrete class that holds binary cached molecules in memory.
#define PRECONDITION(expr, mess)
Definition: Invariant.h:108
Uses the pattern fingerprinter to rule out matches.
virtual unsigned int addMol(const ROMol &m)
unsigned int addFingerprint(const ExplicitBitVect &v)
Adds a raw bit vector to the fingerprinter.
RDKIT_RDGENERAL_EXPORT std::ostream & toStream(std::ostream &)
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
Definition: Exceptions.h:33
a class for bit vectors that are densely occupied
std::vector< std::string > & getMols()
void resetHolders()
access required for serialization
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
FPHolderBase & getFingerprints()
Get the underlying fingerprint implementation.
Concrete class that holds smiles strings in memory.
virtual unsigned int size() const
Get the current library size.
Substructure Search a library of molecules.
const std::vector< std::string > & getMols() const