11 #ifndef RD_FILEPARSERUTILS_H 12 #define RD_FILEPARSERUTILS_H 17 #include <boost/lexical_cast.hpp> 18 #include <boost/algorithm/string.hpp> 19 #include <boost/format.hpp> 26 namespace FileParserUtils {
29 std::string trimmed = boost::trim_copy(input);
30 if (acceptSpaces && trimmed ==
"") {
33 return boost::lexical_cast<T>(trimmed);
37 bool acceptSpaces =
false);
39 bool acceptSpaces =
true);
47 std::istream *inStream,
unsigned int &line,
RWMol *mol,
Conformer *&conf,
48 bool &chiralityPossible,
unsigned int &nAtoms,
unsigned int &nBonds,
49 bool strictParsing =
true,
bool expectMEND =
true);
53 std::istream *inStream,
unsigned int &line,
RWMol *mol,
Conformer *&conf,
54 bool &chiralityPossible,
unsigned int &nAtoms,
unsigned int &nBonds,
55 bool strictParsing =
true);
62 const std::string &prefix,
63 const std::string &missingValueMarker =
"n/a") {
64 std::string atompn = pn.substr(prefix.size());
65 std::string strVect = mol.
getProp<std::string>(pn);
66 std::vector<std::string> tokens;
67 boost::split(tokens, strVect, boost::is_any_of(
" \t\n"),
68 boost::token_compress_on);
71 <<
"Property list " << pn <<
" too short, only " << tokens.size()
72 <<
" elements found. Ignoring it." << std::endl;
75 std::string mv = missingValueMarker;
76 size_t first_token = 0;
77 if (tokens.size() == mol.
getNumAtoms() + 1 && tokens[0].front() ==
'[' &&
78 tokens[0].back() ==
']') {
79 mv = std::string(tokens[0].begin() + 1, tokens[0].end() - 1);
84 <<
" is empty." << std::endl;
86 for (
size_t i = first_token; i < tokens.size(); ++i) {
87 if (tokens[i] != mv) {
88 unsigned int atomid = i - first_token;
90 T apv = boost::lexical_cast<T>(tokens[i]);
92 }
catch (
const boost::bad_lexical_cast &) {
94 <<
"Value " << tokens[i] <<
" for property " << pn <<
" of atom " 95 << atomid <<
" can not be parsed. Ignoring it." << std::endl;
102 template <
typename T>
104 const std::string missingValueMarker =
"n/a") {
106 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
107 applyMolListPropToAtoms<T>(mol, pn, prefix, missingValueMarker);
115 ROMol &mol,
const std::string pn,
116 const std::string &missingValueMarker =
"n/a") {
117 if (pn.find(atomPropPrefix) == 0 && pn.length() > atomPropPrefix.length()) {
118 std::string prefix = atomPropPrefix +
"prop.";
119 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
120 applyMolListPropToAtoms<std::string>(mol, pn, prefix, missingValueMarker);
122 prefix = atomPropPrefix +
"iprop.";
123 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
124 applyMolListPropToAtoms<std::int64_t>(mol, pn, prefix,
127 prefix = atomPropPrefix +
"dprop.";
128 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
129 applyMolListPropToAtoms<double>(mol, pn, prefix, missingValueMarker);
131 prefix = atomPropPrefix +
"bprop.";
132 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
133 applyMolListPropToAtoms<bool>(mol, pn, prefix, missingValueMarker);
143 ROMol &mol,
const std::string &missingValueMarker =
"n/a") {
148 template <
typename T>
150 std::string missingValueMarker =
"",
151 unsigned int lineSize = 190) {
154 if (!missingValueMarker.empty()) {
155 propVal += boost::str(boost::format(
"[%s] ") % missingValueMarker);
157 missingValueMarker =
"n/a";
159 for (
const auto &atom : mol.
atoms()) {
160 std::string apVal = missingValueMarker;
161 if (atom->
hasProp(atomPropName)) {
162 T tVal = atom->
getProp<T>(atomPropName);
163 apVal = boost::lexical_cast<std::string>(tVal);
167 if (propVal.length() + apVal.length() + 1 >= lineSize) {
170 res += propVal +
"\n";
173 propVal += apVal +
" ";
175 if (!propVal.empty()) {
183 ROMol &mol,
const std::string &atomPropName,
184 const std::string &missingValueMarker =
"",
unsigned int lineSize = 190) {
185 std::string molPropName =
"atom.iprop." + atomPropName;
187 getAtomPropertyList<boost::int64_t>(
188 mol, atomPropName, missingValueMarker, lineSize));
191 ROMol &mol,
const std::string &atomPropName,
192 const std::string &missingValueMarker =
"",
unsigned int lineSize = 190) {
193 std::string molPropName =
"atom.dprop." + atomPropName;
195 getAtomPropertyList<double>(mol, atomPropName, missingValueMarker,
199 ROMol &mol,
const std::string &atomPropName,
200 const std::string &missingValueMarker =
"",
unsigned int lineSize = 190) {
201 std::string molPropName =
"atom.bprop." + atomPropName;
203 getAtomPropertyList<bool>(mol, atomPropName, missingValueMarker,
207 ROMol &mol,
const std::string &atomPropName,
208 const std::string &missingValueMarker =
"",
unsigned int lineSize = 190) {
209 std::string molPropName =
"atom.prop." + atomPropName;
211 getAtomPropertyList<std::string>(mol, atomPropName,
212 missingValueMarker, lineSize));
void processMolPropertyLists(ROMol &mol, const std::string &missingValueMarker="n/a")
RDKIT_FILEPARSERS_EXPORT int toInt(const std::string &input, bool acceptSpaces=false)
static const std::string atomPropPrefix
#define RDKIT_FILEPARSERS_EXPORT
RDKIT_FILEPARSERS_EXPORT std::string getV3000Line(std::istream *inStream, unsigned int &line)
#define BOOST_LOG(__arg__)
void createAtomBoolPropertyList(ROMol &mol, const std::string &atomPropName, const std::string &missingValueMarker="", unsigned int lineSize=190)
RWMol is a molecule class that is intended to be edited.
unsigned int getNumAtoms(bool onlyExplicit=1) const
returns our number of atoms
void createAtomDoublePropertyList(ROMol &mol, const std::string &atomPropName, const std::string &missingValueMarker="", unsigned int lineSize=190)
RDKIT_RDGENERAL_EXPORT std::shared_ptr< boost::logging::rdLogger > rdWarningLog
RDKIT_FILEPARSERS_EXPORT bool ParseV2000CTAB(std::istream *inStream, unsigned int &line, RWMol *mol, Conformer *&conf, bool &chiralityPossible, unsigned int &nAtoms, unsigned int &nBonds, bool strictParsing=true)
void applyMolListPropsToAtoms(ROMol &mol, const std::string &prefix, const std::string missingValueMarker="n/a")
applies all properties matching a particular prefix as an atom property list
RDKIT_FILEPARSERS_EXPORT double toDouble(const std::string &input, bool acceptSpaces=true)
std::string getAtomPropertyList(ROMol &mol, const std::string &atomPropName, std::string missingValueMarker="", unsigned int lineSize=190)
void createAtomStringPropertyList(ROMol &mol, const std::string &atomPropName, const std::string &missingValueMarker="", unsigned int lineSize=190)
void processMolPropertyList(ROMol &mol, const std::string pn, const std::string &missingValueMarker="n/a")
T stripSpacesAndCast(const std::string &input, bool acceptSpaces=false)
bool hasProp(const std::string &key) const
void getProp(const std::string &key, T &res) const
allows retrieval of a particular property value
void setProp(const std::string &key, T val, bool computed=false) const
sets a property value
RDKIT_FILEPARSERS_EXPORT bool ParseV3000CTAB(std::istream *inStream, unsigned int &line, RWMol *mol, Conformer *&conf, bool &chiralityPossible, unsigned int &nAtoms, unsigned int &nBonds, bool strictParsing=true, bool expectMEND=true)
STR_VECT getPropList(bool includePrivate=true, bool includeComputed=true) const
returns a list with the names of our properties
Atom * getAtomWithIdx(unsigned int idx)
returns a pointer to a particular Atom
RDKIT_FILEPARSERS_EXPORT Atom * replaceAtomWithQueryAtom(RWMol *mol, Atom *atom)
void applyMolListPropToAtoms(ROMol &mol, const std::string &pn, const std::string &prefix, const std::string &missingValueMarker="n/a")
applies a particular property to the atoms as an atom property list
CXXAtomIterator< MolGraph, Atom * > atoms()
C++11 Range iterator.
The class for representing atoms.
void createAtomIntPropertyList(ROMol &mol, const std::string &atomPropName, const std::string &missingValueMarker="", unsigned int lineSize=190)