RDKit
Open-source cheminformatics and machine learning.
StructChecker.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2016 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 /*! \file StructChecker.h
12 
13 \brief Contains the public API of the StructChecker
14 
15 \b Note that this should be considered beta and that the API may change in
16 future
17 releases.
18 
19 */
20 #include <RDGeneral/export.h>
21 #pragma once
22 #ifndef RD_STRUCTCHECKER_H_Oct2016
23 #define RD_STRUCTCHECKER_H_Oct2016
24 
25 #include <string>
26 #include <vector>
27 #include "../RDKitBase.h"
28 
29 namespace RDKit {
30 namespace StructureCheck {
31 
32 // Flags for the return values of the StructureChecker
33 
34 // TypeDefs for translating augmented atom pairs
35 static const int ANY_CHARGE = 8;
37  RT_NONE = 0,
38  SINGLET = 1,
39  DOUBLET = 2,
40  TRIPLET = 3,
41  ANY_RADICAL = 0xFF
42 };
43 
44 enum AABondType { // MDL CTFile bond types plus extensions
45  BT_NONE = 0, // means REMOVE Bond
46  SINGLE = 1,
47  DOUBLE = 2,
48  TRIPLE = 3,
49  AROMATIC = 4,
53  ANY_BOND = 8,
55 };
56 
57 enum AATopology {
58  TP_NONE = 0, // Don't care
59  RING = 1, // Ring
60  CHAIN = 2 // Chain
61 };
62 
64  std::string AtomSymbol;
65  int Charge;
67  unsigned SubstitutionCount; // substitution count 0 = don't care
70  : Charge(ANY_CHARGE),
71  Radical(ANY_RADICAL),
72  SubstitutionCount(0),
73  BondType(ANY_BOND) {}
74 };
75 
77  std::string AtomSymbol;
78  std::string ShortName;
79  int Charge;
82  std::vector<Ligand> Ligands;
83 
85  : Charge(ANY_CHARGE), Radical(ANY_RADICAL), Topology(TP_NONE) {}
86 
87  AugmentedAtom(const std::string &symbol, const std::string &name, int charge,
88  RadicalType radical, AATopology topology)
89  : AtomSymbol(symbol),
90  ShortName(name),
91  Charge(charge),
92  Radical(radical),
93  Topology(topology) {}
94 };
95 
97  std::string AtomSymbol;
98  double LocalInc;
99  double AlphaInc;
100  double BetaInc;
101  double MultInc;
102 
103  // Used for logging
108 };
109 
112  double Cond;
113  // Used for logging
115 };
116 //-------------
117 
118 //! Structure Check Options
119 // Holds all the user options for the StructureChecking.
120 // Can be initialized from factory functions, perhaps serialized
122  double AcidityLimit;
127  unsigned MaxMolSize;
134  bool Verbose;
135 
136  // Internal data for struchk
137  std::vector<std::pair<AugmentedAtom, AugmentedAtom>> AugmentedAtomPairs;
138  std::vector<AugmentedAtom> AcidicAtoms;
139  std::vector<AugmentedAtom> GoodAtoms;
140  std::vector<ROMOL_SPTR> Patterns;
141  std::vector<ROMOL_SPTR> RotatePatterns;
142  std::vector<ROMOL_SPTR> StereoPatterns;
143  std::vector<ROMOL_SPTR> FromTautomer;
144  std::vector<ROMOL_SPTR> ToTautomer;
145 
146  double Elneg0; // elneg_table[0].value;
147  std::map<unsigned, double> ElnegTable; // AtomicNumber -> eleng
148  std::vector<IncEntry> AtomAcidity; // atom_acidity_table[]
149  std::vector<IncEntry> ChargeIncTable;
150  // std::map AtomSymbol(or AtomicNumber) -> IncEntry
151  /* [ReadTransformation() ]
152  * The alpha, beta coefficients of the transfomation function used
153  * to stretch the preliminary pKa values to the actual predictions.
154  * The function is pKa = 7 + (pKa'-7)*beta + ((pKa'-7)*alpha)^3.
155  */
156 
157  double Alpha, Beta;
158  std::vector<PathEntry> AlphaPathTable, BetaPathTable;
159 
160  public:
162 
163  void clear() { *this = StructCheckerOptions(); }
164 
165  bool loadAugmentedAtomTranslations(const std::string &path);
166  void setAugmentedAtomTranslations(
167  const std::vector<std::pair<AugmentedAtom, AugmentedAtom>> &aaPairs);
168 
169  bool loadAcidicAugmentedAtoms(const std::string &path);
170  void setAcidicAugmentedAtoms(const std::vector<AugmentedAtom> &acidicAtoms);
171 
172  bool loadGoodAugmentedAtoms(const std::string &path);
173  void setGoodAugmentedAtoms(const std::vector<AugmentedAtom> &acidicAtoms);
174 
175  bool loadPatterns(const std::string &path); // file with clean patterns
176  void parsePatterns(
177  const std::vector<std::string> &smarts); // can throw RDKit exeptions
178  void setPatterns(const std::vector<ROMOL_SPTR> &p);
179 
180  bool loadRotatePatterns(
181  const std::string &path); // file with rotate patterns
182  void parseRotatePatterns(
183  const std::vector<std::string> &smarts); // can throw RDKit exeptions
184  void setRotatePatterns(const std::vector<ROMOL_SPTR> &p);
185 
186  bool loadStereoPatterns(
187  const std::string &path); // file with stereo patterns
188  void parseStereoPatterns(
189  const std::vector<std::string> &smarts); // can throw RDKit exeptions
190  void setStereoPatterns(const std::vector<ROMOL_SPTR> &p);
191 
192  bool loadTautomerData(const std::string &path); // file path
193  void parseTautomerData(const std::vector<std::string> &smartsFrom,
194  const std::vector<std::string> &smartsTo);
195  void setTautomerData(const std::vector<ROMOL_SPTR> &from,
196  const std::vector<ROMOL_SPTR> &to);
197  bool loadChargeDataTables(const std::string &path); // file path
198 };
199 
200 RDKIT_STRUCTCHECKER_EXPORT bool parseOptionsJSON(const std::string &json,
202 
205  const std::string &augmentedAtomTranslationsFile = "",
206  // ?? AcidicAtoms;
207  // ?? GoodAtoms;
208  const std::string &patternFile = "", // file with clean patterns
209  const std::string &rotatePatternFile = "", // file with rotate patterns
210  const std::string &stereoPatternFile = "", // file with stereo patterns
211  const std::string &tautomerFile = "");
212 
213 //! \brief Class for performing structure validation and cleanup
214 /*! \b NOTE: This class should be considered beta. The API may change in future
215 releases.
216 
217 Examples of Usage
218 
219 \code
220  StructChecker chk;
221  int flags = StructureCheck::checkMolStructure( mol ); // use defaults
222 \endcode
223 
224 or
225 
226 \code
227  StructureCheck::StructCheckerOptions options; // use defaults
228  // To use external data
229  StructureCheck::loadOptionsFromFiles(options, file1, file2);
230  StructChecker chk(options);
231 
232  for( mol in mols ) {
233  int flags = StructureCheck::checkMolStructure( mol, &options);
234  if (0!=(flags & StructureCheck::StructureFlags::BAD_SET)) {
235  // write to error file
236  } else if (0!=(flags & StructureCheck::StructureFlags::TRANSFORMED_SET))
237 {
238  // input molecule was transformed
239  } else { // flag == NO_CHANGE
240  // no change
241  }
242  }
243 \endcode
244 */
246  public:
247  typedef enum StructureFlags {
248  NO_CHANGE = 0,
249  BAD_MOLECULE = 0x0001,
250  ALIAS_CONVERSION_FAILED = 0x0002,
251  STEREO_ERROR = 0x0004,
252  STEREO_FORCED_BAD = 0x0008,
253  ATOM_CLASH = 0x0010,
254  ATOM_CHECK_FAILED = 0x0020,
255  SIZE_CHECK_FAILED = 0x0040,
256  // reserved error = 0x0080,
257  TRANSFORMED = 0x0100,
258  FRAGMENTS_FOUND = 0x0200,
259  EITHER_WARNING = 0x0400,
260  DUBIOUS_STEREO_REMOVED = 0x0800,
261  RECHARGED = 0x1000,
262  STEREO_TRANSFORMED = 0x2000,
263  TEMPLATE_TRANSFORMED = 0x4000,
264  TAUTOMER_TRANSFORMED = 0x8000,
265  // mask:
266  BAD_SET = (BAD_MOLECULE | ALIAS_CONVERSION_FAILED | STEREO_ERROR |
267  STEREO_FORCED_BAD | ATOM_CLASH | ATOM_CHECK_FAILED |
268  SIZE_CHECK_FAILED),
269 
270  TRANSFORMED_SET = (TRANSFORMED | FRAGMENTS_FOUND | EITHER_WARNING |
271  DUBIOUS_STEREO_REMOVED | STEREO_TRANSFORMED |
272  TEMPLATE_TRANSFORMED | TAUTOMER_TRANSFORMED | RECHARGED),
273  } StructureFlags;
274  // attributes:
275  private:
276  StructCheckerOptions Options;
277 
278  public:
279  inline StructChecker() {}
280  inline StructChecker(const StructCheckerOptions &options)
281  : Options(options) {}
282 
283  const StructCheckerOptions &GetOptions() const { return Options; }
284  void SetOptions(const StructCheckerOptions &options) { Options = options; }
285 
286  // Check and fix (if need) molecule structure and return a set of
287  // StructureFlags
288  // that describes what have been done
289  unsigned checkMolStructure(RWMol &mol) const;
290 
291  // an instance independed helper methods:
292  // Converts structure property flags to a comma seperated string
293  static std::string StructureFlagsToString(unsigned flags);
294  // Converts a comma seperated string to a StructureFlag unsigned integer
295  static unsigned StringToStructureFlags(const std::string &str);
296  // internal implementation:
297  private:
298 };
299 } // namespace StructureCheck
300 } // namespace RDKit
301 #endif
Class for performing structure validation and cleanup.
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
std::vector< std::pair< AugmentedAtom, AugmentedAtom > > AugmentedAtomPairs
RDKIT_STRUCTCHECKER_EXPORT bool loadOptionsFromFiles(StructCheckerOptions &op, const std::string &augmentedAtomTranslationsFile="", const std::string &patternFile="", const std::string &rotatePatternFile="", const std::string &stereoPatternFile="", const std::string &tautomerFile="")
AugmentedAtom(const std::string &symbol, const std::string &name, int charge, RadicalType radical, AATopology topology)
Definition: StructChecker.h:87
RDKIT_STRUCTCHECKER_EXPORT bool parseOptionsJSON(const std::string &json, StructCheckerOptions &op)
StructChecker(const StructCheckerOptions &options)
std::vector< AugmentedAtom > GoodAtoms
static const int ANY_CHARGE
Definition: StructChecker.h:35
Std stuff.
Definition: Atom.h:30
static const char * symbol[119]
Definition: mf.h:259
std::vector< ROMOL_SPTR > StereoPatterns
std::map< unsigned, double > ElnegTable
#define RDKIT_STRUCTCHECKER_EXPORT
Definition: export.h:645
const StructCheckerOptions & GetOptions() const
void SetOptions(const StructCheckerOptions &options)
std::vector< ROMOL_SPTR > RotatePatterns
std::vector< AugmentedAtom > AcidicAtoms