wibble 0.1.28
|
00001 #ifndef WIBBLE_REGEXP_H 00002 #define WIBBLE_REGEXP_H 00003 00004 /* 00005 * OO wrapper for regular expression functions 00006 * 00007 * Copyright (C) 2003--2006 Enrico Zini <enrico@debian.org> 00008 * 00009 * This library is free software; you can redistribute it and/or 00010 * modify it under the terms of the GNU Lesser General Public 00011 * License as published by the Free Software Foundation; either 00012 * version 2.1 of the License, or (at your option) any later version. 00013 * 00014 * This library is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 * Lesser General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU Lesser General Public 00020 * License along with this library; if not, write to the Free Software 00021 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00022 */ 00023 00024 #include <wibble/exception.h> 00025 #include <sys/types.h> 00026 #include <regex.h> 00027 00028 namespace wibble { 00029 namespace exception { 00030 00032 00033 class Regexp : public wibble::exception::Generic 00034 { 00035 protected: 00036 int m_code; 00037 std::string m_message; 00038 00039 public: 00040 Regexp(const regex_t& re, int code, const std::string& context) 00041 throw (); 00042 ~Regexp() throw () {} 00043 00045 virtual int code() const throw () { return m_code; } 00046 00047 virtual const char* type() const throw () { return "Regexp"; } 00048 virtual std::string desc() const throw () { return m_message; } 00049 }; 00050 00051 } 00052 00053 class Regexp 00054 { 00055 protected: 00056 regex_t re; 00057 regmatch_t* pmatch; 00058 int nmatch; 00059 std::string lastMatch; 00060 00061 public: 00062 /* Note that match_count is required to be >1 to enable 00063 sub-regexp capture. The maximum *INCLUDES* the whole-regexp 00064 match (indexed 0). [TODO we may want to fix this to be more 00065 friendly?] */ 00066 Regexp(const std::string& expr, int match_count = 0, int flags = 0) throw (wibble::exception::Regexp); 00067 ~Regexp() throw (); 00068 00069 bool match(const std::string& str, int flags = 0) throw (wibble::exception::Regexp); 00070 00071 /* Indexing is from 1 for capture matches, like perl's $0, 00072 $1... 0 is whole-regexp match, not a capture. TODO 00073 the range is miscalculated (an off-by-one, wrt. the 00074 counterintuitive match counting). */ 00075 std::string operator[](int idx) throw (wibble::exception::OutOfRange); 00076 00077 size_t matchStart(int idx) throw (wibble::exception::OutOfRange); 00078 size_t matchEnd(int idx) throw (wibble::exception::OutOfRange); 00079 size_t matchLength(int idx) throw (wibble::exception::OutOfRange); 00080 }; 00081 00082 class ERegexp : public Regexp 00083 { 00084 public: 00085 ERegexp(const std::string& expr, int match_count = 0, int flags = 0) throw (wibble::exception::Regexp) 00086 : Regexp(expr, match_count, flags | REG_EXTENDED) {} 00087 }; 00088 00089 class Tokenizer 00090 { 00091 const std::string& str; 00092 wibble::Regexp re; 00093 00094 public: 00095 class const_iterator 00096 { 00097 Tokenizer& tok; 00098 size_t beg, end; 00099 public: 00100 typedef std::string value_type; 00101 typedef ptrdiff_t difference_type; 00102 typedef value_type *pointer; 00103 typedef value_type &reference; 00104 typedef std::forward_iterator_tag iterator_category; 00105 00106 const_iterator(Tokenizer& tok) : tok(tok), beg(0), end(0) { operator++(); } 00107 const_iterator(Tokenizer& tok, bool) : tok(tok), beg(tok.str.size()), end(tok.str.size()) {} 00108 00109 const_iterator& operator++(); 00110 00111 std::string operator*() const 00112 { 00113 return tok.str.substr(beg, end-beg); 00114 } 00115 bool operator==(const const_iterator& ti) const 00116 { 00117 return beg == ti.beg && end == ti.end; 00118 } 00119 bool operator!=(const const_iterator& ti) const 00120 { 00121 return beg != ti.beg || end != ti.end; 00122 } 00123 }; 00124 00125 Tokenizer(const std::string& str, const std::string& re, int flags) 00126 : str(str), re(re, 1, flags) {} 00127 00128 const_iterator begin() { return const_iterator(*this); } 00129 const_iterator end() { return const_iterator(*this, false); } 00130 }; 00131 00145 class Splitter 00146 { 00147 wibble::Regexp re; 00148 00149 public: 00154 // TODO: add iterator_traits 00155 class const_iterator 00156 { 00157 wibble::Regexp& re; 00158 std::string cur; 00159 std::string next; 00160 00161 public: 00162 typedef std::string value_type; 00163 typedef ptrdiff_t difference_type; 00164 typedef value_type *pointer; 00165 typedef value_type &reference; 00166 typedef std::forward_iterator_tag iterator_category; 00167 00168 const_iterator(wibble::Regexp& re, const std::string& str) : re(re), next(str) { ++*this; } 00169 const_iterator(wibble::Regexp& re) : re(re) {} 00170 00171 const_iterator& operator++(); 00172 00173 const std::string& operator*() const 00174 { 00175 return cur; 00176 } 00177 const std::string* operator->() const 00178 { 00179 return &cur; 00180 } 00181 bool operator==(const const_iterator& ti) const 00182 { 00183 return cur == ti.cur && next == ti.next; 00184 } 00185 bool operator!=(const const_iterator& ti) const 00186 { 00187 return cur != ti.cur || next != ti.next; 00188 } 00189 }; 00190 00194 Splitter(const std::string& re, int flags) 00195 : re(re, 1, flags) {} 00196 00200 const_iterator begin(const std::string& str) { return const_iterator(re, str); } 00201 const_iterator end() { return const_iterator(re); } 00202 }; 00203 00204 } 00205 00206 // vim:set ts=4 sw=4: 00207 #endif