Drizzled Public API Documentation

unchecked.h
00001 // Copyright 2006 Nemanja Trifunovic
00002 
00003 /*
00004 Permission is hereby granted, free of charge, to any person or organization
00005 obtaining a copy of the software and accompanying documentation covered by
00006 this license (the "Software") to use, reproduce, display, distribute,
00007 execute, and transmit the Software, and to prepare derivative works of the
00008 Software, and to permit third-parties to whom the Software is furnished to
00009 do so, all subject to the following:
00010 
00011 The copyright notices in the Software and this entire statement, including
00012 the above license grant, this restriction and the following disclaimer,
00013 must be included in all copies of the Software, in whole or in part, and
00014 all derivative works of the Software, unless such copies or derivative
00015 works are solely in the form of machine-executable object code generated by
00016 a source language processor.
00017 
00018 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00019 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00020 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
00021 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
00022 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
00023 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00024 DEALINGS IN THE SOFTWARE.
00025 */
00026 
00027 
00028 #pragma once
00029 
00030 #include <drizzled/utf8/core.h>
00031 
00032 namespace drizzled
00033 {
00034 namespace utf8
00035 {
00036     namespace unchecked 
00037     {
00038         template <typename octet_iterator>
00039         octet_iterator append(uint32_t cp, octet_iterator result)
00040         {
00041             if (cp < 0x80)                        // one octet
00042                 *(result++) = static_cast<uint8_t>(cp);  
00043             else if (cp < 0x800) {                // two octets
00044                 *(result++) = static_cast<uint8_t>((cp >> 6)          | 0xc0);
00045                 *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
00046             }
00047             else if (cp < 0x10000) {              // three octets
00048                 *(result++) = static_cast<uint8_t>((cp >> 12)         | 0xe0);
00049                 *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
00050                 *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
00051             }
00052             else {                                // four octets
00053                 *(result++) = static_cast<uint8_t>((cp >> 18)         | 0xf0);
00054                 *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
00055                 *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
00056                 *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
00057             }
00058             return result;
00059         }
00060 
00061         template <typename octet_iterator>
00062         uint32_t next(octet_iterator& it)
00063         {
00064             uint32_t cp = internal::mask8(*it);
00065             typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
00066             switch (length) {
00067                 case 1:
00068                     break;
00069                 case 2:
00070                     it++;
00071                     cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
00072                     break;
00073                 case 3:
00074                     ++it; 
00075                     cp = ((cp << 12) & 0xffff) + ((internal::mask8(*it) << 6) & 0xfff);
00076                     ++it;
00077                     cp += (*it) & 0x3f;
00078                     break;
00079                 case 4:
00080                     ++it;
00081                     cp = ((cp << 18) & 0x1fffff) + ((internal::mask8(*it) << 12) & 0x3ffff);                
00082                     ++it;
00083                     cp += (internal::mask8(*it) << 6) & 0xfff;
00084                     ++it;
00085                     cp += (*it) & 0x3f; 
00086                     break;
00087             }
00088             ++it;
00089             return cp;        
00090         }
00091 
00092         template <typename octet_iterator>
00093         uint32_t peek_next(octet_iterator it)
00094         {
00095             return next(it);    
00096         }
00097 
00098         template <typename octet_iterator>
00099         uint32_t prior(octet_iterator& it)
00100         {
00101             while (internal::is_trail(*(--it))) ;
00102             octet_iterator temp = it;
00103             return next(temp);
00104         }
00105 
00106         // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
00107         template <typename octet_iterator>
00108         inline uint32_t previous(octet_iterator& it)
00109         {
00110             return prior(it);
00111         }
00112 
00113         template <typename octet_iterator, typename distance_type>
00114         void advance (octet_iterator& it, distance_type n)
00115         {
00116             for (distance_type i = 0; i < n; ++i)
00117                 next(it);
00118         }
00119 
00120         template <typename octet_iterator>
00121         typename std::iterator_traits<octet_iterator>::difference_type
00122         distance (octet_iterator first, octet_iterator last)
00123         {
00124             typename std::iterator_traits<octet_iterator>::difference_type dist;
00125             for (dist = 0; first < last; ++dist) 
00126                 next(first);
00127             return dist;
00128         }
00129 
00130         template <typename u16bit_iterator, typename octet_iterator>
00131         octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
00132         {       
00133             while (start != end) {
00134                 uint32_t cp = internal::mask16(*start++);
00135             // Take care of surrogate pairs first
00136                 if (internal::is_lead_surrogate(cp)) {
00137                     uint32_t trail_surrogate = internal::mask16(*start++);
00138                     cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
00139                 }
00140                 result = append(cp, result);
00141             }
00142             return result;         
00143         }
00144 
00145         template <typename u16bit_iterator, typename octet_iterator>
00146         u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
00147         {
00148             while (start < end) {
00149                 uint32_t cp = next(start);
00150                 if (cp > 0xffff) { //make a surrogate pair
00151                     *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
00152                     *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
00153                 }
00154                 else
00155                     *result++ = static_cast<uint16_t>(cp);
00156             }
00157             return result;
00158         }
00159 
00160         template <typename octet_iterator, typename u32bit_iterator>
00161         octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
00162         {
00163             while (start != end)
00164                 result = append(*(start++), result);
00165 
00166             return result;
00167         }
00168 
00169         template <typename octet_iterator, typename u32bit_iterator>
00170         u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
00171         {
00172             while (start < end)
00173                 (*result++) = next(start);
00174 
00175             return result;
00176         }
00177 
00178         // The iterator class
00179         template <typename octet_iterator>
00180           class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> { 
00181             octet_iterator it;
00182             public:
00183             iterator () {};
00184             explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
00185             // the default "big three" are OK
00186             octet_iterator base () const { return it; }
00187             uint32_t operator * () const
00188             {
00189                 octet_iterator temp = it;
00190                 return next(temp);
00191             }
00192             bool operator == (const iterator& rhs) const 
00193             { 
00194                 return (it == rhs.it);
00195             }
00196             bool operator != (const iterator& rhs) const
00197             {
00198                 return !(operator == (rhs));
00199             }
00200             iterator& operator ++ () 
00201             {
00202                 std::advance(it, internal::sequence_length(it));
00203                 return *this;
00204             }
00205             iterator operator ++ (int)
00206             {
00207                 iterator temp = *this;
00208                 std::advance(it, internal::sequence_length(it));
00209                 return temp;
00210             }  
00211             iterator& operator -- ()
00212             {
00213                 prior(it);
00214                 return *this;
00215             }
00216             iterator operator -- (int)
00217             {
00218                 iterator temp = *this;
00219                 prior(it);
00220                 return temp;
00221             }
00222           }; // class iterator
00223 
00224     } // namespace utf8::unchecked
00225 } // namespace utf8 
00226 } // namespace drizzled
00227 
00228 
00229