Blame src/utf8/unchecked.h

Packit bfcc33
// Copyright 2006 Nemanja Trifunovic
Packit bfcc33
Packit bfcc33
/*
Packit bfcc33
Permission is hereby granted, free of charge, to any person or organization
Packit bfcc33
obtaining a copy of the software and accompanying documentation covered by
Packit bfcc33
this license (the "Software") to use, reproduce, display, distribute,
Packit bfcc33
execute, and transmit the Software, and to prepare derivative works of the
Packit bfcc33
Software, and to permit third-parties to whom the Software is furnished to
Packit bfcc33
do so, all subject to the following:
Packit bfcc33
Packit bfcc33
The copyright notices in the Software and this entire statement, including
Packit bfcc33
the above license grant, this restriction and the following disclaimer,
Packit bfcc33
must be included in all copies of the Software, in whole or in part, and
Packit bfcc33
all derivative works of the Software, unless such copies or derivative
Packit bfcc33
works are solely in the form of machine-executable object code generated by
Packit bfcc33
a source language processor.
Packit bfcc33
Packit bfcc33
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
Packit bfcc33
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
Packit bfcc33
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
Packit bfcc33
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
Packit bfcc33
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
Packit bfcc33
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
Packit bfcc33
DEALINGS IN THE SOFTWARE.
Packit bfcc33
*/
Packit bfcc33
Packit bfcc33
Packit bfcc33
#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
Packit bfcc33
#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
Packit bfcc33
Packit bfcc33
#include "core.h"
Packit bfcc33
Packit bfcc33
namespace utf8
Packit bfcc33
{
Packit bfcc33
    namespace unchecked
Packit bfcc33
    {
Packit bfcc33
        template <typename octet_iterator>
Packit bfcc33
        octet_iterator append(uint32_t cp, octet_iterator result)
Packit bfcc33
        {
Packit bfcc33
            if (cp < 0x80)                        // one octet
Packit bfcc33
                *(result++) = static_cast<uint8_t>(cp);
Packit bfcc33
            else if (cp < 0x800) {                // two octets
Packit bfcc33
                *(result++) = static_cast<uint8_t>((cp >> 6)          | 0xc0);
Packit bfcc33
                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
Packit bfcc33
            }
Packit bfcc33
            else if (cp < 0x10000) {              // three octets
Packit bfcc33
                *(result++) = static_cast<uint8_t>((cp >> 12)         | 0xe0);
Packit bfcc33
                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
Packit bfcc33
                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
Packit bfcc33
            }
Packit bfcc33
            else {                                // four octets
Packit bfcc33
                *(result++) = static_cast<uint8_t>((cp >> 18)         | 0xf0);
Packit bfcc33
                *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
Packit bfcc33
                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
Packit bfcc33
                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
Packit bfcc33
            }
Packit bfcc33
            return result;
Packit bfcc33
        }
Packit bfcc33
Packit bfcc33
        template <typename octet_iterator>
Packit bfcc33
        uint32_t next(octet_iterator& it)
Packit bfcc33
        {
Packit bfcc33
            uint32_t cp = utf8::internal::mask8(*it);
Packit bfcc33
            typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
Packit bfcc33
            switch (length) {
Packit bfcc33
                case 1:
Packit bfcc33
                    break;
Packit bfcc33
                case 2:
Packit bfcc33
                    it++;
Packit bfcc33
                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
Packit bfcc33
                    break;
Packit bfcc33
                case 3:
Packit bfcc33
                    ++it;
Packit bfcc33
                    cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
Packit bfcc33
                    ++it;
Packit bfcc33
                    cp += (*it) & 0x3f;
Packit bfcc33
                    break;
Packit bfcc33
                case 4:
Packit bfcc33
                    ++it;
Packit bfcc33
                    cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
Packit bfcc33
                    ++it;
Packit bfcc33
                    cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
Packit bfcc33
                    ++it;
Packit bfcc33
                    cp += (*it) & 0x3f;
Packit bfcc33
                    break;
Packit bfcc33
            }
Packit bfcc33
            ++it;
Packit bfcc33
            return cp;
Packit bfcc33
        }
Packit bfcc33
Packit bfcc33
        template <typename octet_iterator>
Packit bfcc33
        uint32_t peek_next(octet_iterator it)
Packit bfcc33
        {
Packit bfcc33
            return utf8::unchecked::next(it);
Packit bfcc33
        }
Packit bfcc33
Packit bfcc33
        template <typename octet_iterator>
Packit bfcc33
        uint32_t prior(octet_iterator& it)
Packit bfcc33
        {
Packit bfcc33
            while (utf8::internal::is_trail(*(--it))) ;
Packit bfcc33
            octet_iterator temp = it;
Packit bfcc33
            return utf8::unchecked::next(temp);
Packit bfcc33
        }
Packit bfcc33
Packit bfcc33
        // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
Packit bfcc33
        template <typename octet_iterator>
Packit bfcc33
        inline uint32_t previous(octet_iterator& it)
Packit bfcc33
        {
Packit bfcc33
            return utf8::unchecked::prior(it);
Packit bfcc33
        }
Packit bfcc33
Packit bfcc33
        template <typename octet_iterator, typename distance_type>
Packit bfcc33
        void advance (octet_iterator& it, distance_type n)
Packit bfcc33
        {
Packit bfcc33
            for (distance_type i = 0; i < n; ++i)
Packit bfcc33
                utf8::unchecked::next(it);
Packit bfcc33
        }
Packit bfcc33
Packit bfcc33
        template <typename octet_iterator>
Packit bfcc33
        typename std::iterator_traits<octet_iterator>::difference_type
Packit bfcc33
        distance (octet_iterator first, octet_iterator last)
Packit bfcc33
        {
Packit bfcc33
            typename std::iterator_traits<octet_iterator>::difference_type dist;
Packit bfcc33
            for (dist = 0; first < last; ++dist)
Packit bfcc33
                utf8::unchecked::next(first);
Packit bfcc33
            return dist;
Packit bfcc33
        }
Packit bfcc33
Packit bfcc33
        template <typename u16bit_iterator, typename octet_iterator>
Packit bfcc33
        octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
Packit bfcc33
        {
Packit bfcc33
            while (start != end) {
Packit bfcc33
                uint32_t cp = utf8::internal::mask16(*start++);
Packit bfcc33
            // Take care of surrogate pairs first
Packit bfcc33
                if (utf8::internal::is_lead_surrogate(cp)) {
Packit bfcc33
                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
Packit bfcc33
                    cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
Packit bfcc33
                }
Packit bfcc33
                result = utf8::unchecked::append(cp, result);
Packit bfcc33
            }
Packit bfcc33
            return result;
Packit bfcc33
        }
Packit bfcc33
Packit bfcc33
        template <typename u16bit_iterator, typename octet_iterator>
Packit bfcc33
        u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
Packit bfcc33
        {
Packit bfcc33
            while (start < end) {
Packit bfcc33
                uint32_t cp = utf8::unchecked::next(start);
Packit bfcc33
                if (cp > 0xffff) { //make a surrogate pair
Packit bfcc33
                    *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
Packit bfcc33
                    *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
Packit bfcc33
                }
Packit bfcc33
                else
Packit bfcc33
                    *result++ = static_cast<uint16_t>(cp);
Packit bfcc33
            }
Packit bfcc33
            return result;
Packit bfcc33
        }
Packit bfcc33
Packit bfcc33
        template <typename octet_iterator, typename u32bit_iterator>
Packit bfcc33
        octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
Packit bfcc33
        {
Packit bfcc33
            while (start != end)
Packit bfcc33
                result = utf8::unchecked::append(*(start++), result);
Packit bfcc33
Packit bfcc33
            return result;
Packit bfcc33
        }
Packit bfcc33
Packit bfcc33
        template <typename octet_iterator, typename u32bit_iterator>
Packit bfcc33
        u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
Packit bfcc33
        {
Packit bfcc33
            while (start < end)
Packit bfcc33
                (*result++) = utf8::unchecked::next(start);
Packit bfcc33
Packit bfcc33
            return result;
Packit bfcc33
        }
Packit bfcc33
Packit bfcc33
        // The iterator class
Packit bfcc33
        template <typename octet_iterator>
Packit bfcc33
          class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
Packit bfcc33
            octet_iterator it;
Packit bfcc33
            public:
Packit bfcc33
            iterator () {}
Packit bfcc33
            explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
Packit bfcc33
            // the default "big three" are OK
Packit bfcc33
            octet_iterator base () const { return it; }
Packit bfcc33
            uint32_t operator * () const
Packit bfcc33
            {
Packit bfcc33
                octet_iterator temp = it;
Packit bfcc33
                return utf8::unchecked::next(temp);
Packit bfcc33
            }
Packit bfcc33
            bool operator == (const iterator& rhs) const
Packit bfcc33
            {
Packit bfcc33
                return (it == rhs.it);
Packit bfcc33
            }
Packit bfcc33
            bool operator != (const iterator& rhs) const
Packit bfcc33
            {
Packit bfcc33
                return !(operator == (rhs));
Packit bfcc33
            }
Packit bfcc33
            iterator& operator ++ ()
Packit bfcc33
            {
Packit bfcc33
                ::std::advance(it, utf8::internal::sequence_length(it));
Packit bfcc33
                return *this;
Packit bfcc33
            }
Packit bfcc33
            iterator operator ++ (int)
Packit bfcc33
            {
Packit bfcc33
                iterator temp = *this;
Packit bfcc33
                ::std::advance(it, utf8::internal::sequence_length(it));
Packit bfcc33
                return temp;
Packit bfcc33
            }
Packit bfcc33
            iterator& operator -- ()
Packit bfcc33
            {
Packit bfcc33
                utf8::unchecked::prior(it);
Packit bfcc33
                return *this;
Packit bfcc33
            }
Packit bfcc33
            iterator operator -- (int)
Packit bfcc33
            {
Packit bfcc33
                iterator temp = *this;
Packit bfcc33
                utf8::unchecked::prior(it);
Packit bfcc33
                return temp;
Packit bfcc33
            }
Packit bfcc33
          }; // class iterator
Packit bfcc33
Packit bfcc33
    } // namespace utf8::unchecked
Packit bfcc33
} // namespace utf8
Packit bfcc33
Packit bfcc33
Packit bfcc33
#endif // header guard
Packit bfcc33