Blame src/utf8_string.cpp

Packit Service 7770af
#include "sass.hpp"
Packit Service 7770af
#include <string>
Packit Service 7770af
#include <vector>
Packit Service 7770af
#include <cstdlib>
Packit Service 7770af
#include <cmath>
Packit Service 7770af
Packit Service 7770af
#include "utf8.h"
Packit Service 7770af
Packit Service 7770af
namespace Sass {
Packit Service 7770af
  namespace UTF_8 {
Packit Service 7770af
    using std::string;
Packit Service 7770af
Packit Service 7770af
    // naming conventions:
Packit Service 7770af
    // offset: raw byte offset (0 based)
Packit Service 7770af
    // position: code point offset (0 based)
Packit Service 7770af
    // index: code point offset (1 based or negative)
Packit Service 7770af
Packit Service 7770af
    // function that will count the number of code points (utf-8 characters) from the given beginning to the given end
Packit Service 7770af
    size_t code_point_count(const string& str, size_t start, size_t end) {
Packit Service 7770af
      return utf8::distance(str.begin() + start, str.begin() + end);
Packit Service 7770af
    }
Packit Service 7770af
Packit Service 7770af
    size_t code_point_count(const string& str) {
Packit Service 7770af
      return utf8::distance(str.begin(), str.end());
Packit Service 7770af
    }
Packit Service 7770af
Packit Service 7770af
    // function that will return the byte offset at a code point position
Packit Service 7770af
    size_t offset_at_position(const string& str, size_t position) {
Packit Service 7770af
      string::const_iterator it = str.begin();
Packit Service 7770af
      utf8::advance(it, position, str.end());
Packit Service 7770af
      return distance(str.begin(), it);
Packit Service 7770af
    }
Packit Service 7770af
Packit Service 7770af
    // function that returns number of bytes in a character at offset
Packit Service 7770af
    size_t code_point_size_at_offset(const string& str, size_t offset) {
Packit Service 7770af
      // get iterator from string and forward by offset
Packit Service 7770af
      string::const_iterator stop = str.begin() + offset;
Packit Service 7770af
      // check if beyond boundary
Packit Service 7770af
      if (stop == str.end()) return 0;
Packit Service 7770af
      // advance by one code point
Packit Service 7770af
      utf8::advance(stop, 1, str.end());
Packit Service 7770af
      // calculate offset for code point
Packit Service 7770af
      return  stop - str.begin() - offset;
Packit Service 7770af
    }
Packit Service 7770af
Packit Service 7770af
    // function that will return a normalized index, given a crazy one
Packit Service 7770af
    size_t normalize_index(int index, size_t len) {
Packit Service 7770af
      long signed_len = static_cast<long>(len);
Packit Service 7770af
      // assuming the index is 1-based
Packit Service 7770af
      // we are returning a 0-based index
Packit Service 7770af
      if (index > 0 && index <= signed_len) {
Packit Service 7770af
        // positive and within string length
Packit Service 7770af
        return index-1;
Packit Service 7770af
      }
Packit Service 7770af
      else if (index > signed_len) {
Packit Service 7770af
        // positive and past string length
Packit Service 7770af
        return len;
Packit Service 7770af
      }
Packit Service 7770af
      else if (index == 0) {
Packit Service 7770af
        return 0;
Packit Service 7770af
      }
Packit Service 7770af
      else if (std::abs((double)index) <= signed_len) {
Packit Service 7770af
        // negative and within string length
Packit Service 7770af
        return index + signed_len;
Packit Service 7770af
      }
Packit Service 7770af
      else {
Packit Service 7770af
        // negative and past string length
Packit Service 7770af
        return 0;
Packit Service 7770af
      }
Packit Service 7770af
    }
Packit Service 7770af
Packit Service 7770af
    #ifdef _WIN32
Packit Service 7770af
Packit Service 7770af
    // utf16 functions
Packit Service 7770af
    using std::wstring;
Packit Service 7770af
Packit Service 7770af
    // convert from utf16/wide string to utf8 string
Packit Service 7770af
    string convert_from_utf16(const wstring& utf16)
Packit Service 7770af
    {
Packit Service 7770af
      string utf8;
Packit Service 7770af
      // pre-allocate expected memory
Packit Service 7770af
      utf8.reserve(sizeof(utf16)/2);
Packit Service 7770af
      utf8::utf16to8(utf16.begin(), utf16.end(),
Packit Service 7770af
                     back_inserter(utf8));
Packit Service 7770af
      return utf8;
Packit Service 7770af
    }
Packit Service 7770af
Packit Service 7770af
    // convert from utf8 string to utf16/wide string
Packit Service 7770af
    wstring convert_to_utf16(const string& utf8)
Packit Service 7770af
    {
Packit Service 7770af
      wstring utf16;
Packit Service 7770af
      // pre-allocate expected memory
Packit Service 7770af
      utf16.reserve(code_point_count(utf8)*2);
Packit Service 7770af
      utf8::utf8to16(utf8.begin(), utf8.end(),
Packit Service 7770af
                     back_inserter(utf16));
Packit Service 7770af
      return utf16;
Packit Service 7770af
    }
Packit Service 7770af
Packit Service 7770af
    #endif
Packit Service 7770af
Packit Service 7770af
  }
Packit Service 7770af
}