Blame sample/encode.c

Packit b89d10
/*
Packit b89d10
 * encode.c
Packit b89d10
 */
Packit b89d10
#include <stdio.h>
Packit b89d10
#include "oniguruma.h"
Packit b89d10
Packit b89d10
static int
Packit b89d10
search(regex_t* reg, unsigned char* str, unsigned char* end)
Packit b89d10
{
Packit b89d10
  int r;
Packit b89d10
  unsigned char *start, *range;
Packit b89d10
  OnigRegion *region;
Packit b89d10
Packit b89d10
  region = onig_region_new();
Packit b89d10
Packit b89d10
  start = str;
Packit b89d10
  range = end;
Packit b89d10
  r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
Packit b89d10
  if (r >= 0) {
Packit b89d10
    int i;
Packit b89d10
Packit b89d10
    fprintf(stderr, "match at %d  (%s)\n", r,
Packit b89d10
            ONIGENC_NAME(onig_get_encoding(reg)));
Packit b89d10
    for (i = 0; i < region->num_regs; i++) {
Packit b89d10
      fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
Packit b89d10
    }
Packit b89d10
  }
Packit b89d10
  else if (r == ONIG_MISMATCH) {
Packit b89d10
    fprintf(stderr, "search fail (%s)\n",
Packit b89d10
            ONIGENC_NAME(onig_get_encoding(reg)));
Packit b89d10
  }
Packit b89d10
  else { /* error */
Packit b89d10
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
Packit b89d10
    onig_error_code_to_str((UChar* )s, r);
Packit b89d10
    fprintf(stderr, "ERROR: %s\n", s);
Packit b89d10
    fprintf(stderr, "  (%s)\n", ONIGENC_NAME(onig_get_encoding(reg)));
Packit b89d10
    return -1;
Packit b89d10
  }
Packit b89d10
Packit b89d10
  onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
Packit b89d10
  return 0;
Packit b89d10
}
Packit b89d10
Packit b89d10
static int
Packit b89d10
exec(OnigEncoding enc, OnigOptionType options,
Packit b89d10
     char* apattern, char* astr)
Packit b89d10
{
Packit b89d10
  int r;
Packit b89d10
  unsigned char *end;
Packit b89d10
  regex_t* reg;
Packit b89d10
  OnigErrorInfo einfo;
Packit b89d10
  UChar* pattern = (UChar* )apattern;
Packit b89d10
  UChar* str     = (UChar* )astr;
Packit b89d10
Packit b89d10
  onig_initialize(&enc, 1);
Packit b89d10
Packit b89d10
  r = onig_new(&reg, pattern,
Packit b89d10
               pattern + onigenc_str_bytelen_null(enc, pattern),
Packit b89d10
               options, enc, ONIG_SYNTAX_DEFAULT, &einfo);
Packit b89d10
  if (r != ONIG_NORMAL) {
Packit b89d10
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
Packit b89d10
    onig_error_code_to_str((UChar* )s, r, &einfo);
Packit b89d10
    fprintf(stderr, "ERROR: %s\n", s);
Packit b89d10
    return -1;
Packit b89d10
  }
Packit b89d10
Packit b89d10
  end   = str + onigenc_str_bytelen_null(enc, str);
Packit b89d10
  r = search(reg, str, end);
Packit b89d10
Packit b89d10
  onig_free(reg);
Packit b89d10
  onig_end();
Packit b89d10
  return 0;
Packit b89d10
}
Packit b89d10
Packit b89d10
static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN;
Packit b89d10
Packit b89d10
#if 0
Packit b89d10
static void
Packit b89d10
set_case_fold(OnigCaseFoldType cf)
Packit b89d10
{
Packit b89d10
  CF = cf;
Packit b89d10
}
Packit b89d10
#endif
Packit b89d10
Packit b89d10
static int
Packit b89d10
exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc,
Packit b89d10
            OnigOptionType options, char* apattern, char* astr)
Packit b89d10
{
Packit b89d10
  int r;
Packit b89d10
  unsigned char *end;
Packit b89d10
  regex_t* reg;
Packit b89d10
  OnigCompileInfo ci;
Packit b89d10
  OnigErrorInfo einfo;
Packit b89d10
  UChar* pattern = (UChar* )apattern;
Packit b89d10
  UChar* str     = (UChar* )astr;
Packit b89d10
Packit b89d10
  onig_initialize(&str_enc, 1);
Packit b89d10
Packit b89d10
  ci.num_of_elements = 5;
Packit b89d10
  ci.pattern_enc = pattern_enc;
Packit b89d10
  ci.target_enc  = str_enc;
Packit b89d10
  ci.syntax      = ONIG_SYNTAX_DEFAULT;
Packit b89d10
  ci.option      = options;
Packit b89d10
  ci.case_fold_flag  = CF;
Packit b89d10
Packit b89d10
  r = onig_new_deluxe(&reg, pattern,
Packit b89d10
                      pattern + onigenc_str_bytelen_null(pattern_enc, pattern),
Packit b89d10
                      &ci, &einfo);
Packit b89d10
  if (r != ONIG_NORMAL) {
Packit b89d10
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
Packit b89d10
    onig_error_code_to_str((UChar* )s, r, &einfo);
Packit b89d10
    fprintf(stderr, "ERROR: %s\n", s);
Packit b89d10
    return -1;
Packit b89d10
  }
Packit b89d10
Packit b89d10
  end = str + onigenc_str_bytelen_null(str_enc, str);
Packit b89d10
  r = search(reg, str, end);
Packit b89d10
Packit b89d10
  onig_free(reg);
Packit b89d10
  onig_end();
Packit b89d10
  return 0;
Packit b89d10
}
Packit b89d10
Packit b89d10
extern int main(int argc, char* argv[])
Packit b89d10
{
Packit b89d10
  int r;
Packit b89d10
  /* ISO 8859-1 test */
Packit b89d10
  static unsigned char str[] = { 0xc7, 0xd6, 0xfe, 0xea, 0xe0, 0xe2, 0x00 };
Packit b89d10
  static unsigned char pattern[] = { 0xe7, 0xf6, 0xde, '\\', 'w', '+', 0x00 };
Packit b89d10
Packit b89d10
  r = exec(ONIG_ENCODING_SJIS, ONIG_OPTION_NONE,
Packit b89d10
	   "^a\\p{Hiragana}c$", "a\202\274c");
Packit b89d10
Packit b89d10
  r = exec(ONIG_ENCODING_EUC_JP, ONIG_OPTION_NONE,
Packit b89d10
	   "^a\\p{Hiragana}c$", "a\244\276c");
Packit b89d10
Packit b89d10
  r = exec(ONIG_ENCODING_CP1251, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "aBc", " AbC");
Packit b89d10
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   " [a-c\337z] ", "  SS  ");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   " [\330-\341] ", "  SS  ");
Packit b89d10
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "\337          ", "          Ss          ");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "SS          ", "          \337          ");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "\\A\\S\\z", "ss");
Packit b89d10
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "[ac]+", "bbbaAaCCC");
Packit b89d10
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_3, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "[ac]+", "bbbaAaCCC");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_4, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "[ac]+", "bbbaAaCCC");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_5, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "[ac]+", "bbbaAaCCC");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_6, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "[ac]+", "bbbaAaCCC");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_7, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "[ac]+", "bbbaAaCCC");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_8, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "[ac]+", "bbbaAaCCC");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_9, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "[ac]+", "bbbaAaCCC");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_10, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "[ac]+", "bbbaAaCCC");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_11, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "[ac]+", "bbbaAaCCC");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_13, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "[ac]+", "bbbaAaCCC");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_14, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "[ac]+", "bbbaAaCCC");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_15, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   (char* )pattern, (char* )str);
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_16, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   (char* )pattern, (char* )str);
Packit b89d10
Packit b89d10
  r = exec(ONIG_ENCODING_KOI8_R, ONIG_OPTION_NONE, "a+", "bbbaaaccc");
Packit b89d10
  r = exec(ONIG_ENCODING_EUC_TW, ONIG_OPTION_NONE, "b*a+?c+", "bbbaaaccc");
Packit b89d10
  r = exec(ONIG_ENCODING_EUC_KR, ONIG_OPTION_NONE, "a+", "bbbaaaccc");
Packit b89d10
  r = exec(ONIG_ENCODING_EUC_CN, ONIG_OPTION_NONE, "c+", "bbbaaaccc");
Packit b89d10
  r = exec(ONIG_ENCODING_BIG5,   ONIG_OPTION_NONE, "a+", "bbbaaaccc");
Packit b89d10
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
Packit b89d10
           "\337", "SS");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
Packit b89d10
           "SS", "\337");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
Packit b89d10
           "SSb\337ssc", "a\337bSS\337cd");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
Packit b89d10
           "[a\337]{0,2}", "aSS");
Packit b89d10
  r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
Packit b89d10
           "is", "iss");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_ASCII, ONIG_ENCODING_UTF16_BE,
Packit b89d10
                  ONIG_OPTION_NONE, "a+",
Packit b89d10
                  "\000b\000a\000a\000a\000c\000c\000\000");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_ASCII, ONIG_ENCODING_UTF16_LE,
Packit b89d10
                  ONIG_OPTION_NONE, "a+",
Packit b89d10
                  "b\000a\000a\000a\000a\000c\000\000\000");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_LE,
Packit b89d10
                  ONIG_OPTION_NONE,
Packit b89d10
                  "\000b\000a\000a\000a\000c\000c\000\000",
Packit b89d10
                  "x\000b\000a\000a\000a\000c\000c\000\000\000");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
Packit b89d10
                  ONIG_OPTION_IGNORECASE,
Packit b89d10
                  "\337", "\000S\000S\000\000");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
Packit b89d10
                  ONIG_OPTION_IGNORECASE,
Packit b89d10
                  "SS", "\000\337\000\000");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_LE,
Packit b89d10
                  ONIG_OPTION_IGNORECASE,
Packit b89d10
                  "\337", "S\000S\000\000\000");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF32_BE,
Packit b89d10
                  ONIG_OPTION_IGNORECASE,
Packit b89d10
                  "SS", "\000\000\000\337\000\000\000\000");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF32_LE,
Packit b89d10
                  ONIG_OPTION_IGNORECASE,
Packit b89d10
                  "\337", "S\000\000\000S\000\000\000\000\000\000\000");
Packit b89d10
Packit b89d10
  r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE,
Packit b89d10
	   "\000[\000[\000:\000a\000l\000n\000u\000m\000:\000]\000]\000+\000\000",
Packit b89d10
           "\000#\002\120\000a\000Z\012\077\012\076\012\075\000\000");
Packit b89d10
  /* 0x0a3d == \012\075 : is not alnum */
Packit b89d10
  /* 0x0a3e == \012\076 : is alnum */
Packit b89d10
Packit b89d10
  r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE,
Packit b89d10
	   "\000\\\000d\000+\000\000",
Packit b89d10
           "\0003\0001\377\020\377\031\377\032\000\000");
Packit b89d10
Packit b89d10
  r = exec(ONIG_ENCODING_GB18030, ONIG_OPTION_IGNORECASE,
Packit b89d10
	   "(Aa\\d)+", "BaA5Aa0234");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
Packit b89d10
                  ONIG_OPTION_NONE,
Packit b89d10
		  "^\\P{Hiragana}\\p{^Hiragana}(\\p{Hiragana}+)$",
Packit b89d10
		  "\060\100\060\240\060\101\060\102\060\226\060\237\000\000");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
Packit b89d10
                  ONIG_OPTION_IGNORECASE,
Packit b89d10
		  "\000[\000\337\000]\000\000", "\000S\000S\000\000");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
Packit b89d10
                  ONIG_OPTION_IGNORECASE,
Packit b89d10
		  "\000[\000\337\000]\000\000", "\000s\000S\000\000");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
Packit b89d10
                  ONIG_OPTION_IGNORECASE,
Packit b89d10
		  "\000^\000[\000\001\000-\377\375\000]\000$\000\000",
Packit b89d10
		  "\000s\000S\000\000");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
Packit b89d10
                  ONIG_OPTION_IGNORECASE,
Packit b89d10
		  "\000S\000S\000\000",
Packit b89d10
		  "\000S\000T\000\337\000\000");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
Packit b89d10
                  ONIG_OPTION_IGNORECASE,
Packit b89d10
		  "\000S\000T\000S\000S\000\000",
Packit b89d10
		  "\000S\000t\000s\000S\000\000");
Packit b89d10
Packit b89d10
  {
Packit b89d10
    UChar pat[]  = { 0x1f, 0xfc, 0x00, 0x00 };
Packit b89d10
    UChar str1[] = { 0x21, 0x26, 0x1f, 0xbe, 0x00, 0x00 };
Packit b89d10
    UChar str2[] = { 0x1f, 0xf3, 0x00, 0x00 };
Packit b89d10
Packit b89d10
    r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
Packit b89d10
		    ONIG_OPTION_IGNORECASE,
Packit b89d10
		    (char* )pat, (char* )str1);
Packit b89d10
Packit b89d10
    r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
Packit b89d10
		    ONIG_OPTION_IGNORECASE,
Packit b89d10
		    (char* )pat, (char* )str2);
Packit b89d10
  }
Packit b89d10
Packit b89d10
#if 0
Packit b89d10
  /* You should define USE_UNICODE_CASE_FOLD_TURKISH_AZERI in regenc.h. */
Packit b89d10
Packit b89d10
  set_case_fold(ONIGENC_CASE_FOLD_TURKISH_AZERI);
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8,
Packit b89d10
                  ONIG_OPTION_IGNORECASE,
Packit b89d10
		  "Ii", "\304\261\304\260");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
Packit b89d10
                  ONIG_OPTION_IGNORECASE,
Packit b89d10
		  "\000I\000i\000\000", "\001\061\001\060\000\000");
Packit b89d10
Packit b89d10
  r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
Packit b89d10
                  ONIG_OPTION_IGNORECASE,
Packit b89d10
		  "\001\061\001\060\000\000", "\000I\000i\000\000");
Packit b89d10
Packit b89d10
  set_case_fold(ONIGENC_CASE_FOLD_MIN);
Packit b89d10
#endif
Packit b89d10
Packit b89d10
  return r;
Packit b89d10
}