|
Packit |
352660 |
/*
|
|
Packit |
352660 |
* fontconfig/fc-case/fc-case.c
|
|
Packit |
352660 |
*
|
|
Packit |
352660 |
* Copyright © 2004 Keith Packard
|
|
Packit |
352660 |
*
|
|
Packit |
352660 |
* Permission to use, copy, modify, distribute, and sell this software and its
|
|
Packit |
352660 |
* documentation for any purpose is hereby granted without fee, provided that
|
|
Packit |
352660 |
* the above copyright notice appear in all copies and that both that
|
|
Packit |
352660 |
* copyright notice and this permission notice appear in supporting
|
|
Packit |
352660 |
* documentation, and that the name of the author(s) not be used in
|
|
Packit |
352660 |
* advertising or publicity pertaining to distribution of the software without
|
|
Packit |
352660 |
* specific, written prior permission. The authors make no
|
|
Packit |
352660 |
* representations about the suitability of this software for any purpose. It
|
|
Packit |
352660 |
* is provided "as is" without express or implied warranty.
|
|
Packit |
352660 |
*
|
|
Packit |
352660 |
* THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
|
Packit |
352660 |
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
|
|
Packit |
352660 |
* EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
|
Packit |
352660 |
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
|
Packit |
352660 |
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
|
Packit |
352660 |
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
|
Packit |
352660 |
* PERFORMANCE OF THIS SOFTWARE.
|
|
Packit |
352660 |
*/
|
|
Packit |
352660 |
|
|
Packit |
352660 |
#include "fcint.h"
|
|
Packit |
352660 |
#include <ctype.h>
|
|
Packit |
352660 |
|
|
Packit |
352660 |
#define MAX_OUT 32
|
|
Packit |
352660 |
#define MAX_LINE 8192
|
|
Packit |
352660 |
|
|
Packit |
352660 |
typedef enum _caseFoldClass { CaseFoldCommon, CaseFoldFull, CaseFoldSimple, CaseFoldTurkic } CaseFoldClass;
|
|
Packit |
352660 |
|
|
Packit |
352660 |
typedef struct _caseFoldClassMap {
|
|
Packit |
352660 |
const char *name;
|
|
Packit |
352660 |
CaseFoldClass class;
|
|
Packit |
352660 |
} CaseFoldClassMap;
|
|
Packit |
352660 |
|
|
Packit |
352660 |
static const CaseFoldClassMap caseFoldClassMap[] = {
|
|
Packit |
352660 |
{ "C", CaseFoldCommon },
|
|
Packit |
352660 |
{ "F", CaseFoldFull },
|
|
Packit |
352660 |
{ "S", CaseFoldSimple },
|
|
Packit |
352660 |
{ "T", CaseFoldTurkic },
|
|
Packit |
352660 |
{ 0, 0 }
|
|
Packit |
352660 |
};
|
|
Packit |
352660 |
|
|
Packit |
352660 |
typedef struct _caseFoldRaw {
|
|
Packit |
352660 |
FcChar32 upper;
|
|
Packit |
352660 |
CaseFoldClass class;
|
|
Packit |
352660 |
int nout;
|
|
Packit |
352660 |
FcChar32 lower[MAX_OUT];
|
|
Packit |
352660 |
} CaseFoldRaw;
|
|
Packit |
352660 |
|
|
Packit |
352660 |
static void
|
|
Packit |
352660 |
panic (const char *reason)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
fprintf (stderr, "fc-case: panic %s\n", reason);
|
|
Packit |
352660 |
exit (1);
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
|
|
Packit |
352660 |
int maxExpand;
|
|
Packit |
352660 |
static FcCaseFold *folds;
|
|
Packit |
352660 |
int nfolds;
|
|
Packit |
352660 |
|
|
Packit |
352660 |
static FcCaseFold *
|
|
Packit |
352660 |
addFold (void)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
if (folds)
|
|
Packit |
352660 |
folds = realloc (folds, (nfolds + 1) * sizeof (FcCaseFold));
|
|
Packit |
352660 |
else
|
|
Packit |
352660 |
folds = malloc (sizeof (FcCaseFold));
|
|
Packit |
352660 |
if (!folds)
|
|
Packit |
352660 |
panic ("out of memory");
|
|
Packit |
352660 |
return &folds[nfolds++];
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
|
|
Packit |
352660 |
static int
|
|
Packit |
352660 |
ucs4_to_utf8 (FcChar32 ucs4,
|
|
Packit |
352660 |
FcChar8 dest[FC_UTF8_MAX_LEN])
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
int bits;
|
|
Packit |
352660 |
FcChar8 *d = dest;
|
|
Packit |
352660 |
|
|
Packit |
352660 |
if (ucs4 < 0x80) { *d++= ucs4; bits= -6; }
|
|
Packit |
352660 |
else if (ucs4 < 0x800) { *d++= ((ucs4 >> 6) & 0x1F) | 0xC0; bits= 0; }
|
|
Packit |
352660 |
else if (ucs4 < 0x10000) { *d++= ((ucs4 >> 12) & 0x0F) | 0xE0; bits= 6; }
|
|
Packit |
352660 |
else if (ucs4 < 0x200000) { *d++= ((ucs4 >> 18) & 0x07) | 0xF0; bits= 12; }
|
|
Packit |
352660 |
else if (ucs4 < 0x4000000) { *d++= ((ucs4 >> 24) & 0x03) | 0xF8; bits= 18; }
|
|
Packit |
352660 |
else if (ucs4 < 0x80000000) { *d++= ((ucs4 >> 30) & 0x01) | 0xFC; bits= 24; }
|
|
Packit |
352660 |
else return 0;
|
|
Packit |
352660 |
|
|
Packit |
352660 |
for ( ; bits >= 0; bits-= 6) {
|
|
Packit |
352660 |
*d++= ((ucs4 >> bits) & 0x3F) | 0x80;
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
return d - dest;
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
|
|
Packit |
352660 |
static int
|
|
Packit |
352660 |
utf8_size (FcChar32 ucs4)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
FcChar8 utf8[FC_UTF8_MAX_LEN];
|
|
Packit |
352660 |
return ucs4_to_utf8 (ucs4, utf8 );
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
|
|
Packit |
352660 |
static FcChar8 *foldChars;
|
|
Packit |
352660 |
static int nfoldChars;
|
|
Packit |
352660 |
static int maxFoldChars;
|
|
Packit |
352660 |
static FcChar32 minFoldChar;
|
|
Packit |
352660 |
static FcChar32 maxFoldChar;
|
|
Packit |
352660 |
|
|
Packit |
352660 |
static void
|
|
Packit |
352660 |
addChar (FcChar32 c)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
FcChar8 utf8[FC_UTF8_MAX_LEN];
|
|
Packit |
352660 |
int len;
|
|
Packit |
352660 |
int i;
|
|
Packit |
352660 |
|
|
Packit |
352660 |
len = ucs4_to_utf8 (c, utf8);
|
|
Packit |
352660 |
if (foldChars)
|
|
Packit |
352660 |
foldChars = realloc (foldChars, (nfoldChars + len) * sizeof (FcChar8));
|
|
Packit |
352660 |
else
|
|
Packit |
352660 |
foldChars = malloc (sizeof (FcChar8) * len);
|
|
Packit |
352660 |
if (!foldChars)
|
|
Packit |
352660 |
panic ("out of memory");
|
|
Packit |
352660 |
for (i = 0; i < len; i++)
|
|
Packit |
352660 |
foldChars[nfoldChars + i] = utf8[i];
|
|
Packit |
352660 |
nfoldChars += len;
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
|
|
Packit |
352660 |
static int
|
|
Packit |
352660 |
foldExtends (FcCaseFold *fold, CaseFoldRaw *raw)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
switch (fold->method) {
|
|
Packit |
352660 |
case FC_CASE_FOLD_RANGE:
|
|
Packit |
352660 |
if ((short) (raw->lower[0] - raw->upper) != fold->offset)
|
|
Packit |
352660 |
return 0;
|
|
Packit |
352660 |
if (raw->upper != fold->upper + fold->count)
|
|
Packit |
352660 |
return 0;
|
|
Packit |
352660 |
return 1;
|
|
Packit |
352660 |
case FC_CASE_FOLD_EVEN_ODD:
|
|
Packit |
352660 |
if ((short) (raw->lower[0] - raw->upper) != 1)
|
|
Packit |
352660 |
return 0;
|
|
Packit |
352660 |
if (raw->upper != fold->upper + fold->count + 1)
|
|
Packit |
352660 |
return 0;
|
|
Packit |
352660 |
return 1;
|
|
Packit |
352660 |
case FC_CASE_FOLD_FULL:
|
|
Packit |
352660 |
break;
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
return 0;
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
|
|
Packit |
352660 |
static const char *
|
|
Packit |
352660 |
case_fold_method_name (FcChar16 method)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
switch (method) {
|
|
Packit |
352660 |
case FC_CASE_FOLD_RANGE: return "FC_CASE_FOLD_RANGE,";
|
|
Packit |
352660 |
case FC_CASE_FOLD_EVEN_ODD: return "FC_CASE_FOLD_EVEN_ODD,";
|
|
Packit |
352660 |
case FC_CASE_FOLD_FULL: return "FC_CASE_FOLD_FULL,";
|
|
Packit |
352660 |
default: return "unknown";
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
|
|
Packit |
352660 |
static void
|
|
Packit |
352660 |
dump (void)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
int i;
|
|
Packit |
352660 |
|
|
Packit |
352660 |
printf ( "#define FC_NUM_CASE_FOLD %d\n", nfolds);
|
|
Packit |
352660 |
printf ( "#define FC_NUM_CASE_FOLD_CHARS %d\n", nfoldChars);
|
|
Packit |
352660 |
printf ( "#define FC_MAX_CASE_FOLD_CHARS %d\n", maxFoldChars);
|
|
Packit |
352660 |
printf ( "#define FC_MAX_CASE_FOLD_EXPAND %d\n", maxExpand);
|
|
Packit |
352660 |
printf ( "#define FC_MIN_FOLD_CHAR 0x%08x\n", minFoldChar);
|
|
Packit |
352660 |
printf ( "#define FC_MAX_FOLD_CHAR 0x%08x\n", maxFoldChar);
|
|
Packit |
352660 |
printf ( "\n");
|
|
Packit |
352660 |
|
|
Packit |
352660 |
/*
|
|
Packit |
352660 |
* Dump out ranges
|
|
Packit |
352660 |
*/
|
|
Packit |
352660 |
printf ("static const FcCaseFold fcCaseFold[FC_NUM_CASE_FOLD] = {\n");
|
|
Packit |
352660 |
for (i = 0; i < nfolds; i++)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
printf (" { 0x%08x, %-22s 0x%04x, %6d },\n",
|
|
Packit |
352660 |
folds[i].upper, case_fold_method_name (folds[i].method),
|
|
Packit |
352660 |
folds[i].count, folds[i].offset);
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
printf ("};\n\n");
|
|
Packit |
352660 |
|
|
Packit |
352660 |
/*
|
|
Packit |
352660 |
* Dump out "other" values
|
|
Packit |
352660 |
*/
|
|
Packit |
352660 |
|
|
Packit |
352660 |
printf ("static const FcChar8 fcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {\n");
|
|
Packit |
352660 |
for (i = 0; i < nfoldChars; i++)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
printf ("0x%02x", foldChars[i]);
|
|
Packit |
352660 |
if (i != nfoldChars - 1)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
if ((i & 0xf) == 0xf)
|
|
Packit |
352660 |
printf (",\n");
|
|
Packit |
352660 |
else
|
|
Packit |
352660 |
printf (",");
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
printf ("\n};\n");
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
|
|
Packit |
352660 |
/*
|
|
Packit |
352660 |
* Read the standard Unicode CaseFolding.txt file
|
|
Packit |
352660 |
*/
|
|
Packit |
352660 |
#define SEP "; \t\n"
|
|
Packit |
352660 |
|
|
Packit |
352660 |
static int
|
|
Packit |
352660 |
parseRaw (char *line, CaseFoldRaw *raw)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
char *tok, *end;
|
|
Packit |
352660 |
int i;
|
|
Packit |
352660 |
|
|
Packit |
352660 |
if (!isxdigit (line[0]))
|
|
Packit |
352660 |
return 0;
|
|
Packit |
352660 |
/*
|
|
Packit |
352660 |
* Get upper case value
|
|
Packit |
352660 |
*/
|
|
Packit |
352660 |
tok = strtok (line, SEP);
|
|
Packit |
352660 |
if (!tok || tok[0] == '#')
|
|
Packit |
352660 |
return 0;
|
|
Packit |
352660 |
raw->upper = strtol (tok, &end, 16);
|
|
Packit |
352660 |
if (end == tok)
|
|
Packit |
352660 |
return 0;
|
|
Packit |
352660 |
/*
|
|
Packit |
352660 |
* Get class
|
|
Packit |
352660 |
*/
|
|
Packit |
352660 |
tok = strtok (NULL, SEP);
|
|
Packit |
352660 |
if (!tok || tok[0] == '#')
|
|
Packit |
352660 |
return 0;
|
|
Packit |
352660 |
for (i = 0; caseFoldClassMap[i].name; i++)
|
|
Packit |
352660 |
if (!strcmp (tok, caseFoldClassMap[i].name))
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
raw->class = caseFoldClassMap[i].class;
|
|
Packit |
352660 |
break;
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
if (!caseFoldClassMap[i].name)
|
|
Packit |
352660 |
return 0;
|
|
Packit |
352660 |
|
|
Packit |
352660 |
/*
|
|
Packit |
352660 |
* Get list of result characters
|
|
Packit |
352660 |
*/
|
|
Packit |
352660 |
for (i = 0; i < MAX_OUT; i++)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
tok = strtok (NULL, SEP);
|
|
Packit |
352660 |
if (!tok || tok[0] == '#')
|
|
Packit |
352660 |
break;
|
|
Packit |
352660 |
raw->lower[i] = strtol (tok, &end, 16);
|
|
Packit |
352660 |
if (end == tok)
|
|
Packit |
352660 |
break;
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
if (i == 0)
|
|
Packit |
352660 |
return 0;
|
|
Packit |
352660 |
raw->nout = i;
|
|
Packit |
352660 |
return 1;
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
|
|
Packit |
352660 |
static int
|
|
Packit |
352660 |
caseFoldReadRaw (FILE *in, CaseFoldRaw *raw)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
char line[MAX_LINE];
|
|
Packit |
352660 |
|
|
Packit |
352660 |
for (;;)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
if (!fgets (line, sizeof (line) - 1, in))
|
|
Packit |
352660 |
return 0;
|
|
Packit |
352660 |
if (parseRaw (line, raw))
|
|
Packit |
352660 |
return 1;
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
|
|
Packit |
352660 |
int
|
|
Packit |
352660 |
main (int argc, char **argv)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
FcCaseFold *fold = 0;
|
|
Packit |
352660 |
CaseFoldRaw raw;
|
|
Packit |
352660 |
int i;
|
|
Packit |
352660 |
FILE *caseFile;
|
|
Packit |
352660 |
char line[MAX_LINE];
|
|
Packit |
352660 |
int expand;
|
|
Packit |
352660 |
|
|
Packit |
352660 |
if (argc != 2)
|
|
Packit |
352660 |
panic ("usage: fc-case CaseFolding.txt");
|
|
Packit |
352660 |
caseFile = fopen (argv[1], "r");
|
|
Packit |
352660 |
if (!caseFile)
|
|
Packit |
352660 |
panic ("can't open case folding file");
|
|
Packit |
352660 |
|
|
Packit |
352660 |
while (caseFoldReadRaw (caseFile, &raw))
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
if (!minFoldChar)
|
|
Packit |
352660 |
minFoldChar = raw.upper;
|
|
Packit |
352660 |
maxFoldChar = raw.upper;
|
|
Packit |
352660 |
switch (raw.class) {
|
|
Packit |
352660 |
case CaseFoldCommon:
|
|
Packit |
352660 |
case CaseFoldFull:
|
|
Packit |
352660 |
if (raw.nout == 1)
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
if (fold && foldExtends (fold, &raw))
|
|
Packit |
352660 |
fold->count = raw.upper - fold->upper + 1;
|
|
Packit |
352660 |
else
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
fold = addFold ();
|
|
Packit |
352660 |
fold->upper = raw.upper;
|
|
Packit |
352660 |
fold->offset = raw.lower[0] - raw.upper;
|
|
Packit |
352660 |
if (fold->offset == 1)
|
|
Packit |
352660 |
fold->method = FC_CASE_FOLD_EVEN_ODD;
|
|
Packit |
352660 |
else
|
|
Packit |
352660 |
fold->method = FC_CASE_FOLD_RANGE;
|
|
Packit |
352660 |
fold->count = 1;
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
expand = utf8_size (raw.lower[0]) - utf8_size(raw.upper);
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
else
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
fold = addFold ();
|
|
Packit |
352660 |
fold->upper = raw.upper;
|
|
Packit |
352660 |
fold->method = FC_CASE_FOLD_FULL;
|
|
Packit |
352660 |
fold->offset = nfoldChars;
|
|
Packit |
352660 |
for (i = 0; i < raw.nout; i++)
|
|
Packit |
352660 |
addChar (raw.lower[i]);
|
|
Packit |
352660 |
fold->count = nfoldChars - fold->offset;
|
|
Packit |
352660 |
if (fold->count > maxFoldChars)
|
|
Packit |
352660 |
maxFoldChars = fold->count;
|
|
Packit |
352660 |
expand = fold->count - utf8_size (raw.upper);
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
if (expand > maxExpand)
|
|
Packit |
352660 |
maxExpand = expand;
|
|
Packit |
352660 |
break;
|
|
Packit |
352660 |
case CaseFoldSimple:
|
|
Packit |
352660 |
break;
|
|
Packit |
352660 |
case CaseFoldTurkic:
|
|
Packit |
352660 |
break;
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
/*
|
|
Packit |
352660 |
* Scan the input until the marker is found
|
|
Packit |
352660 |
*/
|
|
Packit |
352660 |
|
|
Packit |
352660 |
while (fgets (line, sizeof (line), stdin))
|
|
Packit |
352660 |
{
|
|
Packit |
352660 |
if (!strncmp (line, "@@@", 3))
|
|
Packit |
352660 |
break;
|
|
Packit |
352660 |
fputs (line, stdout);
|
|
Packit |
352660 |
}
|
|
Packit |
352660 |
|
|
Packit |
352660 |
/*
|
|
Packit |
352660 |
* Dump these tables
|
|
Packit |
352660 |
*/
|
|
Packit |
352660 |
dump ();
|
|
Packit |
352660 |
|
|
Packit |
352660 |
/*
|
|
Packit |
352660 |
* And flush out the rest of the input file
|
|
Packit |
352660 |
*/
|
|
Packit |
352660 |
|
|
Packit |
352660 |
while (fgets (line, sizeof (line), stdin))
|
|
Packit |
352660 |
fputs (line, stdout);
|
|
Packit |
352660 |
|
|
Packit |
352660 |
fflush (stdout);
|
|
Packit |
352660 |
exit (ferror (stdout));
|
|
Packit |
352660 |
}
|