Blame fc-case/fc-case.c

Packit 352660
/*
Packit 352660
 * fontconfig/fc-case/fc-case.c
Packit 352660
 *
Packit 352660
 * Copyright © 2004 Keith Packard
Packit 352660
 *
Packit 352660
 * Permission to use, copy, modify, distribute, and sell this software and its
Packit 352660
 * documentation for any purpose is hereby granted without fee, provided that
Packit 352660
 * the above copyright notice appear in all copies and that both that
Packit 352660
 * copyright notice and this permission notice appear in supporting
Packit 352660
 * documentation, and that the name of the author(s) not be used in
Packit 352660
 * advertising or publicity pertaining to distribution of the software without
Packit 352660
 * specific, written prior permission.  The authors make no
Packit 352660
 * representations about the suitability of this software for any purpose.  It
Packit 352660
 * is provided "as is" without express or implied warranty.
Packit 352660
 *
Packit 352660
 * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
Packit 352660
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
Packit 352660
 * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
Packit 352660
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
Packit 352660
 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
Packit 352660
 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
Packit 352660
 * PERFORMANCE OF THIS SOFTWARE.
Packit 352660
 */
Packit 352660
Packit 352660
#include "fcint.h"
Packit 352660
#include <ctype.h>
Packit 352660
Packit 352660
#define MAX_OUT	    32
Packit 352660
#define MAX_LINE    8192
Packit 352660
Packit 352660
typedef enum _caseFoldClass { CaseFoldCommon, CaseFoldFull, CaseFoldSimple, CaseFoldTurkic } CaseFoldClass;
Packit 352660
Packit 352660
typedef struct _caseFoldClassMap {
Packit 352660
    const char	    *name;
Packit 352660
    CaseFoldClass   class;
Packit 352660
} CaseFoldClassMap;
Packit 352660
Packit 352660
static const CaseFoldClassMap	caseFoldClassMap[] = {
Packit 352660
    { "C", CaseFoldCommon },
Packit 352660
    { "F", CaseFoldFull },
Packit 352660
    { "S", CaseFoldSimple },
Packit 352660
    { "T", CaseFoldTurkic },
Packit 352660
    { 0, 0 }
Packit 352660
};
Packit 352660
Packit 352660
typedef struct _caseFoldRaw {
Packit 352660
    FcChar32	    upper;
Packit 352660
    CaseFoldClass   class;
Packit 352660
    int		    nout;
Packit 352660
    FcChar32	    lower[MAX_OUT];
Packit 352660
} CaseFoldRaw;
Packit 352660
Packit 352660
static void
Packit 352660
panic (const char *reason)
Packit 352660
{
Packit 352660
    fprintf (stderr, "fc-case: panic %s\n", reason);
Packit 352660
    exit (1);
Packit 352660
}
Packit 352660
Packit 352660
int			maxExpand;
Packit 352660
static FcCaseFold	*folds;
Packit 352660
int			nfolds;
Packit 352660
Packit 352660
static FcCaseFold *
Packit 352660
addFold (void)
Packit 352660
{
Packit 352660
    if (folds)
Packit 352660
	folds = realloc (folds, (nfolds + 1) * sizeof (FcCaseFold));
Packit 352660
    else
Packit 352660
	folds = malloc (sizeof (FcCaseFold));
Packit 352660
    if (!folds)
Packit 352660
	panic ("out of memory");
Packit 352660
    return &folds[nfolds++];
Packit 352660
}
Packit 352660
Packit 352660
static int
Packit 352660
ucs4_to_utf8 (FcChar32	ucs4,
Packit 352660
	      FcChar8	dest[FC_UTF8_MAX_LEN])
Packit 352660
{
Packit 352660
    int	bits;
Packit 352660
    FcChar8 *d = dest;
Packit 352660
    
Packit 352660
    if      (ucs4 <       0x80) {  *d++=  ucs4;                         bits= -6; }
Packit 352660
    else if (ucs4 <      0x800) {  *d++= ((ucs4 >>  6) & 0x1F) | 0xC0;  bits=  0; }
Packit 352660
    else if (ucs4 <    0x10000) {  *d++= ((ucs4 >> 12) & 0x0F) | 0xE0;  bits=  6; }
Packit 352660
    else if (ucs4 <   0x200000) {  *d++= ((ucs4 >> 18) & 0x07) | 0xF0;  bits= 12; }
Packit 352660
    else if (ucs4 <  0x4000000) {  *d++= ((ucs4 >> 24) & 0x03) | 0xF8;  bits= 18; }
Packit 352660
    else if (ucs4 < 0x80000000) {  *d++= ((ucs4 >> 30) & 0x01) | 0xFC;  bits= 24; }
Packit 352660
    else return 0;
Packit 352660
Packit 352660
    for ( ; bits >= 0; bits-= 6) {
Packit 352660
	*d++= ((ucs4 >> bits) & 0x3F) | 0x80;
Packit 352660
    }
Packit 352660
    return d - dest;
Packit 352660
}
Packit 352660
Packit 352660
static int
Packit 352660
utf8_size (FcChar32 ucs4)
Packit 352660
{
Packit 352660
    FcChar8 utf8[FC_UTF8_MAX_LEN];
Packit 352660
    return ucs4_to_utf8 (ucs4, utf8 );
Packit 352660
}
Packit 352660
Packit 352660
static FcChar8	*foldChars;
Packit 352660
static int	nfoldChars;
Packit 352660
static int	maxFoldChars;
Packit 352660
static FcChar32	minFoldChar;
Packit 352660
static FcChar32	maxFoldChar;
Packit 352660
Packit 352660
static void
Packit 352660
addChar (FcChar32 c)
Packit 352660
{
Packit 352660
    FcChar8	utf8[FC_UTF8_MAX_LEN];
Packit 352660
    int		len;
Packit 352660
    int		i;
Packit 352660
Packit 352660
    len = ucs4_to_utf8 (c, utf8);
Packit 352660
    if (foldChars)
Packit 352660
	foldChars = realloc (foldChars, (nfoldChars + len) * sizeof (FcChar8));
Packit 352660
    else
Packit 352660
	foldChars = malloc (sizeof (FcChar8) * len);
Packit 352660
    if (!foldChars)
Packit 352660
	panic ("out of memory");
Packit 352660
    for (i = 0; i < len; i++)
Packit 352660
	foldChars[nfoldChars + i] = utf8[i];
Packit 352660
    nfoldChars += len;
Packit 352660
}
Packit 352660
Packit 352660
static int
Packit 352660
foldExtends (FcCaseFold *fold, CaseFoldRaw *raw)
Packit 352660
{
Packit 352660
    switch (fold->method) {
Packit 352660
    case FC_CASE_FOLD_RANGE:
Packit 352660
	if ((short) (raw->lower[0] - raw->upper) != fold->offset)
Packit 352660
	    return 0;
Packit 352660
	if (raw->upper != fold->upper + fold->count)
Packit 352660
	    return 0;
Packit 352660
	return 1;
Packit 352660
    case FC_CASE_FOLD_EVEN_ODD:
Packit 352660
	if ((short) (raw->lower[0] - raw->upper) != 1)
Packit 352660
	    return 0;
Packit 352660
	if (raw->upper != fold->upper + fold->count + 1)
Packit 352660
	    return 0;
Packit 352660
	return 1;
Packit 352660
    case FC_CASE_FOLD_FULL:
Packit 352660
	break;
Packit 352660
    }
Packit 352660
    return 0;
Packit 352660
}
Packit 352660
	    
Packit 352660
static const char *
Packit 352660
case_fold_method_name (FcChar16 method)
Packit 352660
{
Packit 352660
    switch (method) {
Packit 352660
    case FC_CASE_FOLD_RANGE:	return "FC_CASE_FOLD_RANGE,";
Packit 352660
    case FC_CASE_FOLD_EVEN_ODD: return "FC_CASE_FOLD_EVEN_ODD,";
Packit 352660
    case FC_CASE_FOLD_FULL:	return "FC_CASE_FOLD_FULL,";
Packit 352660
    default:			return "unknown";
Packit 352660
    }
Packit 352660
}
Packit 352660
Packit 352660
static void
Packit 352660
dump (void)
Packit 352660
{
Packit 352660
    int	    i;
Packit 352660
    
Packit 352660
    printf (   "#define FC_NUM_CASE_FOLD	%d\n", nfolds);
Packit 352660
    printf (   "#define FC_NUM_CASE_FOLD_CHARS	%d\n", nfoldChars);
Packit 352660
    printf (   "#define FC_MAX_CASE_FOLD_CHARS	%d\n", maxFoldChars);
Packit 352660
    printf (   "#define FC_MAX_CASE_FOLD_EXPAND	%d\n", maxExpand);
Packit 352660
    printf (   "#define FC_MIN_FOLD_CHAR	0x%08x\n", minFoldChar);
Packit 352660
    printf (   "#define FC_MAX_FOLD_CHAR	0x%08x\n", maxFoldChar);
Packit 352660
    printf (   "\n");
Packit 352660
    
Packit 352660
    /*
Packit 352660
     * Dump out ranges
Packit 352660
     */
Packit 352660
    printf ("static const FcCaseFold    fcCaseFold[FC_NUM_CASE_FOLD] = {\n");
Packit 352660
    for (i = 0; i < nfolds; i++)
Packit 352660
    {
Packit 352660
	printf ("    { 0x%08x, %-22s 0x%04x, %6d },\n",
Packit 352660
		folds[i].upper, case_fold_method_name (folds[i].method),
Packit 352660
		folds[i].count, folds[i].offset);
Packit 352660
    }
Packit 352660
    printf ("};\n\n");
Packit 352660
Packit 352660
    /*
Packit 352660
     * Dump out "other" values
Packit 352660
     */
Packit 352660
Packit 352660
    printf ("static const FcChar8	fcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {\n");
Packit 352660
    for (i = 0; i < nfoldChars; i++)
Packit 352660
    {
Packit 352660
	printf ("0x%02x", foldChars[i]);
Packit 352660
	if (i != nfoldChars - 1)
Packit 352660
	{
Packit 352660
	    if ((i & 0xf) == 0xf) 
Packit 352660
		printf (",\n");
Packit 352660
	    else
Packit 352660
		printf (",");
Packit 352660
	}
Packit 352660
    }
Packit 352660
    printf ("\n};\n");
Packit 352660
}
Packit 352660
Packit 352660
/*
Packit 352660
 * Read the standard Unicode CaseFolding.txt file
Packit 352660
 */
Packit 352660
#define SEP "; \t\n"
Packit 352660
Packit 352660
static int
Packit 352660
parseRaw (char *line, CaseFoldRaw *raw)
Packit 352660
{
Packit 352660
    char    *tok, *end;
Packit 352660
    int	    i;
Packit 352660
    
Packit 352660
    if (!isxdigit (line[0]))
Packit 352660
	return 0;
Packit 352660
    /*
Packit 352660
     * Get upper case value
Packit 352660
     */
Packit 352660
    tok = strtok (line, SEP);
Packit 352660
    if (!tok || tok[0] == '#')
Packit 352660
	return 0;
Packit 352660
    raw->upper = strtol (tok, &end, 16);
Packit 352660
    if (end == tok)
Packit 352660
	return 0;
Packit 352660
    /*
Packit 352660
     * Get class
Packit 352660
     */
Packit 352660
    tok = strtok (NULL, SEP);
Packit 352660
    if (!tok || tok[0] == '#')
Packit 352660
	return 0;
Packit 352660
    for (i = 0; caseFoldClassMap[i].name; i++)
Packit 352660
	if (!strcmp (tok, caseFoldClassMap[i].name))
Packit 352660
	{
Packit 352660
	    raw->class = caseFoldClassMap[i].class;
Packit 352660
	    break;
Packit 352660
	}
Packit 352660
    if (!caseFoldClassMap[i].name)
Packit 352660
	return 0;
Packit 352660
	
Packit 352660
    /*
Packit 352660
     * Get list of result characters
Packit 352660
     */
Packit 352660
    for (i = 0; i < MAX_OUT; i++)
Packit 352660
    {
Packit 352660
	tok = strtok (NULL, SEP);
Packit 352660
	if (!tok || tok[0] == '#')
Packit 352660
	    break;
Packit 352660
	raw->lower[i] = strtol (tok, &end, 16);
Packit 352660
	if (end == tok)
Packit 352660
	    break;
Packit 352660
    }
Packit 352660
    if (i == 0)
Packit 352660
	return 0;
Packit 352660
    raw->nout = i;
Packit 352660
    return 1;
Packit 352660
}
Packit 352660
Packit 352660
static int
Packit 352660
caseFoldReadRaw (FILE *in, CaseFoldRaw *raw)
Packit 352660
{
Packit 352660
    char    line[MAX_LINE];
Packit 352660
Packit 352660
    for (;;)
Packit 352660
    {
Packit 352660
	if (!fgets (line, sizeof (line) - 1, in))
Packit 352660
	    return 0;
Packit 352660
	if (parseRaw (line, raw))
Packit 352660
	    return 1;
Packit 352660
    }
Packit 352660
}
Packit 352660
Packit 352660
int
Packit 352660
main (int argc, char **argv)
Packit 352660
{
Packit 352660
    FcCaseFold		*fold = 0;
Packit 352660
    CaseFoldRaw		raw;
Packit 352660
    int			i;
Packit 352660
    FILE		*caseFile;
Packit 352660
    char		line[MAX_LINE];
Packit 352660
    int			expand;
Packit 352660
Packit 352660
    if (argc != 2)
Packit 352660
	panic ("usage: fc-case CaseFolding.txt");
Packit 352660
    caseFile = fopen (argv[1], "r");
Packit 352660
    if (!caseFile)
Packit 352660
	panic ("can't open case folding file");
Packit 352660
    
Packit 352660
    while (caseFoldReadRaw (caseFile, &raw))
Packit 352660
    {
Packit 352660
	if (!minFoldChar)
Packit 352660
	    minFoldChar = raw.upper;
Packit 352660
	maxFoldChar = raw.upper;
Packit 352660
	switch (raw.class) {
Packit 352660
	case CaseFoldCommon:
Packit 352660
	case CaseFoldFull:
Packit 352660
	    if (raw.nout == 1)
Packit 352660
	    {
Packit 352660
		if (fold && foldExtends (fold, &raw))
Packit 352660
		    fold->count = raw.upper - fold->upper + 1;
Packit 352660
		else
Packit 352660
		{
Packit 352660
		    fold = addFold ();
Packit 352660
		    fold->upper = raw.upper;
Packit 352660
		    fold->offset = raw.lower[0] - raw.upper;
Packit 352660
		    if (fold->offset == 1)
Packit 352660
			fold->method = FC_CASE_FOLD_EVEN_ODD;
Packit 352660
		    else
Packit 352660
			fold->method = FC_CASE_FOLD_RANGE;
Packit 352660
		    fold->count = 1;
Packit 352660
		}
Packit 352660
		expand = utf8_size (raw.lower[0]) - utf8_size(raw.upper);
Packit 352660
	    }
Packit 352660
	    else
Packit 352660
	    {
Packit 352660
		fold = addFold ();
Packit 352660
		fold->upper = raw.upper;
Packit 352660
		fold->method = FC_CASE_FOLD_FULL;
Packit 352660
		fold->offset = nfoldChars;
Packit 352660
		for (i = 0; i < raw.nout; i++)
Packit 352660
		    addChar (raw.lower[i]);
Packit 352660
		fold->count = nfoldChars - fold->offset;
Packit 352660
		if (fold->count > maxFoldChars)
Packit 352660
		    maxFoldChars = fold->count;
Packit 352660
		expand = fold->count - utf8_size (raw.upper);
Packit 352660
	    }
Packit 352660
	    if (expand > maxExpand)
Packit 352660
		maxExpand = expand;
Packit 352660
	    break;
Packit 352660
	case CaseFoldSimple:
Packit 352660
	    break;
Packit 352660
	case CaseFoldTurkic:
Packit 352660
	    break;
Packit 352660
	}
Packit 352660
    }
Packit 352660
    /*
Packit 352660
     * Scan the input until the marker is found
Packit 352660
     */
Packit 352660
    
Packit 352660
    while (fgets (line, sizeof (line), stdin))
Packit 352660
    {
Packit 352660
	if (!strncmp (line, "@@@", 3))
Packit 352660
	    break;
Packit 352660
	fputs (line, stdout);
Packit 352660
    }
Packit 352660
    
Packit 352660
    /*
Packit 352660
     * Dump these tables
Packit 352660
     */
Packit 352660
    dump ();
Packit 352660
    
Packit 352660
    /*
Packit 352660
     * And flush out the rest of the input file
Packit 352660
     */
Packit 352660
Packit 352660
    while (fgets (line, sizeof (line), stdin))
Packit 352660
	fputs (line, stdout);
Packit 352660
    
Packit 352660
    fflush (stdout);
Packit 352660
    exit (ferror (stdout));
Packit 352660
}