Blame tests/gen-casefold-txt.pl

Packit ae235b
#! /usr/bin/perl -w
Packit ae235b
Packit ae235b
#    Copyright (C) 1998, 1999 Tom Tromey
Packit ae235b
#    Copyright (C) 2001 Red Hat Software
Packit ae235b
Packit ae235b
#    This program is free software; you can redistribute it and/or modify
Packit ae235b
#    it under the terms of the GNU General Public License as published by
Packit ae235b
#    the Free Software Foundation; either version 2, or (at your option)
Packit ae235b
#    any later version.
Packit ae235b
Packit ae235b
#    This program is distributed in the hope that it will be useful,
Packit ae235b
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit ae235b
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit ae235b
#    GNU General Public License for more details.
Packit ae235b
Packit ae235b
#    You should have received a copy of the GNU General Public License
Packit ae235b
#    along with this program; if not, see <http://www.gnu.org/licenses/>.
Packit ae235b
Packit ae235b
# gen-casefold-test.pl - Generate test cases for casefolding from Unicode data.
Packit ae235b
# See http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html
Packit ae235b
# Usage: 
Packit ae235b
# I consider the output of this program to be unrestricted.  Use it as
Packit ae235b
# you will.
Packit ae235b
Packit ae235b
require 5.006;
Packit ae235b
Packit ae235b
# Names of fields in the CaseFolding table
Packit ae235b
$FOLDING_CODE = 0;
Packit ae235b
$FOLDING_STATUS = 1;
Packit ae235b
$FOLDING_MAPPING = 2;
Packit ae235b
Packit ae235b
my $casefoldlen = 0;
Packit ae235b
my @casefold;
Packit ae235b
Packit ae235b
if (@ARGV != 2) {
Packit ae235b
    $0 =~ s@.*/@@;
Packit ae235b
    die "Usage: $0 UNICODE-VERSION  CaseFolding.txt\n";
Packit ae235b
}
Packit ae235b
 
Packit ae235b
print <
Packit ae235b
# Test cases generated from Unicode $ARGV[0] data
Packit ae235b
# by gen-casefold-test.pl. Do not edit.
Packit ae235b
#
Packit ae235b
# Some special hand crafted tests
Packit ae235b
#
Packit ae235b
AaBbCc@@\taabbcc@@
Packit ae235b
#
Packit ae235b
# Now the automatic tests
Packit ae235b
#
Packit ae235b
EOT
Packit ae235b
Packit ae235b
binmode STDOUT, ":utf8";
Packit ae235b
open (INPUT, "< $ARGV[1]") || exit 1;
Packit ae235b
Packit ae235b
while (<INPUT>)
Packit ae235b
{
Packit ae235b
    chop;
Packit ae235b
Packit ae235b
    next if /^#/;
Packit ae235b
    next if /^\s*$/;
Packit ae235b
Packit ae235b
    s/\s*#.*//;
Packit ae235b
Packit ae235b
    my @fields = split ('\s*;\s*', $_, 30);
Packit ae235b
Packit ae235b
    my $raw_code = $fields[$FOLDING_CODE];
Packit ae235b
    my $code = hex ($raw_code);
Packit ae235b
Packit ae235b
    if ($#fields != 3)
Packit ae235b
    {
Packit ae235b
	printf STDERR ("Entry for $raw_code has wrong number of fields (%d)\n", $#fields);
Packit ae235b
	next;
Packit ae235b
    }
Packit ae235b
Packit ae235b
    # skip simple and Turkic mappings
Packit ae235b
    next if ($fields[$FOLDING_STATUS] =~ /^[ST]$/);
Packit ae235b
Packit ae235b
    @values = map { hex ($_) } split /\s+/, $fields[$FOLDING_MAPPING];
Packit ae235b
    printf ("%s\t%s\n", pack ("U", $code), pack ("U*", @values));
Packit ae235b
}
Packit ae235b
Packit ae235b
close INPUT;