Blame buildutils/convert_tags.pl

Packit 5d935b
# This utility script interprets a plain-text file that typically is generated by
Packit 5d935b
# cut&paste from the tables contained in the three reference pages of the OpenType Spec:
Packit 5d935b
#	Script tags:	http://www.microsoft.com/typography/otspec/scripttags.htm
Packit 5d935b
#	Langauge tags:	http://www.microsoft.com/typography/otspec/languagetags.htm
Packit 5d935b
#	Feature tags:	http://www.microsoft.com/typography/otspec/featurelist.htm
Packit 5d935b
# Alternatively, input can be VOLT's tags.txt
Packit 5d935b
# Output (to stdout) is in perl syntax for the hash initialization, e.g.:
Packit 5d935b
#	    "Arabic" => "arab",
Packit 5d935b
#	    "Armenian" => "armn",
Packit 5d935b
# This output can the be transferred to Tags.pm
Packit 5d935b
#
Packit 5d935b
# Bob Hallissy 2008-01-31
Packit 5d935b
Packit 5d935b
use strict;
Packit 5d935b
Packit 5d935b
my $which;
Packit 5d935b
my %iso639list;
Packit 5d935b
Packit 5d935b
while (<>)
Packit 5d935b
{
Packit 5d935b
	s/\s+$//o;	# trim trailing whitespace (including line ending).
Packit 5d935b
	if (/^\s*$/o)
Packit 5d935b
	{
Packit 5d935b
		print "\n"; # Just print empty lines
Packit 5d935b
		next;
Packit 5d935b
	}
Packit 5d935b
	
Packit 5d935b
	s/^\s+//o;	# trim leading whitespace
Packit 5d935b
Packit 5d935b
    if (/^"(SCRIPT|LANGUAGE|FEATURE)"\s*,\s*"([^"]+)"\s*,\s*"([^"]+)"/)
Packit 5d935b
    {
Packit 5d935b
        # VOLT's tags.txt
Packit 5d935b
        my ($type, $name, $tag) = ($1, $2, $3);
Packit 5d935b
        print "\n\n//$type\n\n" if $type != $which;
Packit 5d935b
        $which = $type;
Packit 5d935b
        print "    \"$name\" => '$tag',\n";
Packit 5d935b
    }
Packit 5d935b
	
Packit 5d935b
	elsif (/^'(.{1,4})'\s+(.*)$/o)
Packit 5d935b
	{
Packit 5d935b
		# Special reverse formatting for feature names
Packit 5d935b
		my ($name, $tag) = ($2, $1);
Packit 5d935b
		$tag .= " " x (4 - length($tag));	# pad tag
Packit 5d935b
		print "    \"$name\" => '$tag',\n";
Packit 5d935b
	}
Packit 5d935b
	
Packit 5d935b
	elsif (/^'(.{1,4})-(.{1,4})'\s+(.*)$/o)
Packit 5d935b
	{
Packit 5d935b
		# Special reverse formatting for feature names like 'cv01-cv99'
Packit 5d935b
		my ($name, $tag1, $tag2) = ($3, $1, $2);
Packit 5d935b
		for my $tag ($tag1 .. $tag2)
Packit 5d935b
		{
Packit 5d935b
			$tag =~ /(\d+)$/;
Packit 5d935b
			my $index = $1;
Packit 5d935b
			$tag .= " " x (4 - length($tag));	# pad tag
Packit 5d935b
			print "    \"$name $index\" => '$tag',\n";
Packit 5d935b
		}
Packit 5d935b
	}
Packit 5d935b
	elsif (/^([^\t]*)\t([\w]{2,4})(?: +(\([^\t]*\)))?(?:\t(.*))?$/o)
Packit 5d935b
	{
Packit 5d935b
		# Script and language names
Packit 5d935b
		my ($name, $tag, $extra, $iso639list) = ($1, $2, $3, $4);
Packit 5d935b
		$name =~ s/\s*\(Standard\)\s*//oi;	# Remove "(Standard)" from French and German entries
Packit 5d935b
		$name .= " $extra" if defined $extra;   # Dhivehi has "(deprecated)" after the "DHV " tag -- move it to name.
Packit 5d935b
		$tag .= " " x (4 - length($tag)); 	# pad tag
Packit 5d935b
		print "    \"$name\" => '$tag',\n";
Packit 5d935b
		if (defined $iso639list)
Packit 5d935b
		{
Packit 5d935b
			$iso639list =~ s/,//g;
Packit 5d935b
			$iso639list{$tag} = $iso639list # Save for later
Packit 5d935b
		}
Packit 5d935b
	}
Packit 5d935b
	else
Packit 5d935b
	{
Packit 5d935b
		print "UNEXPECTED DATA: '$_'\n";
Packit 5d935b
	}
Packit 5d935b
}
Packit 5d935b
Packit 5d935b
print "\n";
Packit 5d935b
foreach my $tag (sort keys(%iso639list))
Packit 5d935b
{
Packit 5d935b
	printf "    '$tag' => '$iso639list{$tag}',\n";
Packit 5d935b
}
Packit 5d935b
Packit 5d935b
=head1 AUTHOR
Packit 5d935b
Packit 5d935b
Bob Hallissy L<http://scripts.sil.org/FontUtils>.
Packit 5d935b
Packit 5d935b
=head1 LICENSING
Packit 5d935b
Packit 5d935b
Copyright (c) 1998-2014, SIL International (http://www.sil.org)
Packit 5d935b
Packit 5d935b
This script is released under the terms of the Artistic License 2.0.
Packit 5d935b
For details, see the full text of the license in the file LICENSE.
Packit 5d935b
Packit 5d935b
=cut