Blame bin/unidump

Packit d0f5c2
#!./perl
Packit d0f5c2
Packit d0f5c2
BEGIN { pop @INC if $INC[-1] eq '.' }
Packit d0f5c2
use strict;
Packit d0f5c2
use Encode;
Packit d0f5c2
use Getopt::Std;
Packit d0f5c2
my %Opt; getopts("ChH:e:f:t:s:pPv", \%Opt);
Packit d0f5c2
$Opt{p} ||= $Opt{P};
Packit d0f5c2
$Opt{e} ||= 'utf8';
Packit d0f5c2
$Opt{f} ||= $Opt{e};
Packit d0f5c2
$Opt{t} ||= $Opt{e};
Packit d0f5c2
$Opt{h} and help();
Packit d0f5c2
Packit d0f5c2
my ($linebuf, $outbuf);
Packit d0f5c2
my $CPL = $Opt{p} ? 64 : 8;
Packit d0f5c2
my $linenum;
Packit d0f5c2
my $linesperheading = $Opt{H};
Packit d0f5c2
my $nchars;
Packit d0f5c2
our $PrevChunk;
Packit d0f5c2
Packit d0f5c2
$Opt{h} and help();
Packit d0f5c2
$Opt{p} and do_perl($Opt{s});
Packit d0f5c2
do_dump($Opt{s});
Packit d0f5c2
exit;
Packit d0f5c2
Packit d0f5c2
#
Packit d0f5c2
Packit d0f5c2
sub do_perl{
Packit d0f5c2
    my $string = shift;
Packit d0f5c2
    $Opt{P} and print "#!$^X -w\nprint\n";
Packit d0f5c2
    unless ($string){
Packit d0f5c2
    while(<>){
Packit d0f5c2
        use utf8;
Packit d0f5c2
        $linebuf .=  Encode::decode($Opt{f}, $_);
Packit d0f5c2
        while($linebuf){
Packit d0f5c2
        my $chr =  render_p(substr($linebuf, 0, 1, ''));
Packit d0f5c2
        length($outbuf) + length($chr) > $CPL and print_P();
Packit d0f5c2
        $outbuf .= $chr;
Packit d0f5c2
        }
Packit d0f5c2
    }
Packit d0f5c2
    $outbuf and print print_P(";");
Packit d0f5c2
    }else{
Packit d0f5c2
    while($string){
Packit d0f5c2
        my $chr =  render_p(substr($string, 0, 1, ''));
Packit d0f5c2
        length($outbuf) + length($chr) > $CPL and print_P();
Packit d0f5c2
        $outbuf .= $chr;
Packit d0f5c2
    }
Packit d0f5c2
    }
Packit d0f5c2
    $outbuf and print print_P(";");
Packit d0f5c2
    exit;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub render_p{
Packit d0f5c2
    my ($chr, $format) = @_;
Packit d0f5c2
    our %S2pstr;
Packit d0f5c2
    $S2pstr{$chr} and return $S2pstr{$chr}; # \t\n...
Packit d0f5c2
    $chr =~ /[\x20-\x7e]/ and return $chr;  # ascii, printable;
Packit d0f5c2
    my $fmt = ($chr =~ /[\x00-\x1f\x7F]/)  ?
Packit d0f5c2
    q(\x%x) : q(\x{%x});
Packit d0f5c2
    return sprintf $fmt, ord($chr);
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub print_P{
Packit d0f5c2
    my $end = shift;
Packit d0f5c2
    $outbuf or return;
Packit d0f5c2
    print '"', encode($Opt{t}, $outbuf), '"';
Packit d0f5c2
    my $tail = $Opt{P} ? $end ? "$end" :  "," : '';
Packit d0f5c2
    print $tail, "\n";
Packit d0f5c2
    $outbuf = '';
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub do_dump{
Packit d0f5c2
    my $string = shift;
Packit d0f5c2
    !$Opt{p} and exists $Opt{H} and print_H();
Packit d0f5c2
    unless ($string){
Packit d0f5c2
    while(<>){
Packit d0f5c2
        use utf8;
Packit d0f5c2
        $linebuf .=  Encode::decode($Opt{f}, $_);
Packit d0f5c2
        while (length($linebuf) > $CPL){
Packit d0f5c2
        my $chunk = substr($linebuf, 0, $CPL, '');
Packit d0f5c2
        print_C($chunk, $linenum++);
Packit d0f5c2
        $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S();
Packit d0f5c2
        }
Packit d0f5c2
    }
Packit d0f5c2
    $linebuf and print_C($linebuf);
Packit d0f5c2
    }else{
Packit d0f5c2
    while ($string){
Packit d0f5c2
        my $chunk = substr($string, 0, $CPL, '');
Packit d0f5c2
        print_C($chunk, $linenum++);
Packit d0f5c2
        $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S();
Packit d0f5c2
    }
Packit d0f5c2
    }
Packit d0f5c2
    exit;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub print_S{
Packit d0f5c2
    print "--------+------------------------------------------------";
Packit d0f5c2
    if ($Opt{C}){
Packit d0f5c2
    print "-+-----------------";
Packit d0f5c2
    }
Packit d0f5c2
    print "\n";
Packit d0f5c2
}
Packit d0f5c2
sub print_H{
Packit d0f5c2
    print "  Offset      0     1     2     3     4     5     6     7";
Packit d0f5c2
    if ($Opt{C}){
Packit d0f5c2
    print " |  0 1 2 3 4 5 6 7";
Packit d0f5c2
    }
Packit d0f5c2
    print "\n";
Packit d0f5c2
    print_S;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub print_C{
Packit d0f5c2
    my ($chunk, $linenum) = @_;
Packit d0f5c2
    if (!$Opt{v} and $chunk eq $PrevChunk){
Packit d0f5c2
    printf "%08x *\n", $linenum*8; return;
Packit d0f5c2
    }
Packit d0f5c2
    $PrevChunk = $chunk;
Packit d0f5c2
    my $end = length($chunk) - 1;
Packit d0f5c2
    my (@ord, @chr);
Packit d0f5c2
    for my $i (0..$end){
Packit d0f5c2
    use utf8;
Packit d0f5c2
    my $chr = substr($chunk,$i,1);
Packit d0f5c2
    my $ord = ord($chr);
Packit d0f5c2
    my $fmt = $ord <= 0xffff ? "  %04x" : " %05x";
Packit d0f5c2
    push @ord, (sprintf $fmt, $ord);
Packit d0f5c2
    $Opt{C} and push @chr, render_c($chr);
Packit d0f5c2
    }
Packit d0f5c2
    if (++$end < 7){
Packit d0f5c2
    for my $i ($end..7){
Packit d0f5c2
        push @ord, (" " x 6);
Packit d0f5c2
    }
Packit d0f5c2
    }
Packit d0f5c2
    my $line = sprintf "%08x %s", $linenum*8, join('', @ord);
Packit d0f5c2
    $Opt{C} and $line .= sprintf " | %s",  join('', @chr);
Packit d0f5c2
    print encode($Opt{t}, $line), "\n";
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub render_c{
Packit d0f5c2
    my ($chr, $format) = @_;
Packit d0f5c2
    our (%S2str, $IsFullWidth);
Packit d0f5c2
    $chr =~ /[\p{IsControl}\s]/o and return $S2str{$chr} || "  ";
Packit d0f5c2
    $chr =~ $IsFullWidth and return $chr; # as is
Packit d0f5c2
    return " " . $chr;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
sub help{
Packit d0f5c2
    my $message = shift;
Packit d0f5c2
    use File::Basename;
Packit d0f5c2
    my $name = basename($0);
Packit d0f5c2
    $message and print STDERR "$name error: $message\n";
Packit d0f5c2
    print STDERR <<"EOT";
Packit d0f5c2
Usage:
Packit d0f5c2
  $name -[options...] [files...]
Packit d0f5c2
  $name -[options...] -s "string"
Packit d0f5c2
  $name -h
Packit d0f5c2
  -h prints this message.
Packit d0f5c2
Inherited from hexdump;
Packit d0f5c2
  -C Canonical unidump mode
Packit d0f5c2
  -v prints the duplicate line as is.  Without this option,
Packit d0f5c2
     single "*" will be printed instead.
Packit d0f5c2
For unidump only
Packit d0f5c2
  -p prints in perl literals that you can copy and paste directly
Packit d0f5c2
     to your perl script.
Packit d0f5c2
  -P prints in perl executable format!
Packit d0f5c2
  -u prints a bunch of "Uxxxx,".  Handy when you want to pass your
Packit d0f5c2
     characters in mailing lists. 
Packit d0f5c2
IO Options:
Packit d0f5c2
  -e io_encoding    same as "-f io_encoding -t io_encoding"
Packit d0f5c2
  -f from_encoding  convert the source stream from this encoding
Packit d0f5c2
  -t to_encoding    print to STDOUT in this encoding
Packit d0f5c2
  -s string         "string" will be converted instead of STDIN.
Packit d0f5c2
  -H nline          prints separater for each nlines of output.
Packit d0f5c2
                    0 means only the table headding be printed.
Packit d0f5c2
EOT
Packit d0f5c2
  exit;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
BEGIN{
Packit d0f5c2
    our %S2pstr= (
Packit d0f5c2
          "\\" => '\\\\',
Packit d0f5c2
          "\0" => '\0',
Packit d0f5c2
          "\t" => '\t',
Packit d0f5c2
          "\n" => '\n',
Packit d0f5c2
          "\r" => '\r',
Packit d0f5c2
          "\v" => '\v',
Packit d0f5c2
          "\a" => '\a',
Packit d0f5c2
          "\e" => '\e',
Packit d0f5c2
          "\"" => qq(\\\"),
Packit d0f5c2
          "\'" => qq(\\\'),
Packit d0f5c2
          '$'  => '\$',
Packit d0f5c2
          "@"  => '\@',
Packit d0f5c2
          "%"  => '\%',
Packit d0f5c2
         );
Packit d0f5c2
Packit d0f5c2
    our %S2str = (
Packit d0f5c2
          qq(\x00) => q(\0),  # NULL
Packit d0f5c2
          qq(\x01) => q(^A),  # START OF HEADING
Packit d0f5c2
          qq(\x02) => q(^B),  # START OF TEXT
Packit d0f5c2
          qq(\x03) => q(^C),  # END OF TEXT
Packit d0f5c2
          qq(\x04) => q(^D),  # END OF TRANSMISSION
Packit d0f5c2
          qq(\x05) => q(^E),  # ENQUIRY
Packit d0f5c2
          qq(\x06) => q(^F),  # ACKNOWLEDGE
Packit d0f5c2
          qq(\x07) => q(\a),  # BELL
Packit d0f5c2
          qq(\x08) => q(^H),  # BACKSPACE
Packit d0f5c2
          qq(\x09) => q(\t),  # HORIZONTAL TABULATION
Packit d0f5c2
          qq(\x0A) => q(\n),  # LINE FEED
Packit d0f5c2
          qq(\x0B) => q(\v),  # VERTICAL TABULATION
Packit d0f5c2
          qq(\x0C) => q(^L),  # FORM FEED
Packit d0f5c2
          qq(\x0D) => q(\r),  # CARRIAGE RETURN
Packit d0f5c2
          qq(\x0E) => q(^N),  # SHIFT OUT
Packit d0f5c2
          qq(\x0F) => q(^O),  # SHIFT IN
Packit d0f5c2
          qq(\x10) => q(^P),  # DATA LINK ESCAPE
Packit d0f5c2
          qq(\x11) => q(^Q),  # DEVICE CONTROL ONE
Packit d0f5c2
          qq(\x12) => q(^R),  # DEVICE CONTROL TWO
Packit d0f5c2
          qq(\x13) => q(^S),  # DEVICE CONTROL THREE
Packit d0f5c2
          qq(\x14) => q(^T),  # DEVICE CONTROL FOUR
Packit d0f5c2
          qq(\x15) => q(^U),  # NEGATIVE ACKNOWLEDGE
Packit d0f5c2
          qq(\x16) => q(^V),  # SYNCHRONOUS IDLE
Packit d0f5c2
          qq(\x17) => q(^W),  # END OF TRANSMISSION BLOCK
Packit d0f5c2
          qq(\x18) => q(^X),  # CANCEL
Packit d0f5c2
          qq(\x19) => q(^Y),  # END OF MEDIUM
Packit d0f5c2
          qq(\x1A) => q(^Z),  # SUBSTITUTE
Packit d0f5c2
          qq(\x1B) => q(\e),  # ESCAPE (\c[)
Packit d0f5c2
          qq(\x1C) => "^\\",  # FILE SEPARATOR
Packit d0f5c2
          qq(\x1D) => "^\]",  # GROUP SEPARATOR
Packit d0f5c2
          qq(\x1E) => q(^^),  # RECORD SEPARATOR
Packit d0f5c2
          qq(\x1F) => q(^_),  # UNIT SEPARATOR
Packit d0f5c2
          );
Packit d0f5c2
    #
Packit d0f5c2
    # Generated out of lib/unicore/EastAsianWidth.txt 
Packit d0f5c2
    # will it work ?
Packit d0f5c2
    #		  
Packit d0f5c2
    our $IsFullWidth = 
Packit d0f5c2
    qr/^[
Packit d0f5c2
         \x{1100}-\x{1159}
Packit d0f5c2
         \x{115F}-\x{115F}
Packit d0f5c2
         \x{2329}-\x{232A}
Packit d0f5c2
         \x{2E80}-\x{2E99}
Packit d0f5c2
         \x{2E9B}-\x{2EF3}
Packit d0f5c2
         \x{2F00}-\x{2FD5}
Packit d0f5c2
         \x{2FF0}-\x{2FFB}
Packit d0f5c2
         \x{3000}-\x{303E}
Packit d0f5c2
         \x{3041}-\x{3096}
Packit d0f5c2
         \x{3099}-\x{30FF}
Packit d0f5c2
         \x{3105}-\x{312C}
Packit d0f5c2
         \x{3131}-\x{318E}
Packit d0f5c2
         \x{3190}-\x{31B7}
Packit d0f5c2
         \x{31F0}-\x{321C}
Packit d0f5c2
         \x{3220}-\x{3243}
Packit d0f5c2
         \x{3251}-\x{327B}
Packit d0f5c2
         \x{327F}-\x{32CB}
Packit d0f5c2
         \x{32D0}-\x{32FE}
Packit d0f5c2
         \x{3300}-\x{3376}
Packit d0f5c2
         \x{337B}-\x{33DD}
Packit d0f5c2
         \x{3400}-\x{4DB5}
Packit d0f5c2
         \x{4E00}-\x{9FA5}
Packit d0f5c2
         \x{33E0}-\x{33FE}
Packit d0f5c2
         \x{A000}-\x{A48C}
Packit d0f5c2
         \x{AC00}-\x{D7A3}
Packit d0f5c2
         \x{A490}-\x{A4C6}
Packit d0f5c2
         \x{F900}-\x{FA2D}
Packit d0f5c2
         \x{FA30}-\x{FA6A}
Packit d0f5c2
         \x{FE30}-\x{FE46}
Packit d0f5c2
         \x{FE49}-\x{FE52}
Packit d0f5c2
         \x{FE54}-\x{FE66}
Packit d0f5c2
         \x{FE68}-\x{FE6B}
Packit d0f5c2
         \x{FF01}-\x{FF60}
Packit d0f5c2
         \x{FFE0}-\x{FFE6}
Packit d0f5c2
         \x{20000}-\x{2A6D6}
Packit d0f5c2
     ]$/xo;
Packit d0f5c2
}
Packit d0f5c2
Packit d0f5c2
__END__