#!./perl BEGIN { pop @INC if $INC[-1] eq '.' } use strict; use Encode; use Getopt::Std; my %Opt; getopts("ChH:e:f:t:s:pPv", \%Opt); $Opt{p} ||= $Opt{P}; $Opt{e} ||= 'utf8'; $Opt{f} ||= $Opt{e}; $Opt{t} ||= $Opt{e}; $Opt{h} and help(); my ($linebuf, $outbuf); my $CPL = $Opt{p} ? 64 : 8; my $linenum; my $linesperheading = $Opt{H}; my $nchars; our $PrevChunk; $Opt{h} and help(); $Opt{p} and do_perl($Opt{s}); do_dump($Opt{s}); exit; # sub do_perl{ my $string = shift; $Opt{P} and print "#!$^X -w\nprint\n"; unless ($string){ while(<>){ use utf8; $linebuf .= Encode::decode($Opt{f}, $_); while($linebuf){ my $chr = render_p(substr($linebuf, 0, 1, '')); length($outbuf) + length($chr) > $CPL and print_P(); $outbuf .= $chr; } } $outbuf and print print_P(";"); }else{ while($string){ my $chr = render_p(substr($string, 0, 1, '')); length($outbuf) + length($chr) > $CPL and print_P(); $outbuf .= $chr; } } $outbuf and print print_P(";"); exit; } sub render_p{ my ($chr, $format) = @_; our %S2pstr; $S2pstr{$chr} and return $S2pstr{$chr}; # \t\n... $chr =~ /[\x20-\x7e]/ and return $chr; # ascii, printable; my $fmt = ($chr =~ /[\x00-\x1f\x7F]/) ? q(\x%x) : q(\x{%x}); return sprintf $fmt, ord($chr); } sub print_P{ my $end = shift; $outbuf or return; print '"', encode($Opt{t}, $outbuf), '"'; my $tail = $Opt{P} ? $end ? "$end" : "," : ''; print $tail, "\n"; $outbuf = ''; } sub do_dump{ my $string = shift; !$Opt{p} and exists $Opt{H} and print_H(); unless ($string){ while(<>){ use utf8; $linebuf .= Encode::decode($Opt{f}, $_); while (length($linebuf) > $CPL){ my $chunk = substr($linebuf, 0, $CPL, ''); print_C($chunk, $linenum++); $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S(); } } $linebuf and print_C($linebuf); }else{ while ($string){ my $chunk = substr($string, 0, $CPL, ''); print_C($chunk, $linenum++); $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S(); } } exit; } sub print_S{ print "--------+------------------------------------------------"; if ($Opt{C}){ print "-+-----------------"; } print "\n"; } sub print_H{ print " Offset 0 1 2 3 4 5 6 7"; if ($Opt{C}){ print " | 0 1 2 3 4 5 6 7"; } print "\n"; print_S; } sub print_C{ my ($chunk, $linenum) = @_; if (!$Opt{v} and $chunk eq $PrevChunk){ printf "%08x *\n", $linenum*8; return; } $PrevChunk = $chunk; my $end = length($chunk) - 1; my (@ord, @chr); for my $i (0..$end){ use utf8; my $chr = substr($chunk,$i,1); my $ord = ord($chr); my $fmt = $ord <= 0xffff ? " %04x" : " %05x"; push @ord, (sprintf $fmt, $ord); $Opt{C} and push @chr, render_c($chr); } if (++$end < 7){ for my $i ($end..7){ push @ord, (" " x 6); } } my $line = sprintf "%08x %s", $linenum*8, join('', @ord); $Opt{C} and $line .= sprintf " | %s", join('', @chr); print encode($Opt{t}, $line), "\n"; } sub render_c{ my ($chr, $format) = @_; our (%S2str, $IsFullWidth); $chr =~ /[\p{IsControl}\s]/o and return $S2str{$chr} || " "; $chr =~ $IsFullWidth and return $chr; # as is return " " . $chr; } sub help{ my $message = shift; use File::Basename; my $name = basename($0); $message and print STDERR "$name error: $message\n"; print STDERR <<"EOT"; Usage: $name -[options...] [files...] $name -[options...] -s "string" $name -h -h prints this message. Inherited from hexdump; -C Canonical unidump mode -v prints the duplicate line as is. Without this option, single "*" will be printed instead. For unidump only -p prints in perl literals that you can copy and paste directly to your perl script. -P prints in perl executable format! -u prints a bunch of "Uxxxx,". Handy when you want to pass your characters in mailing lists. IO Options: -e io_encoding same as "-f io_encoding -t io_encoding" -f from_encoding convert the source stream from this encoding -t to_encoding print to STDOUT in this encoding -s string "string" will be converted instead of STDIN. -H nline prints separater for each nlines of output. 0 means only the table headding be printed. EOT exit; } BEGIN{ our %S2pstr= ( "\\" => '\\\\', "\0" => '\0', "\t" => '\t', "\n" => '\n', "\r" => '\r', "\v" => '\v', "\a" => '\a', "\e" => '\e', "\"" => qq(\\\"), "\'" => qq(\\\'), '$' => '\$', "@" => '\@', "%" => '\%', ); our %S2str = ( qq(\x00) => q(\0), # NULL qq(\x01) => q(^A), # START OF HEADING qq(\x02) => q(^B), # START OF TEXT qq(\x03) => q(^C), # END OF TEXT qq(\x04) => q(^D), # END OF TRANSMISSION qq(\x05) => q(^E), # ENQUIRY qq(\x06) => q(^F), # ACKNOWLEDGE qq(\x07) => q(\a), # BELL qq(\x08) => q(^H), # BACKSPACE qq(\x09) => q(\t), # HORIZONTAL TABULATION qq(\x0A) => q(\n), # LINE FEED qq(\x0B) => q(\v), # VERTICAL TABULATION qq(\x0C) => q(^L), # FORM FEED qq(\x0D) => q(\r), # CARRIAGE RETURN qq(\x0E) => q(^N), # SHIFT OUT qq(\x0F) => q(^O), # SHIFT IN qq(\x10) => q(^P), # DATA LINK ESCAPE qq(\x11) => q(^Q), # DEVICE CONTROL ONE qq(\x12) => q(^R), # DEVICE CONTROL TWO qq(\x13) => q(^S), # DEVICE CONTROL THREE qq(\x14) => q(^T), # DEVICE CONTROL FOUR qq(\x15) => q(^U), # NEGATIVE ACKNOWLEDGE qq(\x16) => q(^V), # SYNCHRONOUS IDLE qq(\x17) => q(^W), # END OF TRANSMISSION BLOCK qq(\x18) => q(^X), # CANCEL qq(\x19) => q(^Y), # END OF MEDIUM qq(\x1A) => q(^Z), # SUBSTITUTE qq(\x1B) => q(\e), # ESCAPE (\c[) qq(\x1C) => "^\\", # FILE SEPARATOR qq(\x1D) => "^\]", # GROUP SEPARATOR qq(\x1E) => q(^^), # RECORD SEPARATOR qq(\x1F) => q(^_), # UNIT SEPARATOR ); # # Generated out of lib/unicore/EastAsianWidth.txt # will it work ? # our $IsFullWidth = qr/^[ \x{1100}-\x{1159} \x{115F}-\x{115F} \x{2329}-\x{232A} \x{2E80}-\x{2E99} \x{2E9B}-\x{2EF3} \x{2F00}-\x{2FD5} \x{2FF0}-\x{2FFB} \x{3000}-\x{303E} \x{3041}-\x{3096} \x{3099}-\x{30FF} \x{3105}-\x{312C} \x{3131}-\x{318E} \x{3190}-\x{31B7} \x{31F0}-\x{321C} \x{3220}-\x{3243} \x{3251}-\x{327B} \x{327F}-\x{32CB} \x{32D0}-\x{32FE} \x{3300}-\x{3376} \x{337B}-\x{33DD} \x{3400}-\x{4DB5} \x{4E00}-\x{9FA5} \x{33E0}-\x{33FE} \x{A000}-\x{A48C} \x{AC00}-\x{D7A3} \x{A490}-\x{A4C6} \x{F900}-\x{FA2D} \x{FA30}-\x{FA6A} \x{FE30}-\x{FE46} \x{FE49}-\x{FE52} \x{FE54}-\x{FE66} \x{FE68}-\x{FE6B} \x{FF01}-\x{FF60} \x{FFE0}-\x{FFE6} \x{20000}-\x{2A6D6} ]$/xo; } __END__