Blame internal/unicode

Packit Service 653591
#!/usr/bin/perl
Packit Service 653591
Packit Service 653591
# http://perlmonks.org/?node_id=1084067
Packit Service 653591
# Script by Jim
Packit Service 653591
#use v5.10;
Packit Service 653591
use strict;
Packit Service 653591
use warnings;
Packit Service 653591
use utf8;
Packit Service 653591
Packit Service 653591
use Encode qw( encode_utf8 );
Packit Service 653591
use Unicode::UCD qw( charblock );
Packit Service 653591
Packit Service 653591
binmode STDOUT, ':encoding(UTF-8)';
Packit Service 653591
Packit Service 653591
while (my $word = <DATA>) {
Packit Service 653591
    chomp $word;
Packit Service 653591
Packit Service 653591
    my $length_in_bytes       = length_in_bytes($word);
Packit Service 653591
    my $length_in_code_points = length_in_code_points($word);
Packit Service 653591
    my $length_in_graphemes   = length_in_graphemes($word);
Packit Service 653591
    my $code_points_in_blocks = code_points_in_blocks($word);
Packit Service 653591
Packit Service 653591
    printf "%-12s | Bytes: %2d | Code Points: %2d | Graphemes: %2d | Blocks: %s\n",
Packit Service 653591
        $word,
Packit Service 653591
        $length_in_bytes,
Packit Service 653591
        $length_in_code_points,
Packit Service 653591
        $length_in_graphemes,
Packit Service 653591
        $code_points_in_blocks;
Packit Service 653591
}
Packit Service 653591
Packit Service 653591
exit 0;
Packit Service 653591
Packit Service 653591
sub length_in_bytes {
Packit Service 653591
    my $word = shift;
Packit Service 653591
Packit Service 653591
    my $length = length encode_utf8($word);
Packit Service 653591
Packit Service 653591
    return $length;
Packit Service 653591
}
Packit Service 653591
Packit Service 653591
sub length_in_code_points {
Packit Service 653591
    my $word = shift;
Packit Service 653591
Packit Service 653591
    my $length = length $word;
Packit Service 653591
Packit Service 653591
    return $length;
Packit Service 653591
}
Packit Service 653591
Packit Service 653591
sub length_in_graphemes {
Packit Service 653591
    my $word = shift;
Packit Service 653591
Packit Service 653591
    my $length = () = $word =~ m/\X/g;
Packit Service 653591
Packit Service 653591
    return $length;
Packit Service 653591
}
Packit Service 653591
Packit Service 653591
sub code_points_in_blocks {
Packit Service 653591
    my $word = shift;
Packit Service 653591
Packit Service 653591
    my %total_code_points_by;
Packit Service 653591
    my $blocks = '';
Packit Service 653591
Packit Service 653591
    for my $character (split m//, $word) {
Packit Service 653591
        my $block = charblock(ord $character);
Packit Service 653591
Packit Service 653591
        $total_code_points_by{$block}++;
Packit Service 653591
    }
Packit Service 653591
Packit Service 653591
    for my $block (sort keys %total_code_points_by) {
Packit Service 653591
        my $total = $total_code_points_by{$block};
Packit Service 653591
Packit Service 653591
        $blocks .= sprintf "%s%s (%d)",
Packit Service 653591
                   (length $blocks ? ', ' : ''), $block, $total;
Packit Service 653591
    }
Packit Service 653591
Packit Service 653591
    return $blocks;
Packit Service 653591
}
Packit Service 653591
Packit Service 653591
__DATA__
Packit Service 653591
æ
Packit Service 653591
æð
Packit Service 653591
æða
Packit Service 653591
æðaber
Packit Service 653591
æðahnútur
Packit Service 653591
æðakölkun
Packit Service 653591
æðardúnn
Packit Service 653591
æðarfugl
Packit Service 653591
æðarkolla
Packit Service 653591
æðarkóngur
Packit Service 653591
æðarvarp
Packit Service 653591
æði
Packit Service 653591
æðimargur
Packit Service 653591
æðisgenginn
Packit Service 653591
æðiskast
Packit Service 653591
æðislegur
Packit Service 653591
æðrast
Packit Service 653591
æðri
Packit Service 653591
æðrulaus
Packit Service 653591
æðruleysi
Packit Service 653591
æðruorð
Packit Service 653591
æðrutónn
Packit Service 653591
æðstur
Packit Service 653591
æður
Packit Service 653591
æfa