|
Packit |
244994 |
#!/usr/bin/perl
|
|
Packit |
244994 |
# convmv 2.01 - converts filenames from one encoding to another
|
|
Packit |
244994 |
# Copyright © 2003-2017 Bjoern JACKE <bjoern@j3e.de>
|
|
Packit |
244994 |
#
|
|
Packit |
244994 |
# This program comes with ABSOLUTELY NO WARRANTY; it may be copied or modified
|
|
Packit |
244994 |
# under the terms of the GNU General Public License version 2 or 3 as
|
|
Packit |
244994 |
# published by the Free Software Foundation.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
# to get a man page:
|
|
Packit |
244994 |
# pod2man --section 1 --center=" " convmv | gzip > convmv.1.gz
|
|
Packit |
244994 |
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head1 NAME
|
|
Packit |
244994 |
|
|
Packit |
244994 |
convmv - converts filenames from one encoding to another
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head1 SYNOPSIS
|
|
Packit |
244994 |
|
|
Packit |
244994 |
B<convmv> [B<options>] FILE(S) ... DIRECTORY(S)
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head1 OPTIONS
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=over 4
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<-f ENCODING>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
specify the current encoding of the filename(s) from which should be converted
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<-t ENCODING>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
specify the encoding to which the filename(s) should be converted
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<-i>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
interactive mode (ask y/n for each action)
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<-r>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
recursively go through directories
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--nfc>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
target files will be normalization form C for UTF-8 (Linux etc.)
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--nfd>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
target files will be normalization form D for UTF-8 (OS X etc.).
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--qfrom> , B<--qto>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
be more quiet about the "from" or "to" of a rename (if it screws up your
|
|
Packit |
244994 |
terminal e.g.). This will in fact do nothing else than replace any non-ASCII
|
|
Packit |
244994 |
character (bytewise) with ? and any control character with * on printout, this
|
|
Packit |
244994 |
does not affect rename operation itself.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--exec> command
|
|
Packit |
244994 |
|
|
Packit |
244994 |
execute the given command. You have to quote the command and #1 will be
|
|
Packit |
244994 |
substituted by the old, #2 by the new filename. Using this option link
|
|
Packit |
244994 |
targets will stay untouched. Have in mind that #1 and #2 will be quoted
|
|
Packit |
244994 |
by convmv already, you must not add extra quotation marks around them.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
Example:
|
|
Packit |
244994 |
|
|
Packit |
244994 |
convmv -f latin1 -t utf-8 -r --exec "echo #1 should be renamed to #2" path/to/files
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--list>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
list all available encodings. To get support for more Chinese or Japanese
|
|
Packit |
244994 |
encodings install the Perl HanExtra or JIS2K Encode packages.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--lowmem>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
keep memory footprint low by not creating a hash of all files. This disables
|
|
Packit |
244994 |
checking if symlink targets are in subtree. Symlink target pointers will be
|
|
Packit |
244994 |
converted regardlessly. If you convert multiple hundredthousands or millions of
|
|
Packit |
244994 |
files the memory usage of convmv might grow quite high. This option would help
|
|
Packit |
244994 |
you out in that case.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--nosmart>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
by default convmv will detect if a filename is already UTF8 encoded and will
|
|
Packit |
244994 |
skip this file if conversion from some charset to UTF8 should be performed.
|
|
Packit |
244994 |
C<--nosmart> will also force conversion to UTF-8 for such files, which might
|
|
Packit |
244994 |
result in "double encoded UTF-8" (see section below).
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--fixdouble>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
using the C<--fixdouble> option convmv does only convert files which will still
|
|
Packit |
244994 |
be UTF-8 encoded after conversion. That's useful for fixing double-encoded
|
|
Packit |
244994 |
UTF-8 files. All files which are not UTF-8 or will not result in UTF-8 after
|
|
Packit |
244994 |
conversion will not be touched. Also see chapter "How to undo double UTF-8 ..."
|
|
Packit |
244994 |
below.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--notest>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
Needed to actually rename the files. By default convmv will just print what it
|
|
Packit |
244994 |
wants to do.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--parsable>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
This is an advanced option that people who want to write a GUI front end will
|
|
Packit |
244994 |
find useful (some others maybe, too). It will convmv make print out what it
|
|
Packit |
244994 |
would do in an easy parsable way. The first column contains the action or some
|
|
Packit |
244994 |
kind of information, the second column mostly contains the file that is to be
|
|
Packit |
244994 |
modified and if appropriate the third column contains the modified value. Each
|
|
Packit |
244994 |
column is separated by \0\n (nullbyte newline). Each row (one action) is
|
|
Packit |
244994 |
separated by \0\0\n (nullbyte nullbyte newline).
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--no-preserve-mtimes>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
modifying filenames usually causes the parent directory's mtime being updated.
|
|
Packit |
244994 |
Since version 2 convmv by default resets the mtime to the old value. If your
|
|
Packit |
244994 |
filesystem supports sub-second resolution the sub-second part of the atime and
|
|
Packit |
244994 |
mtime will be lost as Perl does not yet support that. With this option you can
|
|
Packit |
244994 |
B<disable> the preservation of the mtimes.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--replace>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
if the file to which shall be renamed already exists, it will be overwritten if
|
|
Packit |
244994 |
the other file content is equal.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--unescape>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
this option will remove this ugly % hex sequences from filenames and turn them
|
|
Packit |
244994 |
into (hopefully) nicer 8-bit characters. After --unescape you might want to do
|
|
Packit |
244994 |
a charset conversion. This sequences like %20 etc. are sometimes produced when
|
|
Packit |
244994 |
downloading via http or ftp.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--upper> , B<--lower>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
turn filenames into all upper or all lower case. When the file is not
|
|
Packit |
244994 |
ASCII-encoded, convmv expects a charset to be entered via the -f switch.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--map=>some-extra-mapping
|
|
Packit |
244994 |
|
|
Packit |
244994 |
apply some custom character mappings, currently supported are:
|
|
Packit |
244994 |
|
|
Packit |
244994 |
ntfs-sfm(-undo), ntfs-sfu(-undo) for the mapping of illegal ntfs characters for
|
|
Packit |
244994 |
Linux or Macintosh cifs clients (see MS KB 117258 also mapchars mount option of
|
|
Packit |
244994 |
mount.cifs on Linux).
|
|
Packit |
244994 |
|
|
Packit |
244994 |
ntfs-pretty(-undo) for for the mapping of illegal ntfs characters to pretty
|
|
Packit |
244994 |
legal Japanese versions of them.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
See the map_get_newname() function how to easily add own mappings if needed.
|
|
Packit |
244994 |
Let me know if you think convmv is missing some useful mapping here.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--dotlessi>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
care about the dotless i/I issue. A lowercase version of "I" will also be
|
|
Packit |
244994 |
dotless while an uppercase version of "i" will also be dotted. This is an
|
|
Packit |
244994 |
issue for Turkish and Azeri.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
By the way: The superscript dot of the letter i was added in the Middle Ages to
|
|
Packit |
244994 |
distinguish the letter (in manuscripts) from adjacent vertical strokes in such
|
|
Packit |
244994 |
letters as u, m, and n. J is a variant form of i which emerged at this time and
|
|
Packit |
244994 |
subsequently became a separate letter.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--help>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
print a short summary of available options
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=item B<--dump-options>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
print a list of all available options
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=back
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head1 DESCRIPTION
|
|
Packit |
244994 |
|
|
Packit |
244994 |
B<convmv> is meant to help convert a single filename, a directory tree and the
|
|
Packit |
244994 |
contained files or a whole filesystem into a different encoding. It just
|
|
Packit |
244994 |
converts the filenames, not the content of the files. A special feature of
|
|
Packit |
244994 |
convmv is that it also takes care of symlinks, also converts the symlink target
|
|
Packit |
244994 |
pointer in case the symlink target is being converted, too.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
All this comes in very handy when one wants to switch over from old 8-bit
|
|
Packit |
244994 |
locales to UTF-8 locales. It is also possible to convert directories to UTF-8
|
|
Packit |
244994 |
which are already partly UTF-8 encoded. convmv is able to detect if certain
|
|
Packit |
244994 |
files are UTF-8 encoded and will skip them by default. To turn this smartness
|
|
Packit |
244994 |
off use the C<--nosmart> switch.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head2 Filesystem issues
|
|
Packit |
244994 |
|
|
Packit |
244994 |
Almost all POSIX filesystems do not care about how filenames are encoded, here
|
|
Packit |
244994 |
are some exceptions:
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head3 HFS+ on OS X / Darwin
|
|
Packit |
244994 |
|
|
Packit |
244994 |
Linux and (most?) other Unix-like operating systems use the so called
|
|
Packit |
244994 |
normalization form C (NFC) for its UTF-8 encoding by default but do not enforce
|
|
Packit |
244994 |
this. Darwin, the base of the Macintosh OS enforces normalization form D
|
|
Packit |
244994 |
(NFD), where a few characters are encoded in a different way. On OS X it's not
|
|
Packit |
244994 |
possible to create NFC UTF-8 filenames because this is prevented at filesystem
|
|
Packit |
244994 |
layer. On HFS+ filenames are internally stored in UTF-16 and when converted
|
|
Packit |
244994 |
back to UTF-8, for the underlying BSD system to be handable, NFD is created.
|
|
Packit |
244994 |
See http://developer.apple.com/qa/qa2001/qa1173.html for defails. I think it
|
|
Packit |
244994 |
was a very bad idea and breaks many things under OS X which expect a normal
|
|
Packit |
244994 |
POSIX conforming system. Anywhere else convmv is able to convert files from NFC
|
|
Packit |
244994 |
to NFD or vice versa which makes interoperability with such systems a lot
|
|
Packit |
244994 |
easier.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head3 JFS
|
|
Packit |
244994 |
|
|
Packit |
244994 |
If people mount JFS partitions with iocharset=utf8, there is a similar problem,
|
|
Packit |
244994 |
because JFS is designed to store filenames internally in UTF-16, too; that is
|
|
Packit |
244994 |
because Linux' JFS is really JFS2, which was a rewrite of JFS for OS/2. JFS
|
|
Packit |
244994 |
partitions should always be mounted with iocharset=iso8859-1, which is also the
|
|
Packit |
244994 |
default with recent 2.6.6 kernels. If this is not done, JFS does not behave
|
|
Packit |
244994 |
like a POSIX filesystem and it might happen that certain files cannot be
|
|
Packit |
244994 |
created at all, for example filenames in ISO-8859-1 encoding. Only when
|
|
Packit |
244994 |
interoperation with OS/2 is needed iocharset should be set according to your
|
|
Packit |
244994 |
used locale charmap.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head3 NFS4
|
|
Packit |
244994 |
|
|
Packit |
244994 |
Despite other POSIX filesystems RFC3530 (NFS 4) mandates UTF-8 but also says:
|
|
Packit |
244994 |
"The nfs4_cs_prep profile does not specify a normalization form. A later
|
|
Packit |
244994 |
revision of this specification may specify a particular normalization form." In
|
|
Packit |
244994 |
other words, if you want to use NFS4 you might find the conversion and
|
|
Packit |
244994 |
normalization features of convmv quite useful.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head3 FAT/VFAT and NTFS
|
|
Packit |
244994 |
|
|
Packit |
244994 |
NTFS and VFAT (for long filenames) use UTF-16 internally to store filenames.
|
|
Packit |
244994 |
You should not need to convert filenames if you mount one of those filesystems.
|
|
Packit |
244994 |
Use appropriate mount options instead!
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head2 How to undo double UTF-8 (or other) encoded filenames
|
|
Packit |
244994 |
|
|
Packit |
244994 |
Sometimes it might happen that you "double-encoded" certain filenames, for
|
|
Packit |
244994 |
example the file names already were UTF-8 encoded and you accidently did
|
|
Packit |
244994 |
another conversion from some charset to UTF-8. You can simply undo that by
|
|
Packit |
244994 |
converting that the other way round. The from-charset has to be UTF-8 and the
|
|
Packit |
244994 |
to-charset has to be the from-charset you previously accidently used. If you
|
|
Packit |
244994 |
use the C<--fixdouble> option convmv will make sure that only files will be
|
|
Packit |
244994 |
processed that will still be UTF-8 encoded after conversion and it will leave
|
|
Packit |
244994 |
non-UTF-8 files untouched. You should check to get the correct results by doing
|
|
Packit |
244994 |
the conversion without C<--notest> before, also the C<--qfrom> option might be
|
|
Packit |
244994 |
helpful, because the double utf-8 file names might screw up your terminal if
|
|
Packit |
244994 |
they are being printed - they often contain control sequences which do funny
|
|
Packit |
244994 |
things with your terminal window. If you are not sure about the charset which
|
|
Packit |
244994 |
was accidently converted from, using C<--qfrom> is a good way to fiddle out the
|
|
Packit |
244994 |
required encoding without destroying the file names finally.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head2 How to repair Samba files
|
|
Packit |
244994 |
|
|
Packit |
244994 |
When in the smb.conf (of Samba 2.x) there hasn't been set a correct "character
|
|
Packit |
244994 |
set" variable, files which are created from Win* clients are being created in
|
|
Packit |
244994 |
the client's codepage, e.g. cp850 for western european languages. As a result
|
|
Packit |
244994 |
of that the files which contain non-ASCII characters are screwed up if you "ls"
|
|
Packit |
244994 |
them on the Unix server. If you change the "character set" variable afterwards
|
|
Packit |
244994 |
to iso8859-1, newly created files are okay, but the old files are still screwed
|
|
Packit |
244994 |
up in the Windows encoding. In this case convmv can also be used to convert the
|
|
Packit |
244994 |
old Samba-shared files from cp850 to iso8859-1.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
By the way: Samba 3.x finally maps to UTF-8 filenames by default, so also when
|
|
Packit |
244994 |
you migrate from Samba 2 to Samba 3 you might have to convert your file names.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head2 Netatalk interoperability issues
|
|
Packit |
244994 |
|
|
Packit |
244994 |
When Netatalk is being switched to UTF-8 which is supported in version 2 then
|
|
Packit |
244994 |
it is NOT sufficient to rename the file names. There needs to be done more. See
|
|
Packit |
244994 |
http://netatalk.sourceforge.net/2.0/htmldocs/upgrade.html#volumes-and-filenames
|
|
Packit |
244994 |
and the uniconv utility of Netatalk for details.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head1 SEE ALSO
|
|
Packit |
244994 |
|
|
Packit |
244994 |
L<locale(1)> L<utf-8(7)> L<charsets(7)>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head1 BUGS
|
|
Packit |
244994 |
|
|
Packit |
244994 |
no bugs or fleas known
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head1 DONATE
|
|
Packit |
244994 |
|
|
Packit |
244994 |
You can support convmv by doing a donation, see L<https://www.j3e.de/donate.html>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=head1 AUTHOR
|
|
Packit |
244994 |
|
|
Packit |
244994 |
Bjoern JACKE
|
|
Packit |
244994 |
|
|
Packit |
244994 |
Send mail to bjoern [at] j3e.de for bug reports and suggestions.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
=cut
|
|
Packit |
244994 |
|
|
Packit |
244994 |
require 5.008;
|
|
Packit |
244994 |
use Getopt::Long;
|
|
Packit |
244994 |
use File::Find;
|
|
Packit |
244994 |
use File::Basename;
|
|
Packit |
244994 |
use Cwd;
|
|
Packit |
244994 |
use Encode 'from_to','encode_utf8','decode_utf8','_utf8_on','_utf8_off';
|
|
Packit |
244994 |
#use Encode 'is_utf8';
|
|
Packit |
244994 |
use Unicode::Normalize;
|
|
Packit |
244994 |
use utf8;
|
|
Packit |
244994 |
use bytes;
|
|
Packit |
244994 |
|
|
Packit |
244994 |
Getopt::Long::Configure ("bundling");
|
|
Packit |
244994 |
binmode STDOUT, ":bytes";
|
|
Packit |
244994 |
binmode STDERR, ":bytes";
|
|
Packit |
244994 |
|
|
Packit |
244994 |
my $opt_mtimes = 1; # default 1 since convmv 2.0
|
|
Packit |
244994 |
my %opts = (
|
|
Packit |
244994 |
'nfc'=>\$opt_nfc,
|
|
Packit |
244994 |
'nfd'=>\$opt_nfd,
|
|
Packit |
244994 |
'f=s'=>\$opt_f,
|
|
Packit |
244994 |
't=s'=>\$opt_t,
|
|
Packit |
244994 |
'r'=>\$opt_r,
|
|
Packit |
244994 |
'i'=>\$opt_i,
|
|
Packit |
244994 |
'list'=>\$opt_list,
|
|
Packit |
244994 |
'help'=>\$opt_help,
|
|
Packit |
244994 |
'notest'=>\$opt_notest,
|
|
Packit |
244994 |
'qfrom'=>\$opt_qfrom,
|
|
Packit |
244994 |
'qto'=>\$opt_qto,
|
|
Packit |
244994 |
'replace'=>\$opt_replace,
|
|
Packit |
244994 |
'nosmart'=>\$opt_nosmart,
|
|
Packit |
244994 |
'lowmem'=>\$opt_lowmem,
|
|
Packit |
244994 |
'exec=s'=>\$opt_exec,
|
|
Packit |
244994 |
'unescape'=>\$opt_unescape,
|
|
Packit |
244994 |
'upper'=>\$opt_upper,
|
|
Packit |
244994 |
'lower'=>\$opt_lower,
|
|
Packit |
244994 |
'dotlessi'=>\$opt_dotlessi,
|
|
Packit |
244994 |
'parsable'=>\$opt_parsable,
|
|
Packit |
244994 |
'fixdouble'=>\$opt_fixdouble,
|
|
Packit |
244994 |
'preserve-mtimes!'=>\$opt_mtimes,
|
|
Packit |
244994 |
'dump-options'=>\$opt_dumpoptions,
|
|
Packit |
244994 |
'undo-script=s'=>\$opt_undo_script,
|
|
Packit |
244994 |
'map=s'=>\$opt_map,
|
|
Packit |
244994 |
);
|
|
Packit |
244994 |
GetOptions %opts or exit 1;
|
|
Packit |
244994 |
use File::Compare;
|
|
Packit |
244994 |
$errors_occurred=0;
|
|
Packit |
244994 |
$warnings_occurred=0;
|
|
Packit |
244994 |
$ops=0;
|
|
Packit |
244994 |
$mytime = time();
|
|
Packit |
244994 |
$maxfilenamelength=255;
|
|
Packit |
244994 |
# $maxpathlength=4096; # this might be used somehow, somewhere?
|
|
Packit |
244994 |
|
|
Packit |
244994 |
%dir_time_hash=();
|
|
Packit |
244994 |
my $this_is_valid_utf8;
|
|
Packit |
244994 |
|
|
Packit |
244994 |
# delimiter and final delimiter for parsable mode:
|
|
Packit |
244994 |
$del = "\0\n";
|
|
Packit |
244994 |
$fin_del = "\0\0\n";
|
|
Packit |
244994 |
|
|
Packit |
244994 |
&listvalidencodings and exit 0 if ($opt_list);
|
|
Packit |
244994 |
&dumpoptions and exit 0 if ($opt_dumpoptions);
|
|
Packit |
244994 |
&printusage and exit 1 if (!@ARGV or $opt_help);
|
|
Packit |
244994 |
|
|
Packit |
244994 |
&check_for_broken_perl_release();
|
|
Packit |
244994 |
|
|
Packit |
244994 |
if ($opt_parsable) {
|
|
Packit |
244994 |
if ($opt_notest or $opt_exec or $opt_i) {
|
|
Packit |
244994 |
die "--parsable mode cannot be used with --notest, --exec or -i\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
if ($opt_replace and $opt_undo_script) {
|
|
Packit |
244994 |
die "--replace and --undo-script can't work together!\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
if ($opt_unescape) {
|
|
Packit |
244994 |
die "No charset conversion when unescaping!\n" if ($opt_f or $opt_t);
|
|
Packit |
244994 |
$checkenc=\&unescape_checkenc;
|
|
Packit |
244994 |
$get_newname=\&unescape_get_newname;
|
|
Packit |
244994 |
} elsif ($opt_upper or $opt_lower) {
|
|
Packit |
244994 |
die "No charset conversion when uppering/lowering!\n" if ($opt_t);
|
|
Packit |
244994 |
die "Not possible to --upper and --lower at the same time!\n" if ($opt_upper and $opt_lower);
|
|
Packit |
244994 |
$checkenc=\&upperlower_checkenc;
|
|
Packit |
244994 |
$get_newname=\&upperlower_get_newname;
|
|
Packit |
244994 |
$opt_f="ascii" unless ($opt_f);
|
|
Packit |
244994 |
} elsif ($opt_map) {
|
|
Packit |
244994 |
if ($opt_t or $opt_f or $opt_upper or $opt_lower or $opt_unescape) {
|
|
Packit |
244994 |
die "--map parameter not allowed with other character conversion parameters\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
$checkenc=\&dummy;
|
|
Packit |
244994 |
$get_newname=\&map_get_newname;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
if (not ($opt_f and $opt_f=Encode::resolve_alias($opt_f))) {
|
|
Packit |
244994 |
die "wrong/unknown \"from\" encoding!\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
if (not ($opt_t and $opt_t=Encode::resolve_alias($opt_t))) {
|
|
Packit |
244994 |
die "wrong/unknown \"to\" encoding!\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
if ($opt_fixdouble) {
|
|
Packit |
244994 |
$checkenc=\&fixdouble_checkenc;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
$checkenc=\&char_checkenc;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
$get_newname=\&char_get_newname;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
$to_is_utf8 = lc($opt_t) =~ m/^utf-?8/;
|
|
Packit |
244994 |
$from_is_utf8 = lc($opt_f) =~ m/^utf-?8/;
|
|
Packit |
244994 |
|
|
Packit |
244994 |
if ($opt_qfrom) {
|
|
Packit |
244994 |
$from_print=\&to_ascii;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
$from_print=\&dummy;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
if ($opt_qto) {
|
|
Packit |
244994 |
$to_print=\&to_ascii;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
$to_print=\&dummy;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
if ($opt_nfc) {
|
|
Packit |
244994 |
$norm=\&NF;;
|
|
Packit |
244994 |
die "NFC requires UTF-8 as target charset\n" unless ($to_is_utf8);
|
|
Packit |
244994 |
} elsif ($opt_nfd) {
|
|
Packit |
244994 |
$norm=\&NF;;
|
|
Packit |
244994 |
die "NFD requires UTF-8 as target charset\n" unless ($to_is_utf8);
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
$norm=\&dummy;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
if ($opt_fixdouble) {
|
|
Packit |
244994 |
die "--fixdouble requires UTF-8 as source and non-UTF-8 as target charset\n" unless ($from_is_utf8 and $opt_t and not $to_is_utf8);
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
$opt_lowmem=1 if ($opt_exec);
|
|
Packit |
244994 |
|
|
Packit |
244994 |
$pwd=cwd();
|
|
Packit |
244994 |
@args=@ARGV;
|
|
Packit |
244994 |
undef @ARGV;
|
|
Packit |
244994 |
|
|
Packit |
244994 |
for (@args) {
|
|
Packit |
244994 |
s/\/\.\//\/\//g; # normalize "/./" to "/"
|
|
Packit |
244994 |
s/\/[\/]+/\//g; # normalize "//" to "/"
|
|
Packit |
244994 |
die "file or directory not found: $_\n" unless (-e or -l);
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
if ($opt_parsable) {
|
|
Packit |
244994 |
$outerr=NUL;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
$outerr=STDERR;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
if ($opt_undo_script) {
|
|
Packit |
244994 |
die "undo-script file already exists, exiting.\n" if (-e $opt_undo_script);
|
|
Packit |
244994 |
open(UNDOLOG, ">", $opt_undo_script) or die "couldn't open undo-script for writing. Aborting.\n";
|
|
Packit |
244994 |
print UNDOLOG "# this is a per undo script generated by convmv.\n",
|
|
Packit |
244994 |
"# Please check if this looks reasonable before running!\n";
|
|
Packit |
244994 |
print UNDOLOG "# Example: perl $opt_undo_script\n";
|
|
Packit |
244994 |
print UNDOLOG "chdir $pwd;\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
## do {print ord($_)."_" for (split(//,$_));print "\n"; } for (@args); # debug print
|
|
Packit |
244994 |
|
|
Packit |
244994 |
print $outerr "Starting a dry run without changes...\n" unless ($opt_notest);
|
|
Packit |
244994 |
|
|
Packit |
244994 |
if ($opt_r) {
|
|
Packit |
244994 |
$myfind=\&fin;;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
$myfind=\&find0depth;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
&$myfind({wanted=>\&scan,bydepth=>1,no_chdir=>1}, @args);
|
|
Packit |
244994 |
if (not $errors_occurred and $warnings_occurred) {
|
|
Packit |
244994 |
$errors_occurred=1 if (not &print_ask ("WARNINGS occurred. Do you really want to continue?",1));
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
die "To prevent damage to your files, we won't continue.\nFirst fix this or correct options!\n" if ($errors_occurred);
|
|
Packit |
244994 |
unless ($opt_exec) {
|
|
Packit |
244994 |
&$myfind({wanted=>\&process_symlink_targets,bydepth=>1,no_chdir=>1}, @args);
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
&$myfind({wanted=>\&process_main,bydepth=>1,no_chdir=>1}, @args);
|
|
Packit |
244994 |
|
|
Packit |
244994 |
# check for unintentionally left files
|
|
Packit |
244994 |
#for (keys %dir_time_hash) {
|
|
Packit |
244994 |
# print $outerr "error: left in %dir_time_hash: $_\n";
|
|
Packit |
244994 |
#}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
$mytime = time() - $mytime;
|
|
Packit |
244994 |
if ($opt_notest) {
|
|
Packit |
244994 |
print $outerr "Ready! I converted $ops files in $mytime seconds.\n",
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
print $outerr "No changes to your files done. Would have converted $ops files in $mytime seconds.\nUse --notest to finally rename the files.\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
#####
|
|
Packit |
244994 |
## subs
|
|
Packit |
244994 |
###
|
|
Packit |
244994 |
|
|
Packit |
244994 |
# find-like function but without any depth search for not recursive mode:
|
|
Packit |
244994 |
sub find0depth() {
|
|
Packit |
244994 |
my $opts = shift;
|
|
Packit |
244994 |
for (@_) {
|
|
Packit |
244994 |
$$opts{'wanted'}($_);
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
# scan for real files and check charset first:
|
|
Packit |
244994 |
sub scan {
|
|
Packit |
244994 |
$arg = $_;
|
|
Packit |
244994 |
&get_dir_base_change;
|
|
Packit |
244994 |
if (-l $arg) {
|
|
Packit |
244994 |
# print "link: $arg in $dir\n";
|
|
Packit |
244994 |
if (not defined(&$checkenc($arg))) { $errors_occurred=1 };
|
|
Packit |
244994 |
} elsif (-d $arg) {
|
|
Packit |
244994 |
# print "dir: $arg in $dir\n";
|
|
Packit |
244994 |
$inod_fullname{(stat $arg)[1]}=$dir."/".$arg if (!$opt_lowmem);
|
|
Packit |
244994 |
if (not defined(&$checkenc($arg))) { $errors_occurred=1 };
|
|
Packit |
244994 |
if ($opt_r and not (-x $arg or -r $arg)) {
|
|
Packit |
244994 |
print $outerr "WARNING: cannot traverse ",&$from_print($dir."/".$arg),"\n";
|
|
Packit |
244994 |
$warnings_occurred=1;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
} elsif (-f $arg) {
|
|
Packit |
244994 |
# print "file: $arg in $dir\n";
|
|
Packit |
244994 |
$inod_fullname{(stat $arg)[1]}=$dir."/".$arg if (!$opt_lowmem);
|
|
Packit |
244994 |
if (not defined(&$checkenc($arg))) { $errors_occurred=1 };
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
chdir $pwd;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
# move symlink targets:
|
|
Packit |
244994 |
sub process_symlink_targets {
|
|
Packit |
244994 |
$arg=$_;
|
|
Packit |
244994 |
&get_dir_base_change;
|
|
Packit |
244994 |
if (-l $arg) {
|
|
Packit |
244994 |
$oldlink=readlink $arg;
|
|
Packit |
244994 |
if ((-f $oldlink or -d $oldlink) and $newname=&$get_newname($oldlink)) {
|
|
Packit |
244994 |
if ( $newname ne $oldlink ) {
|
|
Packit |
244994 |
if ( $inod_fullname{(stat $oldlink)[1]} or $opt_lowmem) { # = if (symlink target scanned before)
|
|
Packit |
244994 |
#print is_utf8($oldlink) ? 1 : 0;
|
|
Packit |
244994 |
#print is_utf8($newname) ? 1 : 0;
|
|
Packit |
244994 |
print $outerr "symlink \"".&$from_print($File::Find::name)."\": \"";
|
|
Packit |
244994 |
print $outerr "".&$from_print($oldlink)."\" >> \"";
|
|
Packit |
244994 |
&print_ask (&$to_print($newname)."\"",$opt_i) or return;
|
|
Packit |
244994 |
&save_parent_mtime($dir) if ($opt_mtimes);
|
|
Packit |
244994 |
if ($opt_notest) {
|
|
Packit |
244994 |
unlink $arg;
|
|
Packit |
244994 |
symlink ($newname, $arg);
|
|
Packit |
244994 |
print UNDOLOG "unlink \"".$File::Find::name."\";\n";
|
|
Packit |
244994 |
print UNDOLOG "symlink (\"$oldlink\", \"".$File::Find::name."\");\n";
|
|
Packit |
244994 |
} elsif ($opt_parsable) {
|
|
Packit |
244994 |
print "unlink".$del.$File::Find::name.$fin_del;
|
|
Packit |
244994 |
print "symlink".$del.$newname.$del.$File::Find::name.$fin_del;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
$ops++;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
print $outerr "link target \"",&$from_print($oldlink),"\" of \"",&$from_print($dir."/".$arg),"\" not in subtree, left untouched!\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
} # else { print "no need to convert link target: $oldlink to $newname\n"; }
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
chdir $pwd;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
# do the changes to all the real files/dirs/links:
|
|
Packit |
244994 |
sub process_main {
|
|
Packit |
244994 |
$arg=$_;
|
|
Packit |
244994 |
&get_dir_base_change;
|
|
Packit |
244994 |
if (-l $arg) {
|
|
Packit |
244994 |
# $type="symlink";
|
|
Packit |
244994 |
$newname=&$get_newname($arg);
|
|
Packit |
244994 |
if ($newname and $newname ne $arg) {
|
|
Packit |
244994 |
&renameit($arg,$newname);
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
} elsif (-d $arg) {
|
|
Packit |
244994 |
# $type="directory";
|
|
Packit |
244994 |
$newname=&$get_newname($arg);
|
|
Packit |
244994 |
if ($newname and $newname ne $arg) {
|
|
Packit |
244994 |
&renameit($arg,$newname);
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
&restore_times_if_any($dir,$arg,$newname) if ($opt_mtimes);
|
|
Packit |
244994 |
} elsif (-f $arg) {
|
|
Packit |
244994 |
# $type="file";
|
|
Packit |
244994 |
$newname=&$get_newname($arg);
|
|
Packit |
244994 |
if ($newname and $newname ne $arg) {
|
|
Packit |
244994 |
&renameit($arg,$newname);
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
# &restore_times_if_any($dir,$arg,$newname) if ($opt_mtimes); # only in -d case needed !? -> moved up!
|
|
Packit |
244994 |
|
|
Packit |
244994 |
chdir $pwd;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub char_get_newname {
|
|
Packit |
244994 |
# returns undef on error and string otherwise.
|
|
Packit |
244994 |
my $oldfile=shift;
|
|
Packit |
244994 |
my $newname;
|
|
Packit |
244994 |
my $lets_die = 0;
|
|
Packit |
244994 |
if (!$from_is_utf8 and $to_is_utf8 and !$opt_nosmart and &looks_like_utf8($oldfile)) {
|
|
Packit |
244994 |
if ($opt_parsable) {
|
|
Packit |
244994 |
print "іnfomsg".$del."skipalreadyutf8".$del.$dir."/".$oldfile.$fin_del;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
print $outerr "Skipping, already UTF-8: ",&$from_print($dir."/".$oldfile),"\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
return $oldfile;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
if ($opt_fixdouble and not looks_like_utf8($oldfile)) {
|
|
Packit |
244994 |
# this is legacy encoding which we ignore in fixdouble mode
|
|
Packit |
244994 |
return $oldfile;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
if ($from_is_utf8 and ! $to_is_utf8) {
|
|
Packit |
244994 |
# from_to can't convert from NFD to non-UTF-8!
|
|
Packit |
244994 |
$newname=encode_utf8(NFC(decode_utf8($oldfile)));
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
$newname=$oldfile;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
from_to($newname, $opt_f, $opt_t, Encode::FB_QUIET) or $lets_die = 1;
|
|
Packit |
244994 |
if ($opt_fixdouble and not looks_like_utf8($newname)) {
|
|
Packit |
244994 |
return $oldfile;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
if ($lets_die) {
|
|
Packit |
244994 |
die "SHOULD NOT HAPPEN HERE: conversion error, no suitable charset used?: \"$oldfile\"\nTo prevent damage to your files, we won't continue. First fix this!\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
$newname=&$norm(decode_utf8($newname)) if ($to_is_utf8);
|
|
Packit |
244994 |
return $newname;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub get_dir_base_change() {
|
|
Packit |
244994 |
$arg =~ s/\/*$//;
|
|
Packit |
244994 |
$dir=dirname($arg);
|
|
Packit |
244994 |
$arg=basename($arg);
|
|
Packit |
244994 |
chdir $dir;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub renameit() {
|
|
Packit |
244994 |
my $oldfile=shift;
|
|
Packit |
244994 |
my $newname=shift;
|
|
Packit |
244994 |
my $cmd;
|
|
Packit |
244994 |
my $ci_old_new_same_inode = 0;
|
|
Packit |
244994 |
$newname=encode_utf8($newname) if ($to_is_utf8);
|
|
Packit |
244994 |
if ($opt_exec) {
|
|
Packit |
244994 |
$cmd = $opt_exec;
|
|
Packit |
244994 |
$cmd =~ s/\#2/\000f8d9hqoäd\#2/g; # make the #2 unique so that file names may contain "#2"
|
|
Packit |
244994 |
$cmd =~ s/\#1/\Q$oldfile\E/g;
|
|
Packit |
244994 |
$cmd =~ s/\000f8d9hqoäd\#2/\Q$newname\E/g;
|
|
Packit |
244994 |
print "$cmd\n";
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
#print is_utf8($oldfile) ? 1 : 0;
|
|
Packit |
244994 |
#print is_utf8($newname) ? 1 : 0;
|
|
Packit |
244994 |
&print_ask ("mv \"". &$from_print($dir."/".$oldfile)."\"\t\"".&$from_print($dir)."/".&$to_print($newname)."\"",$opt_i) or return;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
&save_parent_mtime($dir) if ($opt_mtimes);
|
|
Packit |
244994 |
# the following is to handle case-insensitive filesystems:
|
|
Packit |
244994 |
if ($opt_lower or $opt_upper) {
|
|
Packit |
244994 |
if ((stat $oldfile)[1] == (stat $newname)[1]) {
|
|
Packit |
244994 |
$ci_old_new_same_inode = 1;
|
|
Packit |
244994 |
#print $outerr "found case-insensitive filesystem...\n";
|
|
Packit |
244994 |
if ($opt_notest) {
|
|
Packit |
244994 |
my $try;
|
|
Packit |
244994 |
for (1 .. 10000) {
|
|
Packit |
244994 |
$try = "convmvtmp".$_;
|
|
Packit |
244994 |
if (! -e $try) {
|
|
Packit |
244994 |
#print $outerr "(via temp rename to \"$try\")\n";
|
|
Packit |
244994 |
rename ($oldfile, $try);
|
|
Packit |
244994 |
$opt++;
|
|
Packit |
244994 |
print UNDOLOG "rename (\"$dir/$try\", \"$dir/$oldfile\");\n";
|
|
Packit |
244994 |
$oldfile=$try;
|
|
Packit |
244994 |
last;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
if (-e $newname and !$opt_exec and !$ci_old_new_same_inode) {
|
|
Packit |
244994 |
if ($opt_replace and !&compare($oldfile,$newname)) {
|
|
Packit |
244994 |
if ($opt_notest) {
|
|
Packit |
244994 |
unlink $newname or print $outerr "Error: $!\n";
|
|
Packit |
244994 |
rename ($oldfile, $newname) or print $outerr "Error: $!\n";
|
|
Packit |
244994 |
} elsif ($opt_parsable) {
|
|
Packit |
244994 |
print "unlink".$del.$dir."/".$oldfile.$fin_del;
|
|
Packit |
244994 |
print "rename".$del.$dir."/".$oldfile.$del.$dir."/".$newname.$fin_del;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
if ($opt_parsable) {
|
|
Packit |
244994 |
print "errormsg".$del."fileexists".$del.$newname.$fin_del;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
print $outerr "".&$to_print($newname)," exists and differs or --replace option missing - skipped\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
if ($opt_notest) {
|
|
Packit |
244994 |
if ($opt_exec) {
|
|
Packit |
244994 |
system($cmd);
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
rename ($oldfile, $newname) or print $outerr "Error: $!\n";
|
|
Packit |
244994 |
print UNDOLOG "rename (\"$dir/$newname\", \"$dir/$oldfile\");\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
} elsif ($opt_parsable) {
|
|
Packit |
244994 |
print "rename".$del.$dir."/".$oldfile.$del.$dir."/".$newname.$fin_del;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
$ops++;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub save_parent_mtime() {
|
|
Packit |
244994 |
my $dir=shift;
|
|
Packit |
244994 |
$dir =~ s/^\.\///;
|
|
Packit |
244994 |
$dir =~ s/\.\/$//;
|
|
Packit |
244994 |
# return if ($dir eq "."); # broken !?
|
|
Packit |
244994 |
return if (exists $dir_time_hash{$dir});
|
|
Packit |
244994 |
#print $outerr "Putting \"$dir\" in %dir_time_hash\n"; # debug print
|
|
Packit |
244994 |
@{$dir_time_hash{$dir}}=(stat("."))[8..10];
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub restore_times_if_any() {
|
|
Packit |
244994 |
my $dir=shift;
|
|
Packit |
244994 |
my $old=shift;
|
|
Packit |
244994 |
my $new=shift;
|
|
Packit |
244994 |
if ($dir eq ".") {
|
|
Packit |
244994 |
$dir = "";
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
$dir .= "/";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
$dir .= $old;
|
|
Packit |
244994 |
# print $outerr "Trying to delete \"$dir\" now \"$new\" from %dir_time_hash\n"; # debug print
|
|
Packit |
244994 |
if (exists $dir_time_hash{$dir}) {
|
|
Packit |
244994 |
if ($opt_notest) {
|
|
Packit |
244994 |
utime ${$dir_time_hash{$dir}}[0], ${$dir_time_hash{$dir}}[1], $new or print $outerr "Could not run utime() on $new: $!\n";
|
|
Packit |
244994 |
print UNDOLOG "utime ".${$dir_time_hash{$dir}}[0].", ".${$dir_time_hash{$dir}}[1].", ".$new." or print \"Could not run utime() on $new: \$!\n\"";
|
|
Packit |
244994 |
} elsif ($opt_parsable) {
|
|
Packit |
244994 |
print "utime".$del.$dir.$del.${$dir_time_hash{$dir}}[0].$del.${$dir_time_hash{$dir}}[1].$del.${$dir_time_hash{$dir}}[2].$fin_del;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
delete $dir_time_hash{$dir};
|
|
Packit |
244994 |
#print $outerr "done\n"; # debug print
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub listvalidencodings() {
|
|
Packit |
244994 |
print "$_\n" for (Encode->encodings(":all"));
|
|
Packit |
244994 |
return 1;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub dumpoptions() {
|
|
Packit |
244994 |
for (keys %opts) {
|
|
Packit |
244994 |
s/=.*//;
|
|
Packit |
244994 |
print "-" if (length($_) > 1);
|
|
Packit |
244994 |
print "-$_\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
return 1;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub fixdouble_checkenc() {
|
|
Packit |
244994 |
return 1;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub char_checkenc() {
|
|
Packit |
244994 |
my $oldfile=shift;
|
|
Packit |
244994 |
my $new=$oldfile;
|
|
Packit |
244994 |
if ($from_is_utf8) {
|
|
Packit |
244994 |
if (! &$this_is_valid_utf8($new)) {
|
|
Packit |
244994 |
if ($opt_parsable) {
|
|
Packit |
244994 |
print "errormsg".$del."filenotutf8".$del.$oldfile.$fin_del;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
print $outerr "this file was not validly encoded in UTF-8: \"". &$from_print($dir."/".$oldfile) ."\"\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
return undef;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
if ($to_is_utf8 and !$opt_nosmart and &looks_like_utf8($oldfile)) {
|
|
Packit |
244994 |
# do nothing: e.g. from_enc is shift_jis but string is utf-8. Should
|
|
Packit |
244994 |
# be "smart-skipped" if to_enc is utf-8 and not produce no error here.
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
elsif (! from_to($new,$opt_f, "utf8", Encode::FB_QUIET) ) {
|
|
Packit |
244994 |
if ($opt_parsable) {
|
|
Packit |
244994 |
print "errormsg".$del."fileencodedinvalid".$del.$dir."/".$oldfile.$fin_del;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
print $outerr "this file was not validly encoded in $opt_f: \"". &$from_print($dir."/".$oldfile) ."\"\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
return undef;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
# $new is utf-8 now and $oldfile's encoding was valid ...
|
|
Packit |
244994 |
my $filenamelength;
|
|
Packit |
244994 |
if ($to_is_utf8) {
|
|
Packit |
244994 |
$new = &$norm(decode_utf8($new));
|
|
Packit |
244994 |
$filenamelength=length($new);
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
$new=encode_utf8(NFC(decode_utf8($new)));
|
|
Packit |
244994 |
$filenamelength=from_to($new, "utf8", $opt_t, Encode::FB_QUIET);
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
## print "$oldfile|$utf8oldfile|$new|$filenamelength\n";
|
|
Packit |
244994 |
if (! $filenamelength) {
|
|
Packit |
244994 |
if ($opt_parsable) {
|
|
Packit |
244994 |
print "errormsg".$del."charsetdoesntcoverneededcharacters".$del.$dir."/".$oldfile.$fin_del;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
print $outerr "$opt_t doesn't cover all needed characters for: \"". &$from_print($dir."/".$oldfile) ."\"\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
return undef;
|
|
Packit |
244994 |
} elsif ($filenamelength > $maxfilenamelength) {
|
|
Packit |
244994 |
print $outerr "".&$from_print($dir."/".$oldfile).": resulting filename is $filenamelength bytes long (max: $maxfilenamelength)\n";
|
|
Packit |
244994 |
return undef;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
&posix_check($new);
|
|
Packit |
244994 |
return 1;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub printusage {
|
|
Packit |
244994 |
&check_for_perl_bugs;
|
|
Packit |
244994 |
print <
|
|
Packit |
244994 |
convmv 2.01 - converts filenames from one encoding to another
|
|
Packit |
244994 |
Copyright (C) 2003-2017 Bjoern JACKE <bjoern\@j3e.de>
|
|
Packit |
244994 |
|
|
Packit |
244994 |
This program comes with ABSOLUTELY NO WARRANTY; it may be copied or modified
|
|
Packit |
244994 |
under the terms of the GNU General Public License version 2 or 3 as published
|
|
Packit |
244994 |
by the Free Software Foundation.
|
|
Packit |
244994 |
|
|
Packit |
244994 |
USAGE: convmv [options] FILE(S)
|
|
Packit |
244994 |
-f enc encoding *from* which should be converted
|
|
Packit |
244994 |
-t enc encoding *to* which should be converted
|
|
Packit |
244994 |
-r recursively go through directories
|
|
Packit |
244994 |
-i interactive mode (ask for each action)
|
|
Packit |
244994 |
--nfc target files will be normalization form C for UTF-8 (Linux etc.)
|
|
Packit |
244994 |
--nfd target files will be normalization form D for UTF-8 (OS X etc.)
|
|
Packit |
244994 |
--qfrom be quiet about the "from" of a rename (if it screws up your terminal e.g.)
|
|
Packit |
244994 |
--qto be quiet about the "to" of a rename (if it screws up your terminal e.g.)
|
|
Packit |
244994 |
--exec c execute command instead of rename (use #1 and #2 and see man page)
|
|
Packit |
244994 |
--list list all available encodings
|
|
Packit |
244994 |
--lowmem keep memory footprint low (see man page)
|
|
Packit |
244994 |
--map m apply an additional character mapping
|
|
Packit |
244994 |
--nosmart ignore if files already seem to be UTF-8 and convert if posible
|
|
Packit |
244994 |
--notest actually do rename the files
|
|
Packit |
244994 |
--replace will replace files if they are equal
|
|
Packit |
244994 |
--unescape convert%20ugly%20escape%20sequences
|
|
Packit |
244994 |
--upper turn to upper case
|
|
Packit |
244994 |
--lower turn to lower case
|
|
Packit |
244994 |
--parsable write a parsable todo list (see man page)
|
|
Packit |
244994 |
--help print this help
|
|
Packit |
244994 |
END
|
|
Packit |
244994 |
#--dotlessi care about the dotless i issue of certain locales (use with care)
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub looks_like_utf8() {
|
|
Packit |
244994 |
my $string = shift;
|
|
Packit |
244994 |
if ($string =~ m/[^[:ascii:]]/ and &$this_is_valid_utf8($string)) {
|
|
Packit |
244994 |
return 1;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
return undef;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub this_is_valid_utf8_decode {
|
|
Packit |
244994 |
my $string = shift;
|
|
Packit |
244994 |
if (not defined(decode_utf8($string))) {
|
|
Packit |
244994 |
return undef;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
return 1;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub this_is_valid_utf8_decode_CROAK() {
|
|
Packit |
244994 |
my $string = shift;
|
|
Packit |
244994 |
# until 1.08 I used to use decode_utf8() but see perl bug #37757 (perl 5.8.7/8)
|
|
Packit |
244994 |
#if (not defined(decode_utf8($string)) ) {
|
|
Packit |
244994 |
#
|
|
Packit |
244994 |
# let's look for a different way to find valid utf-8 ...:
|
|
Packit |
244994 |
# utf8::decode() is experimental and might disappear says utf8(3pm):
|
|
Packit |
244994 |
#if (utf8::decode($string) != undef) {
|
|
Packit |
244994 |
#
|
|
Packit |
244994 |
# Encode::decode does not work as one might expect:
|
|
Packit |
244994 |
#if (Encode::decode(utf8,$string,Encode::FB_QUIET) == undef) {
|
|
Packit |
244994 |
#
|
|
Packit |
244994 |
# from_to() works for all Perl versions (at the moment ;)
|
|
Packit |
244994 |
# ... and here we go: with Perl 5.10 from_to(utf8..utf8) doen't work either,
|
|
Packit |
244994 |
# see perl bug #49830. convmv 1.10 and Perl 5.10 will again only work with
|
|
Packit |
244994 |
# --nosmart.
|
|
Packit |
244994 |
#
|
|
Packit |
244994 |
# okay, now perluniintro suggests to do this:
|
|
Packit |
244994 |
|
|
Packit |
244994 |
eval 'decode_utf8($string, Encode::FB_CROAK);';
|
|
Packit |
244994 |
if ($@) {
|
|
Packit |
244994 |
return undef;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
return 1;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub to_ascii() {
|
|
Packit |
244994 |
my $a=shift;
|
|
Packit |
244994 |
$a =~ s/[^[:ascii:]]/?/g;
|
|
Packit |
244994 |
$a =~ s/[[:cntrl:]]/*/g;
|
|
Packit |
244994 |
return $a;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub dummy() {
|
|
Packit |
244994 |
return shift;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub print_ask() { # takes 2 arguments, string and askornot
|
|
Packit |
244994 |
if ($opt_parsable) {
|
|
Packit |
244994 |
return 1;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
my $a="";
|
|
Packit |
244994 |
print shift;
|
|
Packit |
244994 |
my $ask = shift;
|
|
Packit |
244994 |
while ($ask and not $a =~ m/^[yn]$/i) {
|
|
Packit |
244994 |
print " (y/n) ";
|
|
Packit |
244994 |
$a=<STDIN>;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
print "\n";
|
|
Packit |
244994 |
if ($a =~ m/^n$/i) {
|
|
Packit |
244994 |
return undef;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
return 1;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub unescape_checkenc() {
|
|
Packit |
244994 |
my $name = shift;
|
|
Packit |
244994 |
if ($name =~ m/^[[:ascii:]]*$/) { # should we be more strict ?
|
|
Packit |
244994 |
&posix_check(&unescape_get_newname($name));
|
|
Packit |
244994 |
return 1;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
if ($opt_parsable) {
|
|
Packit |
244994 |
print "errormsg".$del."notanescapedfile".$del.$name.$fin_del;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
print $outerr "\"",&$from_print($name),"\" not ASCII - this does not seem to be an escaped filename.\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
return undef;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub map_get_newname() {
|
|
Packit |
244994 |
$_ = shift;
|
|
Packit |
244994 |
return $_ if ($_ eq "." or $_ eq "..");
|
|
Packit |
244994 |
_utf8_on($_); # this is needed for tr/multibyte/non-multibyte/ to work! Otherwise we would
|
|
Packit |
244994 |
# have to make a s/// for each character, grrr...
|
|
Packit |
244994 |
if ($opt_map eq "ntfs-sfm") { # see MS KB 117258 (but map : instead of /
|
|
Packit |
244994 |
tr/\x01-\x1f\"\*\:\<\>\?\\\|/\x{f001}-\x{f027}/;
|
|
Packit |
244994 |
s/ $/\x{f028}/; # Space, only if occurring as the last character of the name
|
|
Packit |
244994 |
s/\.$/\x{f029}/; # period, only if occurring as the last character of the name
|
|
Packit |
244994 |
} elsif ($opt_map eq "ntfs-sfm-undo") {
|
|
Packit |
244994 |
tr/\x{f001}-\x{f027}/\x01-\x1f"*:<>?\\| /;
|
|
Packit |
244994 |
s/\x{f028}$/ /; # Space, only if occurring as the last character of the name
|
|
Packit |
244994 |
s/\x{f029}$/./; # period, only if occurring as the last character of the name
|
|
Packit |
244994 |
} elsif ($opt_map eq "ntfs-sfu") { # +0xF000, see MS KB ???? anyone knows a link or has archived an old one?
|
|
Packit |
244994 |
tr/\x01-\x1f\"\*\/\<\>\?\\\|/\x{f001}-\x{f01f}\x{f022}\x{f02a}\x{f02f}\x{f03c}\x{f03e}\x{f03f}\x{f05c}\x{f07c}/;
|
|
Packit |
244994 |
#??? s/ $/space/; # Space, only if occurring as the last character of the name
|
|
Packit |
244994 |
#??? s/\.$/period/; # period, only if occurring as the last character of the name
|
|
Packit |
244994 |
} elsif ($opt_map eq "ntfs-sfu-undo") {
|
|
Packit |
244994 |
tr/\x{f001}-\x{f01f}\x{f022}\x{f02a}\x{f02f}\x{f03c}\x{f03e}\x{f03f}\x{f05c}\x{f07c}/\x01-\x1f"*\/<>?\\|/;
|
|
Packit |
244994 |
#??? s/space$/ /; # Space, only if occurring as the last character of the name
|
|
Packit |
244994 |
#??? s/period$/./; # period, only if occurring as the last character of the name
|
|
Packit |
244994 |
} elsif ($opt_map eq "ntfs-pretty") {
|
|
Packit |
244994 |
s/\"/”/g; # U+201D
|
|
Packit |
244994 |
s/\*/∗/g; # U+2731
|
|
Packit |
244994 |
s/\?/?/g; # U+FF1F
|
|
Packit |
244994 |
s/\:/꞉/g; # U+A789
|
|
Packit |
244994 |
s/\</</g; # U+FF1C
|
|
Packit |
244994 |
s/\>/>/g; # U+FF1E
|
|
Packit |
244994 |
s/\|/❘/g; # U+2758
|
|
Packit |
244994 |
s/\\/\/g; # U+FF3C
|
|
Packit |
244994 |
} elsif ($opt_map eq "ntfs-pretty-undo") {
|
|
Packit |
244994 |
s/”/"/g; # U+201D
|
|
Packit |
244994 |
s/∗/*/g; # U+2731
|
|
Packit |
244994 |
s/?/?/g; # U+FF1F
|
|
Packit |
244994 |
s/꞉/:/g; # U+A789
|
|
Packit |
244994 |
s/</
|
|
Packit |
244994 |
s/>/>/g; # U+FF1E
|
|
Packit |
244994 |
s/❘/|/g; # U+2758
|
|
Packit |
244994 |
s/\/\\/g; # U+FF3C
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
die "map parameter \"$opt_map\" not supported. Use one of ",
|
|
Packit |
244994 |
"ntfs-sfm, ntfs-sfm-undo, ",
|
|
Packit |
244994 |
"ntfs-sfu, ntfs-sfu-undo, ",
|
|
Packit |
244994 |
"ntfs-pretty, ntfs-pretty-undo\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
return $_;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub unescape_get_newname() { # return undef on error, string otherwise
|
|
Packit |
244994 |
my $newname = shift;
|
|
Packit |
244994 |
# $newname =~ s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/eg; # this was done before
|
|
Packit |
244994 |
$newname =~ s/(%)([0-9a-fA-F][0-9a-fA-F])/chr(hex($2))/eg;
|
|
Packit |
244994 |
return $newname;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub upperlower_checkenc() {
|
|
Packit |
244994 |
my $oldname = shift;
|
|
Packit |
244994 |
my $newname = upperlower_get_newname($oldname);
|
|
Packit |
244994 |
if (not defined($newname)) {
|
|
Packit |
244994 |
return undef;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
&posix_check($newname);
|
|
Packit |
244994 |
return 1;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub upperlower_get_newname() {
|
|
Packit |
244994 |
# return undef on error, string otherwise
|
|
Packit |
244994 |
my $oldname = shift;
|
|
Packit |
244994 |
my $name=$oldname;
|
|
Packit |
244994 |
if (! from_to($name, $opt_f, "utf8", Encode::FB_QUIET)) { # should also leave NFD as it is ...
|
|
Packit |
244994 |
print $outerr "\"",&$from_print($oldname),"\" not encoded in $opt_f ? Supply the correct encoding via -f option!\n";
|
|
Packit |
244994 |
return undef;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
_utf8_on($name); # Unicode in Perl can be a real pain ...
|
|
Packit |
244994 |
no bytes;
|
|
Packit |
244994 |
if ($opt_upper) {
|
|
Packit |
244994 |
if ($opt_dotlessi) {
|
|
Packit |
244994 |
$name =~ s/ı/I/g;
|
|
Packit |
244994 |
$name =~ s/i/İ/g;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
# we do not want to upper ß to SS ! Let's substitute it with
|
|
Packit |
244994 |
# NUL+DWSLQH (NUL may not be part of filename) and get it back after uc().
|
|
Packit |
244994 |
# Unicode 5.1(draft) news: Uppercasing U+00DF (ß) LATIN SMALL LETTER SHARP S
|
|
Packit |
244994 |
# to the new U+1E9E LATIN CAPITAL LETTER SHARP S.
|
|
Packit |
244994 |
# but until now I don't see use for this in filenames ...
|
|
Packit |
244994 |
$name =~ s/ß/\000DWSLQH/g;
|
|
Packit |
244994 |
$name = uc($name);
|
|
Packit |
244994 |
$name =~ s/\000DWSLQH/ß/g;
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
if ($opt_dotlessi) {
|
|
Packit |
244994 |
$name =~ s/I/ı/g;
|
|
Packit |
244994 |
$name =~ s/İ/i/g;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
$name = lc($name);
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
use bytes;
|
|
Packit |
244994 |
_utf8_off($name);
|
|
Packit |
244994 |
# we should also do special treatment for UTF-8 NFD of "I with dot above" in byte mode now, otherwise we get "i̇", which is a double-single dotted i ;-)
|
|
Packit |
244994 |
# the problems that arise with this letter are endless ...
|
|
Packit |
244994 |
# $name =~ s/i\314\207/i/g if ($from_is_utf8);
|
|
Packit |
244994 |
if (! from_to($name, "utf8", $opt_f, Encode::FB_QUIET)) {
|
|
Packit |
244994 |
print $outerr $opt_upper?"Upper":"Lower","case of \"",&$from_print($oldname),"\" not possible in $opt_f ! Maybe supply different encoding via -f option.\n";
|
|
Packit |
244994 |
return undef;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
return $name;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub posix_check() {
|
|
Packit |
244994 |
my $name=shift;
|
|
Packit |
244994 |
if ($name =~ m/[\000\/]/) {
|
|
Packit |
244994 |
print $outerr "WARNING: new filename \"",&$to_print($name),"\" contains characters, which are not POSIX filesystem conform! This may result in data loss.\n";
|
|
Packit |
244994 |
$warnings_occurred=1;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
# still unused, but might be used for Netatalk CAP encoding:
|
|
Packit |
244994 |
sub cap2utf8() {
|
|
Packit |
244994 |
my $oldname = shift;
|
|
Packit |
244994 |
if (($oldname !~ m/^:2eDS_Store/) and ($oldname =~ /:/)) {
|
|
Packit |
244994 |
$oldname =~ s/(:([0-9a-f][0-9a-f]))/chr(hex($2))/eg;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
return $oldname;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub check_for_broken_perl_release() {
|
|
Packit |
244994 |
# Check that most basic Perl Encode features we use work reliably
|
|
Packit |
244994 |
# and decide which code path we use for &this_is_valid_utf8():
|
|
Packit |
244994 |
my $test = ""."\366";
|
|
Packit |
244994 |
my $error = "";
|
|
Packit |
244994 |
|
|
Packit |
244994 |
if (not defined(decode_utf8($test))) {
|
|
Packit |
244994 |
$this_is_valid_utf8=\&this_is_valid_utf8_decode;
|
|
Packit |
244994 |
return 0;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
$error .= "decode_utf8(\$test) check failed\n";
|
|
Packit |
244994 |
|
|
Packit |
244994 |
eval 'decode_utf8($test, Encode::FB_CROAK);';
|
|
Packit |
244994 |
if (not $@) {
|
|
Packit |
244994 |
$error .= "eval 'decode_utf8(\$non-utf8, Encode::FB_CROAK);'; check failed.\n";
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
$test = ""."ö";
|
|
Packit |
244994 |
eval 'decode_utf8($test, Encode::FB_CROAK);';
|
|
Packit |
244994 |
if ($@) {
|
|
Packit |
244994 |
$error .= "eval 'decode_utf8(\$utf8, Encode::FB_CROAK);'; check failed.\n";
|
|
Packit |
244994 |
} else {
|
|
Packit |
244994 |
$this_is_valid_utf8=\&this_is_valid_utf8_decode_CROAK;
|
|
Packit |
244994 |
return 0;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
print "Your Perl release is too broken to make convmv work reliably:\n",$error;
|
|
Packit |
244994 |
exit 1;
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
sub check_for_perl_bugs() {
|
|
Packit |
244994 |
# Check for certain Perl fleas that we more or less have to work around:
|
|
Packit |
244994 |
# until 1.08 I used to use decode_utf8() but see perl bug #37757 (perl 5.8.7/8)
|
|
Packit |
244994 |
#if (not defined(decode_utf8($string)) )
|
|
Packit |
244994 |
my $bugs = "";
|
|
Packit |
244994 |
my $test = "\366";
|
|
Packit |
244994 |
|
|
Packit |
244994 |
my $u8test = NFD(""."ö"); # "". is intended as only so we have _utf8_off set
|
|
Packit |
244994 |
# otherwise from_to doesn't convert the $data to
|
|
Packit |
244994 |
# something else.
|
|
Packit |
244994 |
# print "DEBUG: string is UTF-8 flagged: ",is_utf8($u8test) ? "yes" : "no","\n";
|
|
Packit |
244994 |
eval "from_to($u8test, 'utf8', 'iso-8859-1');";
|
|
Packit |
244994 |
if ($u8test ne "\366") {
|
|
Packit |
244994 |
# Perl::Encode guys think that conversion from decomposed UTF-8
|
|
Packit |
244994 |
# to any other charset does not have to be supported by from_to.
|
|
Packit |
244994 |
# Why, when NFC or NFD this is both perfectly valid UTF-8?
|
|
Packit |
244994 |
$bugs .= "#22111 ";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
if (decode_utf8($test)) {
|
|
Packit |
244994 |
$bugs .= "#37757 ";
|
|
Packit |
244994 |
# Convmv 1.08 and below would not work here!
|
|
Packit |
244994 |
# Perl documentation up to 5.8.8 said that
|
|
Packit |
244994 |
# decode_utf8($data_that_is_not_utf_8) should return undef
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
if (! from_to($test,utf8,utf8,Encode::FB_QUIET) == undef) {
|
|
Packit |
244994 |
$bugs .= "#49830 ";
|
|
Packit |
244994 |
# convmv 1.10-1.11 would not work here!
|
|
Packit |
244994 |
# broken UTF-8 is silently being converted to sane UTF-8 without throwing
|
|
Packit |
244994 |
# an error.
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
if ($bugs) {
|
|
Packit |
244994 |
print "Your Perl version has fleas $bugs\n";
|
|
Packit |
244994 |
}
|
|
Packit |
244994 |
|
|
Packit |
244994 |
}
|