This patch adds an option --tr=BAD/GOOD to transliterate filenames. It
can be used to remove characters illegal on the destination filesystem.
Jeff Weber expressed interest in this:
http://lists.samba.org/archive/rsync/2007-October/018996.html
To use this patch, run these commands for a successful build:
patch -p1 <patches/transliterate.diff
./configure (optional if already run)
make
based-on: d73762eea3f15f2c56bb3fa9394ad1883c25c949
diff --git a/flist.c b/flist.c
--- a/flist.c
+++ b/flist.c
@@ -75,6 +75,7 @@ extern uid_t our_uid;
extern struct stats stats;
extern char *filesfrom_host;
extern char *usermap, *groupmap;
+extern char *tr_opt;
extern char curr_dir[MAXPATHLEN];
@@ -101,6 +102,8 @@ int file_total = 0; /* total of all active items over all file-lists */
int file_old_total = 0; /* total of active items that will soon be gone */
int flist_eof = 0; /* all the file-lists are now known */
+char tr_substitutions[256];
+
#define NORMAL_NAME 0
#define SLASH_ENDING_NAME 1
#define DOTDIR_NAME 2
@@ -650,6 +653,23 @@ static void send_file_entry(int f, const char *fname, struct file_struct *file,
stats.total_size += F_LENGTH(file);
}
+static void transliterate(char *path, int len)
+{
+ while (1) {
+ /* Find position of any char in tr_opt in path, or the end of the path. */
+ int span = strcspn(path, tr_opt);
+ if ((len -= span) == 0)
+ return;
+ path += span;
+ if ((*path = tr_substitutions[*(uchar*)path]) == '\0')
+ memmove(path, path+1, len--); /* copies the trailing '\0' too. */
+ else {
+ path++;
+ len--;
+ }
+ }
+}
+
static struct file_struct *recv_file_entry(int f, struct file_list *flist, int xflags)
{
static int64 modtime;
@@ -715,9 +735,13 @@ static struct file_struct *recv_file_entry(int f, struct file_list *flist, int x
outbuf.len = 0;
}
thisname[outbuf.len] = '\0';
+ basename_len = outbuf.len;
}
#endif
+ if (tr_opt)
+ transliterate(thisname, basename_len);
+
if (*thisname
&& (clean_fname(thisname, CFN_REFUSE_DOT_DOT_DIRS) < 0 || (!relative_paths && *thisname == '/'))) {
rprintf(FERROR, "ABORTING due to unsafe pathname from sender: %s\n", thisname);
@@ -2434,6 +2458,15 @@ struct file_list *recv_file_list(int f, int dir_ndx)
parse_name_map(usermap, True);
if (groupmap)
parse_name_map(groupmap, False);
+ if (tr_opt) { /* Parse FROM/TO string and populate tr_substitutions[] */
+ char *f, *t;
+ if ((t = strchr(tr_opt, '/')) != NULL)
+ *t++ = '\0';
+ else
+ t = "";
+ for (f = tr_opt; *f; f++)
+ tr_substitutions[*(uchar*)f] = *t ? *t++ : '\0';
+ }
}
start_read = stats.total_read;
diff --git a/options.c b/options.c
--- a/options.c
+++ b/options.c
@@ -193,6 +193,7 @@ int logfile_format_has_i = 0;
int logfile_format_has_o_or_i = 0;
int always_checksum = 0;
int list_only = 0;
+char *tr_opt = NULL;
#define MAX_BATCH_NAME_LEN 256 /* Must be less than MAXPATHLEN-13 */
char *batch_name = NULL;
@@ -803,6 +804,7 @@ void usage(enum logcode F)
#ifdef ICONV_OPTION
rprintf(F," --iconv=CONVERT_SPEC request charset conversion of filenames\n");
#endif
+ rprintf(F," --tr=BAD/GOOD transliterate filenames\n");
rprintf(F," --checksum-seed=NUM set block/file checksum seed (advanced)\n");
rprintf(F," -4, --ipv4 prefer IPv4\n");
rprintf(F," -6, --ipv6 prefer IPv6\n");
@@ -1024,6 +1026,7 @@ static struct poptOption long_options[] = {
{"iconv", 0, POPT_ARG_STRING, &iconv_opt, 0, 0, 0 },
{"no-iconv", 0, POPT_ARG_NONE, 0, OPT_NO_ICONV, 0, 0 },
#endif
+ {"tr", 0, POPT_ARG_STRING, &tr_opt, 0, 0, 0 },
{"ipv4", '4', POPT_ARG_VAL, &default_af_hint, AF_INET, 0, 0 },
{"ipv6", '6', POPT_ARG_VAL, &default_af_hint, AF_INET6, 0, 0 },
{"8-bit-output", '8', POPT_ARG_VAL, &allow_8bit_chars, 1, 0, 0 },
@@ -2364,6 +2367,24 @@ int parse_arguments(int *argc_p, const char ***argv_p)
}
}
+ if (tr_opt) {
+ if (*tr_opt == '/' && tr_opt[1]) {
+ snprintf(err_buf, sizeof err_buf,
+ "Do not start the --tr arg with a slash\n");
+ return 0;
+ }
+ if (*tr_opt && *tr_opt != '/') {
+ need_unsorted_flist = 1;
+ arg = strchr(tr_opt, '/');
+ if (arg && strchr(arg+1, '/')) {
+ snprintf(err_buf, sizeof err_buf,
+ "--tr cannot transliterate slashes\n");
+ return 0;
+ }
+ } else
+ tr_opt = NULL;
+ }
+
am_starting_up = 0;
return 1;
@@ -2792,6 +2813,12 @@ void server_options(char **args, int *argc_p)
if (relative_paths && !implied_dirs && (!am_sender || protocol_version >= 30))
args[ac++] = "--no-implied-dirs";
+ if (tr_opt) {
+ if (asprintf(&arg, "--tr=%s", tr_opt) < 0)
+ goto oom;
+ args[ac++] = arg;
+ }
+
if (remove_source_files == 1)
args[ac++] = "--remove-source-files";
else if (remove_source_files)
diff --git a/rsync.yo b/rsync.yo
--- a/rsync.yo
+++ b/rsync.yo
@@ -457,6 +457,7 @@ to the detailed description below for a complete description. verb(
--read-batch=FILE read a batched update from FILE
--protocol=NUM force an older protocol version to be used
--iconv=CONVERT_SPEC request charset conversion of filenames
+ --tr=BAD/GOOD transliterate filenames
--checksum-seed=NUM set block/file checksum seed (advanced)
-4, --ipv4 prefer IPv4
-6, --ipv6 prefer IPv6
@@ -2657,6 +2658,22 @@ daemon uses the charset specified in its "charset" configuration parameter
regardless of the remote charset you actually pass. Thus, you may feel free to
specify just the local charset for a daemon transfer (e.g. bf(--iconv=utf8)).
+dit(bf(--tr=BAD/GOOD)) Transliterates filenames on the receiver, after the
+iconv conversion (if any). This can be used to remove characters illegal
+on the destination filesystem. If you use this option, consider saving a
+"find . -ls" listing of the source in the destination to help you determine
+the original filenames in case of need.
+
+The argument consists of a string of characters to remove, optionally
+followed by a slash and a string of corresponding characters with which to
+replace them. The second string may be shorter, in which case any leftover
+characters in the first string are simply deleted. For example,
+bf(--tr=':\/!') replaces colons with exclamation marks and deletes backslashes.
+Slashes cannot be transliterated because it would cause havoc.
+
+If the receiver is invoked over a remote shell, use bf(--protect-args) to
+stop the shell from interpreting any nasty characters in the argument.
+
dit(bf(-4, --ipv4) or bf(-6, --ipv6)) Tells rsync to prefer IPv4/IPv6
when creating sockets. This only affects sockets that rsync has direct
control over, such as the outgoing socket when directly contacting an
diff -Nurp a/rsync.1 b/rsync.1
--- a/rsync.1
+++ b/rsync.1
@@ -533,6 +533,7 @@ to the detailed description below for a
\-\-read\-batch=FILE read a batched update from FILE
\-\-protocol=NUM force an older protocol version to be used
\-\-iconv=CONVERT_SPEC request charset conversion of filenames
+ \-\-tr=BAD/GOOD transliterate filenames
\-\-checksum\-seed=NUM set block/file checksum seed (advanced)
\-4, \-\-ipv4 prefer IPv4
\-6, \-\-ipv6 prefer IPv6
@@ -3006,6 +3007,23 @@ daemon uses the charset specified in its
regardless of the remote charset you actually pass. Thus, you may feel free to
specify just the local charset for a daemon transfer (e.g. \fB\-\-iconv=utf8\fP).
.IP
+.IP "\fB\-\-tr=BAD/GOOD\fP"
+Transliterates filenames on the receiver, after the
+iconv conversion (if any). This can be used to remove characters illegal
+on the destination filesystem. If you use this option, consider saving a
+\(dq\&find . \-ls\(dq\& listing of the source in the destination to help you determine
+the original filenames in case of need.
+.IP
+The argument consists of a string of characters to remove, optionally
+followed by a slash and a string of corresponding characters with which to
+replace them. The second string may be shorter, in which case any leftover
+characters in the first string are simply deleted. For example,
+\fB\-\-tr='\&:\e/!'\&\fP replaces colons with exclamation marks and deletes backslashes.
+Slashes cannot be transliterated because it would cause havoc.
+.IP
+If the receiver is invoked over a remote shell, use \fB\-\-protect\-args\fP to
+stop the shell from interpreting any nasty characters in the argument.
+.IP
.IP "\fB\-4, \-\-ipv4\fP or \fB\-6, \-\-ipv6\fP"
Tells rsync to prefer IPv4/IPv6
when creating sockets. This only affects sockets that rsync has direct