Blob Blame History Raw
.\" Automatically generated by Pod::Man 2.28 (Pod::Simple 3.28)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
.    ds C`
.    ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is turned on, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{
.    if \nF \{
.        de IX
.        tm Index:\\$1\t\\n%\t"\\$2"
..
.        if !\nF==2 \{
.            nr % 0
.            nr F 2
.        \}
.    \}
.\}
.rr rF
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "nkf 1"
.TH nkf 1 "2015-12-12" "nkf 2.1.4" " "
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
nkf \- Network Kanji Filter
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
nkf \fB[\-butjnesliohrTVvwWJESZxXFfmMBOcdILg]\fR \fB[\fR\fIfile ...\fR\fB]\fR
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
\&\fBNkf\fR is a yet another kanji code converter among networks, hosts and terminals.
It converts input kanji code to designated kanji code
such as \s-1ISO\-2022\-JP,\s0 Shift_JIS, EUC-JP, \s-1UTF\-8, UTF\-16\s0 or \s-1UTF\-32.\s0
.PP
One of the most unique faculty of \fBnkf\fR is the guess of the input kanji encodings.
It currently recognizes \s-1ISO\-2022\-JP,\s0 Shift_JIS, EUC-JP, \s-1UTF\-8, UTF\-16\s0 and \s-1UTF\-32.\s0
So users needn't set the input kanji code explicitly.
.PP
By default, X0201 kana is converted into X0208 kana.
For X0201 kana, \s-1SO/SI, SSO\s0 and \s-1ESC\-\s0(\-I methods are supported.
For automatic code detection, nkf assumes no X0201 kana in Shift_JIS.
To accept X0201 in Shift_JIS, use \fB\-X\fR, \fB\-x\fR or \fB\-S\fR.
.PP
multiple options are specifed as seprate strings, such as
.PP
.Vb 1
\&  print nkf(\*(Aq\-\-ic=UTF8\-MAC\*(Aq, \*(Aq\-w\*(Aq, $string), "\en";
.Ve
.PP
except the last arguments.
.SH "OPTIONS"
.IX Header "OPTIONS"
.IP "\fB\-J \-S \-E \-W \-W16 \-W32 \-j \-s \-e \-w \-w16 \-w32\fR" 4
.IX Item "-J -S -E -W -W16 -W32 -j -s -e -w -w16 -w32"
Specify input and output encodings. Upper case is input.
cf. \-\-ic and \-\-oc.
.RS 4
.IP "\fB\-J\fR" 4
.IX Item "-J"
\&\s-1ISO\-2022\-JP \s0(\s-1JIS\s0 code).
.IP "\fB\-S\fR" 4
.IX Item "-S"
Shift_JIS and \s-1JIS X 0201\s0 kana.
EUC-JP is recognized as X0201 kana. Without \fB\-x\fR flag,
\&\s-1JIS X 0201\s0 Katakana (a.k.a.halfwidth kana) is converted into \s-1JIS X 0208.\s0
If you use Windows, see Windows\-31J (\s-1CP932\s0).
.IP "\fB\-E\fR" 4
.IX Item "-E"
EUC-JP.
.IP "\fB\-W\fR" 4
.IX Item "-W"
\&\s-1UTF\-8N.\s0
.IP "\fB\-W16[\s-1BL\s0][0]\fR" 4
.IX Item "-W16[BL][0]"
\&\s-1UTF\-16.
B\s0 or L gives whether Big Endian or Little Endian.
0 gives whther put \s-1BOM\s0 or not.
.IP "\fB\-W32[\s-1BL\s0][0]\fR" 4
.IX Item "-W32[BL][0]"
\&\s-1UTF\-32.
B\s0 or L gives whether Big Endian or Little Endian.
0 gives whther put \s-1BOM\s0 or not.
.RE
.RS 4
.RE
.IP "\fB\-b \-u\fR" 4
.IX Item "-b -u"
Output is buffered (\s-1DEFAULT\s0), Output is unbuffered.
.IP "\fB\-t\fR" 4
.IX Item "-t"
No conversion.
.IP "\fB\-i[@B]\fR" 4
.IX Item "-i[@B]"
Specify the escape sequence for \s-1JIS X 0208.\s0
.RS 4
.IP "\fB\-i@\fR" 4
.IX Item "-i@"
Use \s-1ESC \s0( @. (\s-1JIS X 0208\-1978\s0)
.IP "\fB\-iB\fR" 4
.IX Item "-iB"
Use \s-1ESC \s0( B. (\s-1JIS X 0208\-1983/1990 DEFAULT\s0)
.RE
.RS 4
.RE
.IP "\fB\-o[\s-1BJ\s0]\fR" 4
.IX Item "-o[BJ]"
Specify the escape sequence for \s-1US\-ASCII/JIS X 0201\s0 Roman. (\s-1DEFAULT B\s0)
.IP "\fB\-r\fR" 4
.IX Item "-r"
{de/en}crypt \s-1ROT13/47\s0
.IP "\fB\-h[123] \-\-hiragana \-\-katakana \-\-katakana\-hiragana\fR" 4
.IX Item "-h[123] --hiragana --katakana --katakana-hiragana"
.RS 4
.PD 0
.IP "\fB\-h1 \-\-hiragana\fR" 4
.IX Item "-h1 --hiragana"
.PD
Katakana to Hiragana conversion.
.IP "\fB\-h2 \-\-katakana\fR" 4
.IX Item "-h2 --katakana"
Hiragana to Katakana conversion.
.IP "\fB\-h3 \-\-katakana\-hiragana\fR" 4
.IX Item "-h3 --katakana-hiragana"
Katakana to Hiragana and Hiragana to Katakana conversion.
.RE
.RS 4
.RE
.IP "\fB\-T\fR" 4
.IX Item "-T"
Text mode output (MS-DOS)
.IP "\fB\-f[\f(BIm\fB [\- \f(BIn\fB]]\fR" 4
.IX Item "-f[m [- n]]"
Folding on \fIm\fR length with \fIn\fR margin in a line.
Without this option, fold length is 60 and fold margin is 10.
.IP "\fB\-F\fR" 4
.IX Item "-F"
New line preserving line folding.
.IP "\fB\-Z[0\-3]\fR" 4
.IX Item "-Z[0-3]"
Convert X0208 alphabet (Fullwidth Alphabets) to \s-1ASCII.\s0
.RS 4
.IP "\fB\-Z \-Z0\fR" 4
.IX Item "-Z -Z0"
Convert X0208 alphabet to \s-1ASCII.\s0
.IP "\fB\-Z1\fR" 4
.IX Item "-Z1"
Convert X0208 kankaku to single \s-1ASCII\s0 space.
.IP "\fB\-Z2\fR" 4
.IX Item "-Z2"
Convert X0208 kankaku to double \s-1ASCII\s0 spaces.
.IP "\fB\-Z3\fR" 4
.IX Item "-Z3"
Replacing fullwidth >, <, ", & into '&gt;', '&lt;', '&quot;', '&amp;' as in \s-1HTML.\s0
.RE
.RS 4
.RE
.IP "\fB\-X \-x\fR" 4
.IX Item "-X -x"
With \fB\-X\fR or without this option, X0201 is converted into X0208 Kana.
With \fB\-x\fR, try to preserve X0208 kana and do not convert X0201 kana to X0208.
In \s-1JIS\s0 output, \s-1ESC\-\s0(\-I is used. In \s-1EUC\s0 output, \s-1SS2\s0 is used.
.IP "\fB\-B[0\-2]\fR" 4
.IX Item "-B[0-2]"
Assume broken JIS-Kanji input, which lost \s-1ESC.\s0
Useful when your site is using old B\-News Nihongo patch.
.RS 4
.IP "\fB\-B1\fR" 4
.IX Item "-B1"
allows any chars after \s-1ESC\-\s0( or \s-1ESC\-$.\s0
.IP "\fB\-B2\fR" 4
.IX Item "-B2"
force \s-1ASCII\s0 after \s-1NL.\s0
.RE
.RS 4
.RE
.IP "\fB\-I\fR" 4
.IX Item "-I"
Replacing non iso\-2022\-jp char into a geta character
(substitute character in Japanese).
.IP "\fB\-m[\s-1BQN0\s0]\fR" 4
.IX Item "-m[BQN0]"
\&\s-1MIME ISO\-2022\-JP/ISO8859\-1\s0 decode. (\s-1DEFAULT\s0)
To see \s-1ISO8859\-1 \s0(Latin\-1) \-l is necessary.
.RS 4
.IP "\fB\-mB\fR" 4
.IX Item "-mB"
Decode \s-1MIME\s0 base64 encoded stream. Remove header or other part before
conversion.
.IP "\fB\-mQ\fR" 4
.IX Item "-mQ"
Decode \s-1MIME\s0 quoted stream. '_' in quoted stream is converted to space.
.IP "\fB\-mN\fR" 4
.IX Item "-mN"
Non-strict decoding.
It allows line break in the middle of the base64 encoding.
.IP "\fB\-m0\fR" 4
.IX Item "-m0"
No \s-1MIME\s0 decode.
.RE
.RS 4
.RE
.IP "\fB\-M\fR" 4
.IX Item "-M"
\&\s-1MIME\s0 encode. Header style. All \s-1ASCII\s0 code and control characters are intact.
.RS 4
.IP "\fB\-MB\fR" 4
.IX Item "-MB"
\&\s-1MIME\s0 encode Base64 stream.
Kanji conversion is performed before encoding, so this cannot be used as a picture encoder.
.IP "\fB\-MQ\fR" 4
.IX Item "-MQ"
Perform quoted encoding.
.RE
.RS 4
.RE
.IP "\fB\-l\fR" 4
.IX Item "-l"
Input and output code is \s-1ISO8859\-1 \s0(Latin\-1) and \s-1ISO\-2022\-JP.
\&\s0\fB\-s\fR, \fB\-e\fR and \fB\-x\fR are not compatible with this option.
.IP "\fB\-L[uwm] \-d \-c\fR" 4
.IX Item "-L[uwm] -d -c"
Convert line breaks.
.RS 4
.IP "\fB\-Lu \-d\fR" 4
.IX Item "-Lu -d"
unix (\s-1LF\s0)
.IP "\fB\-Lw \-c\fR" 4
.IX Item "-Lw -c"
windows (\s-1CRLF\s0)
.IP "\fB\-Lm\fR" 4
.IX Item "-Lm"
mac (\s-1CR\s0)
.Sp
Without this option, nkf doesn't convert line breaks.
.RE
.RS 4
.RE
.IP "\fB\-\-fj \-\-unix \-\-mac \-\-msdos \-\-windows\fR" 4
.IX Item "--fj --unix --mac --msdos --windows"
Convert for these systems.
.IP "\fB\-\-jis \-\-euc \-\-sjis \-\-mime \-\-base64\fR" 4
.IX Item "--jis --euc --sjis --mime --base64"
Convert to named code.
.IP "\fB\-\-jis\-input \-\-euc\-input \-\-sjis\-input \-\-mime\-input \-\-base64\-input\fR" 4
.IX Item "--jis-input --euc-input --sjis-input --mime-input --base64-input"
Assume input system
.IP "\fB\-\-ic=\f(BIinput codeset\fB \-\-oc=\f(BIoutput codeset\fB\fR" 4
.IX Item "--ic=input codeset --oc=output codeset"
Set the input or output codeset.
\&\s-1NKF\s0 supports following codesets and those codeset names are case insensitive.
.RS 4
.IP "\s-1ISO\-2022\-JP\s0" 4
.IX Item "ISO-2022-JP"
a.k.a. \s-1RFC1468,\s0 7bit \s-1JIS, JUNET\s0
.IP "EUC-JP (eucJP-nkf)" 4
.IX Item "EUC-JP (eucJP-nkf)"
a.k.a. \s-1AT&T JIS,\s0 Japanese \s-1EUC, UJIS\s0
.IP "eucJP-ascii" 4
.IX Item "eucJP-ascii"
.PD 0
.IP "eucJP-ms" 4
.IX Item "eucJP-ms"
.IP "\s-1CP51932\s0" 4
.IX Item "CP51932"
.PD
Microsoft Version of EUC-JP.
.IP "Shift_JIS" 4
.IX Item "Shift_JIS"
a.k.a. \s-1SJIS,\s0 MS_Kanji
.IP "Windows\-31J" 4
.IX Item "Windows-31J"
a.k.a. \s-1CP932\s0
.IP "\s-1UTF\-8\s0" 4
.IX Item "UTF-8"
same as \s-1UTF\-8N\s0
.IP "\s-1UTF\-8N\s0" 4
.IX Item "UTF-8N"
\&\s-1UTF\-8\s0 without \s-1BOM\s0
.IP "\s-1UTF\-8\-BOM\s0" 4
.IX Item "UTF-8-BOM"
\&\s-1UTF\-8\s0 with \s-1BOM\s0
.IP "\s-1UTF8\-MAC \s0(input only)" 4
.IX Item "UTF8-MAC (input only)"
decomposed \s-1UTF\-8\s0
.IP "\s-1UTF\-16\s0" 4
.IX Item "UTF-16"
same as \s-1UTF\-16BE\s0
.IP "\s-1UTF\-16BE\s0" 4
.IX Item "UTF-16BE"
\&\s-1UTF\-16\s0 Big Endian without \s-1BOM\s0
.IP "\s-1UTF\-16BE\-BOM\s0" 4
.IX Item "UTF-16BE-BOM"
\&\s-1UTF\-16\s0 Big Endian with \s-1BOM\s0
.IP "\s-1UTF\-16LE\s0" 4
.IX Item "UTF-16LE"
\&\s-1UTF\-16\s0 Little Endian without \s-1BOM\s0
.IP "\s-1UTF\-16LE\-BOM\s0" 4
.IX Item "UTF-16LE-BOM"
\&\s-1UTF\-16\s0 Little Endian with \s-1BOM\s0
.IP "\s-1UTF\-32\s0" 4
.IX Item "UTF-32"
same as \s-1UTF\-32BE\s0
.IP "\s-1UTF\-32BE\s0" 4
.IX Item "UTF-32BE"
\&\s-1UTF\-32\s0 Big Endian without \s-1BOM\s0
.IP "\s-1UTF\-32BE\-BOM\s0" 4
.IX Item "UTF-32BE-BOM"
\&\s-1UTF\-32\s0 Big Endian with \s-1BOM\s0
.IP "\s-1UTF\-32LE\s0" 4
.IX Item "UTF-32LE"
\&\s-1UTF\-32\s0 Little Endian without \s-1BOM\s0
.IP "\s-1UTF\-32LE\-BOM\s0" 4
.IX Item "UTF-32LE-BOM"
\&\s-1UTF\-32\s0 Little Endian with \s-1BOM\s0
.RE
.RS 4
.RE
.IP "\fB\-\-fb\-{skip, html, xml, perl, java, subchar}\fR" 4
.IX Item "--fb-{skip, html, xml, perl, java, subchar}"
Specify the way that nkf handles unassigned characters.
Without this option, \-\-fb\-skip is assumed.
.IP "\fB\-\-prefix=\f(BIescape character\fB\f(BItarget character\fB..\fR" 4
.IX Item "--prefix=escape charactertarget character.."
When nkf converts to Shift_JIS,
nkf adds a specified escape character to specified 2nd byte of Shift_JIS characters.
1st byte of argument is the escape character and following bytes are target characters.
.IP "\fB\-\-no\-cp932ext\fR" 4
.IX Item "--no-cp932ext"
Handle the characters extended in \s-1CP932\s0 as unassigned characters.
.IP "\fB\-\-no\-best\-fit\-chars\fR" 4
.IX Item "--no-best-fit-chars"
When Unicode to Encoded byte conversion,
don't convert characters which is not round trip safe.
When Unicode to Unicode conversion,
with this and \-x option, nkf can be used as \s-1UTF\s0 converter.
(In other words, without this and \-x option, nkf doesn't save some characters)
.Sp
When nkf converts strings that related to path, you should use this opion.
.IP "\fB\-\-cap\-input\fR" 4
.IX Item "--cap-input"
Decode hex encoded characters.
.IP "\fB\-\-url\-input\fR" 4
.IX Item "--url-input"
Unescape percent escaped characters.
.IP "\fB\-\-numchar\-input\fR" 4
.IX Item "--numchar-input"
Decode character reference, such as \*(L"&#....;\*(R".
.IP "\fB\-\-in\-place[=\fR\fI\s-1SUFFIX\s0\fR\fB]\fR  \fB\-\-overwrite[=\fR\fI\s-1SUFFIX\s0\fR\fB]\fR" 4
.IX Item "--in-place[=SUFFIX] --overwrite[=SUFFIX]"
Overwrite \fBoriginal\fR listed files by filtered result.
.Sp
\&\fBNote\fR \-\-overwrite preserves timestamps of original files.
.IP "\fB\-\-guess=[12]\fR" 4
.IX Item "--guess=[12]"
Print guessed encoding and newline. (2 is default, 1 is only encoding)
.IP "\fB\-\-help\fR" 4
.IX Item "--help"
Print nkf's help.
.IP "\fB\-\-version\fR" 4
.IX Item "--version"
Print nkf's version.
.IP "\fB\-\-\fR" 4
.IX Item "--"
Ignore rest of \-option.
.SH "AUTHOR"
.IX Header "AUTHOR"
Copyright (c) 1987, Fujitsu \s-1LTD. \s0(Itaru \s-1ICHIKAWA\s0).
.PP
Copyright (c) 1996\-2015, The nkf Project.