|
Packit |
df99a1 |
#! /bin/bash -f
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
function copyright()
|
|
Packit |
df99a1 |
{
|
|
Packit |
df99a1 |
echo "Copyright (C) 2002 David Kreil <D.Kreil@IEEE.Org>"
|
|
Packit |
df99a1 |
echo "Modified by Barak A. Pearlmutter <bap@debian.org>"
|
|
Packit |
df99a1 |
echo " and Yaroslav Halchenko <debian@onerussian.com>"
|
|
Packit |
df99a1 |
echo "Released under the GNU GPL v2, 21-Oct-2002."
|
|
Packit |
df99a1 |
}
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
function warranty()
|
|
Packit |
df99a1 |
{
|
|
Packit |
df99a1 |
echo "This program is distributed in the hope that it will be useful,"
|
|
Packit |
df99a1 |
echo "but WITHOUT ANY WARRANTY; without even the implied warranty of"
|
|
Packit |
df99a1 |
echo "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"
|
|
Packit |
df99a1 |
echo "GNU General Public License for more details."
|
|
Packit |
df99a1 |
}
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
function disclaimer()
|
|
Packit |
df99a1 |
{
|
|
Packit |
df99a1 |
echo "By using this tool you accept the following disclaimer:"
|
|
Packit |
df99a1 |
echo " Because the any2djvu service is free of charge, there is no"
|
|
Packit |
df99a1 |
echo " warranty of any kind. In particular, no effort is made to"
|
|
Packit |
df99a1 |
echo " prevent anyone from downloading the files submitted to or"
|
|
Packit |
df99a1 |
echo " produced by the any2djvu server."
|
|
Packit |
df99a1 |
}
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
# TO DO:
|
|
Packit |
df99a1 |
# - error handling
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
rurl="http://any2djvu.djvuzone.org"
|
|
Packit |
df99a1 |
rcgi="any2djvu.php"
|
|
Packit |
df99a1 |
res=400
|
|
Packit |
df99a1 |
ocr=1
|
|
Packit |
df99a1 |
docformat=2
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
function warn()
|
|
Packit |
df99a1 |
{
|
|
Packit |
df99a1 |
echo "Notes:"
|
|
Packit |
df99a1 |
echo " - Internet connection is required."
|
|
Packit |
df99a1 |
echo " - Filenames are assumed to require no URL-encoding."
|
|
Packit |
df99a1 |
echo " - Documents must be PostScript (.ps, .ps.gz) or PDF (.pdf)."
|
|
Packit |
df99a1 |
echo " - Defaults are: conversion is at 400dpi, with English OCR enabled."
|
|
Packit |
df99a1 |
echo " - This script should not be used for large scale conversions of"
|
|
Packit |
df99a1 |
echo " documents, as it may badly affect and hence endanger the free web"
|
|
Packit |
df99a1 |
echo " service to the community."
|
|
Packit |
df99a1 |
echo " - This software comes with NO WARRANTY."
|
|
Packit |
df99a1 |
}
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
function format_help()
|
|
Packit |
df99a1 |
{
|
|
Packit |
df99a1 |
echo "Codes for the formats of the input documents to use with -f"
|
|
Packit |
df99a1 |
echo " 1 - DjVu Document (for verification or OCR)"
|
|
Packit |
df99a1 |
echo " 2 - PS/PS.GZ/PDF Document (default)"
|
|
Packit |
df99a1 |
echo " 3 - Photo/Picture/Icon"
|
|
Packit |
df99a1 |
echo " 4 - Scanned Document - B&W - <200 dpi"
|
|
Packit |
df99a1 |
echo " 5 - Scanned Document - B&W - 200-400 dpi"
|
|
Packit |
df99a1 |
echo " 6 - Scanned Document - B&W - >400 dpi"
|
|
Packit |
df99a1 |
echo " 7 - Scanned Document - Color/Mixed - <200 dpi"
|
|
Packit |
df99a1 |
echo " 8 - Scanned Document - Color/Mixed - 200-400 dpi"
|
|
Packit |
df99a1 |
echo " 9 - Scanned Document - Color/Mixed - >400 dpi"
|
|
Packit |
df99a1 |
}
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
function usage()
|
|
Packit |
df99a1 |
{
|
|
Packit |
df99a1 |
echo "Convert files from .ps/.ps.gz/.pdf to .djvu"
|
|
Packit |
df99a1 |
echo "Usage: $0 [options] [url] {filename(s)}"
|
|
Packit |
df99a1 |
echo
|
|
Packit |
df99a1 |
echo "Options:"
|
|
Packit |
df99a1 |
echo " -q quiet mode: reduce chatter on the screen"
|
|
Packit |
df99a1 |
echo " -a acknowledge internet transmission"
|
|
Packit |
df99a1 |
echo " -c clean after work: remove log file"
|
|
Packit |
df99a1 |
echo " -r dpi resolution in dpi (200/300/400, default: $res)"
|
|
Packit |
df99a1 |
echo " -o ocr perform OCR (0 - no, 1 - yes, default: $ocr)"
|
|
Packit |
df99a1 |
echo " -f format format of input document; invoke -f help for list"
|
|
Packit |
df99a1 |
echo " -u url base URL of server (default: $rurl)"
|
|
Packit |
df99a1 |
echo " -p s CGI script name (default: $rcgi)"
|
|
Packit |
df99a1 |
echo
|
|
Packit |
df99a1 |
echo "If no local file is named by the 1st argument the 1st"
|
|
Packit |
df99a1 |
echo "argument is taken to be an external web-accessible directory and,"
|
|
Packit |
df99a1 |
echo "the specified files are uploaded from that location."
|
|
Packit |
df99a1 |
echo
|
|
Packit |
df99a1 |
echo "Examples:"
|
|
Packit |
df99a1 |
echo " cd ~bap/public_html/foo"
|
|
Packit |
df99a1 |
echo " # uploads from web-accessible directory"
|
|
Packit |
df99a1 |
echo " any2djvu http://www.bcl.hamilton.ie/~barak/papers mesh-preprint.ps.gz"
|
|
Packit |
df99a1 |
echo " any2djvu http://www.inference.phy.cam.ac.uk/mackay *.ps.gz bar.pdf"
|
|
Packit |
df99a1 |
echo " # uploads from current directory"
|
|
Packit |
df99a1 |
echo " any2djvu b*.pdf"
|
|
Packit |
df99a1 |
echo " any2djvu -s -c -r 300 -o 0 bp.ps"
|
|
Packit |
df99a1 |
}
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
while getopts "hacqsr:o:f:u:p:" opt
|
|
Packit |
df99a1 |
do
|
|
Packit |
df99a1 |
case "$opt" in
|
|
Packit |
df99a1 |
h) usage; exit 0 ;;
|
|
Packit |
df99a1 |
a) DJVU_ONLINE_ACK=1 ;;
|
|
Packit |
df99a1 |
c) doclean=1 ;;
|
|
Packit |
df99a1 |
s) echo "option -s is deprecated, please use -q"; silent=1 ;;
|
|
Packit |
df99a1 |
q) silent=1 ;;
|
|
Packit |
df99a1 |
r) res="$OPTARG" ;;
|
|
Packit |
df99a1 |
o) ocr="$OPTARG" ;;
|
|
Packit |
df99a1 |
f) docformat="$OPTARG" ;;
|
|
Packit |
df99a1 |
u) rurl="$OPTARG" ;;
|
|
Packit |
df99a1 |
p) rcgi="$OPTARG" ;;
|
|
Packit |
df99a1 |
*) usage; exit 2 ;;
|
|
Packit |
df99a1 |
esac
|
|
Packit |
df99a1 |
done
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
shift $((OPTIND - 1))
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
# check resolution
|
|
Packit |
df99a1 |
if [ ${res#[234]} != '00' ]; then
|
|
Packit |
df99a1 |
echo 'error: -r resolution must be one of: 200, 300, 400'
|
|
Packit |
df99a1 |
exit 2
|
|
Packit |
df99a1 |
fi
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
# check OCR option
|
|
Packit |
df99a1 |
if [ ! "x$ocr" == x0 ] && [ ! "x$ocr" == x1 ]; then
|
|
Packit |
df99a1 |
echo 'error: -o OCR must be 0 or 1'
|
|
Packit |
df99a1 |
exit 2
|
|
Packit |
df99a1 |
fi
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
# if help is requested or docformat is not specified right - show help
|
|
Packit |
df99a1 |
if [ "x$docformat" == x'help' ]; then
|
|
Packit |
df99a1 |
format_help
|
|
Packit |
df99a1 |
exit 0
|
|
Packit |
df99a1 |
fi
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
if [ -z "$docformat" ] || [ ! -z "${docformat#[0-9]}" ]; then
|
|
Packit |
df99a1 |
echo "error: requested document format unknown: $docformat".
|
|
Packit |
df99a1 |
format_help
|
|
Packit |
df99a1 |
exit 2
|
|
Packit |
df99a1 |
fi
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
lurl=''
|
|
Packit |
df99a1 |
if [ ! -e "$1" ]; then # if 1st argument is not an existing file
|
|
Packit |
df99a1 |
lurl="$1" # assume that it is a URL
|
|
Packit |
df99a1 |
shift
|
|
Packit |
df99a1 |
fi
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
#log=`date -u`": "`whoami`'@'`hostname`", pid $$: $0 (cwd "`pwd`")"
|
|
Packit |
df99a1 |
log="`date -u`: `whoami`@`hostname`, pid $$: $0 (cwd `pwd`)"
|
|
Packit |
df99a1 |
[ -z $silent ] && echo '/-- Started' "$log" >&2
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
in="$1"
|
|
Packit |
df99a1 |
shift
|
|
Packit |
df99a1 |
if [ -z "$in" ]; then
|
|
Packit |
df99a1 |
echo "error: no files to convert"
|
|
Packit |
df99a1 |
usage
|
|
Packit |
df99a1 |
exit 1
|
|
Packit |
df99a1 |
fi
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
if [ ! -z $s ]; then
|
|
Packit |
df99a1 |
copyright
|
|
Packit |
df99a1 |
disclaimer
|
|
Packit |
df99a1 |
warn
|
|
Packit |
df99a1 |
fi
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
if [ -z "$DJVU_ONLINE_ACK" ]; then
|
|
Packit |
df99a1 |
cat <
|
|
Packit |
df99a1 |
WARNING!
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
any2djvu uses an external server which is willing to perform the
|
|
Packit |
df99a1 |
conversion and requires the document transfer over to that server.
|
|
Packit |
df99a1 |
There is a security issue in operating on documents not intended for
|
|
Packit |
df99a1 |
widespread distribution, which could be partially although not
|
|
Packit |
df99a1 |
completely ameliorated by using a secure web connection.
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
Do you acknowledge and allow the transmission of the document?
|
|
Packit |
df99a1 |
(Type 'yes' to acknowledge. You can define non-empty environment
|
|
Packit |
df99a1 |
variable DJVU_ONLINE_ACK to avoid seeing this dialog, or use -a
|
|
Packit |
df99a1 |
command line parameter to any2djvu).
|
|
Packit |
df99a1 |
EOF
|
|
Packit |
df99a1 |
echo -n "[yes/no]:"
|
|
Packit |
df99a1 |
read ack
|
|
Packit |
df99a1 |
if [[ ! "$ack" =~ ^[yY][eE][sS]$ ]]; then
|
|
Packit |
df99a1 |
warn
|
|
Packit |
df99a1 |
exit 1
|
|
Packit |
df99a1 |
fi
|
|
Packit |
df99a1 |
fi
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
wgetopts=''
|
|
Packit |
df99a1 |
curlopts=''
|
|
Packit |
df99a1 |
shellopts=''
|
|
Packit |
df99a1 |
if [ ! -z $silent ]; then
|
|
Packit |
df99a1 |
curlopts=' -s '
|
|
Packit |
df99a1 |
wgetopts=' -q '
|
|
Packit |
df99a1 |
shellopts=' 1>/dev/null ' # 2>&1
|
|
Packit |
df99a1 |
fi
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
cgiopts="&docformat=$docformat&resolution=$res&ocr=$ocr&legal=1"
|
|
Packit |
df99a1 |
while [ -n "$in" ]; do
|
|
Packit |
df99a1 |
b="$in"
|
|
Packit |
df99a1 |
b="${b%.ps.gz}"
|
|
Packit |
df99a1 |
b="${b%.ps}"
|
|
Packit |
df99a1 |
b="${b%.pdf}"
|
|
Packit |
df99a1 |
b="${b%.PDF}"
|
|
Packit |
df99a1 |
echo `date -u` Processing $b ...
|
|
Packit |
df99a1 |
log="$b-any2djvu.log"
|
|
Packit |
df99a1 |
if [ -z "$lurl" ]; then
|
|
Packit |
df99a1 |
if [ ! -e "$in" ]; then
|
|
Packit |
df99a1 |
echo "File $in wasn't found. Conversion stopped"
|
|
Packit |
df99a1 |
exit 2
|
|
Packit |
df99a1 |
fi
|
|
Packit |
df99a1 |
curl $curlopts -F "fupload=@$in" -F submit=Submit \
|
|
Packit |
df99a1 |
-F docformat=$docformat -F resolution=$res \
|
|
Packit |
df99a1 |
-F ocr=$ocr -F legal=1 "$rurl/$rcgi" \
|
|
Packit |
df99a1 |
| eval tee "'$log'" $shellopts
|
|
Packit |
df99a1 |
else
|
|
Packit |
df99a1 |
wget $wgetopts -O - "$rurl/$rcgi?urlupload=$lurl/$in$cgiopts" \
|
|
Packit |
df99a1 |
| eval tee "'$log'" $shellopts
|
|
Packit |
df99a1 |
fi
|
|
Packit |
df99a1 |
l=`egrep 'href=djvu/.*\.djvu' "$log"`
|
|
Packit |
df99a1 |
l="${l##*href=}"
|
|
Packit |
df99a1 |
l="${l%%>*}"
|
|
Packit |
df99a1 |
if [ -z "$l" ]; then
|
|
Packit |
df99a1 |
echo "error: something got wrong. check log file"
|
|
Packit |
df99a1 |
exit 1
|
|
Packit |
df99a1 |
fi
|
|
Packit |
df99a1 |
wget $wgetopts -O "$b.djvu" "$rurl/$l"
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
[ -z "$doclean" ] || rm "$log"
|
|
Packit |
df99a1 |
[ -z $silent ] && ls -l "$b.djvu"
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
in="$1"
|
|
Packit |
df99a1 |
shift
|
|
Packit |
df99a1 |
done
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
log=`date -u`": "`whoami`'@'`hostname`", pid $$: $0 (cwd "`pwd`")"
|
|
Packit |
df99a1 |
|
|
Packit |
df99a1 |
[ -z $silent ] && echo '\-- Done' $log >&2
|