1806 lines
78 KiB
Groff
Executable File
1806 lines
78 KiB
Groff
Executable File
.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
|
|
.\"
|
|
.\" Standard preamble:
|
|
.\" ========================================================================
|
|
.de Sh \" Subsection heading
|
|
.br
|
|
.if t .Sp
|
|
.ne 5
|
|
.PP
|
|
\fB\\$1\fR
|
|
.PP
|
|
..
|
|
.de Sp \" Vertical space (when we can't use .PP)
|
|
.if t .sp .5v
|
|
.if n .sp
|
|
..
|
|
.de Vb \" Begin verbatim text
|
|
.ft CW
|
|
.nf
|
|
.ne \\$1
|
|
..
|
|
.de Ve \" End verbatim text
|
|
.ft R
|
|
.fi
|
|
..
|
|
.\" Set up some character translations and predefined strings. \*(-- will
|
|
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
|
|
.\" double quote, and \*(R" will give a right double quote. \*(C+ will
|
|
.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
|
|
.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
|
|
.\" nothing in troff, for use with C<>.
|
|
.tr \(*W-
|
|
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
|
|
.ie n \{\
|
|
. ds -- \(*W-
|
|
. ds PI pi
|
|
. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
|
|
. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
|
|
. ds L" ""
|
|
. ds R" ""
|
|
. ds C` ""
|
|
. ds C' ""
|
|
'br\}
|
|
.el\{\
|
|
. ds -- \|\(em\|
|
|
. ds PI \(*p
|
|
. ds L" ``
|
|
. ds R" ''
|
|
'br\}
|
|
.\"
|
|
.\" If the F register is turned on, we'll generate index entries on stderr for
|
|
.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
|
|
.\" entries marked with X<> in POD. Of course, you'll have to process the
|
|
.\" output yourself in some meaningful fashion.
|
|
.if \nF \{\
|
|
. de IX
|
|
. tm Index:\\$1\t\\n%\t"\\$2"
|
|
..
|
|
. nr % 0
|
|
. rr F
|
|
.\}
|
|
.\"
|
|
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
|
.\" way too many mistakes in technical documents.
|
|
.hy 0
|
|
.if n .na
|
|
.\"
|
|
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
|
|
.\" Fear. Run. Save yourself. No user-serviceable parts.
|
|
. \" fudge factors for nroff and troff
|
|
.if n \{\
|
|
. ds #H 0
|
|
. ds #V .8m
|
|
. ds #F .3m
|
|
. ds #[ \f1
|
|
. ds #] \fP
|
|
.\}
|
|
.if t \{\
|
|
. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
|
|
. ds #V .6m
|
|
. ds #F 0
|
|
. ds #[ \&
|
|
. ds #] \&
|
|
.\}
|
|
. \" simple accents for nroff and troff
|
|
.if n \{\
|
|
. ds ' \&
|
|
. ds ` \&
|
|
. ds ^ \&
|
|
. ds , \&
|
|
. ds ~ ~
|
|
. ds /
|
|
.\}
|
|
.if t \{\
|
|
. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
|
|
. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
|
|
. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
|
|
. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
|
|
. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
|
|
. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
|
|
.\}
|
|
. \" troff and (daisy-wheel) nroff accents
|
|
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
|
|
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
|
|
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
|
|
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
|
|
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
|
|
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
|
|
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
|
|
.ds ae a\h'-(\w'a'u*4/10)'e
|
|
.ds Ae A\h'-(\w'A'u*4/10)'E
|
|
. \" corrections for vroff
|
|
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
|
|
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
|
|
. \" for low resolution devices (crt and lpr)
|
|
.if \n(.H>23 .if \n(.V>19 \
|
|
\{\
|
|
. ds : e
|
|
. ds 8 ss
|
|
. ds o a
|
|
. ds d- d\h'-1'\(ga
|
|
. ds D- D\h'-1'\(hy
|
|
. ds th \o'bp'
|
|
. ds Th \o'LP'
|
|
. ds ae ae
|
|
. ds Ae AE
|
|
.\}
|
|
.rm #[ #] #H #V #F C
|
|
.\" ========================================================================
|
|
.\"
|
|
.IX Title "WGET 1"
|
|
.TH WGET 1 "2008-04-30" "GNU Wget 1.11.2" "GNU Wget"
|
|
.SH "NAME"
|
|
Wget \- The non\-interactive network downloader.
|
|
.SH "SYNOPSIS"
|
|
.IX Header "SYNOPSIS"
|
|
wget [\fIoption\fR]... [\fI\s-1URL\s0\fR]...
|
|
.SH "DESCRIPTION"
|
|
.IX Header "DESCRIPTION"
|
|
\&\s-1GNU\s0 Wget is a free utility for non-interactive download of files from
|
|
the Web. It supports \s-1HTTP\s0, \s-1HTTPS\s0, and \s-1FTP\s0 protocols, as
|
|
well as retrieval through \s-1HTTP\s0 proxies.
|
|
.PP
|
|
Wget is non\-interactive, meaning that it can work in the background,
|
|
while the user is not logged on. This allows you to start a retrieval
|
|
and disconnect from the system, letting Wget finish the work. By
|
|
contrast, most of the Web browsers require constant user's presence,
|
|
which can be a great hindrance when transferring a lot of data.
|
|
.PP
|
|
Wget can follow links in \s-1HTML\s0 and \s-1XHTML\s0 pages and create local
|
|
versions of remote web sites, fully recreating the directory structure of
|
|
the original site. This is sometimes referred to as \*(L"recursive
|
|
downloading.\*(R" While doing that, Wget respects the Robot Exclusion
|
|
Standard (\fI/robots.txt\fR). Wget can be instructed to convert the
|
|
links in downloaded \s-1HTML\s0 files to the local files for offline
|
|
viewing.
|
|
.PP
|
|
Wget has been designed for robustness over slow or unstable network
|
|
connections; if a download fails due to a network problem, it will
|
|
keep retrying until the whole file has been retrieved. If the server
|
|
supports regetting, it will instruct the server to continue the
|
|
download from where it left off.
|
|
.SH "OPTIONS"
|
|
.IX Header "OPTIONS"
|
|
.Sh "Option Syntax"
|
|
.IX Subsection "Option Syntax"
|
|
Since Wget uses \s-1GNU\s0 getopt to process command-line arguments, every
|
|
option has a long form along with the short one. Long options are
|
|
more convenient to remember, but take time to type. You may freely
|
|
mix different option styles, or specify options after the command-line
|
|
arguments. Thus you may write:
|
|
.PP
|
|
.Vb 1
|
|
\& wget \-r \-\-tries=10 http://fly.srk.fer.hr/ \-o log
|
|
.Ve
|
|
.PP
|
|
The space between the option accepting an argument and the argument may
|
|
be omitted. Instead of \fB\-o log\fR you can write \fB\-olog\fR.
|
|
.PP
|
|
You may put several options that do not require arguments together,
|
|
like:
|
|
.PP
|
|
.Vb 1
|
|
\& wget \-drc <URL>
|
|
.Ve
|
|
.PP
|
|
This is a complete equivalent of:
|
|
.PP
|
|
.Vb 1
|
|
\& wget \-d \-r \-c <URL>
|
|
.Ve
|
|
.PP
|
|
Since the options can be specified after the arguments, you may
|
|
terminate them with \fB\-\-\fR. So the following will try to download
|
|
\&\s-1URL\s0 \fB\-x\fR, reporting failure to \fIlog\fR:
|
|
.PP
|
|
.Vb 1
|
|
\& wget \-o log \-\- \-x
|
|
.Ve
|
|
.PP
|
|
The options that accept comma-separated lists all respect the convention
|
|
that specifying an empty list clears its value. This can be useful to
|
|
clear the \fI.wgetrc\fR settings. For instance, if your \fI.wgetrc\fR
|
|
sets \f(CW\*(C`exclude_directories\*(C'\fR to \fI/cgi\-bin\fR, the following
|
|
example will first reset it, and then set it to exclude \fI/~nobody\fR
|
|
and \fI/~somebody\fR. You can also clear the lists in \fI.wgetrc\fR.
|
|
.PP
|
|
.Vb 1
|
|
\& wget \-X " \-X /~nobody,/~somebody
|
|
.Ve
|
|
.PP
|
|
Most options that do not accept arguments are \fIboolean\fR options,
|
|
so named because their state can be captured with a yes-or-no
|
|
(\*(L"boolean\*(R") variable. For example, \fB\-\-follow\-ftp\fR tells Wget
|
|
to follow \s-1FTP\s0 links from \s-1HTML\s0 files and, on the other hand,
|
|
\&\fB\-\-no\-glob\fR tells it not to perform file globbing on \s-1FTP\s0 URLs. A
|
|
boolean option is either \fIaffirmative\fR or \fInegative\fR
|
|
(beginning with \fB\-\-no\fR). All such options share several
|
|
properties.
|
|
.PP
|
|
Unless stated otherwise, it is assumed that the default behavior is
|
|
the opposite of what the option accomplishes. For example, the
|
|
documented existence of \fB\-\-follow\-ftp\fR assumes that the default
|
|
is to \fInot\fR follow \s-1FTP\s0 links from \s-1HTML\s0 pages.
|
|
.PP
|
|
Affirmative options can be negated by prepending the \fB\-\-no\-\fR to
|
|
the option name; negative options can be negated by omitting the
|
|
\&\fB\-\-no\-\fR prefix. This might seem superfluous\-\-\-if the default for
|
|
an affirmative option is to not do something, then why provide a way
|
|
to explicitly turn it off? But the startup file may in fact change
|
|
the default. For instance, using \f(CW\*(C`follow_ftp = off\*(C'\fR in
|
|
\&\fI.wgetrc\fR makes Wget \fInot\fR follow \s-1FTP\s0 links by default, and
|
|
using \fB\-\-no\-follow\-ftp\fR is the only way to restore the factory
|
|
default from the command line.
|
|
.Sh "Basic Startup Options"
|
|
.IX Subsection "Basic Startup Options"
|
|
.IP "\fB\-V\fR" 4
|
|
.IX Item "-V"
|
|
.PD 0
|
|
.IP "\fB\-\-version\fR" 4
|
|
.IX Item "--version"
|
|
.PD
|
|
Display the version of Wget.
|
|
.IP "\fB\-h\fR" 4
|
|
.IX Item "-h"
|
|
.PD 0
|
|
.IP "\fB\-\-help\fR" 4
|
|
.IX Item "--help"
|
|
.PD
|
|
Print a help message describing all of Wget's command-line options.
|
|
.IP "\fB\-b\fR" 4
|
|
.IX Item "-b"
|
|
.PD 0
|
|
.IP "\fB\-\-background\fR" 4
|
|
.IX Item "--background"
|
|
.PD
|
|
Go to background immediately after startup. If no output file is
|
|
specified via the \fB\-o\fR, output is redirected to \fIwget-log\fR.
|
|
.IP "\fB\-e\fR \fIcommand\fR" 4
|
|
.IX Item "-e command"
|
|
.PD 0
|
|
.IP "\fB\-\-execute\fR \fIcommand\fR" 4
|
|
.IX Item "--execute command"
|
|
.PD
|
|
Execute \fIcommand\fR as if it were a part of \fI.wgetrc\fR. A command thus invoked will be executed
|
|
\&\fIafter\fR the commands in \fI.wgetrc\fR, thus taking precedence over
|
|
them. If you need to specify more than one wgetrc command, use multiple
|
|
instances of \fB\-e\fR.
|
|
.Sh "Logging and Input File Options"
|
|
.IX Subsection "Logging and Input File Options"
|
|
.IP "\fB\-o\fR \fIlogfile\fR" 4
|
|
.IX Item "-o logfile"
|
|
.PD 0
|
|
.IP "\fB\-\-output\-file=\fR\fIlogfile\fR" 4
|
|
.IX Item "--output-file=logfile"
|
|
.PD
|
|
Log all messages to \fIlogfile\fR. The messages are normally reported
|
|
to standard error.
|
|
.IP "\fB\-a\fR \fIlogfile\fR" 4
|
|
.IX Item "-a logfile"
|
|
.PD 0
|
|
.IP "\fB\-\-append\-output=\fR\fIlogfile\fR" 4
|
|
.IX Item "--append-output=logfile"
|
|
.PD
|
|
Append to \fIlogfile\fR. This is the same as \fB\-o\fR, only it appends
|
|
to \fIlogfile\fR instead of overwriting the old log file. If
|
|
\&\fIlogfile\fR does not exist, a new file is created.
|
|
.IP "\fB\-d\fR" 4
|
|
.IX Item "-d"
|
|
.PD 0
|
|
.IP "\fB\-\-debug\fR" 4
|
|
.IX Item "--debug"
|
|
.PD
|
|
Turn on debug output, meaning various information important to the
|
|
developers of Wget if it does not work properly. Your system
|
|
administrator may have chosen to compile Wget without debug support, in
|
|
which case \fB\-d\fR will not work. Please note that compiling with
|
|
debug support is always safe\-\-\-Wget compiled with the debug support will
|
|
\&\fInot\fR print any debug info unless requested with \fB\-d\fR.
|
|
.IP "\fB\-q\fR" 4
|
|
.IX Item "-q"
|
|
.PD 0
|
|
.IP "\fB\-\-quiet\fR" 4
|
|
.IX Item "--quiet"
|
|
.PD
|
|
Turn off Wget's output.
|
|
.IP "\fB\-v\fR" 4
|
|
.IX Item "-v"
|
|
.PD 0
|
|
.IP "\fB\-\-verbose\fR" 4
|
|
.IX Item "--verbose"
|
|
.PD
|
|
Turn on verbose output, with all the available data. The default output
|
|
is verbose.
|
|
.IP "\fB\-nv\fR" 4
|
|
.IX Item "-nv"
|
|
.PD 0
|
|
.IP "\fB\-\-no\-verbose\fR" 4
|
|
.IX Item "--no-verbose"
|
|
.PD
|
|
Turn off verbose without being completely quiet (use \fB\-q\fR for
|
|
that), which means that error messages and basic information still get
|
|
printed.
|
|
.IP "\fB\-i\fR \fIfile\fR" 4
|
|
.IX Item "-i file"
|
|
.PD 0
|
|
.IP "\fB\-\-input\-file=\fR\fIfile\fR" 4
|
|
.IX Item "--input-file=file"
|
|
.PD
|
|
Read URLs from \fIfile\fR. If \fB\-\fR is specified as
|
|
\&\fIfile\fR, URLs are read from the standard input. (Use
|
|
\&\fB./\-\fR to read from a file literally named \fB\-\fR.)
|
|
.Sp
|
|
If this function is used, no URLs need be present on the command
|
|
line. If there are URLs both on the command line and in an input
|
|
file, those on the command lines will be the first ones to be
|
|
retrieved. The \fIfile\fR need not be an \s-1HTML\s0 document (but no
|
|
harm if it is)\-\-\-it is enough if the URLs are just listed
|
|
sequentially.
|
|
.Sp
|
|
However, if you specify \fB\-\-force\-html\fR, the document will be
|
|
regarded as \fBhtml\fR. In that case you may have problems with
|
|
relative links, which you can solve either by adding \f(CW\*(C`<base
|
|
href="\f(CIurl\f(CW">\*(C'\fR to the documents or by specifying
|
|
\&\fB\-\-base=\fR\fIurl\fR on the command line.
|
|
.IP "\fB\-F\fR" 4
|
|
.IX Item "-F"
|
|
.PD 0
|
|
.IP "\fB\-\-force\-html\fR" 4
|
|
.IX Item "--force-html"
|
|
.PD
|
|
When input is read from a file, force it to be treated as an \s-1HTML\s0
|
|
file. This enables you to retrieve relative links from existing
|
|
\&\s-1HTML\s0 files on your local disk, by adding \f(CW\*(C`<base
|
|
href="\f(CIurl\f(CW">\*(C'\fR to \s-1HTML\s0, or using the \fB\-\-base\fR command-line
|
|
option.
|
|
.IP "\fB\-B\fR \fI\s-1URL\s0\fR" 4
|
|
.IX Item "-B URL"
|
|
.PD 0
|
|
.IP "\fB\-\-base=\fR\fI\s-1URL\s0\fR" 4
|
|
.IX Item "--base=URL"
|
|
.PD
|
|
Prepends \fI\s-1URL\s0\fR to relative links read from the file specified with
|
|
the \fB\-i\fR option.
|
|
.Sh "Download Options"
|
|
.IX Subsection "Download Options"
|
|
.IP "\fB\-\-bind\-address=\fR\fI\s-1ADDRESS\s0\fR" 4
|
|
.IX Item "--bind-address=ADDRESS"
|
|
When making client \s-1TCP/IP\s0 connections, bind to \fI\s-1ADDRESS\s0\fR on
|
|
the local machine. \fI\s-1ADDRESS\s0\fR may be specified as a hostname or \s-1IP\s0
|
|
address. This option can be useful if your machine is bound to multiple
|
|
IPs.
|
|
.IP "\fB\-t\fR \fInumber\fR" 4
|
|
.IX Item "-t number"
|
|
.PD 0
|
|
.IP "\fB\-\-tries=\fR\fInumber\fR" 4
|
|
.IX Item "--tries=number"
|
|
.PD
|
|
Set number of retries to \fInumber\fR. Specify 0 or \fBinf\fR for
|
|
infinite retrying. The default is to retry 20 times, with the exception
|
|
of fatal errors like \*(L"connection refused\*(R" or \*(L"not found\*(R" (404),
|
|
which are not retried.
|
|
.IP "\fB\-O\fR \fIfile\fR" 4
|
|
.IX Item "-O file"
|
|
.PD 0
|
|
.IP "\fB\-\-output\-document=\fR\fIfile\fR" 4
|
|
.IX Item "--output-document=file"
|
|
.PD
|
|
The documents will not be written to the appropriate files, but all
|
|
will be concatenated together and written to \fIfile\fR. If \fB\-\fR
|
|
is used as \fIfile\fR, documents will be printed to standard output,
|
|
disabling link conversion. (Use \fB./\-\fR to print to a file
|
|
literally named \fB\-\fR.)
|
|
.Sp
|
|
Use of \fB\-O\fR is \fInot\fR intended to mean simply "use the name
|
|
\&\fIfile\fR instead of the one in the \s-1URL\s0;" rather, it is
|
|
analogous to shell redirection:
|
|
\&\fBwget \-O file http://foo\fR is intended to work like
|
|
\&\fBwget \-O \- http://foo > file\fR; \fIfile\fR will be truncated
|
|
immediately, and \fIall\fR downloaded content will be written there.
|
|
.Sp
|
|
For this reason, \fB\-N\fR (for timestamp\-checking) is not supported
|
|
in combination with \fB\-O\fR: since \fIfile\fR is always newly
|
|
created, it will always have a very new timestamp. Contrary to some
|
|
users' expectations, the combination has never worked, and as of
|
|
version 1.11, it results in an error.
|
|
.Sp
|
|
Similarly, using \fB\-r\fR or \fB\-p\fR with \fB\-O\fR may not work as
|
|
you expect: Wget won't just download the first file to \fIfile\fR and
|
|
then download the rest to their normal names: \fIall\fR downloaded
|
|
content will be placed in \fIfile\fR. This was disabled in version
|
|
1.11, but has been reinstated (with a warning) in 1.11.2, as there are
|
|
some cases where this behavior can actually have some use.
|
|
.Sp
|
|
Note that a combination with \fB\-k\fR is only permitted when
|
|
downloading a single document, as in that case it will just convert
|
|
all relative URIs to external ones; \fB\-k\fR makes no sense for
|
|
multiple URIs when they're all being downloaded to a single file.
|
|
.IP "\fB\-nc\fR" 4
|
|
.IX Item "-nc"
|
|
.PD 0
|
|
.IP "\fB\-\-no\-clobber\fR" 4
|
|
.IX Item "--no-clobber"
|
|
.PD
|
|
If a file is downloaded more than once in the same directory, Wget's
|
|
behavior depends on a few options, including \fB\-nc\fR. In certain
|
|
cases, the local file will be \fIclobbered\fR, or overwritten, upon
|
|
repeated download. In other cases it will be preserved.
|
|
.Sp
|
|
When running Wget without \fB\-N\fR, \fB\-nc\fR, \fB\-r\fR, or \fBp\fR,
|
|
downloading the same file in the same directory will result in the
|
|
original copy of \fIfile\fR being preserved and the second copy being
|
|
named \fIfile\fR\fB.1\fR. If that file is downloaded yet again, the
|
|
third copy will be named \fIfile\fR\fB.2\fR, and so on. When
|
|
\&\fB\-nc\fR is specified, this behavior is suppressed, and Wget will
|
|
refuse to download newer copies of \fIfile\fR. Therefore,
|
|
"\f(CW\*(C`no\-clobber\*(C'\fR" is actually a misnomer in this mode\-\-\-it's not
|
|
clobbering that's prevented (as the numeric suffixes were already
|
|
preventing clobbering), but rather the multiple version saving that's
|
|
prevented.
|
|
.Sp
|
|
When running Wget with \fB\-r\fR or \fB\-p\fR, but without \fB\-N\fR
|
|
or \fB\-nc\fR, re-downloading a file will result in the new copy
|
|
simply overwriting the old. Adding \fB\-nc\fR will prevent this
|
|
behavior, instead causing the original version to be preserved and any
|
|
newer copies on the server to be ignored.
|
|
.Sp
|
|
When running Wget with \fB\-N\fR, with or without \fB\-r\fR or
|
|
\&\fB\-p\fR, the decision as to whether or not to download a newer copy
|
|
of a file depends on the local and remote timestamp and size of the
|
|
file. \fB\-nc\fR may not be specified at the
|
|
same time as \fB\-N\fR.
|
|
.Sp
|
|
Note that when \fB\-nc\fR is specified, files with the suffixes
|
|
\&\fB.html\fR or \fB.htm\fR will be loaded from the local disk and
|
|
parsed as if they had been retrieved from the Web.
|
|
.IP "\fB\-c\fR" 4
|
|
.IX Item "-c"
|
|
.PD 0
|
|
.IP "\fB\-\-continue\fR" 4
|
|
.IX Item "--continue"
|
|
.PD
|
|
Continue getting a partially-downloaded file. This is useful when you
|
|
want to finish up a download started by a previous instance of Wget, or
|
|
by another program. For instance:
|
|
.Sp
|
|
.Vb 1
|
|
\& wget \-c ftp://sunsite.doc.ic.ac.uk/ls\-lR.Z
|
|
.Ve
|
|
.Sp
|
|
If there is a file named \fIls\-lR.Z\fR in the current directory, Wget
|
|
will assume that it is the first portion of the remote file, and will
|
|
ask the server to continue the retrieval from an offset equal to the
|
|
length of the local file.
|
|
.Sp
|
|
Note that you don't need to specify this option if you just want the
|
|
current invocation of Wget to retry downloading a file should the
|
|
connection be lost midway through. This is the default behavior.
|
|
\&\fB\-c\fR only affects resumption of downloads started \fIprior\fR to
|
|
this invocation of Wget, and whose local files are still sitting around.
|
|
.Sp
|
|
Without \fB\-c\fR, the previous example would just download the remote
|
|
file to \fIls\-lR.Z.1\fR, leaving the truncated \fIls\-lR.Z\fR file
|
|
alone.
|
|
.Sp
|
|
Beginning with Wget 1.7, if you use \fB\-c\fR on a non-empty file, and
|
|
it turns out that the server does not support continued downloading,
|
|
Wget will refuse to start the download from scratch, which would
|
|
effectively ruin existing contents. If you really want the download to
|
|
start from scratch, remove the file.
|
|
.Sp
|
|
Also beginning with Wget 1.7, if you use \fB\-c\fR on a file which is of
|
|
equal size as the one on the server, Wget will refuse to download the
|
|
file and print an explanatory message. The same happens when the file
|
|
is smaller on the server than locally (presumably because it was changed
|
|
on the server since your last download attempt)\-\-\-because \*(L"continuing\*(R"
|
|
is not meaningful, no download occurs.
|
|
.Sp
|
|
On the other side of the coin, while using \fB\-c\fR, any file that's
|
|
bigger on the server than locally will be considered an incomplete
|
|
download and only \f(CW\*(C`(length(remote) \- length(local))\*(C'\fR bytes will be
|
|
downloaded and tacked onto the end of the local file. This behavior can
|
|
be desirable in certain cases\-\-\-for instance, you can use \fBwget \-c\fR
|
|
to download just the new portion that's been appended to a data
|
|
collection or log file.
|
|
.Sp
|
|
However, if the file is bigger on the server because it's been
|
|
\&\fIchanged\fR, as opposed to just \fIappended\fR to, you'll end up
|
|
with a garbled file. Wget has no way of verifying that the local file
|
|
is really a valid prefix of the remote file. You need to be especially
|
|
careful of this when using \fB\-c\fR in conjunction with \fB\-r\fR,
|
|
since every file will be considered as an \*(L"incomplete download\*(R" candidate.
|
|
.Sp
|
|
Another instance where you'll get a garbled file if you try to use
|
|
\&\fB\-c\fR is if you have a lame \s-1HTTP\s0 proxy that inserts a
|
|
\&\*(L"transfer interrupted\*(R" string into the local file. In the future a
|
|
\&\*(L"rollback\*(R" option may be added to deal with this case.
|
|
.Sp
|
|
Note that \fB\-c\fR only works with \s-1FTP\s0 servers and with \s-1HTTP\s0
|
|
servers that support the \f(CW\*(C`Range\*(C'\fR header.
|
|
.IP "\fB\-\-progress=\fR\fItype\fR" 4
|
|
.IX Item "--progress=type"
|
|
Select the type of the progress indicator you wish to use. Legal
|
|
indicators are \*(L"dot\*(R" and \*(L"bar\*(R".
|
|
.Sp
|
|
The \*(L"bar\*(R" indicator is used by default. It draws an \s-1ASCII\s0 progress
|
|
bar graphics (a.k.a \*(L"thermometer\*(R" display) indicating the status of
|
|
retrieval. If the output is not a \s-1TTY\s0, the \*(L"dot\*(R" bar will be used by
|
|
default.
|
|
.Sp
|
|
Use \fB\-\-progress=dot\fR to switch to the \*(L"dot\*(R" display. It traces
|
|
the retrieval by printing dots on the screen, each dot representing a
|
|
fixed amount of downloaded data.
|
|
.Sp
|
|
When using the dotted retrieval, you may also set the \fIstyle\fR by
|
|
specifying the type as \fBdot:\fR\fIstyle\fR. Different styles assign
|
|
different meaning to one dot. With the \f(CW\*(C`default\*(C'\fR style each dot
|
|
represents 1K, there are ten dots in a cluster and 50 dots in a line.
|
|
The \f(CW\*(C`binary\*(C'\fR style has a more \*(L"computer\*(R"\-like orientation\-\-\-8K
|
|
dots, 16\-dots clusters and 48 dots per line (which makes for 384K
|
|
lines). The \f(CW\*(C`mega\*(C'\fR style is suitable for downloading very large
|
|
files\-\-\-each dot represents 64K retrieved, there are eight dots in a
|
|
cluster, and 48 dots on each line (so each line contains 3M).
|
|
.Sp
|
|
Note that you can set the default style using the \f(CW\*(C`progress\*(C'\fR
|
|
command in \fI.wgetrc\fR. That setting may be overridden from the
|
|
command line. The exception is that, when the output is not a \s-1TTY\s0, the
|
|
\&\*(L"dot\*(R" progress will be favored over \*(L"bar\*(R". To force the bar output,
|
|
use \fB\-\-progress=bar:force\fR.
|
|
.IP "\fB\-N\fR" 4
|
|
.IX Item "-N"
|
|
.PD 0
|
|
.IP "\fB\-\-timestamping\fR" 4
|
|
.IX Item "--timestamping"
|
|
.PD
|
|
Turn on time\-stamping.
|
|
.IP "\fB\-S\fR" 4
|
|
.IX Item "-S"
|
|
.PD 0
|
|
.IP "\fB\-\-server\-response\fR" 4
|
|
.IX Item "--server-response"
|
|
.PD
|
|
Print the headers sent by \s-1HTTP\s0 servers and responses sent by
|
|
\&\s-1FTP\s0 servers.
|
|
.IP "\fB\-\-spider\fR" 4
|
|
.IX Item "--spider"
|
|
When invoked with this option, Wget will behave as a Web \fIspider\fR,
|
|
which means that it will not download the pages, just check that they
|
|
are there. For example, you can use Wget to check your bookmarks:
|
|
.Sp
|
|
.Vb 1
|
|
\& wget \-\-spider \-\-force\-html \-i bookmarks.html
|
|
.Ve
|
|
.Sp
|
|
This feature needs much more work for Wget to get close to the
|
|
functionality of real web spiders.
|
|
.IP "\fB\-T seconds\fR" 4
|
|
.IX Item "-T seconds"
|
|
.PD 0
|
|
.IP "\fB\-\-timeout=\fR\fIseconds\fR" 4
|
|
.IX Item "--timeout=seconds"
|
|
.PD
|
|
Set the network timeout to \fIseconds\fR seconds. This is equivalent
|
|
to specifying \fB\-\-dns\-timeout\fR, \fB\-\-connect\-timeout\fR, and
|
|
\&\fB\-\-read\-timeout\fR, all at the same time.
|
|
.Sp
|
|
When interacting with the network, Wget can check for timeout and
|
|
abort the operation if it takes too long. This prevents anomalies
|
|
like hanging reads and infinite connects. The only timeout enabled by
|
|
default is a 900\-second read timeout. Setting a timeout to 0 disables
|
|
it altogether. Unless you know what you are doing, it is best not to
|
|
change the default timeout settings.
|
|
.Sp
|
|
All timeout-related options accept decimal values, as well as
|
|
subsecond values. For example, \fB0.1\fR seconds is a legal (though
|
|
unwise) choice of timeout. Subsecond timeouts are useful for checking
|
|
server response times or for testing network latency.
|
|
.IP "\fB\-\-dns\-timeout=\fR\fIseconds\fR" 4
|
|
.IX Item "--dns-timeout=seconds"
|
|
Set the \s-1DNS\s0 lookup timeout to \fIseconds\fR seconds. \s-1DNS\s0 lookups that
|
|
don't complete within the specified time will fail. By default, there
|
|
is no timeout on \s-1DNS\s0 lookups, other than that implemented by system
|
|
libraries.
|
|
.IP "\fB\-\-connect\-timeout=\fR\fIseconds\fR" 4
|
|
.IX Item "--connect-timeout=seconds"
|
|
Set the connect timeout to \fIseconds\fR seconds. \s-1TCP\s0 connections that
|
|
take longer to establish will be aborted. By default, there is no
|
|
connect timeout, other than that implemented by system libraries.
|
|
.IP "\fB\-\-read\-timeout=\fR\fIseconds\fR" 4
|
|
.IX Item "--read-timeout=seconds"
|
|
Set the read (and write) timeout to \fIseconds\fR seconds. The
|
|
\&\*(L"time\*(R" of this timeout refers to \fIidle time\fR: if, at any point in
|
|
the download, no data is received for more than the specified number
|
|
of seconds, reading fails and the download is restarted. This option
|
|
does not directly affect the duration of the entire download.
|
|
.Sp
|
|
Of course, the remote server may choose to terminate the connection
|
|
sooner than this option requires. The default read timeout is 900
|
|
seconds.
|
|
.IP "\fB\-\-limit\-rate=\fR\fIamount\fR" 4
|
|
.IX Item "--limit-rate=amount"
|
|
Limit the download speed to \fIamount\fR bytes per second. Amount may
|
|
be expressed in bytes, kilobytes with the \fBk\fR suffix, or megabytes
|
|
with the \fBm\fR suffix. For example, \fB\-\-limit\-rate=20k\fR will
|
|
limit the retrieval rate to 20KB/s. This is useful when, for whatever
|
|
reason, you don't want Wget to consume the entire available bandwidth.
|
|
.Sp
|
|
This option allows the use of decimal numbers, usually in conjunction
|
|
with power suffixes; for example, \fB\-\-limit\-rate=2.5k\fR is a legal
|
|
value.
|
|
.Sp
|
|
Note that Wget implements the limiting by sleeping the appropriate
|
|
amount of time after a network read that took less time than specified
|
|
by the rate. Eventually this strategy causes the \s-1TCP\s0 transfer to slow
|
|
down to approximately the specified rate. However, it may take some
|
|
time for this balance to be achieved, so don't be surprised if limiting
|
|
the rate doesn't work well with very small files.
|
|
.IP "\fB\-w\fR \fIseconds\fR" 4
|
|
.IX Item "-w seconds"
|
|
.PD 0
|
|
.IP "\fB\-\-wait=\fR\fIseconds\fR" 4
|
|
.IX Item "--wait=seconds"
|
|
.PD
|
|
Wait the specified number of seconds between the retrievals. Use of
|
|
this option is recommended, as it lightens the server load by making the
|
|
requests less frequent. Instead of in seconds, the time can be
|
|
specified in minutes using the \f(CW\*(C`m\*(C'\fR suffix, in hours using \f(CW\*(C`h\*(C'\fR
|
|
suffix, or in days using \f(CW\*(C`d\*(C'\fR suffix.
|
|
.Sp
|
|
Specifying a large value for this option is useful if the network or the
|
|
destination host is down, so that Wget can wait long enough to
|
|
reasonably expect the network error to be fixed before the retry. The
|
|
waiting interval specified by this function is influenced by
|
|
\&\f(CW\*(C`\-\-random\-wait\*(C'\fR, which see.
|
|
.IP "\fB\-\-waitretry=\fR\fIseconds\fR" 4
|
|
.IX Item "--waitretry=seconds"
|
|
If you don't want Wget to wait between \fIevery\fR retrieval, but only
|
|
between retries of failed downloads, you can use this option. Wget will
|
|
use \fIlinear backoff\fR, waiting 1 second after the first failure on a
|
|
given file, then waiting 2 seconds after the second failure on that
|
|
file, up to the maximum number of \fIseconds\fR you specify. Therefore,
|
|
a value of 10 will actually make Wget wait up to (1 + 2 + ... + 10) = 55
|
|
seconds per file.
|
|
.Sp
|
|
Note that this option is turned on by default in the global
|
|
\&\fIwgetrc\fR file.
|
|
.IP "\fB\-\-random\-wait\fR" 4
|
|
.IX Item "--random-wait"
|
|
Some web sites may perform log analysis to identify retrieval programs
|
|
such as Wget by looking for statistically significant similarities in
|
|
the time between requests. This option causes the time between requests
|
|
to vary between 0.5 and 1.5 * \fIwait\fR seconds, where \fIwait\fR was
|
|
specified using the \fB\-\-wait\fR option, in order to mask Wget's
|
|
presence from such analysis.
|
|
.Sp
|
|
A 2001 article in a publication devoted to development on a popular
|
|
consumer platform provided code to perform this analysis on the fly.
|
|
Its author suggested blocking at the class C address level to ensure
|
|
automated retrieval programs were blocked despite changing DHCP-supplied
|
|
addresses.
|
|
.Sp
|
|
The \fB\-\-random\-wait\fR option was inspired by this ill-advised
|
|
recommendation to block many unrelated users from a web site due to the
|
|
actions of one.
|
|
.IP "\fB\-\-no\-proxy\fR" 4
|
|
.IX Item "--no-proxy"
|
|
Don't use proxies, even if the appropriate \f(CW*_proxy\fR environment
|
|
variable is defined.
|
|
.IP "\fB\-Q\fR \fIquota\fR" 4
|
|
.IX Item "-Q quota"
|
|
.PD 0
|
|
.IP "\fB\-\-quota=\fR\fIquota\fR" 4
|
|
.IX Item "--quota=quota"
|
|
.PD
|
|
Specify download quota for automatic retrievals. The value can be
|
|
specified in bytes (default), kilobytes (with \fBk\fR suffix), or
|
|
megabytes (with \fBm\fR suffix).
|
|
.Sp
|
|
Note that quota will never affect downloading a single file. So if you
|
|
specify \fBwget \-Q10k ftp://wuarchive.wustl.edu/ls\-lR.gz\fR, all of the
|
|
\&\fIls\-lR.gz\fR will be downloaded. The same goes even when several
|
|
URLs are specified on the command\-line. However, quota is
|
|
respected when retrieving either recursively, or from an input file.
|
|
Thus you may safely type \fBwget \-Q2m \-i sites\fR\-\-\-download will be
|
|
aborted when the quota is exceeded.
|
|
.Sp
|
|
Setting quota to 0 or to \fBinf\fR unlimits the download quota.
|
|
.IP "\fB\-\-no\-dns\-cache\fR" 4
|
|
.IX Item "--no-dns-cache"
|
|
Turn off caching of \s-1DNS\s0 lookups. Normally, Wget remembers the \s-1IP\s0
|
|
addresses it looked up from \s-1DNS\s0 so it doesn't have to repeatedly
|
|
contact the \s-1DNS\s0 server for the same (typically small) set of hosts it
|
|
retrieves from. This cache exists in memory only; a new Wget run will
|
|
contact \s-1DNS\s0 again.
|
|
.Sp
|
|
However, it has been reported that in some situations it is not
|
|
desirable to cache host names, even for the duration of a
|
|
short-running application like Wget. With this option Wget issues a
|
|
new \s-1DNS\s0 lookup (more precisely, a new call to \f(CW\*(C`gethostbyname\*(C'\fR or
|
|
\&\f(CW\*(C`getaddrinfo\*(C'\fR) each time it makes a new connection. Please note
|
|
that this option will \fInot\fR affect caching that might be
|
|
performed by the resolving library or by an external caching layer,
|
|
such as \s-1NSCD\s0.
|
|
.Sp
|
|
If you don't understand exactly what this option does, you probably
|
|
won't need it.
|
|
.IP "\fB\-\-restrict\-file\-names=\fR\fImode\fR" 4
|
|
.IX Item "--restrict-file-names=mode"
|
|
Change which characters found in remote URLs may show up in local file
|
|
names generated from those URLs. Characters that are \fIrestricted\fR
|
|
by this option are escaped, i.e. replaced with \fB%HH\fR, where
|
|
\&\fB\s-1HH\s0\fR is the hexadecimal number that corresponds to the restricted
|
|
character.
|
|
.Sp
|
|
By default, Wget escapes the characters that are not valid as part of
|
|
file names on your operating system, as well as control characters that
|
|
are typically unprintable. This option is useful for changing these
|
|
defaults, either because you are downloading to a non-native partition,
|
|
or because you want to disable escaping of the control characters.
|
|
.Sp
|
|
When mode is set to \*(L"unix\*(R", Wget escapes the character \fB/\fR and
|
|
the control characters in the ranges 0\-\-31 and 128\-\-159. This is the
|
|
default on Unix-like \s-1OS\s0'es.
|
|
.Sp
|
|
When mode is set to \*(L"windows\*(R", Wget escapes the characters \fB\e\fR,
|
|
\&\fB|\fR, \fB/\fR, \fB:\fR, \fB?\fR, \fB"\fR, \fB*\fR, \fB<\fR,
|
|
\&\fB>\fR, and the control characters in the ranges 0\-\-31 and 128\-\-159.
|
|
In addition to this, Wget in Windows mode uses \fB+\fR instead of
|
|
\&\fB:\fR to separate host and port in local file names, and uses
|
|
\&\fB@\fR instead of \fB?\fR to separate the query portion of the file
|
|
name from the rest. Therefore, a \s-1URL\s0 that would be saved as
|
|
\&\fBwww.xemacs.org:4300/search.pl?input=blah\fR in Unix mode would be
|
|
saved as \fBwww.xemacs.org+4300/search.pl@input=blah\fR in Windows
|
|
mode. This mode is the default on Windows.
|
|
.Sp
|
|
If you append \fB,nocontrol\fR to the mode, as in
|
|
\&\fBunix,nocontrol\fR, escaping of the control characters is also
|
|
switched off. You can use \fB\-\-restrict\-file\-names=nocontrol\fR to
|
|
turn off escaping of control characters without affecting the choice of
|
|
the \s-1OS\s0 to use as file name restriction mode.
|
|
.IP "\fB\-4\fR" 4
|
|
.IX Item "-4"
|
|
.PD 0
|
|
.IP "\fB\-\-inet4\-only\fR" 4
|
|
.IX Item "--inet4-only"
|
|
.IP "\fB\-6\fR" 4
|
|
.IX Item "-6"
|
|
.IP "\fB\-\-inet6\-only\fR" 4
|
|
.IX Item "--inet6-only"
|
|
.PD
|
|
Force connecting to IPv4 or IPv6 addresses. With \fB\-\-inet4\-only\fR
|
|
or \fB\-4\fR, Wget will only connect to IPv4 hosts, ignoring \s-1AAAA\s0
|
|
records in \s-1DNS\s0, and refusing to connect to IPv6 addresses specified in
|
|
URLs. Conversely, with \fB\-\-inet6\-only\fR or \fB\-6\fR, Wget will
|
|
only connect to IPv6 hosts and ignore A records and IPv4 addresses.
|
|
.Sp
|
|
Neither options should be needed normally. By default, an IPv6\-aware
|
|
Wget will use the address family specified by the host's \s-1DNS\s0 record.
|
|
If the \s-1DNS\s0 responds with both IPv4 and IPv6 addresses, Wget will try
|
|
them in sequence until it finds one it can connect to. (Also see
|
|
\&\f(CW\*(C`\-\-prefer\-family\*(C'\fR option described below.)
|
|
.Sp
|
|
These options can be used to deliberately force the use of IPv4 or
|
|
IPv6 address families on dual family systems, usually to aid debugging
|
|
or to deal with broken network configuration. Only one of
|
|
\&\fB\-\-inet6\-only\fR and \fB\-\-inet4\-only\fR may be specified at the
|
|
same time. Neither option is available in Wget compiled without IPv6
|
|
support.
|
|
.IP "\fB\-\-prefer\-family=IPv4/IPv6/none\fR" 4
|
|
.IX Item "--prefer-family=IPv4/IPv6/none"
|
|
When given a choice of several addresses, connect to the addresses
|
|
with specified address family first. IPv4 addresses are preferred by
|
|
default.
|
|
.Sp
|
|
This avoids spurious errors and connect attempts when accessing hosts
|
|
that resolve to both IPv6 and IPv4 addresses from IPv4 networks. For
|
|
example, \fBwww.kame.net\fR resolves to
|
|
\&\fB2001:200:0:8002:203:47ff:fea5:3085\fR and to
|
|
\&\fB203.178.141.194\fR. When the preferred family is \f(CW\*(C`IPv4\*(C'\fR, the
|
|
IPv4 address is used first; when the preferred family is \f(CW\*(C`IPv6\*(C'\fR,
|
|
the IPv6 address is used first; if the specified value is \f(CW\*(C`none\*(C'\fR,
|
|
the address order returned by \s-1DNS\s0 is used without change.
|
|
.Sp
|
|
Unlike \fB\-4\fR and \fB\-6\fR, this option doesn't inhibit access to
|
|
any address family, it only changes the \fIorder\fR in which the
|
|
addresses are accessed. Also note that the reordering performed by
|
|
this option is \fIstable\fR\-\-\-it doesn't affect order of addresses of
|
|
the same family. That is, the relative order of all IPv4 addresses
|
|
and of all IPv6 addresses remains intact in all cases.
|
|
.IP "\fB\-\-retry\-connrefused\fR" 4
|
|
.IX Item "--retry-connrefused"
|
|
Consider \*(L"connection refused\*(R" a transient error and try again.
|
|
Normally Wget gives up on a \s-1URL\s0 when it is unable to connect to the
|
|
site because failure to connect is taken as a sign that the server is
|
|
not running at all and that retries would not help. This option is
|
|
for mirroring unreliable sites whose servers tend to disappear for
|
|
short periods of time.
|
|
.IP "\fB\-\-user=\fR\fIuser\fR" 4
|
|
.IX Item "--user=user"
|
|
.PD 0
|
|
.IP "\fB\-\-password=\fR\fIpassword\fR" 4
|
|
.IX Item "--password=password"
|
|
.PD
|
|
Specify the username \fIuser\fR and password \fIpassword\fR for both
|
|
\&\s-1FTP\s0 and \s-1HTTP\s0 file retrieval. These parameters can be overridden
|
|
using the \fB\-\-ftp\-user\fR and \fB\-\-ftp\-password\fR options for
|
|
\&\s-1FTP\s0 connections and the \fB\-\-http\-user\fR and \fB\-\-http\-password\fR
|
|
options for \s-1HTTP\s0 connections.
|
|
.Sh "Directory Options"
|
|
.IX Subsection "Directory Options"
|
|
.IP "\fB\-nd\fR" 4
|
|
.IX Item "-nd"
|
|
.PD 0
|
|
.IP "\fB\-\-no\-directories\fR" 4
|
|
.IX Item "--no-directories"
|
|
.PD
|
|
Do not create a hierarchy of directories when retrieving recursively.
|
|
With this option turned on, all files will get saved to the current
|
|
directory, without clobbering (if a name shows up more than once, the
|
|
filenames will get extensions \fB.n\fR).
|
|
.IP "\fB\-x\fR" 4
|
|
.IX Item "-x"
|
|
.PD 0
|
|
.IP "\fB\-\-force\-directories\fR" 4
|
|
.IX Item "--force-directories"
|
|
.PD
|
|
The opposite of \fB\-nd\fR\-\-\-create a hierarchy of directories, even if
|
|
one would not have been created otherwise. E.g. \fBwget \-x
|
|
http://fly.srk.fer.hr/robots.txt\fR will save the downloaded file to
|
|
\&\fIfly.srk.fer.hr/robots.txt\fR.
|
|
.IP "\fB\-nH\fR" 4
|
|
.IX Item "-nH"
|
|
.PD 0
|
|
.IP "\fB\-\-no\-host\-directories\fR" 4
|
|
.IX Item "--no-host-directories"
|
|
.PD
|
|
Disable generation of host-prefixed directories. By default, invoking
|
|
Wget with \fB\-r http://fly.srk.fer.hr/\fR will create a structure of
|
|
directories beginning with \fIfly.srk.fer.hr/\fR. This option disables
|
|
such behavior.
|
|
.IP "\fB\-\-protocol\-directories\fR" 4
|
|
.IX Item "--protocol-directories"
|
|
Use the protocol name as a directory component of local file names. For
|
|
example, with this option, \fBwget \-r http://\fR\fIhost\fR will save to
|
|
\&\fBhttp/\fR\fIhost\fR\fB/...\fR rather than just to \fIhost\fR\fB/...\fR.
|
|
.IP "\fB\-\-cut\-dirs=\fR\fInumber\fR" 4
|
|
.IX Item "--cut-dirs=number"
|
|
Ignore \fInumber\fR directory components. This is useful for getting a
|
|
fine-grained control over the directory where recursive retrieval will
|
|
be saved.
|
|
.Sp
|
|
Take, for example, the directory at
|
|
\&\fBftp://ftp.xemacs.org/pub/xemacs/\fR. If you retrieve it with
|
|
\&\fB\-r\fR, it will be saved locally under
|
|
\&\fIftp.xemacs.org/pub/xemacs/\fR. While the \fB\-nH\fR option can
|
|
remove the \fIftp.xemacs.org/\fR part, you are still stuck with
|
|
\&\fIpub/xemacs\fR. This is where \fB\-\-cut\-dirs\fR comes in handy; it
|
|
makes Wget not \*(L"see\*(R" \fInumber\fR remote directory components. Here
|
|
are several examples of how \fB\-\-cut\-dirs\fR option works.
|
|
.Sp
|
|
.Vb 4
|
|
\& No options \-> ftp.xemacs.org/pub/xemacs/
|
|
\& \-nH \-> pub/xemacs/
|
|
\& \-nH \-\-cut\-dirs=1 \-> xemacs/
|
|
\& \-nH \-\-cut\-dirs=2 \-> .
|
|
.Ve
|
|
.Sp
|
|
.Vb 2
|
|
\& \-\-cut\-dirs=1 \-> ftp.xemacs.org/xemacs/
|
|
\& ...
|
|
.Ve
|
|
.Sp
|
|
If you just want to get rid of the directory structure, this option is
|
|
similar to a combination of \fB\-nd\fR and \fB\-P\fR. However, unlike
|
|
\&\fB\-nd\fR, \fB\-\-cut\-dirs\fR does not lose with subdirectories\-\-\-for
|
|
instance, with \fB\-nH \-\-cut\-dirs=1\fR, a \fIbeta/\fR subdirectory will
|
|
be placed to \fIxemacs/beta\fR, as one would expect.
|
|
.IP "\fB\-P\fR \fIprefix\fR" 4
|
|
.IX Item "-P prefix"
|
|
.PD 0
|
|
.IP "\fB\-\-directory\-prefix=\fR\fIprefix\fR" 4
|
|
.IX Item "--directory-prefix=prefix"
|
|
.PD
|
|
Set directory prefix to \fIprefix\fR. The \fIdirectory prefix\fR is the
|
|
directory where all other files and subdirectories will be saved to,
|
|
i.e. the top of the retrieval tree. The default is \fB.\fR (the
|
|
current directory).
|
|
.Sh "\s-1HTTP\s0 Options"
|
|
.IX Subsection "HTTP Options"
|
|
.IP "\fB\-E\fR" 4
|
|
.IX Item "-E"
|
|
.PD 0
|
|
.IP "\fB\-\-html\-extension\fR" 4
|
|
.IX Item "--html-extension"
|
|
.PD
|
|
If a file of type \fBapplication/xhtml+xml\fR or \fBtext/html\fR is
|
|
downloaded and the \s-1URL\s0 does not end with the regexp
|
|
\&\fB\e.[Hh][Tt][Mm][Ll]?\fR, this option will cause the suffix \fB.html\fR
|
|
to be appended to the local filename. This is useful, for instance, when
|
|
you're mirroring a remote site that uses \fB.asp\fR pages, but you want
|
|
the mirrored pages to be viewable on your stock Apache server. Another
|
|
good use for this is when you're downloading CGI-generated materials. A \s-1URL\s0
|
|
like \fBhttp://site.com/article.cgi?25\fR will be saved as
|
|
\&\fIarticle.cgi?25.html\fR.
|
|
.Sp
|
|
Note that filenames changed in this way will be re-downloaded every time
|
|
you re-mirror a site, because Wget can't tell that the local
|
|
\&\fI\fIX\fI.html\fR file corresponds to remote \s-1URL\s0 \fIX\fR (since
|
|
it doesn't yet know that the \s-1URL\s0 produces output of type
|
|
\&\fBtext/html\fR or \fBapplication/xhtml+xml\fR. To prevent this
|
|
re\-downloading, you must use \fB\-k\fR and \fB\-K\fR so that the original
|
|
version of the file will be saved as \fI\fIX\fI.orig\fR.
|
|
.IP "\fB\-\-http\-user=\fR\fIuser\fR" 4
|
|
.IX Item "--http-user=user"
|
|
.PD 0
|
|
.IP "\fB\-\-http\-password=\fR\fIpassword\fR" 4
|
|
.IX Item "--http-password=password"
|
|
.PD
|
|
Specify the username \fIuser\fR and password \fIpassword\fR on an
|
|
\&\s-1HTTP\s0 server. According to the type of the challenge, Wget will
|
|
encode them using either the \f(CW\*(C`basic\*(C'\fR (insecure),
|
|
the \f(CW\*(C`digest\*(C'\fR, or the Windows \f(CW\*(C`NTLM\*(C'\fR authentication scheme.
|
|
.Sp
|
|
Another way to specify username and password is in the \s-1URL\s0 itself. Either method reveals your password to anyone who
|
|
bothers to run \f(CW\*(C`ps\*(C'\fR. To prevent the passwords from being seen,
|
|
store them in \fI.wgetrc\fR or \fI.netrc\fR, and make sure to protect
|
|
those files from other users with \f(CW\*(C`chmod\*(C'\fR. If the passwords are
|
|
really important, do not leave them lying in those files either\-\-\-edit
|
|
the files and delete them after Wget has started the download.
|
|
.IP "\fB\-\-no\-cache\fR" 4
|
|
.IX Item "--no-cache"
|
|
Disable server-side cache. In this case, Wget will send the remote
|
|
server an appropriate directive (\fBPragma: no-cache\fR) to get the
|
|
file from the remote service, rather than returning the cached version.
|
|
This is especially useful for retrieving and flushing out-of-date
|
|
documents on proxy servers.
|
|
.Sp
|
|
Caching is allowed by default.
|
|
.IP "\fB\-\-no\-cookies\fR" 4
|
|
.IX Item "--no-cookies"
|
|
Disable the use of cookies. Cookies are a mechanism for maintaining
|
|
server-side state. The server sends the client a cookie using the
|
|
\&\f(CW\*(C`Set\-Cookie\*(C'\fR header, and the client responds with the same cookie
|
|
upon further requests. Since cookies allow the server owners to keep
|
|
track of visitors and for sites to exchange this information, some
|
|
consider them a breach of privacy. The default is to use cookies;
|
|
however, \fIstoring\fR cookies is not on by default.
|
|
.IP "\fB\-\-load\-cookies\fR \fIfile\fR" 4
|
|
.IX Item "--load-cookies file"
|
|
Load cookies from \fIfile\fR before the first \s-1HTTP\s0 retrieval.
|
|
\&\fIfile\fR is a textual file in the format originally used by Netscape's
|
|
\&\fIcookies.txt\fR file.
|
|
.Sp
|
|
You will typically use this option when mirroring sites that require
|
|
that you be logged in to access some or all of their content. The login
|
|
process typically works by the web server issuing an \s-1HTTP\s0 cookie
|
|
upon receiving and verifying your credentials. The cookie is then
|
|
resent by the browser when accessing that part of the site, and so
|
|
proves your identity.
|
|
.Sp
|
|
Mirroring such a site requires Wget to send the same cookies your
|
|
browser sends when communicating with the site. This is achieved by
|
|
\&\fB\-\-load\-cookies\fR\-\-\-simply point Wget to the location of the
|
|
\&\fIcookies.txt\fR file, and it will send the same cookies your browser
|
|
would send in the same situation. Different browsers keep textual
|
|
cookie files in different locations:
|
|
.RS 4
|
|
.IP "@asis<Netscape 4.x.>" 4
|
|
.IX Item "@asis<Netscape 4.x.>"
|
|
The cookies are in \fI~/.netscape/cookies.txt\fR.
|
|
.IP "@asis<Mozilla and Netscape 6.x.>" 4
|
|
.IX Item "@asis<Mozilla and Netscape 6.x.>"
|
|
Mozilla's cookie file is also named \fIcookies.txt\fR, located
|
|
somewhere under \fI~/.mozilla\fR, in the directory of your profile.
|
|
The full path usually ends up looking somewhat like
|
|
\&\fI~/.mozilla/default/\fIsome-weird-string\fI/cookies.txt\fR.
|
|
.IP "@asis<Internet Explorer.>" 4
|
|
.IX Item "@asis<Internet Explorer.>"
|
|
You can produce a cookie file Wget can use by using the File menu,
|
|
Import and Export, Export Cookies. This has been tested with Internet
|
|
Explorer 5; it is not guaranteed to work with earlier versions.
|
|
.IP "@asis<Other browsers.>" 4
|
|
.IX Item "@asis<Other browsers.>"
|
|
If you are using a different browser to create your cookies,
|
|
\&\fB\-\-load\-cookies\fR will only work if you can locate or produce a
|
|
cookie file in the Netscape format that Wget expects.
|
|
.RE
|
|
.RS 4
|
|
.Sp
|
|
If you cannot use \fB\-\-load\-cookies\fR, there might still be an
|
|
alternative. If your browser supports a \*(L"cookie manager\*(R", you can use
|
|
it to view the cookies used when accessing the site you're mirroring.
|
|
Write down the name and value of the cookie, and manually instruct Wget
|
|
to send those cookies, bypassing the \*(L"official\*(R" cookie support:
|
|
.Sp
|
|
.Vb 1
|
|
\& wget \-\-no\-cookies \-\-header "Cookie: <name>=<value>"
|
|
.Ve
|
|
.RE
|
|
.IP "\fB\-\-save\-cookies\fR \fIfile\fR" 4
|
|
.IX Item "--save-cookies file"
|
|
Save cookies to \fIfile\fR before exiting. This will not save cookies
|
|
that have expired or that have no expiry time (so\-called \*(L"session
|
|
cookies\*(R"), but also see \fB\-\-keep\-session\-cookies\fR.
|
|
.IP "\fB\-\-keep\-session\-cookies\fR" 4
|
|
.IX Item "--keep-session-cookies"
|
|
When specified, causes \fB\-\-save\-cookies\fR to also save session
|
|
cookies. Session cookies are normally not saved because they are
|
|
meant to be kept in memory and forgotten when you exit the browser.
|
|
Saving them is useful on sites that require you to log in or to visit
|
|
the home page before you can access some pages. With this option,
|
|
multiple Wget runs are considered a single browser session as far as
|
|
the site is concerned.
|
|
.Sp
|
|
Since the cookie file format does not normally carry session cookies,
|
|
Wget marks them with an expiry timestamp of 0. Wget's
|
|
\&\fB\-\-load\-cookies\fR recognizes those as session cookies, but it might
|
|
confuse other browsers. Also note that cookies so loaded will be
|
|
treated as other session cookies, which means that if you want
|
|
\&\fB\-\-save\-cookies\fR to preserve them again, you must use
|
|
\&\fB\-\-keep\-session\-cookies\fR again.
|
|
.IP "\fB\-\-ignore\-length\fR" 4
|
|
.IX Item "--ignore-length"
|
|
Unfortunately, some \s-1HTTP\s0 servers (\s-1CGI\s0 programs, to be more
|
|
precise) send out bogus \f(CW\*(C`Content\-Length\*(C'\fR headers, which makes Wget
|
|
go wild, as it thinks not all the document was retrieved. You can spot
|
|
this syndrome if Wget retries getting the same document again and again,
|
|
each time claiming that the (otherwise normal) connection has closed on
|
|
the very same byte.
|
|
.Sp
|
|
With this option, Wget will ignore the \f(CW\*(C`Content\-Length\*(C'\fR header\-\-\-as
|
|
if it never existed.
|
|
.IP "\fB\-\-header=\fR\fIheader-line\fR" 4
|
|
.IX Item "--header=header-line"
|
|
Send \fIheader-line\fR along with the rest of the headers in each
|
|
\&\s-1HTTP\s0 request. The supplied header is sent as\-is, which means it
|
|
must contain name and value separated by colon, and must not contain
|
|
newlines.
|
|
.Sp
|
|
You may define more than one additional header by specifying
|
|
\&\fB\-\-header\fR more than once.
|
|
.Sp
|
|
.Vb 3
|
|
\& wget \-\-header='Accept\-Charset: iso\-8859\-2' \e
|
|
\& \-\-header='Accept\-Language: hr' \e
|
|
\& http://fly.srk.fer.hr/
|
|
.Ve
|
|
.Sp
|
|
Specification of an empty string as the header value will clear all
|
|
previous user-defined headers.
|
|
.Sp
|
|
As of Wget 1.10, this option can be used to override headers otherwise
|
|
generated automatically. This example instructs Wget to connect to
|
|
localhost, but to specify \fBfoo.bar\fR in the \f(CW\*(C`Host\*(C'\fR header:
|
|
.Sp
|
|
.Vb 1
|
|
\& wget \-\-header="Host: foo.bar" http://localhost/
|
|
.Ve
|
|
.Sp
|
|
In versions of Wget prior to 1.10 such use of \fB\-\-header\fR caused
|
|
sending of duplicate headers.
|
|
.IP "\fB\-\-max\-redirect=\fR\fInumber\fR" 4
|
|
.IX Item "--max-redirect=number"
|
|
Specifies the maximum number of redirections to follow for a resource.
|
|
The default is 20, which is usually far more than necessary. However, on
|
|
those occasions where you want to allow more (or fewer), this is the
|
|
option to use.
|
|
.IP "\fB\-\-proxy\-user=\fR\fIuser\fR" 4
|
|
.IX Item "--proxy-user=user"
|
|
.PD 0
|
|
.IP "\fB\-\-proxy\-password=\fR\fIpassword\fR" 4
|
|
.IX Item "--proxy-password=password"
|
|
.PD
|
|
Specify the username \fIuser\fR and password \fIpassword\fR for
|
|
authentication on a proxy server. Wget will encode them using the
|
|
\&\f(CW\*(C`basic\*(C'\fR authentication scheme.
|
|
.Sp
|
|
Security considerations similar to those with \fB\-\-http\-password\fR
|
|
pertain here as well.
|
|
.IP "\fB\-\-referer=\fR\fIurl\fR" 4
|
|
.IX Item "--referer=url"
|
|
Include `Referer: \fIurl\fR' header in \s-1HTTP\s0 request. Useful for
|
|
retrieving documents with server-side processing that assume they are
|
|
always being retrieved by interactive web browsers and only come out
|
|
properly when Referer is set to one of the pages that point to them.
|
|
.IP "\fB\-\-save\-headers\fR" 4
|
|
.IX Item "--save-headers"
|
|
Save the headers sent by the \s-1HTTP\s0 server to the file, preceding the
|
|
actual contents, with an empty line as the separator.
|
|
.IP "\fB\-U\fR \fIagent-string\fR" 4
|
|
.IX Item "-U agent-string"
|
|
.PD 0
|
|
.IP "\fB\-\-user\-agent=\fR\fIagent-string\fR" 4
|
|
.IX Item "--user-agent=agent-string"
|
|
.PD
|
|
Identify as \fIagent-string\fR to the \s-1HTTP\s0 server.
|
|
.Sp
|
|
The \s-1HTTP\s0 protocol allows the clients to identify themselves using a
|
|
\&\f(CW\*(C`User\-Agent\*(C'\fR header field. This enables distinguishing the
|
|
\&\s-1WWW\s0 software, usually for statistical purposes or for tracing of
|
|
protocol violations. Wget normally identifies as
|
|
\&\fBWget/\fR\fIversion\fR, \fIversion\fR being the current version
|
|
number of Wget.
|
|
.Sp
|
|
However, some sites have been known to impose the policy of tailoring
|
|
the output according to the \f(CW\*(C`User\-Agent\*(C'\fR\-supplied information.
|
|
While this is not such a bad idea in theory, it has been abused by
|
|
servers denying information to clients other than (historically)
|
|
Netscape or, more frequently, Microsoft Internet Explorer. This
|
|
option allows you to change the \f(CW\*(C`User\-Agent\*(C'\fR line issued by Wget.
|
|
Use of this option is discouraged, unless you really know what you are
|
|
doing.
|
|
.Sp
|
|
Specifying empty user agent with \fB\-\-user\-agent=""\fR instructs Wget
|
|
not to send the \f(CW\*(C`User\-Agent\*(C'\fR header in \s-1HTTP\s0 requests.
|
|
.IP "\fB\-\-post\-data=\fR\fIstring\fR" 4
|
|
.IX Item "--post-data=string"
|
|
.PD 0
|
|
.IP "\fB\-\-post\-file=\fR\fIfile\fR" 4
|
|
.IX Item "--post-file=file"
|
|
.PD
|
|
Use \s-1POST\s0 as the method for all \s-1HTTP\s0 requests and send the specified data
|
|
in the request body. \f(CW\*(C`\-\-post\-data\*(C'\fR sends \fIstring\fR as data,
|
|
whereas \f(CW\*(C`\-\-post\-file\*(C'\fR sends the contents of \fIfile\fR. Other than
|
|
that, they work in exactly the same way.
|
|
.Sp
|
|
Please be aware that Wget needs to know the size of the \s-1POST\s0 data in
|
|
advance. Therefore the argument to \f(CW\*(C`\-\-post\-file\*(C'\fR must be a regular
|
|
file; specifying a \s-1FIFO\s0 or something like \fI/dev/stdin\fR won't work.
|
|
It's not quite clear how to work around this limitation inherent in
|
|
\&\s-1HTTP/1\s0.0. Although \s-1HTTP/1\s0.1 introduces \fIchunked\fR transfer that
|
|
doesn't require knowing the request length in advance, a client can't
|
|
use chunked unless it knows it's talking to an \s-1HTTP/1\s0.1 server. And it
|
|
can't know that until it receives a response, which in turn requires the
|
|
request to have been completed \*(-- a chicken-and-egg problem.
|
|
.Sp
|
|
Note: if Wget is redirected after the \s-1POST\s0 request is completed, it
|
|
will not send the \s-1POST\s0 data to the redirected \s-1URL\s0. This is because
|
|
URLs that process \s-1POST\s0 often respond with a redirection to a regular
|
|
page, which does not desire or accept \s-1POST\s0. It is not completely
|
|
clear that this behavior is optimal; if it doesn't work out, it might
|
|
be changed in the future.
|
|
.Sp
|
|
This example shows how to log to a server using \s-1POST\s0 and then proceed to
|
|
download the desired pages, presumably only accessible to authorized
|
|
users:
|
|
.Sp
|
|
.Vb 4
|
|
\& # Log in to the server. This can be done only once.
|
|
\& wget \-\-save\-cookies cookies.txt \e
|
|
\& \-\-post\-data 'user=foo&password=bar' \e
|
|
\& http://server.com/auth.php
|
|
.Ve
|
|
.Sp
|
|
.Vb 3
|
|
\& # Now grab the page or pages we care about.
|
|
\& wget \-\-load\-cookies cookies.txt \e
|
|
\& \-p http://server.com/interesting/article.php
|
|
.Ve
|
|
.Sp
|
|
If the server is using session cookies to track user authentication,
|
|
the above will not work because \fB\-\-save\-cookies\fR will not save
|
|
them (and neither will browsers) and the \fIcookies.txt\fR file will
|
|
be empty. In that case use \fB\-\-keep\-session\-cookies\fR along with
|
|
\&\fB\-\-save\-cookies\fR to force saving of session cookies.
|
|
.IP "\fB\-\-content\-disposition\fR" 4
|
|
.IX Item "--content-disposition"
|
|
If this is set to on, experimental (not fully\-functional) support for
|
|
\&\f(CW\*(C`Content\-Disposition\*(C'\fR headers is enabled. This can currently result in
|
|
extra round-trips to the server for a \f(CW\*(C`HEAD\*(C'\fR request, and is known
|
|
to suffer from a few bugs, which is why it is not currently enabled by default.
|
|
.Sp
|
|
This option is useful for some file-downloading \s-1CGI\s0 programs that use
|
|
\&\f(CW\*(C`Content\-Disposition\*(C'\fR headers to describe what the name of a
|
|
downloaded file should be.
|
|
.IP "\fB\-\-auth\-no\-challenge\fR" 4
|
|
.IX Item "--auth-no-challenge"
|
|
If this option is given, Wget will send Basic \s-1HTTP\s0 authentication
|
|
information (plaintext username and password) for all requests, just
|
|
like Wget 1.10.2 and prior did by default.
|
|
.Sp
|
|
Use of this option is not recommended, and is intended only to support
|
|
some few obscure servers, which never send \s-1HTTP\s0 authentication
|
|
challenges, but accept unsolicited auth info, say, in addition to
|
|
form-based authentication.
|
|
.Sh "\s-1HTTPS\s0 (\s-1SSL/TLS\s0) Options"
|
|
.IX Subsection "HTTPS (SSL/TLS) Options"
|
|
To support encrypted \s-1HTTP\s0 (\s-1HTTPS\s0) downloads, Wget must be compiled
|
|
with an external \s-1SSL\s0 library, currently OpenSSL. If Wget is compiled
|
|
without \s-1SSL\s0 support, none of these options are available.
|
|
.IP "\fB\-\-secure\-protocol=\fR\fIprotocol\fR" 4
|
|
.IX Item "--secure-protocol=protocol"
|
|
Choose the secure protocol to be used. Legal values are \fBauto\fR,
|
|
\&\fBSSLv2\fR, \fBSSLv3\fR, and \fBTLSv1\fR. If \fBauto\fR is used,
|
|
the \s-1SSL\s0 library is given the liberty of choosing the appropriate
|
|
protocol automatically, which is achieved by sending an SSLv2 greeting
|
|
and announcing support for SSLv3 and TLSv1. This is the default.
|
|
.Sp
|
|
Specifying \fBSSLv2\fR, \fBSSLv3\fR, or \fBTLSv1\fR forces the use
|
|
of the corresponding protocol. This is useful when talking to old and
|
|
buggy \s-1SSL\s0 server implementations that make it hard for OpenSSL to
|
|
choose the correct protocol version. Fortunately, such servers are
|
|
quite rare.
|
|
.IP "\fB\-\-no\-check\-certificate\fR" 4
|
|
.IX Item "--no-check-certificate"
|
|
Don't check the server certificate against the available certificate
|
|
authorities. Also don't require the \s-1URL\s0 host name to match the common
|
|
name presented by the certificate.
|
|
.Sp
|
|
As of Wget 1.10, the default is to verify the server's certificate
|
|
against the recognized certificate authorities, breaking the \s-1SSL\s0
|
|
handshake and aborting the download if the verification fails.
|
|
Although this provides more secure downloads, it does break
|
|
interoperability with some sites that worked with previous Wget
|
|
versions, particularly those using self\-signed, expired, or otherwise
|
|
invalid certificates. This option forces an \*(L"insecure\*(R" mode of
|
|
operation that turns the certificate verification errors into warnings
|
|
and allows you to proceed.
|
|
.Sp
|
|
If you encounter \*(L"certificate verification\*(R" errors or ones saying
|
|
that \*(L"common name doesn't match requested host name\*(R", you can use
|
|
this option to bypass the verification and proceed with the download.
|
|
\&\fIOnly use this option if you are otherwise convinced of the
|
|
site's authenticity, or if you really don't care about the validity of
|
|
its certificate.\fR It is almost always a bad idea not to check the
|
|
certificates when transmitting confidential or important data.
|
|
.IP "\fB\-\-certificate=\fR\fIfile\fR" 4
|
|
.IX Item "--certificate=file"
|
|
Use the client certificate stored in \fIfile\fR. This is needed for
|
|
servers that are configured to require certificates from the clients
|
|
that connect to them. Normally a certificate is not required and this
|
|
switch is optional.
|
|
.IP "\fB\-\-certificate\-type=\fR\fItype\fR" 4
|
|
.IX Item "--certificate-type=type"
|
|
Specify the type of the client certificate. Legal values are
|
|
\&\fB\s-1PEM\s0\fR (assumed by default) and \fB\s-1DER\s0\fR, also known as
|
|
\&\fB\s-1ASN1\s0\fR.
|
|
.IP "\fB\-\-private\-key=\fR\fIfile\fR" 4
|
|
.IX Item "--private-key=file"
|
|
Read the private key from \fIfile\fR. This allows you to provide the
|
|
private key in a file separate from the certificate.
|
|
.IP "\fB\-\-private\-key\-type=\fR\fItype\fR" 4
|
|
.IX Item "--private-key-type=type"
|
|
Specify the type of the private key. Accepted values are \fB\s-1PEM\s0\fR
|
|
(the default) and \fB\s-1DER\s0\fR.
|
|
.IP "\fB\-\-ca\-certificate=\fR\fIfile\fR" 4
|
|
.IX Item "--ca-certificate=file"
|
|
Use \fIfile\fR as the file with the bundle of certificate authorities
|
|
(\*(L"\s-1CA\s0\*(R") to verify the peers. The certificates must be in \s-1PEM\s0 format.
|
|
.Sp
|
|
Without this option Wget looks for \s-1CA\s0 certificates at the
|
|
system-specified locations, chosen at OpenSSL installation time.
|
|
.IP "\fB\-\-ca\-directory=\fR\fIdirectory\fR" 4
|
|
.IX Item "--ca-directory=directory"
|
|
Specifies directory containing \s-1CA\s0 certificates in \s-1PEM\s0 format. Each
|
|
file contains one \s-1CA\s0 certificate, and the file name is based on a hash
|
|
value derived from the certificate. This is achieved by processing a
|
|
certificate directory with the \f(CW\*(C`c_rehash\*(C'\fR utility supplied with
|
|
OpenSSL. Using \fB\-\-ca\-directory\fR is more efficient than
|
|
\&\fB\-\-ca\-certificate\fR when many certificates are installed because
|
|
it allows Wget to fetch certificates on demand.
|
|
.Sp
|
|
Without this option Wget looks for \s-1CA\s0 certificates at the
|
|
system-specified locations, chosen at OpenSSL installation time.
|
|
.IP "\fB\-\-random\-file=\fR\fIfile\fR" 4
|
|
.IX Item "--random-file=file"
|
|
Use \fIfile\fR as the source of random data for seeding the
|
|
pseudo-random number generator on systems without \fI/dev/random\fR.
|
|
.Sp
|
|
On such systems the \s-1SSL\s0 library needs an external source of randomness
|
|
to initialize. Randomness may be provided by \s-1EGD\s0 (see
|
|
\&\fB\-\-egd\-file\fR below) or read from an external source specified by
|
|
the user. If this option is not specified, Wget looks for random data
|
|
in \f(CW$RANDFILE\fR or, if that is unset, in \fI$HOME/.rnd\fR. If
|
|
none of those are available, it is likely that \s-1SSL\s0 encryption will not
|
|
be usable.
|
|
.Sp
|
|
If you're getting the \*(L"Could not seed OpenSSL \s-1PRNG\s0; disabling \s-1SSL\s0.\*(R"
|
|
error, you should provide random data using some of the methods
|
|
described above.
|
|
.IP "\fB\-\-egd\-file=\fR\fIfile\fR" 4
|
|
.IX Item "--egd-file=file"
|
|
Use \fIfile\fR as the \s-1EGD\s0 socket. \s-1EGD\s0 stands for \fIEntropy
|
|
Gathering Daemon\fR, a user-space program that collects data from
|
|
various unpredictable system sources and makes it available to other
|
|
programs that might need it. Encryption software, such as the \s-1SSL\s0
|
|
library, needs sources of non-repeating randomness to seed the random
|
|
number generator used to produce cryptographically strong keys.
|
|
.Sp
|
|
OpenSSL allows the user to specify his own source of entropy using the
|
|
\&\f(CW\*(C`RAND_FILE\*(C'\fR environment variable. If this variable is unset, or
|
|
if the specified file does not produce enough randomness, OpenSSL will
|
|
read random data from \s-1EGD\s0 socket specified using this option.
|
|
.Sp
|
|
If this option is not specified (and the equivalent startup command is
|
|
not used), \s-1EGD\s0 is never contacted. \s-1EGD\s0 is not needed on modern Unix
|
|
systems that support \fI/dev/random\fR.
|
|
.Sh "\s-1FTP\s0 Options"
|
|
.IX Subsection "FTP Options"
|
|
.IP "\fB\-\-ftp\-user=\fR\fIuser\fR" 4
|
|
.IX Item "--ftp-user=user"
|
|
.PD 0
|
|
.IP "\fB\-\-ftp\-password=\fR\fIpassword\fR" 4
|
|
.IX Item "--ftp-password=password"
|
|
.PD
|
|
Specify the username \fIuser\fR and password \fIpassword\fR on an
|
|
\&\s-1FTP\s0 server. Without this, or the corresponding startup option,
|
|
the password defaults to \fB\-wget@\fR, normally used for anonymous
|
|
\&\s-1FTP\s0.
|
|
.Sp
|
|
Another way to specify username and password is in the \s-1URL\s0 itself. Either method reveals your password to anyone who
|
|
bothers to run \f(CW\*(C`ps\*(C'\fR. To prevent the passwords from being seen,
|
|
store them in \fI.wgetrc\fR or \fI.netrc\fR, and make sure to protect
|
|
those files from other users with \f(CW\*(C`chmod\*(C'\fR. If the passwords are
|
|
really important, do not leave them lying in those files either\-\-\-edit
|
|
the files and delete them after Wget has started the download.
|
|
.IP "\fB\-\-no\-remove\-listing\fR" 4
|
|
.IX Item "--no-remove-listing"
|
|
Don't remove the temporary \fI.listing\fR files generated by \s-1FTP\s0
|
|
retrievals. Normally, these files contain the raw directory listings
|
|
received from \s-1FTP\s0 servers. Not removing them can be useful for
|
|
debugging purposes, or when you want to be able to easily check on the
|
|
contents of remote server directories (e.g. to verify that a mirror
|
|
you're running is complete).
|
|
.Sp
|
|
Note that even though Wget writes to a known filename for this file,
|
|
this is not a security hole in the scenario of a user making
|
|
\&\fI.listing\fR a symbolic link to \fI/etc/passwd\fR or something and
|
|
asking \f(CW\*(C`root\*(C'\fR to run Wget in his or her directory. Depending on
|
|
the options used, either Wget will refuse to write to \fI.listing\fR,
|
|
making the globbing/recursion/time\-stamping operation fail, or the
|
|
symbolic link will be deleted and replaced with the actual
|
|
\&\fI.listing\fR file, or the listing will be written to a
|
|
\&\fI.listing.\fInumber\fI\fR file.
|
|
.Sp
|
|
Even though this situation isn't a problem, though, \f(CW\*(C`root\*(C'\fR should
|
|
never run Wget in a non-trusted user's directory. A user could do
|
|
something as simple as linking \fIindex.html\fR to \fI/etc/passwd\fR
|
|
and asking \f(CW\*(C`root\*(C'\fR to run Wget with \fB\-N\fR or \fB\-r\fR so the file
|
|
will be overwritten.
|
|
.IP "\fB\-\-no\-glob\fR" 4
|
|
.IX Item "--no-glob"
|
|
Turn off \s-1FTP\s0 globbing. Globbing refers to the use of shell-like
|
|
special characters (\fIwildcards\fR), like \fB*\fR, \fB?\fR, \fB[\fR
|
|
and \fB]\fR to retrieve more than one file from the same directory at
|
|
once, like:
|
|
.Sp
|
|
.Vb 1
|
|
\& wget ftp://gnjilux.srk.fer.hr/*.msg
|
|
.Ve
|
|
.Sp
|
|
By default, globbing will be turned on if the \s-1URL\s0 contains a
|
|
globbing character. This option may be used to turn globbing on or off
|
|
permanently.
|
|
.Sp
|
|
You may have to quote the \s-1URL\s0 to protect it from being expanded by
|
|
your shell. Globbing makes Wget look for a directory listing, which is
|
|
system\-specific. This is why it currently works only with Unix \s-1FTP\s0
|
|
servers (and the ones emulating Unix \f(CW\*(C`ls\*(C'\fR output).
|
|
.IP "\fB\-\-no\-passive\-ftp\fR" 4
|
|
.IX Item "--no-passive-ftp"
|
|
Disable the use of the \fIpassive\fR \s-1FTP\s0 transfer mode. Passive \s-1FTP\s0
|
|
mandates that the client connect to the server to establish the data
|
|
connection rather than the other way around.
|
|
.Sp
|
|
If the machine is connected to the Internet directly, both passive and
|
|
active \s-1FTP\s0 should work equally well. Behind most firewall and \s-1NAT\s0
|
|
configurations passive \s-1FTP\s0 has a better chance of working. However,
|
|
in some rare firewall configurations, active \s-1FTP\s0 actually works when
|
|
passive \s-1FTP\s0 doesn't. If you suspect this to be the case, use this
|
|
option, or set \f(CW\*(C`passive_ftp=off\*(C'\fR in your init file.
|
|
.IP "\fB\-\-retr\-symlinks\fR" 4
|
|
.IX Item "--retr-symlinks"
|
|
Usually, when retrieving \s-1FTP\s0 directories recursively and a symbolic
|
|
link is encountered, the linked-to file is not downloaded. Instead, a
|
|
matching symbolic link is created on the local filesystem. The
|
|
pointed-to file will not be downloaded unless this recursive retrieval
|
|
would have encountered it separately and downloaded it anyway.
|
|
.Sp
|
|
When \fB\-\-retr\-symlinks\fR is specified, however, symbolic links are
|
|
traversed and the pointed-to files are retrieved. At this time, this
|
|
option does not cause Wget to traverse symlinks to directories and
|
|
recurse through them, but in the future it should be enhanced to do
|
|
this.
|
|
.Sp
|
|
Note that when retrieving a file (not a directory) because it was
|
|
specified on the command\-line, rather than because it was recursed to,
|
|
this option has no effect. Symbolic links are always traversed in this
|
|
case.
|
|
.IP "\fB\-\-no\-http\-keep\-alive\fR" 4
|
|
.IX Item "--no-http-keep-alive"
|
|
Turn off the \*(L"keep\-alive\*(R" feature for \s-1HTTP\s0 downloads. Normally, Wget
|
|
asks the server to keep the connection open so that, when you download
|
|
more than one document from the same server, they get transferred over
|
|
the same \s-1TCP\s0 connection. This saves time and at the same time reduces
|
|
the load on the server.
|
|
.Sp
|
|
This option is useful when, for some reason, persistent (keep\-alive)
|
|
connections don't work for you, for example due to a server bug or due
|
|
to the inability of server-side scripts to cope with the connections.
|
|
.Sh "Recursive Retrieval Options"
|
|
.IX Subsection "Recursive Retrieval Options"
|
|
.IP "\fB\-r\fR" 4
|
|
.IX Item "-r"
|
|
.PD 0
|
|
.IP "\fB\-\-recursive\fR" 4
|
|
.IX Item "--recursive"
|
|
.PD
|
|
Turn on recursive retrieving.
|
|
.IP "\fB\-l\fR \fIdepth\fR" 4
|
|
.IX Item "-l depth"
|
|
.PD 0
|
|
.IP "\fB\-\-level=\fR\fIdepth\fR" 4
|
|
.IX Item "--level=depth"
|
|
.PD
|
|
Specify recursion maximum depth level \fIdepth\fR. The default maximum depth is 5.
|
|
.IP "\fB\-\-delete\-after\fR" 4
|
|
.IX Item "--delete-after"
|
|
This option tells Wget to delete every single file it downloads,
|
|
\&\fIafter\fR having done so. It is useful for pre-fetching popular
|
|
pages through a proxy, e.g.:
|
|
.Sp
|
|
.Vb 1
|
|
\& wget \-r \-nd \-\-delete\-after http://whatever.com/~popular/page/
|
|
.Ve
|
|
.Sp
|
|
The \fB\-r\fR option is to retrieve recursively, and \fB\-nd\fR to not
|
|
create directories.
|
|
.Sp
|
|
Note that \fB\-\-delete\-after\fR deletes files on the local machine. It
|
|
does not issue the \fB\s-1DELE\s0\fR command to remote \s-1FTP\s0 sites, for
|
|
instance. Also note that when \fB\-\-delete\-after\fR is specified,
|
|
\&\fB\-\-convert\-links\fR is ignored, so \fB.orig\fR files are simply not
|
|
created in the first place.
|
|
.IP "\fB\-k\fR" 4
|
|
.IX Item "-k"
|
|
.PD 0
|
|
.IP "\fB\-\-convert\-links\fR" 4
|
|
.IX Item "--convert-links"
|
|
.PD
|
|
After the download is complete, convert the links in the document to
|
|
make them suitable for local viewing. This affects not only the visible
|
|
hyperlinks, but any part of the document that links to external content,
|
|
such as embedded images, links to style sheets, hyperlinks to non-HTML
|
|
content, etc.
|
|
.Sp
|
|
Each link will be changed in one of the two ways:
|
|
.RS 4
|
|
.IP "*" 4
|
|
The links to files that have been downloaded by Wget will be changed to
|
|
refer to the file they point to as a relative link.
|
|
.Sp
|
|
Example: if the downloaded file \fI/foo/doc.html\fR links to
|
|
\&\fI/bar/img.gif\fR, also downloaded, then the link in \fIdoc.html\fR
|
|
will be modified to point to \fB../bar/img.gif\fR. This kind of
|
|
transformation works reliably for arbitrary combinations of directories.
|
|
.IP "*" 4
|
|
The links to files that have not been downloaded by Wget will be changed
|
|
to include host name and absolute path of the location they point to.
|
|
.Sp
|
|
Example: if the downloaded file \fI/foo/doc.html\fR links to
|
|
\&\fI/bar/img.gif\fR (or to \fI../bar/img.gif\fR), then the link in
|
|
\&\fIdoc.html\fR will be modified to point to
|
|
\&\fIhttp://\fIhostname\fI/bar/img.gif\fR.
|
|
.RE
|
|
.RS 4
|
|
.Sp
|
|
Because of this, local browsing works reliably: if a linked file was
|
|
downloaded, the link will refer to its local name; if it was not
|
|
downloaded, the link will refer to its full Internet address rather than
|
|
presenting a broken link. The fact that the former links are converted
|
|
to relative links ensures that you can move the downloaded hierarchy to
|
|
another directory.
|
|
.Sp
|
|
Note that only at the end of the download can Wget know which links have
|
|
been downloaded. Because of that, the work done by \fB\-k\fR will be
|
|
performed at the end of all the downloads.
|
|
.RE
|
|
.IP "\fB\-K\fR" 4
|
|
.IX Item "-K"
|
|
.PD 0
|
|
.IP "\fB\-\-backup\-converted\fR" 4
|
|
.IX Item "--backup-converted"
|
|
.PD
|
|
When converting a file, back up the original version with a \fB.orig\fR
|
|
suffix. Affects the behavior of \fB\-N\fR.
|
|
.IP "\fB\-m\fR" 4
|
|
.IX Item "-m"
|
|
.PD 0
|
|
.IP "\fB\-\-mirror\fR" 4
|
|
.IX Item "--mirror"
|
|
.PD
|
|
Turn on options suitable for mirroring. This option turns on recursion
|
|
and time\-stamping, sets infinite recursion depth and keeps \s-1FTP\s0
|
|
directory listings. It is currently equivalent to
|
|
\&\fB\-r \-N \-l inf \-\-no\-remove\-listing\fR.
|
|
.IP "\fB\-p\fR" 4
|
|
.IX Item "-p"
|
|
.PD 0
|
|
.IP "\fB\-\-page\-requisites\fR" 4
|
|
.IX Item "--page-requisites"
|
|
.PD
|
|
This option causes Wget to download all the files that are necessary to
|
|
properly display a given \s-1HTML\s0 page. This includes such things as
|
|
inlined images, sounds, and referenced stylesheets.
|
|
.Sp
|
|
Ordinarily, when downloading a single \s-1HTML\s0 page, any requisite documents
|
|
that may be needed to display it properly are not downloaded. Using
|
|
\&\fB\-r\fR together with \fB\-l\fR can help, but since Wget does not
|
|
ordinarily distinguish between external and inlined documents, one is
|
|
generally left with \*(L"leaf documents\*(R" that are missing their
|
|
requisites.
|
|
.Sp
|
|
For instance, say document \fI1.html\fR contains an \f(CW\*(C`<IMG>\*(C'\fR tag
|
|
referencing \fI1.gif\fR and an \f(CW\*(C`<A>\*(C'\fR tag pointing to external
|
|
document \fI2.html\fR. Say that \fI2.html\fR is similar but that its
|
|
image is \fI2.gif\fR and it links to \fI3.html\fR. Say this
|
|
continues up to some arbitrarily high number.
|
|
.Sp
|
|
If one executes the command:
|
|
.Sp
|
|
.Vb 1
|
|
\& wget \-r \-l 2 http://<site>/1.html
|
|
.Ve
|
|
.Sp
|
|
then \fI1.html\fR, \fI1.gif\fR, \fI2.html\fR, \fI2.gif\fR, and
|
|
\&\fI3.html\fR will be downloaded. As you can see, \fI3.html\fR is
|
|
without its requisite \fI3.gif\fR because Wget is simply counting the
|
|
number of hops (up to 2) away from \fI1.html\fR in order to determine
|
|
where to stop the recursion. However, with this command:
|
|
.Sp
|
|
.Vb 1
|
|
\& wget \-r \-l 2 \-p http://<site>/1.html
|
|
.Ve
|
|
.Sp
|
|
all the above files \fIand\fR \fI3.html\fR's requisite \fI3.gif\fR
|
|
will be downloaded. Similarly,
|
|
.Sp
|
|
.Vb 1
|
|
\& wget \-r \-l 1 \-p http://<site>/1.html
|
|
.Ve
|
|
.Sp
|
|
will cause \fI1.html\fR, \fI1.gif\fR, \fI2.html\fR, and \fI2.gif\fR
|
|
to be downloaded. One might think that:
|
|
.Sp
|
|
.Vb 1
|
|
\& wget \-r \-l 0 \-p http://<site>/1.html
|
|
.Ve
|
|
.Sp
|
|
would download just \fI1.html\fR and \fI1.gif\fR, but unfortunately
|
|
this is not the case, because \fB\-l 0\fR is equivalent to
|
|
\&\fB\-l inf\fR\-\-\-that is, infinite recursion. To download a single \s-1HTML\s0
|
|
page (or a handful of them, all specified on the command-line or in a
|
|
\&\fB\-i\fR \s-1URL\s0 input file) and its (or their) requisites, simply leave off
|
|
\&\fB\-r\fR and \fB\-l\fR:
|
|
.Sp
|
|
.Vb 1
|
|
\& wget \-p http://<site>/1.html
|
|
.Ve
|
|
.Sp
|
|
Note that Wget will behave as if \fB\-r\fR had been specified, but only
|
|
that single page and its requisites will be downloaded. Links from that
|
|
page to external documents will not be followed. Actually, to download
|
|
a single page and all its requisites (even if they exist on separate
|
|
websites), and make sure the lot displays properly locally, this author
|
|
likes to use a few options in addition to \fB\-p\fR:
|
|
.Sp
|
|
.Vb 1
|
|
\& wget \-E \-H \-k \-K \-p http://<site>/<document>
|
|
.Ve
|
|
.Sp
|
|
To finish off this topic, it's worth knowing that Wget's idea of an
|
|
external document link is any \s-1URL\s0 specified in an \f(CW\*(C`<A>\*(C'\fR tag, an
|
|
\&\f(CW\*(C`<AREA>\*(C'\fR tag, or a \f(CW\*(C`<LINK>\*(C'\fR tag other than \f(CW\*(C`<LINK
|
|
REL="stylesheet">\*(C'\fR.
|
|
.IP "\fB\-\-strict\-comments\fR" 4
|
|
.IX Item "--strict-comments"
|
|
Turn on strict parsing of \s-1HTML\s0 comments. The default is to terminate
|
|
comments at the first occurrence of \fB\-\->\fR.
|
|
.Sp
|
|
According to specifications, \s-1HTML\s0 comments are expressed as \s-1SGML\s0
|
|
\&\fIdeclarations\fR. Declaration is special markup that begins with
|
|
\&\fB<!\fR and ends with \fB>\fR, such as \fB<!DOCTYPE ...>\fR, that
|
|
may contain comments between a pair of \fB\-\-\fR delimiters. \s-1HTML\s0
|
|
comments are \*(L"empty declarations\*(R", \s-1SGML\s0 declarations without any
|
|
non-comment text. Therefore, \fB<!\-\-foo\*(-->\fR is a valid comment, and
|
|
so is \fB<!\-\-one\*(-- \-\-two\*(-->\fR, but \fB<!\-\-1\-\-2\-\->\fR is not.
|
|
.Sp
|
|
On the other hand, most \s-1HTML\s0 writers don't perceive comments as anything
|
|
other than text delimited with \fB<!\-\-\fR and \fB\-\->\fR, which is not
|
|
quite the same. For example, something like \fB<!\-\-\-\-\-\-\-\-\-\-\-\->\fR
|
|
works as a valid comment as long as the number of dashes is a multiple
|
|
of four (!). If not, the comment technically lasts until the next
|
|
\&\fB\-\-\fR, which may be at the other end of the document. Because of
|
|
this, many popular browsers completely ignore the specification and
|
|
implement what users have come to expect: comments delimited with
|
|
\&\fB<!\-\-\fR and \fB\-\->\fR.
|
|
.Sp
|
|
Until version 1.9, Wget interpreted comments strictly, which resulted in
|
|
missing links in many web pages that displayed fine in browsers, but had
|
|
the misfortune of containing non-compliant comments. Beginning with
|
|
version 1.9, Wget has joined the ranks of clients that implements
|
|
\&\*(L"naive\*(R" comments, terminating each comment at the first occurrence of
|
|
\&\fB\-\->\fR.
|
|
.Sp
|
|
If, for whatever reason, you want strict comment parsing, use this
|
|
option to turn it on.
|
|
.Sh "Recursive Accept/Reject Options"
|
|
.IX Subsection "Recursive Accept/Reject Options"
|
|
.IP "\fB\-A\fR \fIacclist\fR \fB\-\-accept\fR \fIacclist\fR" 4
|
|
.IX Item "-A acclist --accept acclist"
|
|
.PD 0
|
|
.IP "\fB\-R\fR \fIrejlist\fR \fB\-\-reject\fR \fIrejlist\fR" 4
|
|
.IX Item "-R rejlist --reject rejlist"
|
|
.PD
|
|
Specify comma-separated lists of file name suffixes or patterns to
|
|
accept or reject. Note that if
|
|
any of the wildcard characters, \fB*\fR, \fB?\fR, \fB[\fR or
|
|
\&\fB]\fR, appear in an element of \fIacclist\fR or \fIrejlist\fR,
|
|
it will be treated as a pattern, rather than a suffix.
|
|
.IP "\fB\-D\fR \fIdomain-list\fR" 4
|
|
.IX Item "-D domain-list"
|
|
.PD 0
|
|
.IP "\fB\-\-domains=\fR\fIdomain-list\fR" 4
|
|
.IX Item "--domains=domain-list"
|
|
.PD
|
|
Set domains to be followed. \fIdomain-list\fR is a comma-separated list
|
|
of domains. Note that it does \fInot\fR turn on \fB\-H\fR.
|
|
.IP "\fB\-\-exclude\-domains\fR \fIdomain-list\fR" 4
|
|
.IX Item "--exclude-domains domain-list"
|
|
Specify the domains that are \fInot\fR to be followed..
|
|
.IP "\fB\-\-follow\-ftp\fR" 4
|
|
.IX Item "--follow-ftp"
|
|
Follow \s-1FTP\s0 links from \s-1HTML\s0 documents. Without this option,
|
|
Wget will ignore all the \s-1FTP\s0 links.
|
|
.IP "\fB\-\-follow\-tags=\fR\fIlist\fR" 4
|
|
.IX Item "--follow-tags=list"
|
|
Wget has an internal table of \s-1HTML\s0 tag / attribute pairs that it
|
|
considers when looking for linked documents during a recursive
|
|
retrieval. If a user wants only a subset of those tags to be
|
|
considered, however, he or she should be specify such tags in a
|
|
comma-separated \fIlist\fR with this option.
|
|
.IP "\fB\-\-ignore\-tags=\fR\fIlist\fR" 4
|
|
.IX Item "--ignore-tags=list"
|
|
This is the opposite of the \fB\-\-follow\-tags\fR option. To skip
|
|
certain \s-1HTML\s0 tags when recursively looking for documents to download,
|
|
specify them in a comma-separated \fIlist\fR.
|
|
.Sp
|
|
In the past, this option was the best bet for downloading a single page
|
|
and its requisites, using a command-line like:
|
|
.Sp
|
|
.Vb 1
|
|
\& wget \-\-ignore\-tags=a,area \-H \-k \-K \-r http://<site>/<document>
|
|
.Ve
|
|
.Sp
|
|
However, the author of this option came across a page with tags like
|
|
\&\f(CW\*(C`<LINK REL="home" HREF="/">\*(C'\fR and came to the realization that
|
|
specifying tags to ignore was not enough. One can't just tell Wget to
|
|
ignore \f(CW\*(C`<LINK>\*(C'\fR, because then stylesheets will not be downloaded.
|
|
Now the best bet for downloading a single page and its requisites is the
|
|
dedicated \fB\-\-page\-requisites\fR option.
|
|
.IP "\fB\-\-ignore\-case\fR" 4
|
|
.IX Item "--ignore-case"
|
|
Ignore case when matching files and directories. This influences the
|
|
behavior of \-R, \-A, \-I, and \-X options, as well as globbing
|
|
implemented when downloading from \s-1FTP\s0 sites. For example, with this
|
|
option, \fB\-A *.txt\fR will match \fBfile1.txt\fR, but also
|
|
\&\fBfile2.TXT\fR, \fBfile3.TxT\fR, and so on.
|
|
.IP "\fB\-H\fR" 4
|
|
.IX Item "-H"
|
|
.PD 0
|
|
.IP "\fB\-\-span\-hosts\fR" 4
|
|
.IX Item "--span-hosts"
|
|
.PD
|
|
Enable spanning across hosts when doing recursive retrieving.
|
|
.IP "\fB\-L\fR" 4
|
|
.IX Item "-L"
|
|
.PD 0
|
|
.IP "\fB\-\-relative\fR" 4
|
|
.IX Item "--relative"
|
|
.PD
|
|
Follow relative links only. Useful for retrieving a specific home page
|
|
without any distractions, not even those from the same hosts.
|
|
.IP "\fB\-I\fR \fIlist\fR" 4
|
|
.IX Item "-I list"
|
|
.PD 0
|
|
.IP "\fB\-\-include\-directories=\fR\fIlist\fR" 4
|
|
.IX Item "--include-directories=list"
|
|
.PD
|
|
Specify a comma-separated list of directories you wish to follow when
|
|
downloading. Elements
|
|
of \fIlist\fR may contain wildcards.
|
|
.IP "\fB\-X\fR \fIlist\fR" 4
|
|
.IX Item "-X list"
|
|
.PD 0
|
|
.IP "\fB\-\-exclude\-directories=\fR\fIlist\fR" 4
|
|
.IX Item "--exclude-directories=list"
|
|
.PD
|
|
Specify a comma-separated list of directories you wish to exclude from
|
|
download. Elements of
|
|
\&\fIlist\fR may contain wildcards.
|
|
.IP "\fB\-np\fR" 4
|
|
.IX Item "-np"
|
|
.PD 0
|
|
.IP "\fB\-\-no\-parent\fR" 4
|
|
.IX Item "--no-parent"
|
|
.PD
|
|
Do not ever ascend to the parent directory when retrieving recursively.
|
|
This is a useful option, since it guarantees that only the files
|
|
\&\fIbelow\fR a certain hierarchy will be downloaded.
|
|
.SH "FILES"
|
|
.IX Header "FILES"
|
|
.IP "\fB/usr/local/etc/wgetrc\fR" 4
|
|
.IX Item "/usr/local/etc/wgetrc"
|
|
Default location of the \fIglobal\fR startup file.
|
|
.IP "\fB.wgetrc\fR" 4
|
|
.IX Item ".wgetrc"
|
|
User startup file.
|
|
.SH "BUGS"
|
|
.IX Header "BUGS"
|
|
You are welcome to submit bug reports via the \s-1GNU\s0 Wget bug tracker (see
|
|
<\fBhttp://wget.addictivecode.org/BugTracker\fR>).
|
|
.PP
|
|
Before actually submitting a bug report, please try to follow a few
|
|
simple guidelines.
|
|
.IP "1." 4
|
|
Please try to ascertain that the behavior you see really is a bug. If
|
|
Wget crashes, it's a bug. If Wget does not behave as documented,
|
|
it's a bug. If things work strange, but you are not sure about the way
|
|
they are supposed to work, it might well be a bug, but you might want to
|
|
double-check the documentation and the mailing lists.
|
|
.IP "2." 4
|
|
Try to repeat the bug in as simple circumstances as possible. E.g. if
|
|
Wget crashes while downloading \fBwget \-rl0 \-kKE \-t5 \-\-no\-proxy
|
|
http://yoyodyne.com \-o /tmp/log\fR, you should try to see if the crash is
|
|
repeatable, and if will occur with a simpler set of options. You might
|
|
even try to start the download at the page where the crash occurred to
|
|
see if that page somehow triggered the crash.
|
|
.Sp
|
|
Also, while I will probably be interested to know the contents of your
|
|
\&\fI.wgetrc\fR file, just dumping it into the debug message is probably
|
|
a bad idea. Instead, you should first try to see if the bug repeats
|
|
with \fI.wgetrc\fR moved out of the way. Only if it turns out that
|
|
\&\fI.wgetrc\fR settings affect the bug, mail me the relevant parts of
|
|
the file.
|
|
.IP "3." 4
|
|
Please start Wget with \fB\-d\fR option and send us the resulting
|
|
output (or relevant parts thereof). If Wget was compiled without
|
|
debug support, recompile it\-\-\-it is \fImuch\fR easier to trace bugs
|
|
with debug support on.
|
|
.Sp
|
|
Note: please make sure to remove any potentially sensitive information
|
|
from the debug log before sending it to the bug address. The
|
|
\&\f(CW\*(C`\-d\*(C'\fR won't go out of its way to collect sensitive information,
|
|
but the log \fIwill\fR contain a fairly complete transcript of Wget's
|
|
communication with the server, which may include passwords and pieces
|
|
of downloaded data. Since the bug address is publically archived, you
|
|
may assume that all bug reports are visible to the public.
|
|
.IP "4." 4
|
|
If Wget has crashed, try to run it in a debugger, e.g. \f(CW\*(C`gdb `which
|
|
wget` core\*(C'\fR and type \f(CW\*(C`where\*(C'\fR to get the backtrace. This may not
|
|
work if the system administrator has disabled core files, but it is
|
|
safe to try.
|
|
.SH "SEE ALSO"
|
|
.IX Header "SEE ALSO"
|
|
This is \fBnot\fR the complete manual for \s-1GNU\s0 Wget.
|
|
For more complete information, including more detailed explanations of
|
|
some of the options, and a number of commands available
|
|
for use with \fI.wgetrc\fR files and the \fB\-e\fR option, see the \s-1GNU\s0
|
|
Info entry for \fIwget\fR.
|
|
.SH "AUTHOR"
|
|
.IX Header "AUTHOR"
|
|
Originally written by Hrvoje Niksic <hniksic@xemacs.org>.
|
|
Currently maintained by Micah Cowan <micah@cowan.name>.
|
|
.SH "COPYRIGHT"
|
|
.IX Header "COPYRIGHT"
|
|
Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
|
|
2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
|
.PP
|
|
Permission is granted to copy, distribute and/or modify this document
|
|
under the terms of the \s-1GNU\s0 Free Documentation License, Version 1.2 or
|
|
any later version published by the Free Software Foundation; with no
|
|
Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A
|
|
copy of the license is included in the section entitled \*(L"\s-1GNU\s0 Free
|
|
Documentation License\*(R".
|