summaryrefslogtreecommitdiff
path: root/libhttplib2.tex
blob: 0c958c3f5bb8ffeda85dbecbc61dcfe8464be3fc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
% Template for a library manual section.
% PLEASE REMOVE THE COMMENTS AFTER USING THE TEMPLATE
%
% Complete documentation on the extended LaTeX markup used for Python
% documentation is available in ``Documenting Python'', which is part
% of the standard documentation for Python.  It may be found online
% at:
%
%     http://www.python.org/doc/current/doc/doc.html

% ==== 0. ====
% Copy this file to <mydir>/lib<mymodule>.tex, and edit that file
% according to the instructions below.


% ==== 1. ====
% The section prologue.  Give the section a title and provide some
% meta-information.  References to the module should use
% \refbimodindex, \refstmodindex, \refexmodindex or \refmodindex, as
% appropriate.


\section{\module{httplib2}
         A comprehensive HTTP client library.  }

% Choose one of these to specify the module module name.  If there's
% an underscore in the name, use
% \declaremodule[modname]{...}{mod_name} instead.
%
\declaremodule{}{httplib2}                     % not standard, in Python

% Portability statement:  Uncomment and fill in the parameter to specify the
% availability of the module.  The parameter can be Unix, IRIX, SunOS, Mac,
% Windows, or lots of other stuff.  When ``Mac'' is specified, the availability
% statement will say ``Macintosh'' and the Module Index may say ``Mac''.
% Please use a name that has already been used whenever applicable.  If this
% is omitted, no availability statement is produced or implied.
%
%   \platform{Unix}

% These apply to all modules, and may be given more than once:

\moduleauthor{Joe Gregorio}{joe@bitworking.org}         % Author of the module code;
                                        % omit if not known.
\sectionauthor{Joe Gregorio}{joe@bitworking.org}                % Author of the documentation,
                                        % even if not a module section.


% Leave at least one blank line after this, to simplify ad-hoc tools
% that are sometimes used to massage these files.
\modulesynopsis{A comprehensive HTTP client library, \module{httplib2} supports many features left out of other HTTP libraries.}


% ==== 2. ====
% Give a short overview of what the module does.
% If it is platform specific, mention this.
% Mention other important restrictions or general operating principles.
% For example:

The \module{httplib2} module is a comprehensive HTTP client library with the following features:

\begin{description}
\item[HTTP and HTTPS]  HTTPS support is only available if the socket module was compiled with SSL support.
\item[Keep-Alive]    Supports HTTP 1.1 Keep-Alive, keeping the socket open and performing multiple requests over the same connection if possible.
\item[Authentication] The following three types of HTTP Authentication are supported. These can be used over both HTTP and HTTPS.
    \begin{itemize}
        \item Digest
        \item Basic
        \item WSSE
    \end{itemize}
\item[Caching]
    The module can optionally operate with a private cache that understands the Cache-Control: header and uses both the ETag and Last-Modified cache validators.
\item[All Methods]
    The module can handle any HTTP request method, not just GET and POST.
\item[Redirects]
    Automatically follows 3XX redirects on GETs.
\item[Compression]
    Handles both 'deflate' and 'gzip' types of compression.
\item[Proxies]
    If the Socksipy module is installed then httplib2 can handle sock4, sock5 and http proxies.
\item[Lost update support]
    Automatically adds back ETags into PUT requests to resources we have already cached. This implements Section 3.2 of Detecting the Lost Update Problem Using Unreserved Checkout
\end{description}

% ==== 3. ====
% List the public functions defined by the module.  Begin with a
% standard phrase.  You may also list the exceptions and other data
% items defined in the module, insofar as they are important for the
% user.

The \module{httplib2} module defines the following variables:
% ---- 3.2. ----
% Data items are described using a ``datadesc'' block.  This has only
% one parameter: the item's name.

\begin{datadesc}{debuglevel}
The amount of debugging information to print. The default is 0.
\end{datadesc}

\begin{datadesc}{RETRIES}
A request will be tried 'RETRIES' times if it fails at the socket/connection level.
The default is 2.
\end{datadesc}

% --- 3.3. ---
% Exceptions are described using a ``excdesc'' block.  This has only
% one parameter: the exception name.  Exceptions defined as classes in
% the source code should be documented using this environment, but
% constructor parameters must be omitted.

The \module{httplib2} module may raise the following Exceptions. Note that
there is an option that turns exceptions into
normal responses with an HTTP status code indicating
an error occured. See \member{Http.force_exception_to_status_code}

\begin{excdesc}{HttpLib2Error}
The Base Exception for all exceptions raised by httplib2.
\end{excdesc}

\begin{excdesc}{RedirectMissingLocation}
A 3xx redirect response code was provided but no Location: header
was provided to point to the new location.
\end{excdesc}

\begin{excdesc}{RedirectLimit}
The maximum number of redirections was reached without coming to a final URI.
\end{excdesc}


\begin{excdesc}{ServerNotFoundError}
Unable to resolve the host name given.
\end{excdesc}

\begin{excdesc}{RelativeURIError}
A relative, as opposed to an absolute URI, was passed into request().
\end{excdesc}

\begin{excdesc}{FailedToDecompressContent}
The headers claimed that the content of the response was compressed but the
decompression algorithm applied to the content failed.
\end{excdesc}

\begin{excdesc}{UnimplementedDigestAuthOptionError}
The server requested a type of Digest authentication that we
are unfamiliar with.
\end{excdesc}

\begin{excdesc}{UnimplementedHmacDigestAuthOptionError}
The server requested a type of HMACDigest authentication that we
are unfamiliar with.
\end{excdesc}

% ---- 3.4. ----
% Other standard environments:
%
%  classdesc    - Python classes; same arguments are funcdesc
%  methoddesc   - methods, like funcdesc but has an optional parameter
%                 to give the type name: \begin{methoddesc}[mytype]{name}{args}
%                 By default, the type name will be the name of the
%                 last class defined using classdesc.  The type name
%                 is required if the type is implemented in C (because
%                 there's no classdesc) or if the class isn't directly
%                 documented (if it's private).
%  memberdesc   - data members, like datadesc, but with an optional
%                 type name like methoddesc.

\begin{classdesc}{Http}{\optional{cache=None}, \optional{timeout=None}, \optional{proxy_info=None}}
The class that represents a client HTTP interface.
The \var{cache} parameter is either the name of a directory
to be used as a flat file cache, or it must an object that
implements the required caching interface.
The \var{timeout} parameter is the socket level timeout.
The \var{proxy_info} is an instance of \class{ProxyInfo} and is supplied
if a proxy is to be used. Note that the Socksipy module must be
installed for proxy support to work.
\end{classdesc}

\begin{classdesc}{Response}{info}
Response is a subclass of \class{dict} and instances of this
class are returned from calls
to Http.request. The \var{info} parameter is either
an \class{rfc822.Message} or an \class{httplib.HTTPResponse} object.
\end{classdesc}

\begin{classdesc}{FileCache}{dir_name, \optional{safe=safename}}
FileCache implements a Cache as a directory of files.
The \var{dir_name} parameter is
the name of the directory to use. If the directory does
not exist then FileCache attempts to create the directory.
The optional \var{safe} parameter is a funtion which generates
the cache filename for each URI. A FileCache object is
constructed and used for caching when you pass a directory name
into the constructor of \class{Http}.
\end{classdesc}

\begin{classdesc}{ProxyInfo}{proxy_type, proxy_host, proxy_port, \optional{proxy_rdns=None}, \optional{proxy_user=None}, \optional{proxy_pass=None}}
The parameter \var{proxy_type} must be set to one of socks.PROXY_TYPE_XXX
constants. The \var{proxy_host} and \var{proxy_port} must be set to the location
of the proxy. The optional \var{proxy_rdns} should be set to True if
the DNS server on the proxy should be used. The \var{proxy_user} and
\var{proxy_pass} are supplied when the proxy is protected by authentication.
\end{classdesc}


% If your module defines new object types (for a built-in module) or
% classes (for a module written in Python), you should list the
% methods and instance variables (if any) of each type or class in a
% separate subsection.

\subsection{Http Objects}
\label{http-objects}
% This label is generally useful for referencing this section, but is
% also used to give a filename when generating HTML.

Http objects have the following methods:

\begin{methoddesc}[Http]{request}{uri, \optional{method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None}}
Performs a single HTTP request.
The \var{uri} is the URI of the HTTP resource and can begin with either \code{http} or \code{https}. The value of \var{uri} must be an absolute URI.

The \var{method} is the HTTP method to perform, such as \code{GET}, \code{POST}, \code{DELETE}, etc. There is no restriction
on the methods allowed.

The \var{body} is the entity body to be sent with the request. It is a string
object.

Any extra headers that are to be sent with the request should be provided in the
\var{headers} dictionary.

The maximum number of redirect to follow before raising an exception is \var{redirections}. The default is 5.

The \var{connection_type} is the type of connection object to use. The supplied class
should implement the interface of httplib.HTTPConnection.

The return value is a tuple of (response, content), the first being and instance of the
\class{Response} class, the second being a string that contains the response entity body.
\end{methoddesc}

\begin{methoddesc}[Http]{add_credentials}{name, password, \optional{domain=None}}
Adds a name and password that will be used when a request
requires authentication. Supplying the optional \var{domain} name will
restrict these credentials to only be sent to the specified
domain. If \var{domain} is not specified then the given credentials will
be used to try to satisfy every HTTP 401 challenge.
\end{methoddesc}

\begin{methoddesc}[Http]{add_certificate}{key, cert, domain}
Add a \var{key} and \var{cert} that will be used for an SSL connection
to the specified domain. \var{keyfile} is the name of a PEM formatted
file that contains your private key. \var{certfile} is a PEM formatted certificate chain file.
\end{methoddesc}

\begin{methoddesc}[Http]{clear_credentials}{}
Remove all the names and passwords used for authentication.
\end{methoddesc}

\begin{memberdesc}[Http]{follow_redirects}
If \code{True}, which is the default, safe redirects are followed, where
safe means that the client is only doing a \code{GET} or \code{HEAD} on the
URI to which it is being redirected. If \code{False} then no redirects are followed.
Note that a False 'follow_redirects' takes precedence over a True 'follow_all_redirects'.
Another way of saying that is for 'follow_all_redirects' to have any affect, 'follow_redirects'
must be True.
\end{memberdesc}

\begin{memberdesc}[Http]{forward_authorization_headers}
If \code{False}, which is the default, then Authorization: headers are
stripped from redirects. If \code{True} then Authorization: headers are left
in place when following redirects. This parameter only applies if following
redirects is turned on. Note that turning this on could cause your credentials
to leak, so carefully consider the consequences.
\end{memberdesc}

\begin{memberdesc}[Http]{follow_all_redirects}
If \code{False}, which is the default, only safe redirects are followed, where
safe means that the client is only doing a \code{GET} or \code{HEAD} on the
URI to which it is being redirected. If \code{True} then all redirects are followed.
Note that a False 'follow_redirects' takes precedence over a True 'follow_all_redirects'.
Another way of saying that is for 'follow_all_redirects' to have any affect, 'follow_redirects'
must be True.
\end{memberdesc}

\begin{memberdesc}[Http]{force_exception_to_status_code}
If \code{True}, which is the default, then no \module{httplib2} exceptions will be thrown. Instead,
those error conditions will be turned into \class{Response} objects
that will be returned normally.

If \code{False}, then exceptions will be thrown.
\end{memberdesc}

\begin{memberdesc}[Http]{ignore_etag}
Defaults to \code{False}. If \code{True}, then any etags present in the cached response
are ignored when processing the current request, i.e. httplib2 does \strong{not} use
'if-match' for PUT or 'if-none-match' when GET or HEAD requests are made. This
is mainly to deal with broken servers which supply an etag, but change it capriciously.
\end{memberdesc}

\subsection{Cache Objects}
\label{cache-objects}
% This label is generally useful for referencing this section, but is
% also used to give a filename when generating HTML.

If you wish to supply your own caching implementation
then you will need to pass in an object that supports the
following methods. Note that the \module{memcache} module
supports this interface natively.

\begin{methoddesc}[Cache]{get}{key}
Takes a string \var{key} and returns the value as a string.
\end{methoddesc}

\begin{methoddesc}[Cache]{set}{key, value}
Takes a string \var{key} and \var{value} and stores it in the cache.
\end{methoddesc}

\begin{methoddesc}[Cache]{delete}{key}
Deletes the cached value stored at \var{key}. The value
of \var{key} is a string.
\end{methoddesc}




\subsection{Response Objects}
\label{response-objects}
% This label is generally useful for referencing this section, but is
% also used to give a filename when generating HTML.

Response objects are derived from \class{dict} and map
header names (lower case with the trailing colon removed)
to header values. In addition to the dict methods
a Response object also has:

\begin{memberdesc}[Response]{fromcache}
If \code{true} the the response was returned from the cache.
\end{memberdesc}

\begin{memberdesc}[Response]{version}
The version of HTTP that the server supports. A value
of 11 means '1.1'.
\end{memberdesc}

\begin{memberdesc}[Response]{status}
The numerical HTTP status code returned in the response.
\end{memberdesc}

\begin{memberdesc}[Response]{reason}
The human readable component of the HTTP response status code.
\end{memberdesc}

\begin{memberdesc}[Response]{previous}
If redirects are followed then the \class{Response} object returned
is just for the very last HTTP request and \var{previous} points to
the previous \class{Response} object. In this manner they form a chain
going back through the responses to the very first response.
Will be \code{None} if there are no previous respones.
\end{memberdesc}

The Response object also populates the header \code{content-location}, that
contains the URI that was ultimately requested. This is useful if
redirects were encountered, you can determine the ultimate URI that
the request was sent to. All Response objects contain this key value,
including \code{previous} responses so you can determine the entire
chain of redirects. If \member{Http.force_exception_to_status_code} is \code{True}
and the number of redirects has exceeded the number of allowed number
of redirects then the \class{Response} object will report the error
in the status code, but the complete chain of previous responses will
still be in tact.


% ==== 4. ====
% Now is probably a good time for a complete example.  (Alternatively,
% an example giving the flavor of the module may be given before the
% detailed list of functions.)

\subsection{Examples \label{httplib2-example}}

To do a simple \code{GET} request just supply the absolute URI
of the resource:

\begin{verbatim}
import httplib2
h = httplib2.Http()
resp, content = h.request("http://bitworking.org/")
assert resp.status == 200
assert resp['content-type'] == 'text/html'
\end{verbatim}

Here is more complex example that does a PUT
of some text to a resource that requires authentication.
The Http instance also uses a file cache
in the directory \code{.cache}.

\begin{verbatim}
import httplib2
h = httplib2.Http(".cache")
h.add_credentials('name', 'password')
resp, content = h.request("https://example.org/chap/2",
    "PUT", body="This is text",
    headers={'content-type':'text/plain'} )
\end{verbatim}

Here is an example that connects to a server that
supports the Atom Publishing Protocol.

\begin{verbatim}
import httplib2
h = httplib2.Http()
h.add_credentials(myname, mypasswd)
h.follow_all_redirects = True
headers = {'Content-Type': 'application/atom+xml'}
body    = """<?xml version="1.0" ?>
    <entry xmlns="http://www.w3.org/2005/Atom">
      <title>Atom-Powered Robots Run Amok</title>
      <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
      <updated>2003-12-13T18:30:02Z</updated>
      <author><name>John Doe</name></author>
      <content>Some text.</content>
</entry>
"""
uri     = "http://www.example.com/collection/"
resp, content = h.request(uri, "POST", body=body, headers=headers)
\end{verbatim}
% Note that there is no trailing ">>> " prompt shown.

Here is an example of providing data to an HTML form processor.
In this case we presume this is a POST form. We need to take our
data and format it as "application/x-www-form-urlencoded" data and use that as a
body for a POST request.

\begin{verbatim}
>>> import httplib2
>>> import urllib
>>> data = {'name': 'fred', 'address': '123 shady lane'}
>>> body = urllib.urlencode(data)
>>> body
'name=fred&address=123+shady+lane'
>>> h = httplib2.Http()
>>> resp, content = h.request("http://example.com", method="POST", body=body)
\end{verbatim}
% Note that there is no trailing ">>> " prompt shown.

Here is an example of using a proxy server:
\begin{verbatim}
import httplib2
import socks

httplib2.debuglevel=4
h = httplib2.Http(proxy_info = httplib2.ProxyInfo(socks.PROXY_TYPE_HTTP, 'localhost', 8000))
r,c = h.request("http://bitworking.org/news/")
\end{verbatim}