… | |
… | |
12 | This module is an AnyEvent user, you need to make sure that you use and |
12 | This module is an AnyEvent user, you need to make sure that you use and |
13 | run a supported event loop. |
13 | run a supported event loop. |
14 | |
14 | |
15 | This module implements a simple, stateless and non-blocking HTTP client. |
15 | This module implements a simple, stateless and non-blocking HTTP client. |
16 | It supports GET, POST and other request methods, cookies and more, all |
16 | It supports GET, POST and other request methods, cookies and more, all |
17 | on a very low level. It can follow redirects supports proxies and |
17 | on a very low level. It can follow redirects, supports proxies, and |
18 | automatically limits the number of connections to the values specified |
18 | automatically limits the number of connections to the values specified |
19 | in the RFC. |
19 | in the RFC. |
20 | |
20 | |
21 | It should generally be a "good client" that is enough for most HTTP |
21 | It should generally be a "good client" that is enough for most HTTP |
22 | tasks. Simple tasks should be simple, but complex tasks should still be |
22 | tasks. Simple tasks should be simple, but complex tasks should still be |
… | |
… | |
28 | limited support. |
28 | limited support. |
29 | |
29 | |
30 | METHODS |
30 | METHODS |
31 | http_get $url, key => value..., $cb->($data, $headers) |
31 | http_get $url, key => value..., $cb->($data, $headers) |
32 | Executes an HTTP-GET request. See the http_request function for |
32 | Executes an HTTP-GET request. See the http_request function for |
33 | details on additional parameters. |
33 | details on additional parameters and the return value. |
34 | |
34 | |
35 | http_head $url, key => value..., $cb->($data, $headers) |
35 | http_head $url, key => value..., $cb->($data, $headers) |
36 | Executes an HTTP-HEAD request. See the http_request function for |
36 | Executes an HTTP-HEAD request. See the http_request function for |
37 | details on additional parameters. |
37 | details on additional parameters and the return value. |
38 | |
38 | |
39 | http_post $url, $body, key => value..., $cb->($data, $headers) |
39 | http_post $url, $body, key => value..., $cb->($data, $headers) |
40 | Executes an HTTP-POST request with a request body of $body. See the |
40 | Executes an HTTP-POST request with a request body of $body. See the |
41 | http_request function for details on additional parameters. |
41 | http_request function for details on additional parameters and the |
|
|
42 | return value. |
42 | |
43 | |
43 | http_request $method => $url, key => value..., $cb->($data, $headers) |
44 | http_request $method => $url, key => value..., $cb->($data, $headers) |
44 | Executes a HTTP request of type $method (e.g. "GET", "POST"). The |
45 | Executes a HTTP request of type $method (e.g. "GET", "POST"). The |
45 | URL must be an absolute http or https URL. |
46 | URL must be an absolute http or https URL. |
46 | |
47 | |
|
|
48 | When called in void context, nothing is returned. In other contexts, |
|
|
49 | "http_request" returns a "cancellation guard" - you have to keep the |
|
|
50 | object at least alive until the callback get called. If the object |
|
|
51 | gets destroyed before the callback is called, the request will be |
|
|
52 | cancelled. |
|
|
53 | |
47 | The callback will be called with the response data as first argument |
54 | The callback will be called with the response body data as first |
48 | (or "undef" if it wasn't available due to errors), and a hash-ref |
55 | argument (or "undef" if an error occured), and a hash-ref with |
49 | with response headers as second argument. |
56 | response headers (and trailers) as second argument. |
50 | |
57 | |
51 | All the headers in that hash are lowercased. In addition to the |
58 | All the headers in that hash are lowercased. In addition to the |
52 | response headers, the "pseudo-headers" "HTTPVersion", "Status" and |
59 | response headers, the "pseudo-headers" (uppercase to avoid clashing |
|
|
60 | with possible response headers) "HTTPVersion", "Status" and "Reason" |
53 | "Reason" contain the three parts of the HTTP Status-Line of the same |
61 | contain the three parts of the HTTP Status-Line of the same name. If |
|
|
62 | an error occurs during the body phase of a request, then the |
|
|
63 | original "Status" and "Reason" values from the header are available |
|
|
64 | as "OrigStatus" and "OrigReason". |
|
|
65 | |
54 | name. The pseudo-header "URL" contains the original URL (which can |
66 | The pseudo-header "URL" contains the actual URL (which can differ |
55 | differ from the requested URL when following redirects). |
67 | from the requested URL when following redirects - for example, you |
|
|
68 | might get an error that your URL scheme is not supported even though |
|
|
69 | your URL is a valid http URL because it redirected to an ftp URL, in |
|
|
70 | which case you can look at the URL pseudo header). |
56 | |
71 | |
|
|
72 | The pseudo-header "Redirect" only exists when the request was a |
|
|
73 | result of an internal redirect. In that case it is an array |
|
|
74 | reference with the "($data, $headers)" from the redirect response. |
|
|
75 | Note that this response could in turn be the result of a redirect |
|
|
76 | itself, and "$headers->{Redirect}[1]{Redirect}" will then contain |
|
|
77 | the original response, and so on. |
|
|
78 | |
57 | If the server sends a header multiple lines, then their contents |
79 | If the server sends a header multiple times, then their contents |
58 | will be joined together with "\x00". |
80 | will be joined together with a comma (","), as per the HTTP spec. |
59 | |
81 | |
60 | If an internal error occurs, such as not being able to resolve a |
82 | If an internal error occurs, such as not being able to resolve a |
61 | hostname, then $data will be "undef", "$headers->{Status}" will be |
83 | hostname, then $data will be "undef", "$headers->{Status}" will be |
62 | 599 and the "Reason" pseudo-header will contain an error message. |
84 | 590-599 and the "Reason" pseudo-header will contain an error |
|
|
85 | message. Currently the following status codes are used: |
|
|
86 | |
|
|
87 | 595 - errors during connection etsbalishment, proxy handshake. |
|
|
88 | 596 - errors during TLS negotiation, request sending and header |
|
|
89 | processing. |
|
|
90 | 597 - errors during body receiving or processing. |
|
|
91 | 598 - user aborted request via "on_header" or "on_body". |
|
|
92 | 599 - other, usually nonretryable, errors (garbled URL etc.). |
63 | |
93 | |
64 | A typical callback might look like this: |
94 | A typical callback might look like this: |
65 | |
95 | |
66 | sub { |
96 | sub { |
67 | my ($body, $hdr) = @_; |
97 | my ($body, $hdr) = @_; |
… | |
… | |
81 | authentication retries and so on, and how often to do so. |
111 | authentication retries and so on, and how often to do so. |
82 | |
112 | |
83 | headers => hashref |
113 | headers => hashref |
84 | The request headers to use. Currently, "http_request" may |
114 | The request headers to use. Currently, "http_request" may |
85 | provide its own "Host:", "Content-Length:", "Connection:" and |
115 | provide its own "Host:", "Content-Length:", "Connection:" and |
86 | "Cookie:" headers and will provide defaults for "User-Agent:" |
116 | "Cookie:" headers and will provide defaults at least for "TE:", |
87 | and "Referer:". |
117 | "Referer:" and "User-Agent:" (this can be suppressed by using |
|
|
118 | "undef" for these headers in which case they won't be sent at |
|
|
119 | all). |
|
|
120 | |
|
|
121 | You really should provide your own "User-Agent:" header value |
|
|
122 | that is appropriate for your program - I wouldn't be surprised |
|
|
123 | if the default AnyEvent string gets blocked by webservers sooner |
|
|
124 | or later. |
88 | |
125 | |
89 | timeout => $seconds |
126 | timeout => $seconds |
90 | The time-out to use for various stages - each connect attempt |
127 | The time-out to use for various stages - each connect attempt |
91 | will reset the timeout, as will read or write activity. Default |
128 | will reset the timeout, as will read or write activity, i.e. |
|
|
129 | this is not an overall timeout. |
|
|
130 | |
92 | timeout is 5 minutes. |
131 | Default timeout is 5 minutes. |
93 | |
132 | |
94 | proxy => [$host, $port[, $scheme]] or undef |
133 | proxy => [$host, $port[, $scheme]] or undef |
95 | Use the given http proxy for all requests. If not specified, |
134 | Use the given http proxy for all requests, or no proxy if |
96 | then the default proxy (as specified by $ENV{http_proxy}) is |
|
|
97 | used. |
135 | "undef" is used. |
98 | |
136 | |
99 | $scheme must be either missing or "http" for HTTP, or "https" |
137 | $scheme must be either missing or must be "http" for HTTP. |
100 | for HTTPS. |
138 | |
|
|
139 | If not specified, then the default proxy is used (see |
|
|
140 | "AnyEvent::HTTP::set_proxy"). |
101 | |
141 | |
102 | body => $string |
142 | body => $string |
103 | The request body, usually empty. Will be-sent as-is (future |
143 | The request body, usually empty. Will be sent as-is (future |
104 | versions of this module might offer more options). |
144 | versions of this module might offer more options). |
105 | |
145 | |
106 | cookie_jar => $hash_ref |
146 | cookie_jar => $hash_ref |
107 | Passing this parameter enables (simplified) cookie-processing, |
147 | Passing this parameter enables (simplified) cookie-processing, |
108 | loosely based on the original netscape specification. |
148 | loosely based on the original netscape specification. |
109 | |
149 | |
110 | The $hash_ref must be an (initially empty) hash reference which |
150 | The $hash_ref must be an (initially empty) hash reference which |
111 | will get updated automatically. It is possible to save the |
151 | will get updated automatically. It is possible to save the |
112 | cookie_jar to persistent storage with something like JSON or |
152 | cookie jar to persistent storage with something like JSON or |
113 | Storable, but this is not recommended, as expire times are |
153 | Storable - see the "AnyEvent::HTTP::cookie_jar_expire" function |
114 | currently being ignored. |
154 | if you wish to remove expired or session-only cookies, and also |
|
|
155 | for documentation on the format of the cookie jar. |
115 | |
156 | |
116 | Note that this cookie implementation is not of very high |
157 | Note that this cookie implementation is not meant to be |
117 | quality, nor meant to be complete. If you want complete cookie |
158 | complete. If you want complete cookie management you have to do |
118 | management you have to do that on your own. "cookie_jar" is |
159 | that on your own. "cookie_jar" is meant as a quick fix to get |
119 | meant as a quick fix to get some cookie-using sites working. |
160 | most cookie-using sites working. Cookies are a privacy disaster, |
120 | Cookies are a privacy disaster, do not use them unless required |
161 | do not use them unless required to. |
|
|
162 | |
|
|
163 | When cookie processing is enabled, the "Cookie:" and |
|
|
164 | "Set-Cookie:" headers will be set and handled by this module, |
|
|
165 | otherwise they will be left untouched. |
|
|
166 | |
|
|
167 | tls_ctx => $scheme | $tls_ctx |
|
|
168 | Specifies the AnyEvent::TLS context to be used for https |
|
|
169 | connections. This parameter follows the same rules as the |
|
|
170 | "tls_ctx" parameter to AnyEvent::Handle, but additionally, the |
|
|
171 | two strings "low" or "high" can be specified, which give you a |
|
|
172 | predefined low-security (no verification, highest compatibility) |
|
|
173 | and high-security (CA and common-name verification) TLS context. |
|
|
174 | |
|
|
175 | The default for this option is "low", which could be interpreted |
|
|
176 | as "give me the page, no matter what". |
|
|
177 | |
|
|
178 | See also the "sessionid" parameter. |
|
|
179 | |
|
|
180 | session => $string |
|
|
181 | The module might reuse connections to the same host internally. |
|
|
182 | Sometimes (e.g. when using TLS), you do not want to reuse |
|
|
183 | connections from other sessions. This can be achieved by setting |
|
|
184 | this parameter to some unique ID (such as the address of an |
|
|
185 | object storing your state data, or the TLS context) - only |
|
|
186 | connections using the same unique ID will be reused. |
|
|
187 | |
|
|
188 | on_prepare => $callback->($fh) |
|
|
189 | In rare cases you need to "tune" the socket before it is used to |
|
|
190 | connect (for exmaple, to bind it on a given IP address). This |
|
|
191 | parameter overrides the prepare callback passed to |
|
|
192 | "AnyEvent::Socket::tcp_connect" and behaves exactly the same way |
|
|
193 | (e.g. it has to provide a timeout). See the description for the |
|
|
194 | $prepare_cb argument of "AnyEvent::Socket::tcp_connect" for |
|
|
195 | details. |
|
|
196 | |
|
|
197 | tcp_connect => $callback->($host, $service, $connect_cb, |
|
|
198 | $prepare_cb) |
|
|
199 | In even rarer cases you want total control over how |
|
|
200 | AnyEvent::HTTP establishes connections. Normally it uses |
|
|
201 | AnyEvent::Socket::tcp_connect to do this, but you can provide |
|
|
202 | your own "tcp_connect" function - obviously, it has to follow |
|
|
203 | the same calling conventions, except that it may always return a |
|
|
204 | connection guard object. |
|
|
205 | |
|
|
206 | There are probably lots of weird uses for this function, |
|
|
207 | starting from tracing the hosts "http_request" actually tries to |
|
|
208 | connect, to (inexact but fast) host => IP address caching or |
|
|
209 | even socks protocol support. |
|
|
210 | |
|
|
211 | on_header => $callback->($headers) |
|
|
212 | When specified, this callback will be called with the header |
|
|
213 | hash as soon as headers have been successfully received from the |
|
|
214 | remote server (not on locally-generated errors). |
|
|
215 | |
|
|
216 | It has to return either true (in which case AnyEvent::HTTP will |
|
|
217 | continue), or false, in which case AnyEvent::HTTP will cancel |
|
|
218 | the download (and call the finish callback with an error code of |
121 | to. |
219 | 598). |
122 | |
220 | |
|
|
221 | This callback is useful, among other things, to quickly reject |
|
|
222 | unwanted content, which, if it is supposed to be rare, can be |
|
|
223 | faster than first doing a "HEAD" request. |
|
|
224 | |
|
|
225 | The downside is that cancelling the request makes it impossible |
|
|
226 | to re-use the connection. Also, the "on_header" callback will |
|
|
227 | not receive any trailer (headers sent after the response body). |
|
|
228 | |
|
|
229 | Example: cancel the request unless the content-type is |
|
|
230 | "text/html". |
|
|
231 | |
|
|
232 | on_header => sub { |
|
|
233 | $_[0]{"content-type"} =~ /^text\/html\s*(?:;|$)/ |
|
|
234 | }, |
|
|
235 | |
|
|
236 | on_body => $callback->($partial_body, $headers) |
|
|
237 | When specified, all body data will be passed to this callback |
|
|
238 | instead of to the completion callback. The completion callback |
|
|
239 | will get the empty string instead of the body data. |
|
|
240 | |
|
|
241 | It has to return either true (in which case AnyEvent::HTTP will |
|
|
242 | continue), or false, in which case AnyEvent::HTTP will cancel |
|
|
243 | the download (and call the completion callback with an error |
|
|
244 | code of 598). |
|
|
245 | |
|
|
246 | The downside to cancelling the request is that it makes it |
|
|
247 | impossible to re-use the connection. |
|
|
248 | |
|
|
249 | This callback is useful when the data is too large to be held in |
|
|
250 | memory (so the callback writes it to a file) or when only some |
|
|
251 | information should be extracted, or when the body should be |
|
|
252 | processed incrementally. |
|
|
253 | |
|
|
254 | It is usually preferred over doing your own body handling via |
|
|
255 | "want_body_handle", but in case of streaming APIs, where HTTP is |
|
|
256 | only used to create a connection, "want_body_handle" is the |
|
|
257 | better alternative, as it allows you to install your own event |
|
|
258 | handler, reducing resource usage. |
|
|
259 | |
|
|
260 | want_body_handle => $enable |
|
|
261 | When enabled (default is disabled), the behaviour of |
|
|
262 | AnyEvent::HTTP changes considerably: after parsing the headers, |
|
|
263 | and instead of downloading the body (if any), the completion |
|
|
264 | callback will be called. Instead of the $body argument |
|
|
265 | containing the body data, the callback will receive the |
|
|
266 | AnyEvent::Handle object associated with the connection. In error |
|
|
267 | cases, "undef" will be passed. When there is no body (e.g. |
|
|
268 | status 304), the empty string will be passed. |
|
|
269 | |
|
|
270 | The handle object might or might not be in TLS mode, might be |
|
|
271 | connected to a proxy, be a persistent connection, use chunked |
|
|
272 | transfer encoding etc., and configured in unspecified ways. The |
|
|
273 | user is responsible for this handle (it will not be used by this |
|
|
274 | module anymore). |
|
|
275 | |
|
|
276 | This is useful with some push-type services, where, after the |
|
|
277 | initial headers, an interactive protocol is used (typical |
|
|
278 | example would be the push-style twitter API which starts a |
|
|
279 | JSON/XML stream). |
|
|
280 | |
|
|
281 | If you think you need this, first have a look at "on_body", to |
|
|
282 | see if that doesn't solve your problem in a better way. |
|
|
283 | |
|
|
284 | persistent => $boolean |
|
|
285 | Try to create/reuse a persistent connection. When this flag is |
|
|
286 | set (default: true for idempotent requests, false for all |
|
|
287 | others), then "http_request" tries to re-use an existing |
|
|
288 | (previously-created) persistent connection to the host and, |
|
|
289 | failing that, tries to create a new one. |
|
|
290 | |
|
|
291 | Requests failing in certain ways will be automatically retried |
|
|
292 | once, which is dangerous for non-idempotent requests, which is |
|
|
293 | why it defaults to off for them. The reason for this is because |
|
|
294 | the bozos who designed HTTP/1.1 made it impossible to |
|
|
295 | distinguish between a fatal error and a normal connection |
|
|
296 | timeout, so you never know whether there was a problem with your |
|
|
297 | request or not. |
|
|
298 | |
|
|
299 | When reusing an existent connection, many parameters (such as |
|
|
300 | TLS context) will be ignored. See the "session" parameter for a |
|
|
301 | workaround. |
|
|
302 | |
|
|
303 | keepalive => $boolean |
|
|
304 | Only used when "persistent" is also true. This parameter decides |
|
|
305 | whether "http_request" tries to handshake a HTTP/1.0-style |
|
|
306 | keep-alive connection (as opposed to only a HTTP/1.1 persistent |
|
|
307 | connection). |
|
|
308 | |
|
|
309 | The default is true, except when using a proxy, in which case it |
|
|
310 | defaults to false, as HTTP/1.0 proxies cannot support this in a |
|
|
311 | meaningful way. |
|
|
312 | |
|
|
313 | handle_params => { key => value ... } |
|
|
314 | The key-value pairs in this hash will be passed to any |
|
|
315 | AnyEvent::Handle constructor that is called - not all requests |
|
|
316 | will create a handle, and sometimes more than one is created, so |
|
|
317 | this parameter is only good for setting hints. |
|
|
318 | |
|
|
319 | Example: set the maximum read size to 4096, to potentially |
|
|
320 | conserve memory at the cost of speed. |
|
|
321 | |
|
|
322 | handle_params => { |
|
|
323 | max_read_size => 4096, |
|
|
324 | }, |
|
|
325 | |
123 | Example: make a simple HTTP GET request for http://www.nethype.de/ |
326 | Example: do a simple HTTP GET request for http://www.nethype.de/ and |
|
|
327 | print the response body. |
124 | |
328 | |
125 | http_request GET => "http://www.nethype.de/", sub { |
329 | http_request GET => "http://www.nethype.de/", sub { |
126 | my ($body, $hdr) = @_; |
330 | my ($body, $hdr) = @_; |
127 | print "$body\n"; |
331 | print "$body\n"; |
128 | }; |
332 | }; |
129 | |
333 | |
130 | Example: make a HTTP HEAD request on https://www.google.com/, use a |
334 | Example: do a HTTP HEAD request on https://www.google.com/, use a |
131 | timeout of 30 seconds. |
335 | timeout of 30 seconds. |
132 | |
336 | |
133 | http_request |
337 | http_request |
134 | GET => "https://www.google.com", |
338 | GET => "https://www.google.com", |
|
|
339 | headers => { "user-agent" => "MySearchClient 1.0" }, |
135 | timeout => 30, |
340 | timeout => 30, |
136 | sub { |
341 | sub { |
137 | my ($body, $hdr) = @_; |
342 | my ($body, $hdr) = @_; |
138 | use Data::Dumper; |
343 | use Data::Dumper; |
139 | print Dumper $hdr; |
344 | print Dumper $hdr; |
140 | } |
345 | } |
141 | ; |
346 | ; |
142 | |
347 | |
|
|
348 | Example: do another simple HTTP GET request, but immediately try to |
|
|
349 | cancel it. |
|
|
350 | |
|
|
351 | my $request = http_request GET => "http://www.nethype.de/", sub { |
|
|
352 | my ($body, $hdr) = @_; |
|
|
353 | print "$body\n"; |
|
|
354 | }; |
|
|
355 | |
|
|
356 | undef $request; |
|
|
357 | |
|
|
358 | DNS CACHING |
|
|
359 | AnyEvent::HTTP uses the AnyEvent::Socket::tcp_connect function for the |
|
|
360 | actual connection, which in turn uses AnyEvent::DNS to resolve |
|
|
361 | hostnames. The latter is a simple stub resolver and does no caching on |
|
|
362 | its own. If you want DNS caching, you currently have to provide your own |
|
|
363 | default resolver (by storing a suitable resolver object in |
|
|
364 | $AnyEvent::DNS::RESOLVER) or your own "tcp_connect" callback. |
|
|
365 | |
143 | GLOBAL FUNCTIONS AND VARIABLES |
366 | GLOBAL FUNCTIONS AND VARIABLES |
144 | AnyEvent::HTTP::set_proxy "proxy-url" |
367 | AnyEvent::HTTP::set_proxy "proxy-url" |
145 | Sets the default proxy server to use. The proxy-url must begin with |
368 | Sets the default proxy server to use. The proxy-url must begin with |
146 | a string of the form "http://host:port" (optionally "https:..."). |
369 | a string of the form "http://host:port", croaks otherwise. |
|
|
370 | |
|
|
371 | To clear an already-set proxy, use "undef". |
|
|
372 | |
|
|
373 | When AnyEvent::HTTP is laoded for the first time it will query the |
|
|
374 | default proxy from the operating system, currently by looking at |
|
|
375 | "$ENV{http_proxy"}. |
|
|
376 | |
|
|
377 | AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] |
|
|
378 | Remove all cookies from the cookie jar that have been expired. If |
|
|
379 | $session_end is given and true, then additionally remove all session |
|
|
380 | cookies. |
|
|
381 | |
|
|
382 | You should call this function (with a true $session_end) before you |
|
|
383 | save cookies to disk, and you should call this function after |
|
|
384 | loading them again. If you have a long-running program you can |
|
|
385 | additonally call this function from time to time. |
|
|
386 | |
|
|
387 | A cookie jar is initially an empty hash-reference that is managed by |
|
|
388 | this module. It's format is subject to change, but currently it is |
|
|
389 | like this: |
|
|
390 | |
|
|
391 | The key "version" has to contain 1, otherwise the hash gets emptied. |
|
|
392 | All other keys are hostnames or IP addresses pointing to |
|
|
393 | hash-references. The key for these inner hash references is the |
|
|
394 | server path for which this cookie is meant, and the values are again |
|
|
395 | hash-references. The keys of those hash-references is the cookie |
|
|
396 | name, and the value, you guessed it, is another hash-reference, this |
|
|
397 | time with the key-value pairs from the cookie, except for "expires" |
|
|
398 | and "max-age", which have been replaced by a "_expires" key that |
|
|
399 | contains the cookie expiry timestamp. |
|
|
400 | |
|
|
401 | Here is an example of a cookie jar with a single cookie, so you have |
|
|
402 | a chance of understanding the above paragraph: |
|
|
403 | |
|
|
404 | { |
|
|
405 | version => 1, |
|
|
406 | "10.0.0.1" => { |
|
|
407 | "/" => { |
|
|
408 | "mythweb_id" => { |
|
|
409 | _expires => 1293917923, |
|
|
410 | value => "ooRung9dThee3ooyXooM1Ohm", |
|
|
411 | }, |
|
|
412 | }, |
|
|
413 | }, |
|
|
414 | } |
|
|
415 | |
|
|
416 | $date = AnyEvent::HTTP::format_date $timestamp |
|
|
417 | Takes a POSIX timestamp (seconds since the epoch) and formats it as |
|
|
418 | a HTTP Date (RFC 2616). |
|
|
419 | |
|
|
420 | $timestamp = AnyEvent::HTTP::parse_date $date |
|
|
421 | Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) |
|
|
422 | or a bunch of minor variations of those, and returns the |
|
|
423 | corresponding POSIX timestamp, or "undef" if the date cannot be |
|
|
424 | parsed. |
147 | |
425 | |
148 | $AnyEvent::HTTP::MAX_RECURSE |
426 | $AnyEvent::HTTP::MAX_RECURSE |
149 | The default value for the "recurse" request parameter (default: 10). |
427 | The default value for the "recurse" request parameter (default: 10). |
150 | |
428 | |
|
|
429 | $AnyEvent::HTTP::TIMEOUT |
|
|
430 | The default timeout for conenction operations (default: 300). |
|
|
431 | |
151 | $AnyEvent::HTTP::USERAGENT |
432 | $AnyEvent::HTTP::USERAGENT |
152 | The default value for the "User-Agent" header (the default is |
433 | The default value for the "User-Agent" header (the default is |
153 | "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; |
434 | "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; |
154 | +http://software.schmorp.de/pkg/AnyEvent)"). |
435 | +http://software.schmorp.de/pkg/AnyEvent)"). |
155 | |
436 | |
156 | $AnyEvent::HTTP::MAX_PERSISTENT |
437 | $AnyEvent::HTTP::MAX_PER_HOST |
157 | The maximum number of persistent connections to keep open (default: |
438 | The maximum number of concurrent connections to the same host |
158 | 8). |
439 | (identified by the hostname). If the limit is exceeded, then the |
|
|
440 | additional requests are queued until previous connections are |
|
|
441 | closed. Both persistent and non-persistent connections are counted |
|
|
442 | in this limit. |
159 | |
443 | |
160 | Not implemented currently. |
444 | The default value for this is 4, and it is highly advisable to not |
|
|
445 | increase it much. |
|
|
446 | |
|
|
447 | For comparison: the RFC's recommend 4 non-persistent or 2 persistent |
|
|
448 | connections, older browsers used 2, newers (such as firefox 3) |
|
|
449 | typically use 6, and Opera uses 8 because like, they have the |
|
|
450 | fastest browser and give a shit for everybody else on the planet. |
161 | |
451 | |
162 | $AnyEvent::HTTP::PERSISTENT_TIMEOUT |
452 | $AnyEvent::HTTP::PERSISTENT_TIMEOUT |
163 | The maximum time to cache a persistent connection, in seconds |
453 | The time after which idle persistent conenctions get closed by |
164 | (default: 2). |
454 | AnyEvent::HTTP (default: 3). |
165 | |
|
|
166 | Not implemented currently. |
|
|
167 | |
455 | |
168 | $AnyEvent::HTTP::ACTIVE |
456 | $AnyEvent::HTTP::ACTIVE |
169 | The number of active connections. This is not the number of |
457 | The number of active connections. This is not the number of |
170 | currently running requests, but the number of currently open and |
458 | currently running requests, but the number of currently open and |
171 | non-idle TCP connections. This number of can be useful for |
459 | non-idle TCP connections. This number can be useful for |
172 | load-leveling. |
460 | load-leveling. |
|
|
461 | |
|
|
462 | SHOWCASE |
|
|
463 | This section contaisn some more elaborate "real-world" examples or code |
|
|
464 | snippets. |
|
|
465 | |
|
|
466 | HTTP/1.1 FILE DOWNLOAD |
|
|
467 | Downloading files with HTTP can be quite tricky, especially when |
|
|
468 | something goes wrong and you want to resume. |
|
|
469 | |
|
|
470 | Here is a function that initiates and resumes a download. It uses the |
|
|
471 | last modified time to check for file content changes, and works with |
|
|
472 | many HTTP/1.0 servers as well, and usually falls back to a complete |
|
|
473 | re-download on older servers. |
|
|
474 | |
|
|
475 | It calls the completion callback with either "undef", which means a |
|
|
476 | nonretryable error occured, 0 when the download was partial and should |
|
|
477 | be retried, and 1 if it was successful. |
|
|
478 | |
|
|
479 | use AnyEvent::HTTP; |
|
|
480 | |
|
|
481 | sub download($$$) { |
|
|
482 | my ($url, $file, $cb) = @_; |
|
|
483 | |
|
|
484 | open my $fh, "+<", $file |
|
|
485 | or die "$file: $!"; |
|
|
486 | |
|
|
487 | my %hdr; |
|
|
488 | my $ofs = 0; |
|
|
489 | |
|
|
490 | warn stat $fh; |
|
|
491 | warn -s _; |
|
|
492 | if (stat $fh and -s _) { |
|
|
493 | $ofs = -s _; |
|
|
494 | warn "-s is ", $ofs;#d# |
|
|
495 | $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9]; |
|
|
496 | $hdr{"range"} = "bytes=$ofs-"; |
|
|
497 | } |
|
|
498 | |
|
|
499 | http_get $url, |
|
|
500 | headers => \%hdr, |
|
|
501 | on_header => sub { |
|
|
502 | my ($hdr) = @_; |
|
|
503 | |
|
|
504 | if ($hdr->{Status} == 200 && $ofs) { |
|
|
505 | # resume failed |
|
|
506 | truncate $fh, $ofs = 0; |
|
|
507 | } |
|
|
508 | |
|
|
509 | sysseek $fh, $ofs, 0; |
|
|
510 | |
|
|
511 | 1 |
|
|
512 | }, |
|
|
513 | on_body => sub { |
|
|
514 | my ($data, $hdr) = @_; |
|
|
515 | |
|
|
516 | if ($hdr->{Status} =~ /^2/) { |
|
|
517 | length $data == syswrite $fh, $data |
|
|
518 | or return; # abort on write errors |
|
|
519 | } |
|
|
520 | |
|
|
521 | 1 |
|
|
522 | }, |
|
|
523 | sub { |
|
|
524 | my (undef, $hdr) = @_; |
|
|
525 | |
|
|
526 | my $status = $hdr->{Status}; |
|
|
527 | |
|
|
528 | if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) { |
|
|
529 | utime $fh, $time, $time; |
|
|
530 | } |
|
|
531 | |
|
|
532 | if ($status == 200 || $status == 206 || $status == 416) { |
|
|
533 | # download ok || resume ok || file already fully downloaded |
|
|
534 | $cb->(1, $hdr); |
|
|
535 | |
|
|
536 | } elsif ($status == 412) { |
|
|
537 | # file has changed while resuming, delete and retry |
|
|
538 | unlink $file; |
|
|
539 | $cb->(0, $hdr); |
|
|
540 | |
|
|
541 | } elsif ($status == 500 or $status == 503 or $status =~ /^59/) { |
|
|
542 | # retry later |
|
|
543 | $cb->(0, $hdr); |
|
|
544 | |
|
|
545 | } else { |
|
|
546 | $cb->(undef, $hdr); |
|
|
547 | } |
|
|
548 | } |
|
|
549 | ; |
|
|
550 | } |
|
|
551 | |
|
|
552 | download "http://server/somelargefile", "/tmp/somelargefile", sub { |
|
|
553 | if ($_[0]) { |
|
|
554 | print "OK!\n"; |
|
|
555 | } elsif (defined $_[0]) { |
|
|
556 | print "please retry later\n"; |
|
|
557 | } else { |
|
|
558 | print "ERROR\n"; |
|
|
559 | } |
|
|
560 | }; |
|
|
561 | |
|
|
562 | SOCKS PROXIES |
|
|
563 | Socks proxies are not directly supported by AnyEvent::HTTP. You can |
|
|
564 | compile your perl to support socks, or use an external program such as |
|
|
565 | socksify (dante) or tsocks to make your program use a socks proxy |
|
|
566 | transparently. |
|
|
567 | |
|
|
568 | Alternatively, for AnyEvent::HTTP only, you can use your own |
|
|
569 | "tcp_connect" function that does the proxy handshake - here is an |
|
|
570 | example that works with socks4a proxies: |
|
|
571 | |
|
|
572 | use Errno; |
|
|
573 | use AnyEvent::Util; |
|
|
574 | use AnyEvent::Socket; |
|
|
575 | use AnyEvent::Handle; |
|
|
576 | |
|
|
577 | # host, port and username of/for your socks4a proxy |
|
|
578 | my $socks_host = "10.0.0.23"; |
|
|
579 | my $socks_port = 9050; |
|
|
580 | my $socks_user = ""; |
|
|
581 | |
|
|
582 | sub socks4a_connect { |
|
|
583 | my ($host, $port, $connect_cb, $prepare_cb) = @_; |
|
|
584 | |
|
|
585 | my $hdl = new AnyEvent::Handle |
|
|
586 | connect => [$socks_host, $socks_port], |
|
|
587 | on_prepare => sub { $prepare_cb->($_[0]{fh}) }, |
|
|
588 | on_error => sub { $connect_cb->() }, |
|
|
589 | ; |
|
|
590 | |
|
|
591 | $hdl->push_write (pack "CCnNZ*Z*", 4, 1, $port, 1, $socks_user, $host); |
|
|
592 | |
|
|
593 | $hdl->push_read (chunk => 8, sub { |
|
|
594 | my ($hdl, $chunk) = @_; |
|
|
595 | my ($status, $port, $ipn) = unpack "xCna4", $chunk; |
|
|
596 | |
|
|
597 | if ($status == 0x5a) { |
|
|
598 | $connect_cb->($hdl->{fh}, (format_address $ipn) . ":$port"); |
|
|
599 | } else { |
|
|
600 | $! = Errno::ENXIO; $connect_cb->(); |
|
|
601 | } |
|
|
602 | }); |
|
|
603 | |
|
|
604 | $hdl |
|
|
605 | } |
|
|
606 | |
|
|
607 | Use "socks4a_connect" instead of "tcp_connect" when doing |
|
|
608 | "http_request"s, possibly after switching off other proxy types: |
|
|
609 | |
|
|
610 | AnyEvent::HTTP::set_proxy undef; # usually you do not want other proxies |
|
|
611 | |
|
|
612 | http_get 'http://www.google.com', tcp_connect => \&socks4a_connect, sub { |
|
|
613 | my ($data, $headers) = @_; |
|
|
614 | ... |
|
|
615 | }; |
173 | |
616 | |
174 | SEE ALSO |
617 | SEE ALSO |
175 | AnyEvent. |
618 | AnyEvent. |
176 | |
619 | |
177 | AUTHOR |
620 | AUTHOR |
178 | Marc Lehmann <schmorp@schmorp.de> |
621 | Marc Lehmann <schmorp@schmorp.de> |
179 | http://home.schmorp.de/ |
622 | http://home.schmorp.de/ |
180 | |
623 | |
|
|
624 | With many thanks to Дмитрий Шалашов, who provided |
|
|
625 | countless testcases and bugreports. |
|
|
626 | |