ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
Revision: 1.2
Committed: Wed Jun 4 11:37:41 2008 UTC (15 years, 11 months ago) by root
Branch: MAIN
Changes since 1.1: +69 -19 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4
5 =head1 SYNOPSIS
6
7 use AnyEvent::HTTP;
8
9 =head1 DESCRIPTION
10
11 This module is an L<AnyEvent> user, you need to make sure that you use and
12 run a supported event loop.
13
14 =head2 METHODS
15
16 =over 4
17
18 =cut
19
20 package AnyEvent::HTTP;
21
22 use strict;
23 no warnings;
24
25 use Carp;
26
27 use AnyEvent ();
28 use AnyEvent::Util ();
29 use AnyEvent::Socket ();
30 use AnyEvent::Handle ();
31
32 use base Exporter::;
33
34 our $VERSION = '1.0';
35
36 our @EXPORT = qw(http_get http_request);
37
38 our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39 our $MAX_REDIRECTS = 10;
40 our $MAX_PERSISTENT = 8;
41 our $PERSISTENT_TIMEOUT = 2;
42 our $TIMEOUT = 300;
43
44 # changing these is evil
45 our $MAX_PERSISTENT_PER_HOST = 2;
46 our $MAX_PER_HOST = 4; # not respected yet :(
47
48 our $PROXY;
49
50 my %KA_COUNT; # number of open keep-alive connections per host
51
52 =item http_get $url, key => value..., $cb->($data, $headers)
53
54 Executes an HTTP-GET request. See the http_request function for details on
55 additional parameters.
56
57 =item http_request $method => $url, key => value..., $cb->($data, $headers)
58
59 Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
60 must be an absolute http or https URL.
61
62 The callback will be called with the response data as first argument
63 (or C<undef> if it wasn't available due to errors), and a hash-ref with
64 response headers as second argument.
65
66 All the headers in that has are lowercased. In addition to the response
67 headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
68 C<Reason> contain the three parts of the HTTP Status-Line of the same
69 name.
70
71 If an internal error occurs, such as not being able to resolve a hostname,
72 then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
73 and the C<Reason> pseudo-header will contain an error message.
74
75 Additional parameters are key-value pairs, and are fully optional. They
76 include:
77
78 =over 4
79
80 =item recurse => $boolean (default: true)
81
82 Whether to recurse requests or not, e.g. on redirects, authentication
83 retries and so on.
84
85 =item headers => hashref
86
87 The request headers to use.
88
89 =item timeout => $seconds
90
91 The time-out to use for various stages - each connect attempt will reset
92 the timeout, as will read or write activity. Default timeout is 5 minutes.
93
94 =item proxy => [$host, $port[, $scheme]] or undef
95
96 Use the given http proxy for all requests. If not specified, then the
97 default proxy (as specified by C<$ENV{http_proxy}>) is used.
98
99 C<$scheme> must be either missing or C<http> for HTTP, or C<https> for
100 HTTPS.
101
102 =back
103
104 =back
105
106 =cut
107
108 sub http_request($$$;@) {
109 my $cb = pop;
110 my ($method, $url, %arg) = @_;
111
112 my %hdr;
113
114 if (my $hdr = delete $arg{headers}) {
115 while (my ($k, $v) = each %$hdr) {
116 $hdr{lc $k} = $v;
117 }
118 }
119
120 my $proxy = $arg{proxy} || $PROXY;
121 my $timeout = $arg{timeout} || $TIMEOUT;
122
123 $hdr{"user-agent"} ||= $USERAGENT;
124
125 my ($host, $port, $path, $scheme);
126
127 if ($proxy) {
128 ($host, $port, $scheme) = @$proxy;
129 $path = $url;
130 } else {
131 ($scheme, my $authority, $path, my $query, my $fragment) =
132 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
133
134 $port = $scheme eq "http" ? 80
135 : $scheme eq "https" ? 443
136 : croak "$url: only http and https URLs supported";
137
138 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
139 or croak "$authority: unparsable URL";
140
141 $host = $1;
142 $port = $2 if defined $2;
143
144 $host =~ s/^\[(.*)\]$/$1/;
145 $path .= "?$query" if length $query;
146
147 $path = "/" unless $path;
148
149 $hdr{host} = $host = lc $host;
150 }
151
152 $scheme = lc $scheme;
153
154 my %state;
155
156 my $body = "";
157 $state{body} = $body;
158
159 $hdr{"content-length"} = length $body;
160
161 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub {
162 $state{fh} = shift
163 or return $cb->(undef, { Status => 599, Reason => "$!" });
164
165 delete $state{connect_guard}; # reduce memory usage, save a tree
166
167 # get handle
168 $state{handle} = new AnyEvent::Handle
169 fh => $state{fh},
170 ($scheme eq "https" ? (tls => "connect") : ());
171
172 # limit the number of persistent connections
173 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
174 ++$KA_COUNT{$_[1]};
175 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
176 $hdr{connection} = "keep-alive";
177 delete $hdr{connection}; # keep-alive not yet supported
178 } else {
179 delete $hdr{connection};
180 }
181
182 # (re-)configure handle
183 $state{handle}->timeout ($timeout);
184 $state{handle}->on_error (sub {
185 %state = ();
186 $cb->(undef, { Status => 599, Reason => "$!" });
187 });
188 $state{handle}->on_eof (sub {
189 %state = ();
190 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
191 });
192
193 # send request
194 $state{handle}->push_write (
195 "\U$method\E $path HTTP/1.0\015\012"
196 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
197 . "\015\012"
198 . (delete $state{body})
199 );
200
201 %hdr = (); # reduce memory usage, save a kitten
202
203 # status line
204 $state{handle}->push_read (line => qr/\015?\012/, sub {
205 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
206 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
207
208 my %hdr = ( # response headers
209 HTTPVersion => ",$1",
210 Status => ",$2",
211 Reason => ",$3",
212 );
213
214 # headers, could be optimized a bit
215 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
216 for ("$_[1]\012") {
217 # we support spaces in field names, as lotus domino
218 # creates them.
219 $hdr{lc $1} .= ",$2"
220 while /\G
221 ([^:\000-\037]+):
222 [\011\040]*
223 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
224 \015?\012
225 /gxc;
226
227 /\G$/
228 or return $cb->(undef, { Status => 599, Reason => "garbled response headers" });
229 }
230
231 substr $_, 0, 1, ""
232 for values %hdr;
233
234 if (exists $hdr{"content-length"}) {
235 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
236 # could cache persistent connection now
237 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
238 # but we don't, due to misdesigns, this is annoyingly complex
239 };
240
241 %state = ();
242 $cb->($_[1], \%hdr);
243 });
244 } else {
245 # too bad, need to read until we get an error or EOF,
246 # no way to detect winged data.
247 $_[0]->on_error (sub {
248 %state = ();
249 $cb->($_[0]{rbuf}, \%hdr);
250 });
251 $_[0]->on_eof (undef);
252 $_[0]->on_read (sub { });
253 }
254 });
255 });
256 }, sub {
257 $timeout
258 };
259
260 defined wantarray && AnyEvent::Util::guard { %state = () }
261 }
262
263 sub http_get($$;@) {
264 unshift @_, "GET";
265 &http_request
266 }
267
268 =head2 GLOBAL FUNCTIONS AND VARIABLES
269
270 =over 4
271
272 =item AnyEvent::HTTP::set_proxy "proxy-url"
273
274 Sets the default proxy server to use. The proxy-url must begin with a
275 string of the form C<http://host:port> (optionally C<https:...>).
276
277 =item $AnyEvent::HTTP::MAX_REDIRECTS
278
279 The default value for the C<max_redirects> request parameter
280 (default: C<10>).
281
282 =item $AnyEvent::HTTP::USERAGENT
283
284 The default value for the C<User-Agent> header (the default is
285 C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
286
287 =item $AnyEvent::HTTP::MAX_PERSISTENT
288
289 The maximum number of persistent connections to keep open (default: 8).
290
291 =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
292
293 The maximum time to cache a persistent connection, in seconds (default: 2).
294
295 =back
296
297 =cut
298
299 sub set_proxy($) {
300 $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix;
301 }
302
303 # initialise proxy from environment
304 set_proxy $ENV{http_proxy};
305
306 =head1 SEE ALSO
307
308 L<AnyEvent>.
309
310 =head1 AUTHOR
311
312 Marc Lehmann <schmorp@schmorp.de>
313 http://home.schmorp.de/
314
315 =cut
316
317 1
318