ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.10 by root, Thu Jun 5 13:06:43 2008 UTC vs.
Revision 1.13 by root, Thu Jun 5 16:43:45 2008 UTC

8 8
9=head1 DESCRIPTION 9=head1 DESCRIPTION
10 10
11This module is an L<AnyEvent> user, you need to make sure that you use and 11This module is an L<AnyEvent> user, you need to make sure that you use and
12run a supported event loop. 12run a supported event loop.
13
14This module implements a simple, stateless and non-blocking HTTP
15client. It supports GET, POST and other request methods, cookies and more,
16all on a very low level. It can follow redirects supports proxies and
17automatically limits the number of connections to the values specified in
18the RFC.
19
20It should generally be a "good client" that is enough for most HTTP
21tasks. Simple tasks should be simple, but complex tasks should still be
22possible as the user retains control over request and response headers.
23
24The caller is responsible for authentication management, cookies (if
25the simplistic implementation in this module doesn't suffice), referer
26and other high-level protocol details for which this module offers only
27limited support.
13 28
14=head2 METHODS 29=head2 METHODS
15 30
16=over 4 31=over 4
17 32
41our $PERSISTENT_TIMEOUT = 2; 56our $PERSISTENT_TIMEOUT = 2;
42our $TIMEOUT = 300; 57our $TIMEOUT = 300;
43 58
44# changing these is evil 59# changing these is evil
45our $MAX_PERSISTENT_PER_HOST = 2; 60our $MAX_PERSISTENT_PER_HOST = 2;
46our $MAX_PER_HOST = 4; # not respected yet :( 61our $MAX_PER_HOST = 4;
47 62
48our $PROXY; 63our $PROXY;
49 64
50my %KA_COUNT; # number of open keep-alive connections per host 65my %KA_COUNT; # number of open keep-alive connections per host
66my %CO_SLOT; # number of open connections, and wait queue, per host
51 67
52=item http_get $url, key => value..., $cb->($data, $headers) 68=item http_get $url, key => value..., $cb->($data, $headers)
53 69
54Executes an HTTP-GET request. See the http_request function for details on 70Executes an HTTP-GET request. See the http_request function for details on
55additional parameters. 71additional parameters.
105Whether to recurse requests or not, e.g. on redirects, authentication 121Whether to recurse requests or not, e.g. on redirects, authentication
106retries and so on, and how often to do so. 122retries and so on, and how often to do so.
107 123
108=item headers => hashref 124=item headers => hashref
109 125
110The request headers to use. 126The request headers to use. Currently, C<http_request> may provide its
127own C<Host:>, C<Content-Length:>, C<Connection:> and C<Cookie:> headers
128and will provide defaults for C<User-Agent:> and C<Referer:>.
111 129
112=item timeout => $seconds 130=item timeout => $seconds
113 131
114The time-out to use for various stages - each connect attempt will reset 132The time-out to use for various stages - each connect attempt will reset
115the timeout, as will read or write activity. Default timeout is 5 minutes. 133the timeout, as will read or write activity. Default timeout is 5 minutes.
165 } 183 }
166 ; 184 ;
167 185
168=cut 186=cut
169 187
188sub _slot_schedule;
189sub _slot_schedule($) {
190 my $host = shift;
191
192 while ($CO_SLOT{$host}[0] < $MAX_PER_HOST) {
193 if (my $cb = shift @{ $CO_SLOT{$host}[1] }) {
194 # somebody wants that slot
195 ++$CO_SLOT{$host}[0];
196
197 $cb->(AnyEvent::Util::guard {
198 --$CO_SLOT{$host}[0];
199 _slot_schedule $host;
200 });
201 } else {
202 # nobody wants the slot, maybe we can forget about it
203 delete $CO_SLOT{$host} unless $CO_SLOT{$host}[0];
204 last;
205 }
206 }
207}
208
209# wait for a free slot on host, call callback
210sub _get_slot($$) {
211 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
212
213 _slot_schedule $_[0];
214}
215
170sub http_request($$$;@) { 216sub http_request($$$;@) {
171 my $cb = pop; 217 my $cb = pop;
172 my ($method, $url, %arg) = @_; 218 my ($method, $url, %arg) = @_;
173 219
174 my %hdr; 220 my %hdr;
197 $scheme = lc $scheme; 243 $scheme = lc $scheme;
198 244
199 my $uport = $scheme eq "http" ? 80 245 my $uport = $scheme eq "http" ? 80
200 : $scheme eq "https" ? 443 246 : $scheme eq "https" ? 443
201 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported" }); 247 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported" });
248
249 $hdr{referer} ||= "$scheme://$authority$upath"; # leave out fragment and query string, just a heuristic
202 250
203 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 251 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
204 or return $cb->(undef, { Status => 599, Reason => "unparsable URL" }); 252 or return $cb->(undef, { Status => 599, Reason => "unparsable URL" });
205 253
206 my $uhost = $1; 254 my $uhost = $1;
245 $hdr{host} = $uhost; 293 $hdr{host} = $uhost;
246 } 294 }
247 295
248 $hdr{"content-length"} = length $arg{body}; 296 $hdr{"content-length"} = length $arg{body};
249 297
250 my %state; 298 my %state = (connect_guard => 1);
251 299
300 _get_slot $uhost, sub {
301 $state{slot_guard} = shift;
302
303 return unless $state{connect_guard};
304
252 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { 305 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub {
253 $state{fh} = shift 306 $state{fh} = shift
254 or return $cb->(undef, { Status => 599, Reason => "$!" }); 307 or return $cb->(undef, { Status => 599, Reason => "$!" });
255 308
256 delete $state{connect_guard}; # reduce memory usage, save a tree 309 delete $state{connect_guard}; # reduce memory usage, save a tree
257 310
258 # get handle 311 # get handle
259 $state{handle} = new AnyEvent::Handle 312 $state{handle} = new AnyEvent::Handle
260 fh => $state{fh}, 313 fh => $state{fh},
261 ($scheme eq "https" ? (tls => "connect") : ()); 314 ($scheme eq "https" ? (tls => "connect") : ());
262 315
263 # limit the number of persistent connections 316 # limit the number of persistent connections
264 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 317 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
265 ++$KA_COUNT{$_[1]}; 318 ++$KA_COUNT{$_[1]};
266 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; 319 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
267 $hdr{connection} = "keep-alive"; 320 $hdr{connection} = "keep-alive";
268 delete $hdr{connection}; # keep-alive not yet supported 321 delete $hdr{connection}; # keep-alive not yet supported
269 } else { 322 } else {
270 delete $hdr{connection}; 323 delete $hdr{connection};
271 } 324 }
272 325
273 # (re-)configure handle 326 # (re-)configure handle
274 $state{handle}->timeout ($timeout); 327 $state{handle}->timeout ($timeout);
275 $state{handle}->on_error (sub { 328 $state{handle}->on_error (sub {
276 %state = (); 329 %state = ();
277 $cb->(undef, { Status => 599, Reason => "$!" }); 330 $cb->(undef, { Status => 599, Reason => "$!" });
278 }); 331 });
279 $state{handle}->on_eof (sub { 332 $state{handle}->on_eof (sub {
280 %state = (); 333 %state = ();
281 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 334 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
282 }); 335 });
283 336
284 # send request 337 # send request
285 $state{handle}->push_write ( 338 $state{handle}->push_write (
286 "$method $rpath HTTP/1.0\015\012" 339 "$method $rpath HTTP/1.0\015\012"
287 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr) 340 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
288 . "\015\012" 341 . "\015\012"
289 . (delete $arg{body}) 342 . (delete $arg{body})
290 );
291
292 %hdr = (); # reduce memory usage, save a kitten
293
294 # status line
295 $state{handle}->push_read (line => qr/\015?\012/, sub {
296 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
297 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
298
299 my %hdr = ( # response headers
300 HTTPVersion => "\x00$1",
301 Status => "\x00$2",
302 Reason => "\x00$3",
303 ); 343 );
304 344
345 %hdr = (); # reduce memory usage, save a kitten
346
347 # status line
348 $state{handle}->push_read (line => qr/\015?\012/, sub {
349 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
350 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
351
352 my %hdr = ( # response headers
353 HTTPVersion => "\x00$1",
354 Status => "\x00$2",
355 Reason => "\x00$3",
356 );
357
305 # headers, could be optimized a bit 358 # headers, could be optimized a bit
306 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 359 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
307 for ("$_[1]\012") { 360 for ("$_[1]\012") {
308 # we support spaces in field names, as lotus domino 361 # we support spaces in field names, as lotus domino
309 # creates them. 362 # creates them.
310 $hdr{lc $1} .= "\x00$2" 363 $hdr{lc $1} .= "\x00$2"
311 while /\G 364 while /\G
312 ([^:\000-\037]+): 365 ([^:\000-\037]+):
313 [\011\040]* 366 [\011\040]*
314 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 367 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
315 \015?\012 368 \015?\012
316 /gxc; 369 /gxc;
317 370
318 /\G$/ 371 /\G$/
319 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers" })); 372 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers" }));
320 } 373 }
321 374
322 substr $_, 0, 1, "" 375 substr $_, 0, 1, ""
323 for values %hdr; 376 for values %hdr;
324 377
325 my $finish = sub { 378 my $finish = sub {
326 %state = (); 379 %state = ();
327 380
328 # set-cookie processing 381 # set-cookie processing
329 if ($arg{cookie_jar} && exists $hdr{"set-cookie"}) { 382 if ($arg{cookie_jar} && exists $hdr{"set-cookie"}) {
330 for (split /\x00/, $hdr{"set-cookie"}) { 383 for (split /\x00/, $hdr{"set-cookie"}) {
331 my ($cookie, @arg) = split /;\s*/; 384 my ($cookie, @arg) = split /;\s*/;
332 my ($name, $value) = split /=/, $cookie, 2; 385 my ($name, $value) = split /=/, $cookie, 2;
333 my %kv = (value => $value, map { split /=/, $_, 2 } @arg); 386 my %kv = (value => $value, map { split /=/, $_, 2 } @arg);
334 387
335 my $cdom = (delete $kv{domain}) || $uhost; 388 my $cdom = (delete $kv{domain}) || $uhost;
336 my $cpath = (delete $kv{path}) || "/"; 389 my $cpath = (delete $kv{path}) || "/";
337 390
338 $cdom =~ s/^.?/./; # make sure it starts with a "." 391 $cdom =~ s/^.?/./; # make sure it starts with a "."
339 392
393 next if $cdom =~ /\.$/;
394
395 # this is not rfc-like and not netscape-like. go figure.
340 my $ndots = $cdom =~ y/.//; 396 my $ndots = $cdom =~ y/.//;
341 next if $ndots < ($cdom =~ /[^.]{3}$/ ? 2 : 3); 397 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
342 398
343 # store it 399 # store it
344 $arg{cookie_jar}{version} = 1; 400 $arg{cookie_jar}{version} = 1;
345 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv; 401 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
402 }
403 }
404
405 if ($_[1]{Status} =~ /^30[12]$/ && $recurse) {
406 # microsoft and other assholes don't give a shit for following standards,
407 # try to support a common form of broken Location header.
408 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%;
409
410 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
411 } else {
412 $cb->($_[0], $_[1]);
413 }
414 };
415
416 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
417 $finish->(undef, \%hdr);
418 } else {
419 if (exists $hdr{"content-length"}) {
420 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
421 # could cache persistent connection now
422 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
423 # but we don't, due to misdesigns, this is annoyingly complex
424 };
425
426 $finish->($_[1], \%hdr);
427 });
428 } else {
429 # too bad, need to read until we get an error or EOF,
430 # no way to detect winged data.
431 $_[0]->on_error (sub {
432 $finish->($_[0]{rbuf}, \%hdr);
433 });
434 $_[0]->on_eof (undef);
435 $_[0]->on_read (sub { });
346 } 436 }
347 } 437 }
348
349 if ($_[1]{Status} =~ /^x30[12]$/ && $recurse) {
350 # microsoft and other assholes don't give a shit for following standards,
351 # try to support a common form of broken Location header.
352 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%;
353
354 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
355 } else {
356 $cb->($_[0], $_[1]);
357 }
358 }; 438 });
359
360 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
361 $finish->(undef, \%hdr);
362 } else {
363 if (exists $hdr{"content-length"}) {
364 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
365 # could cache persistent connection now
366 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
367 # but we don't, due to misdesigns, this is annoyingly complex
368 };
369
370 $finish->($_[1], \%hdr);
371 });
372 } else {
373 # too bad, need to read until we get an error or EOF,
374 # no way to detect winged data.
375 $_[0]->on_error (sub {
376 $finish->($_[0]{rbuf}, \%hdr);
377 });
378 $_[0]->on_eof (undef);
379 $_[0]->on_read (sub { });
380 }
381 }
382 }); 439 });
440 }, sub {
441 $timeout
383 }); 442 };
384 }, sub {
385 $timeout
386 }; 443 };
387 444
388 defined wantarray && AnyEvent::Util::guard { %state = () } 445 defined wantarray && AnyEvent::Util::guard { %state = () }
389} 446}
390 447

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines