ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.17 by root, Fri Jun 6 13:02:38 2008 UTC vs.
Revision 1.28 by root, Mon Sep 29 13:50:39 2008 UTC

48use AnyEvent::Socket (); 48use AnyEvent::Socket ();
49use AnyEvent::Handle (); 49use AnyEvent::Handle ();
50 50
51use base Exporter::; 51use base Exporter::;
52 52
53our $VERSION = '1.01'; 53our $VERSION = '1.05';
54 54
55our @EXPORT = qw(http_get http_post http_head http_request); 55our @EXPORT = qw(http_get http_post http_head http_request);
56 56
57our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 57our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
58our $MAX_RECURSE = 10; 58our $MAX_RECURSE = 10;
80Executes an HTTP-HEAD request. See the http_request function for details on 80Executes an HTTP-HEAD request. See the http_request function for details on
81additional parameters. 81additional parameters.
82 82
83=item http_post $url, $body, key => value..., $cb->($data, $headers) 83=item http_post $url, $body, key => value..., $cb->($data, $headers)
84 84
85Executes an HTTP-POST request with a request body of C<$bod>. See the 85Executes an HTTP-POST request with a request body of C<$body>. See the
86http_request function for details on additional parameters. 86http_request function for details on additional parameters.
87 87
88=item http_request $method => $url, key => value..., $cb->($data, $headers) 88=item http_request $method => $url, key => value..., $cb->($data, $headers)
89 89
90Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL 90Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
93The callback will be called with the response data as first argument 93The callback will be called with the response data as first argument
94(or C<undef> if it wasn't available due to errors), and a hash-ref with 94(or C<undef> if it wasn't available due to errors), and a hash-ref with
95response headers as second argument. 95response headers as second argument.
96 96
97All the headers in that hash are lowercased. In addition to the response 97All the headers in that hash are lowercased. In addition to the response
98headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and 98headers, the "pseudo-headers" C<HTTPVersion>, C<Status> and C<Reason>
99C<Reason> contain the three parts of the HTTP Status-Line of the same 99contain the three parts of the HTTP Status-Line of the same name. The
100pseudo-header C<URL> contains the original URL (which can differ from the
101requested URL when following redirects).
102
100name. If the server sends a header multiple lines, then their contents 103If the server sends a header multiple lines, then their contents will be
101will be joined together with C<\x00>. 104joined together with C<\x00>.
102 105
103If an internal error occurs, such as not being able to resolve a hostname, 106If an internal error occurs, such as not being able to resolve a hostname,
104then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599> 107then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
105and the C<Reason> pseudo-header will contain an error message. 108and the C<Reason> pseudo-header will contain an error message.
106 109
232 while (my ($k, $v) = each %$hdr) { 235 while (my ($k, $v) = each %$hdr) {
233 $hdr{lc $k} = $v; 236 $hdr{lc $k} = $v;
234 } 237 }
235 } 238 }
236 239
237 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE; 240 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE;
238 241
239 return $cb->(undef, { Status => 599, Reason => "recursion limit reached" }) 242 return $cb->(undef, { Status => 599, Reason => "recursion limit reached", URL => $url })
240 if $recurse < 0; 243 if $recurse < 0;
241 244
242 my $proxy = $arg{proxy} || $PROXY; 245 my $proxy = $arg{proxy} || $PROXY;
243 my $timeout = $arg{timeout} || $TIMEOUT; 246 my $timeout = $arg{timeout} || $TIMEOUT;
244 247
249 252
250 $scheme = lc $scheme; 253 $scheme = lc $scheme;
251 254
252 my $uport = $scheme eq "http" ? 80 255 my $uport = $scheme eq "http" ? 80
253 : $scheme eq "https" ? 443 256 : $scheme eq "https" ? 443
254 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported" }); 257 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported", URL => $url });
255 258
256 $hdr{referer} ||= "$scheme://$authority$upath"; # leave out fragment and query string, just a heuristic 259 $hdr{referer} ||= "$scheme://$authority$upath"; # leave out fragment and query string, just a heuristic
257 260
258 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 261 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
259 or return $cb->(undef, { Status => 599, Reason => "unparsable URL" }); 262 or return $cb->(undef, { Status => 599, Reason => "unparsable URL", URL => $url });
260 263
261 my $uhost = $1; 264 my $uhost = $1;
262 $uport = $2 if defined $2; 265 $uport = $2 if defined $2;
263 266
264 $uhost =~ s/^\[(.*)\]$/$1/; 267 $uhost =~ s/^\[(.*)\]$/$1/;
309 312
310 return unless $state{connect_guard}; 313 return unless $state{connect_guard};
311 314
312 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { 315 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub {
313 $state{fh} = shift 316 $state{fh} = shift
314 or return $cb->(undef, { Status => 599, Reason => "$!" }); 317 or return $cb->(undef, { Status => 599, Reason => "$!", URL => $url });
315 318
316 delete $state{connect_guard}; # reduce memory usage, save a tree 319 delete $state{connect_guard}; # reduce memory usage, save a tree
317 320
318 # get handle 321 # get handle
319 $state{handle} = new AnyEvent::Handle 322 $state{handle} = new AnyEvent::Handle
333 # (re-)configure handle 336 # (re-)configure handle
334 $state{handle}->timeout ($timeout); 337 $state{handle}->timeout ($timeout);
335 $state{handle}->on_error (sub { 338 $state{handle}->on_error (sub {
336 my $errno = "$!"; 339 my $errno = "$!";
337 %state = (); 340 %state = ();
338 $cb->(undef, { Status => 599, Reason => $errno }); 341 $cb->(undef, { Status => 599, Reason => $errno, URL => $url });
339 }); 342 });
340 $state{handle}->on_eof (sub { 343 $state{handle}->on_eof (sub {
341 %state = (); 344 %state = ();
342 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 345 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file", URL => $url });
343 }); 346 });
344 347
345 # send request 348 # send request
346 $state{handle}->push_write ( 349 $state{handle}->push_write (
347 "$method $rpath HTTP/1.0\015\012" 350 "$method $rpath HTTP/1.0\015\012"
352 355
353 %hdr = (); # reduce memory usage, save a kitten 356 %hdr = (); # reduce memory usage, save a kitten
354 357
355 # status line 358 # status line
356 $state{handle}->push_read (line => qr/\015?\012/, sub { 359 $state{handle}->push_read (line => qr/\015?\012/, sub {
357 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix 360 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
358 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" })); 361 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])", URL => $url }));
359 362
360 my %hdr = ( # response headers 363 my %hdr = ( # response headers
361 HTTPVersion => "\x00$1", 364 HTTPVersion => "\x00$1",
362 Status => "\x00$2", 365 Status => "\x00$2",
363 Reason => "\x00$3", 366 Reason => "\x00$3",
367 URL => "\x00$url"
364 ); 368 );
365 369
366 # headers, could be optimized a bit 370 # headers, could be optimized a bit
367 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 371 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
368 for ("$_[1]\012") { 372 for ("$_[1]\012") {
375 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 379 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
376 \015?\012 380 \015?\012
377 /gxc; 381 /gxc;
378 382
379 /\G$/ 383 /\G$/
380 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers" })); 384 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers", URL => $url }));
381 } 385 }
382 386
383 substr $_, 0, 1, "" 387 substr $_, 0, 1, ""
384 for values %hdr; 388 for values %hdr;
385 389
394 my %kv = (value => $value, map { split /=/, $_, 2 } @arg); 398 my %kv = (value => $value, map { split /=/, $_, 2 } @arg);
395 399
396 my $cdom = (delete $kv{domain}) || $uhost; 400 my $cdom = (delete $kv{domain}) || $uhost;
397 my $cpath = (delete $kv{path}) || "/"; 401 my $cpath = (delete $kv{path}) || "/";
398 402
399 $cdom =~ s/^.?/./; # make sure it starts with a "." 403 $cdom =~ s/^\.?/./; # make sure it starts with a "."
400 404
401 next if $cdom =~ /\.$/; 405 next if $cdom =~ /\.$/;
402 406
403 # this is not rfc-like and not netscape-like. go figure. 407 # this is not rfc-like and not netscape-like. go figure.
404 my $ndots = $cdom =~ y/.//; 408 my $ndots = $cdom =~ y/.//;
408 $arg{cookie_jar}{version} = 1; 412 $arg{cookie_jar}{version} = 1;
409 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv; 413 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
410 } 414 }
411 } 415 }
412 416
413 if ($_[1]{Status} =~ /^30[12]$/ && $recurse) {
414 # microsoft and other assholes don't give a shit for following standards, 417 # microsoft and other shitheads don't give a shit for following standards,
415 # try to support a common form of broken Location header. 418 # try to support some common forms of broken Location headers.
419 if ($_[1]{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
420 $_[1]{location} =~ s/^\.\/+//;
421
416 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%; 422 my $url = "$scheme://$uhost:$uport";
417 423
424 unless ($_[1]{location} =~ s/^\///) {
425 $url .= $upath;
426 $url =~ s/\/[^\/]*$//;
427 }
428
429 $_[1]{location} = "$url/$_[1]{location}";
430 }
431
432 if ($_[1]{Status} =~ /^30[12]$/ && $recurse && $method ne "POST") {
433 # apparently, mozilla et al. just change POST to GET here
434 # more research is needed before we do the same
418 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb); 435 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
436 } elsif ($_[1]{Status} == 303 && $recurse) {
437 # even http/1.1 is unlear on how to mutate the method
438 $method = "GET" unless $method eq "HEAD";
439 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
440 } elsif ($_[1]{Status} == 307 && $recurse && $method =~ /^(?:GET|HEAD)$/) {
441 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
419 } else { 442 } else {
420 $cb->($_[0], $_[1]); 443 $cb->($_[0], $_[1]);
421 } 444 }
422 }; 445 };
423 446
462 unshift @_, "HEAD"; 485 unshift @_, "HEAD";
463 &http_request 486 &http_request
464} 487}
465 488
466sub http_post($$@) { 489sub http_post($$@) {
490 my $url = shift;
467 unshift @_, "POST", "body"; 491 unshift @_, "POST", $url, "body";
468 &http_request 492 &http_request
469} 493}
470 494
471=back 495=back
472 496
521 545
522L<AnyEvent>. 546L<AnyEvent>.
523 547
524=head1 AUTHOR 548=head1 AUTHOR
525 549
526 Marc Lehmann <schmorp@schmorp.de> 550 Marc Lehmann <schmorp@schmorp.de>
527 http://home.schmorp.de/ 551 http://home.schmorp.de/
528 552
529=cut 553=cut
530 554
5311 5551
532 556

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines