ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.17 by root, Fri Jun 6 13:02:38 2008 UTC vs.
Revision 1.23 by root, Wed Jul 2 01:23:41 2008 UTC

48use AnyEvent::Socket (); 48use AnyEvent::Socket ();
49use AnyEvent::Handle (); 49use AnyEvent::Handle ();
50 50
51use base Exporter::; 51use base Exporter::;
52 52
53our $VERSION = '1.01'; 53our $VERSION = '1.03';
54 54
55our @EXPORT = qw(http_get http_post http_head http_request); 55our @EXPORT = qw(http_get http_post http_head http_request);
56 56
57our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 57our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
58our $MAX_RECURSE = 10; 58our $MAX_RECURSE = 10;
93The callback will be called with the response data as first argument 93The callback will be called with the response data as first argument
94(or C<undef> if it wasn't available due to errors), and a hash-ref with 94(or C<undef> if it wasn't available due to errors), and a hash-ref with
95response headers as second argument. 95response headers as second argument.
96 96
97All the headers in that hash are lowercased. In addition to the response 97All the headers in that hash are lowercased. In addition to the response
98headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and 98headers, the "pseudo-headers" C<HTTPVersion>, C<Status> and C<Reason>
99C<Reason> contain the three parts of the HTTP Status-Line of the same 99contain the three parts of the HTTP Status-Line of the same name. The
100pseudo-header C<URL> contains the original URL (which can differ from the
101requested URL when following redirects).
102
100name. If the server sends a header multiple lines, then their contents 103If the server sends a header multiple lines, then their contents will be
101will be joined together with C<\x00>. 104joined together with C<\x00>.
102 105
103If an internal error occurs, such as not being able to resolve a hostname, 106If an internal error occurs, such as not being able to resolve a hostname,
104then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599> 107then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
105and the C<Reason> pseudo-header will contain an error message. 108and the C<Reason> pseudo-header will contain an error message.
106 109
232 while (my ($k, $v) = each %$hdr) { 235 while (my ($k, $v) = each %$hdr) {
233 $hdr{lc $k} = $v; 236 $hdr{lc $k} = $v;
234 } 237 }
235 } 238 }
236 239
237 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE; 240 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE;
238 241
239 return $cb->(undef, { Status => 599, Reason => "recursion limit reached" }) 242 return $cb->(undef, { Status => 599, Reason => "recursion limit reached", URL => $url })
240 if $recurse < 0; 243 if $recurse < 0;
241 244
242 my $proxy = $arg{proxy} || $PROXY; 245 my $proxy = $arg{proxy} || $PROXY;
243 my $timeout = $arg{timeout} || $TIMEOUT; 246 my $timeout = $arg{timeout} || $TIMEOUT;
244 247
249 252
250 $scheme = lc $scheme; 253 $scheme = lc $scheme;
251 254
252 my $uport = $scheme eq "http" ? 80 255 my $uport = $scheme eq "http" ? 80
253 : $scheme eq "https" ? 443 256 : $scheme eq "https" ? 443
254 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported" }); 257 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported", URL => $url });
255 258
256 $hdr{referer} ||= "$scheme://$authority$upath"; # leave out fragment and query string, just a heuristic 259 $hdr{referer} ||= "$scheme://$authority$upath"; # leave out fragment and query string, just a heuristic
257 260
258 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 261 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
259 or return $cb->(undef, { Status => 599, Reason => "unparsable URL" }); 262 or return $cb->(undef, { Status => 599, Reason => "unparsable URL", URL => $url });
260 263
261 my $uhost = $1; 264 my $uhost = $1;
262 $uport = $2 if defined $2; 265 $uport = $2 if defined $2;
263 266
264 $uhost =~ s/^\[(.*)\]$/$1/; 267 $uhost =~ s/^\[(.*)\]$/$1/;
309 312
310 return unless $state{connect_guard}; 313 return unless $state{connect_guard};
311 314
312 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { 315 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub {
313 $state{fh} = shift 316 $state{fh} = shift
314 or return $cb->(undef, { Status => 599, Reason => "$!" }); 317 or return $cb->(undef, { Status => 599, Reason => "$!", URL => $url });
315 318
316 delete $state{connect_guard}; # reduce memory usage, save a tree 319 delete $state{connect_guard}; # reduce memory usage, save a tree
317 320
318 # get handle 321 # get handle
319 $state{handle} = new AnyEvent::Handle 322 $state{handle} = new AnyEvent::Handle
333 # (re-)configure handle 336 # (re-)configure handle
334 $state{handle}->timeout ($timeout); 337 $state{handle}->timeout ($timeout);
335 $state{handle}->on_error (sub { 338 $state{handle}->on_error (sub {
336 my $errno = "$!"; 339 my $errno = "$!";
337 %state = (); 340 %state = ();
338 $cb->(undef, { Status => 599, Reason => $errno }); 341 $cb->(undef, { Status => 599, Reason => $errno, URL => $url });
339 }); 342 });
340 $state{handle}->on_eof (sub { 343 $state{handle}->on_eof (sub {
341 %state = (); 344 %state = ();
342 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 345 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file", URL => $url });
343 }); 346 });
344 347
345 # send request 348 # send request
346 $state{handle}->push_write ( 349 $state{handle}->push_write (
347 "$method $rpath HTTP/1.0\015\012" 350 "$method $rpath HTTP/1.0\015\012"
353 %hdr = (); # reduce memory usage, save a kitten 356 %hdr = (); # reduce memory usage, save a kitten
354 357
355 # status line 358 # status line
356 $state{handle}->push_read (line => qr/\015?\012/, sub { 359 $state{handle}->push_read (line => qr/\015?\012/, sub {
357 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix 360 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
358 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" })); 361 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])", URL => $url }));
359 362
360 my %hdr = ( # response headers 363 my %hdr = ( # response headers
361 HTTPVersion => "\x00$1", 364 HTTPVersion => "\x00$1",
362 Status => "\x00$2", 365 Status => "\x00$2",
363 Reason => "\x00$3", 366 Reason => "\x00$3",
367 URL => "\x00$url"
364 ); 368 );
365 369
366 # headers, could be optimized a bit 370 # headers, could be optimized a bit
367 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 371 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
368 for ("$_[1]\012") { 372 for ("$_[1]\012") {
375 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 379 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
376 \015?\012 380 \015?\012
377 /gxc; 381 /gxc;
378 382
379 /\G$/ 383 /\G$/
380 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers" })); 384 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers", URL => $url }));
381 } 385 }
382 386
383 substr $_, 0, 1, "" 387 substr $_, 0, 1, ""
384 for values %hdr; 388 for values %hdr;
385 389
408 $arg{cookie_jar}{version} = 1; 412 $arg{cookie_jar}{version} = 1;
409 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv; 413 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
410 } 414 }
411 } 415 }
412 416
413 if ($_[1]{Status} =~ /^30[12]$/ && $recurse) { 417 if ($_[1]{Status} =~ /^30[12]$/ && $recurse && $method ne "POST") {
414 # microsoft and other assholes don't give a shit for following standards, 418 # microsoft and other assholes don't give a shit for following standards,
415 # try to support a common form of broken Location header. 419 # try to support a common form of broken Location header.
416 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%; 420 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%;
417 421
422 # apparently, mozilla et al. just change POST to GET here
423 # more research is needed before we do the same
424
418 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb); 425 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
426 } elsif ($_[1]{Status} == 303 && $recurse) {
427 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%;
428
429 http_request (GET => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
419 } else { 430 } else {
420 $cb->($_[0], $_[1]); 431 $cb->($_[0], $_[1]);
421 } 432 }
422 }; 433 };
423 434
462 unshift @_, "HEAD"; 473 unshift @_, "HEAD";
463 &http_request 474 &http_request
464} 475}
465 476
466sub http_post($$@) { 477sub http_post($$@) {
478 my $url = shift;
467 unshift @_, "POST", "body"; 479 unshift @_, "POST", $url, "body";
468 &http_request 480 &http_request
469} 481}
470 482
471=back 483=back
472 484
521 533
522L<AnyEvent>. 534L<AnyEvent>.
523 535
524=head1 AUTHOR 536=head1 AUTHOR
525 537
526 Marc Lehmann <schmorp@schmorp.de> 538 Marc Lehmann <schmorp@schmorp.de>
527 http://home.schmorp.de/ 539 http://home.schmorp.de/
528 540
529=cut 541=cut
530 542
5311 5431
532 544

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines