ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.69 by root, Fri Dec 31 19:32:47 2010 UTC vs.
Revision 1.89 by root, Mon Jan 3 00:23:25 2011 UTC

36 36
37=cut 37=cut
38 38
39package AnyEvent::HTTP; 39package AnyEvent::HTTP;
40 40
41use strict; 41use common::sense;
42no warnings;
43 42
44use Errno (); 43use Errno ();
45 44
46use AnyEvent 5.0 (); 45use AnyEvent 5.0 ();
47use AnyEvent::Util (); 46use AnyEvent::Util ();
58our $MAX_PERSISTENT = 8; 57our $MAX_PERSISTENT = 8;
59our $PERSISTENT_TIMEOUT = 2; 58our $PERSISTENT_TIMEOUT = 2;
60our $TIMEOUT = 300; 59our $TIMEOUT = 300;
61 60
62# changing these is evil 61# changing these is evil
63our $MAX_PERSISTENT_PER_HOST = 0; 62our $MAX_PERSISTENT_PER_HOST = 2;
64our $MAX_PER_HOST = 4; 63our $MAX_PER_HOST = 4;
65 64
66our $PROXY; 65our $PROXY;
67our $ACTIVE = 0; 66our $ACTIVE = 0;
68 67
122 121
123If the server sends a header multiple times, then their contents will be 122If the server sends a header multiple times, then their contents will be
124joined together with a comma (C<,>), as per the HTTP spec. 123joined together with a comma (C<,>), as per the HTTP spec.
125 124
126If an internal error occurs, such as not being able to resolve a hostname, 125If an internal error occurs, such as not being able to resolve a hostname,
127then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<59x> 126then C<$data> will be C<undef>, C<< $headers->{Status} >> will be
128(usually C<599>) and the C<Reason> pseudo-header will contain an error 127C<590>-C<599> and the C<Reason> pseudo-header will contain an error
129message. 128message. Currently the following status codes are used:
129
130=over 4
131
132=item 595 - errors during connection etsbalishment, proxy handshake.
133
134=item 596 - errors during TLS negotiation, request sending and header processing.
135
136=item 597 - errors during body receiving or processing.
137
138=item 598 - user aborted request via C<on_header> or C<on_body>.
139
140=item 599 - other, usually nonretryable, errors (garbled URL etc.).
141
142=back
130 143
131A typical callback might look like this: 144A typical callback might look like this:
132 145
133 sub { 146 sub {
134 my ($body, $hdr) = @_; 147 my ($body, $hdr) = @_;
182=item cookie_jar => $hash_ref 195=item cookie_jar => $hash_ref
183 196
184Passing this parameter enables (simplified) cookie-processing, loosely 197Passing this parameter enables (simplified) cookie-processing, loosely
185based on the original netscape specification. 198based on the original netscape specification.
186 199
187The C<$hash_ref> must be an (initially empty) hash reference which will 200The C<$hash_ref> must be an (initially empty) hash reference which
188get updated automatically. It is possible to save the cookie_jar to 201will get updated automatically. It is possible to save the cookie jar
189persistent storage with something like JSON or Storable, but this is not 202to persistent storage with something like JSON or Storable - see the
190recommended, as expiry times are currently being ignored. 203C<AnyEvent::HTTP::cookie_jar_expire> function if you wish to remove
204expired or session-only cookies, and also for documentation on the format
205of the cookie jar.
191 206
192Note that this cookie implementation is not of very high quality, nor 207Note that this cookie implementation is not meant to be complete. If
193meant to be complete. If you want complete cookie management you have to 208you want complete cookie management you have to do that on your
194do that on your own. C<cookie_jar> is meant as a quick fix to get some 209own. C<cookie_jar> is meant as a quick fix to get most cookie-using sites
195cookie-using sites working. Cookies are a privacy disaster, do not use 210working. Cookies are a privacy disaster, do not use them unless required
196them unless required to. 211to.
197 212
198When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:> 213When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:>
199headers will be ste and handled by this module, otherwise they will be 214headers will be set and handled by this module, otherwise they will be
200left untouched. 215left untouched.
201 216
202=item tls_ctx => $scheme | $tls_ctx 217=item tls_ctx => $scheme | $tls_ctx
203 218
204Specifies the AnyEvent::TLS context to be used for https connections. This 219Specifies the AnyEvent::TLS context to be used for https connections. This
364 push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; 379 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
365 380
366 _slot_schedule $_[0]; 381 _slot_schedule $_[0];
367} 382}
368 383
384#############################################################################
385
386# expire cookies
387sub cookie_jar_expire($;$) {
388 my ($jar, $session_end) = @_;
389
390 %$jar = () if $jar->{version} != 1;
391
392 my $anow = AE::now;
393
394 while (my ($chost, $paths) = each %$jar) {
395 next unless ref $paths;
396
397 while (my ($cpath, $cookies) = each %$paths) {
398 while (my ($cookie, $kv) = each %$cookies) {
399 if (exists $kv->{_expires}) {
400 delete $cookies->{$cookie}
401 if $anow > $kv->{_expires};
402 } elsif ($session_end) {
403 delete $cookies->{$cookie};
404 }
405 }
406
407 delete $paths->{$cpath}
408 unless %$cookies;
409 }
410
411 delete $jar->{$chost}
412 unless %$paths;
413 }
414}
415
416# extract cookies from jar
417sub cookie_jar_extract($$$$) {
418 my ($jar, $uscheme, $uhost, $upath) = @_;
419
420 %$jar = () if $jar->{version} != 1;
421
422 my @cookies;
423
424 while (my ($chost, $paths) = each %$jar) {
425 next unless ref $paths;
426
427 if ($chost =~ /^\./) {
428 next unless $chost eq substr $uhost, -length $chost;
429 } elsif ($chost =~ /\./) {
430 next unless $chost eq $uhost;
431 } else {
432 next;
433 }
434
435 while (my ($cpath, $cookies) = each %$paths) {
436 next unless $cpath eq substr $upath, 0, length $cpath;
437
438 while (my ($cookie, $kv) = each %$cookies) {
439 next if $uscheme ne "https" && exists $kv->{secure};
440
441 if (exists $kv->{_expires} and AE::now > $kv->{_expires}) {
442 delete $cookies->{$cookie};
443 next;
444 }
445
446 my $value = $kv->{value};
447
448 if ($value =~ /[=;,[:space:]]/) {
449 $value =~ s/([\\"])/\\$1/g;
450 $value = "\"$value\"";
451 }
452
453 push @cookies, "$cookie=$value";
454 }
455 }
456 }
457
458 \@cookies
459}
460
461# parse set_cookie header into jar
462sub cookie_jar_set_cookie($$$$) {
463 my ($jar, $set_cookie, $uhost, $date) = @_;
464
465 my $anow = int AE::now;
466 my $snow; # server-now
467
468 for ($set_cookie) {
469 # parse NAME=VALUE
470 my @kv;
471
472 # expires is not http-compliant in the original cookie-spec,
473 # we support the official date format and some extensions
474 while (
475 m{
476 \G\s*
477 (?:
478 expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
479 | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) )?
480 )
481 }gcxsi
482 ) {
483 my $name = $2;
484 my $value = $4;
485
486 if (defined $1) {
487 # expires
488 $name = "expires";
489 $value = $1;
490 } elsif (defined $3) {
491 # quoted
492 $value = $3;
493 $value =~ s/\\(.)/$1/gs;
494 }
495
496 push @kv, lc $name, $value;
497
498 last unless /\G\s*;/gc;
499 }
500
501 last unless @kv;
502
503 my $name = shift @kv;
504 my %kv = (value => shift @kv, @kv);
505
506 if (exists $kv{"max-age"}) {
507 $kv{_expires} = $anow + delete $kv{"max-age"};
508 } elsif (exists $kv{expires}) {
509 $snow ||= parse_date ($date) || $anow;
510 $kv{_expires} = $anow + (parse_date (delete $kv{expires}) - $snow);
511 } else {
512 delete $kv{_expires};
513 }
514
515 my $cdom;
516 my $cpath = (delete $kv{path}) || "/";
517
518 if (exists $kv{domain}) {
519 $cdom = delete $kv{domain};
520
521 $cdom =~ s/^\.?/./; # make sure it starts with a "."
522
523 next if $cdom =~ /\.$/;
524
525 # this is not rfc-like and not netscape-like. go figure.
526 my $ndots = $cdom =~ y/.//;
527 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
528 } else {
529 $cdom = $uhost;
530 }
531
532 # store it
533 $jar->{version} = 1;
534 $jar->{lc $cdom}{$cpath}{$name} = \%kv;
535
536 redo if /\G\s*,/gc;
537 }
538}
539
369# continue to parse $_ for headers and place them into the arg 540# continue to parse $_ for headers and place them into the arg
370sub parse_hdr() { 541sub parse_hdr() {
371 my %hdr; 542 my %hdr;
372 543
373 # things seen, not parsed: 544 # things seen, not parsed:
435 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" }); 606 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" });
436 607
437 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 608 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
438 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" }); 609 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" });
439 610
440 my $uhost = $1; 611 my $uhost = lc $1;
441 $uport = $2 if defined $2; 612 $uport = $2 if defined $2;
442 613
443 $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost" 614 $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost"
444 unless exists $hdr{host}; 615 unless exists $hdr{host};
445 616
448 619
449 $upath =~ s%^/?%/%; 620 $upath =~ s%^/?%/%;
450 621
451 # cookie processing 622 # cookie processing
452 if (my $jar = $arg{cookie_jar}) { 623 if (my $jar = $arg{cookie_jar}) {
453 %$jar = () if $jar->{version} != 1; 624 my $cookies = cookie_jar_extract $jar, $uscheme, $uhost, $upath;
454 625
455 my @cookie;
456
457 while (my ($chost, $v) = each %$jar) {
458 if ($chost =~ /^\./) {
459 next unless $chost eq substr $uhost, -length $chost;
460 } elsif ($chost =~ /\./) {
461 next unless $chost eq $uhost;
462 } else {
463 next;
464 }
465
466 while (my ($cpath, $v) = each %$v) {
467 next unless $cpath eq substr $upath, 0, length $cpath;
468
469 while (my ($k, $v) = each %$v) {
470 next if $uscheme ne "https" && exists $v->{secure};
471 my $value = $v->{value};
472 $value =~ s/([\\"])/\\$1/g;
473 push @cookie, "$k=\"$value\"";
474 }
475 }
476 }
477
478 $hdr{cookie} = join "; ", @cookie 626 $hdr{cookie} = join "; ", @$cookies
479 if @cookie; 627 if @$cookies;
480 } 628 }
481 629
482 my ($rhost, $rport, $rscheme, $rpath); # request host, port, path 630 my ($rhost, $rport, $rscheme, $rpath); # request host, port, path
483 631
484 if ($proxy) { 632 if ($proxy) {
487 $rscheme = "http" unless defined $rscheme; 635 $rscheme = "http" unless defined $rscheme;
488 636
489 # don't support https requests over https-proxy transport, 637 # don't support https requests over https-proxy transport,
490 # can't be done with tls as spec'ed, unless you double-encrypt. 638 # can't be done with tls as spec'ed, unless you double-encrypt.
491 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https"; 639 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https";
640
641 $rhost = lc $rhost;
642 $rscheme = lc $rscheme;
492 } else { 643 } else {
493 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath); 644 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
494 } 645 }
495 646
496 # leave out fragment and query string, just a heuristic 647 # leave out fragment and query string, just a heuristic
498 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"}; 649 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"};
499 650
500 $hdr{"content-length"} = length $arg{body} 651 $hdr{"content-length"} = length $arg{body}
501 if length $arg{body} || $method ne "GET"; 652 if length $arg{body} || $method ne "GET";
502 653
503 $hdr{connection} = "close TE"; #1.1 654 my $idempotent = $method =~ /^(?:GET|HEAD|PUT|DELETE|OPTIONS|TRACE)$/;
655
656 # default value for keepalive is true iff the request is for an idempotent method
657 my $keepalive = exists $arg{keepalive}
658 ? $arg{keepalive}*1
659 : $idempotent ? $PERSISTENT_TIMEOUT : 0;
660
661 $hdr{connection} = ($keepalive ? "" : "close ") . "Te"; #1.1
504 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1 662 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1
505 663
506 my %state = (connect_guard => 1); 664 my %state = (connect_guard => 1);
507 665
508 _get_slot $uhost, sub { 666 my $ae_error = 595; # connecting
509 $state{slot_guard} = shift;
510 667
668 # handle actual, non-tunneled, request
669 my $handle_actual_request = sub {
670 $ae_error = 596; # request phase
671
672 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
673
674 # send request
675 $state{handle}->push_write (
676 "$method $rpath HTTP/1.1\015\012"
677 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
678 . "\015\012"
679 . (delete $arg{body})
680 );
681
682 # return if error occured during push_write()
511 return unless $state{connect_guard}; 683 return unless %state;
512 684
513 my $connect_cb = sub { 685 # reduce memory usage, save a kitten, also re-use it for the response headers.
514 $state{fh} = shift 686 %hdr = ();
687
688 # status line and headers
689 $state{read_response} = sub {
690 for ("$_[1]") {
691 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
692
693 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
694 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
695
696 # 100 Continue handling
697 # should not happen as we don't send expect: 100-continue,
698 # but we handle it just in case.
699 # since we send the request body regardless, if we get an error
700 # we are out of-sync, which we currently do NOT handle correctly.
701 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
702 if $2 eq 100;
703
704 push @pseudo,
705 HTTPVersion => $1,
706 Status => $2,
707 Reason => $3,
515 or do { 708 ;
516 my $err = "$!"; 709
710 my $hdr = parse_hdr
711 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
712
713 %hdr = (%$hdr, @pseudo);
714 }
715
716 # redirect handling
717 # microsoft and other shitheads don't give a shit for following standards,
718 # try to support some common forms of broken Location headers.
719 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
720 $hdr{location} =~ s/^\.\/+//;
721
722 my $url = "$rscheme://$uhost:$uport";
723
724 unless ($hdr{location} =~ s/^\///) {
725 $url .= $upath;
726 $url =~ s/\/[^\/]*$//;
727 }
728
729 $hdr{location} = "$url/$hdr{location}";
730 }
731
732 my $redirect;
733
734 if ($recurse) {
735 my $status = $hdr{Status};
736
737 # industry standard is to redirect POST as GET for
738 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
739 # also, the UA should ask the user for 301 and 307 and POST,
740 # industry standard seems to be to simply follow.
741 # we go with the industry standard.
742 if ($status == 301 or $status == 302 or $status == 303) {
743 # HTTP/1.1 is unclear on how to mutate the method
744 $method = "GET" unless $method eq "HEAD";
745 $redirect = 1;
746 } elsif ($status == 307) {
747 $redirect = 1;
748 }
749 }
750
751 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
752 my $may_keep_alive = $_[3];
753
754 $state{handle}->destroy if $state{handle};
517 %state = (); 755 %state = ();
518 return $cb->(undef, { @pseudo, Status => 599, Reason => $err }); 756
757 if (defined $_[1]) {
758 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
759 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
760 }
761
762 # set-cookie processing
763 if ($arg{cookie_jar}) {
764 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
765 }
766
767 if ($redirect && exists $hdr{location}) {
768 # we ignore any errors, as it is very common to receive
769 # Content-Length != 0 but no actual body
770 # we also access %hdr, as $_[1] might be an erro
771 http_request (
772 $method => $hdr{location},
773 %arg,
774 recurse => $recurse - 1,
775 Redirect => [$_[0], \%hdr],
776 $cb);
777 } else {
778 $cb->($_[0], \%hdr);
779 }
780 };
781
782 $ae_error = 597; # body phase
783
784 my $len = $hdr{"content-length"};
785
786 # body handling, many different code paths
787 # - no body expected
788 # - want_body_handle
789 # - te chunked
790 # - 2x length known (with or without on_body)
791 # - 2x length not known (with or without on_body)
792 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
793 $finish->(undef, 598 => "Request cancelled by on_header");
794 } elsif (
795 $hdr{Status} =~ /^(?:1..|204|205|304)$/
796 or $method eq "HEAD"
797 or (defined $len && $len == 0) # == 0, not !, because "0 " is true
798 ) {
799 # no body
800 $finish->("", undef, undef, 1);
801
802 } elsif (!$redirect && $arg{want_body_handle}) {
803 $_[0]->on_eof (undef);
804 $_[0]->on_error (undef);
805 $_[0]->on_read (undef);
806
807 $finish->(delete $state{handle});
808
809 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
810 my $cl = 0;
811 my $body = undef;
812 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
813
814 $state{read_chunk} = sub {
815 $_[1] =~ /^([0-9a-fA-F]+)/
816 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
817
818 my $len = hex $1;
819
820 if ($len) {
821 $cl += $len;
822
823 $_[0]->push_read (chunk => $len, sub {
824 $on_body->($_[1], \%hdr)
825 or return $finish->(undef, 598 => "Request cancelled by on_body");
826
827 $_[0]->push_read (line => sub {
828 length $_[1]
829 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
830 $_[0]->push_read (line => $state{read_chunk});
831 });
832 });
833 } else {
834 $hdr{"content-length"} ||= $cl;
835
836 $_[0]->push_read (line => $qr_nlnl, sub {
837 if (length $_[1]) {
838 for ("$_[1]") {
839 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
840
841 my $hdr = parse_hdr
842 or return $finish->(undef, $ae_error => "Garbled response trailers");
843
844 %hdr = (%hdr, %$hdr);
845 }
846 }
847
848 $finish->($body, undef, undef, 1);
849 });
850 }
519 }; 851 };
520 852
521 pop; # free memory, save a tree 853 $_[0]->push_read (line => $state{read_chunk});
522 854
855 } elsif ($arg{on_body}) {
856 if (defined $len) {
857 $_[0]->on_read (sub {
858 $len -= length $_[0]{rbuf};
859
860 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
861 or return $finish->(undef, 598 => "Request cancelled by on_body");
862
863 $len > 0
864 or $finish->("", undef, undef, 1);
865 });
866 } else {
867 $_[0]->on_eof (sub {
868 $finish->("");
869 });
870 $_[0]->on_read (sub {
871 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
872 or $finish->(undef, 598 => "Request cancelled by on_body");
873 });
874 }
875 } else {
876 $_[0]->on_eof (undef);
877
878 if (defined $len) {
879 $_[0]->on_read (sub {
880 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
881 if $len <= length $_[0]{rbuf};
882 });
883 } else {
884 $_[0]->on_error (sub {
885 ($! == Errno::EPIPE || !$!)
886 ? $finish->(delete $_[0]{rbuf})
887 : $finish->(undef, $ae_error => $_[2]);
888 });
889 $_[0]->on_read (sub { });
890 }
891 }
892 };
893
894 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
895 };
896
897 my $connect_cb = sub {
898 $state{fh} = shift
899 or do {
900 my $err = "$!";
901 %state = ();
902 return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err });
903 };
904
523 return unless delete $state{connect_guard}; 905 return unless delete $state{connect_guard};
524 906
525 # get handle 907 # get handle
526 $state{handle} = new AnyEvent::Handle 908 $state{handle} = new AnyEvent::Handle
527 fh => $state{fh}, 909 fh => $state{fh},
528 peername => $rhost, 910 peername => $rhost,
529 tls_ctx => $arg{tls_ctx}, 911 tls_ctx => $arg{tls_ctx},
530 # these need to be reconfigured on keepalive handles 912 # these need to be reconfigured on keepalive handles
531 timeout => $timeout, 913 timeout => $timeout,
532 on_error => sub { 914 on_error => sub {
533 %state = (); 915 %state = ();
534 $cb->(undef, { @pseudo, Status => 599, Reason => $_[2] }); 916 $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] });
535 }, 917 },
536 on_eof => sub { 918 on_eof => sub {
537 %state = (); 919 %state = ();
538 $cb->(undef, { @pseudo, Status => 599, Reason => "Unexpected end-of-file" }); 920 $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" });
539 }, 921 },
540 ; 922 ;
541 923
542 # limit the number of persistent connections 924 # limit the number of persistent connections
543 # keepalive not yet supported 925 # keepalive not yet supported
544# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 926# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
545# ++$KA_COUNT{$_[1]}; 927# ++$KA_COUNT{$_[1]};
546# $state{handle}{ka_count_guard} = AnyEvent::Util::guard { 928# $state{handle}{ka_count_guard} = AnyEvent::Util::guard {
547# --$KA_COUNT{$_[1]} 929# --$KA_COUNT{$_[1]}
548# }; 930# };
549# $hdr{connection} = "keep-alive"; 931# $hdr{connection} = "keep-alive";
550# } 932# }
551 933
552 $state{handle}->starttls ("connect") if $rscheme eq "https"; 934 $state{handle}->starttls ("connect") if $rscheme eq "https";
553 935
554 # handle actual, non-tunneled, request
555 my $handle_actual_request = sub {
556 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
557
558 # send request
559 $state{handle}->push_write (
560 "$method $rpath HTTP/1.1\015\012"
561 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
562 . "\015\012"
563 . (delete $arg{body})
564 );
565
566 # return if error occured during push_write()
567 return unless %state;
568
569 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
570
571 # status line and headers
572 $state{read_response} = sub {
573 for ("$_[1]") {
574 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
575
576 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/igxc
577 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
578
579 # 100 Continue handling
580 # should not happen as we don't send expect: 100-continue,
581 # but we handle it just in case.
582 # since we send the request body regardless, if we get an error
583 # we are out of-sync, which we currently do NOT handle correctly.
584 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
585 if $2 eq 100;
586
587 push @pseudo,
588 HTTPVersion => $1,
589 Status => $2,
590 Reason => $3,
591 ;
592
593 my $hdr = parse_hdr
594 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
595
596 %hdr = (%$hdr, @pseudo);
597 }
598
599 # redirect handling
600 # microsoft and other shitheads don't give a shit for following standards,
601 # try to support some common forms of broken Location headers.
602 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
603 $hdr{location} =~ s/^\.\/+//;
604
605 my $url = "$rscheme://$uhost:$uport";
606
607 unless ($hdr{location} =~ s/^\///) {
608 $url .= $upath;
609 $url =~ s/\/[^\/]*$//;
610 }
611
612 $hdr{location} = "$url/$hdr{location}";
613 }
614
615 my $redirect;
616
617 if ($recurse) {
618 my $status = $hdr{Status};
619
620 # industry standard is to redirect POST as GET for
621 # 301, 302 and 303, in contrast to http/1.0 and 1.1.
622 # also, the UA should ask the user for 301 and 307 and POST,
623 # industry standard seems to be to simply follow.
624 # we go with the industry standard.
625 if ($status == 301 or $status == 302 or $status == 303) {
626 # HTTP/1.1 is unclear on how to mutate the method
627 $method = "GET" unless $method eq "HEAD";
628 $redirect = 1;
629 } elsif ($status == 307) {
630 $redirect = 1;
631 }
632 }
633
634 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
635 my $keepalive = pop;
636
637 $state{handle}->destroy if $state{handle};
638 %state = ();
639
640 if (defined $_[1]) {
641 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
642 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
643 }
644
645 # set-cookie processing
646 if ($arg{cookie_jar}) {
647 for ($hdr{"set-cookie"}) {
648 # parse NAME=VALUE
649 my @kv;
650
651 while (/\G\s* ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )/gcxs) {
652 my $name = $1;
653 my $value = $3;
654
655 unless ($value) {
656 $value = $2;
657 $value =~ s/\\(.)/$1/gs;
658 }
659
660 push @kv, $name => $value;
661
662 last unless /\G\s*;/gc;
663 }
664
665 last unless @kv;
666
667 my $name = shift @kv;
668 my %kv = (value => shift @kv, @kv);
669
670 my $cdom;
671 my $cpath = (delete $kv{path}) || "/";
672
673 if (exists $kv{domain}) {
674 $cdom = delete $kv{domain};
675
676 $cdom =~ s/^\.?/./; # make sure it starts with a "."
677
678 next if $cdom =~ /\.$/;
679
680 # this is not rfc-like and not netscape-like. go figure.
681 my $ndots = $cdom =~ y/.//;
682 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
683 } else {
684 $cdom = $uhost;
685 }
686
687 # store it
688 $arg{cookie_jar}{version} = 1;
689 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
690
691 redo if /\G\s*,/gc;
692 }
693 }
694
695 if ($redirect && exists $hdr{location}) {
696 # we ignore any errors, as it is very common to receive
697 # Content-Length != 0 but no actual body
698 # we also access %hdr, as $_[1] might be an erro
699 http_request (
700 $method => $hdr{location},
701 %arg,
702 recurse => $recurse - 1,
703 Redirect => [$_[0], \%hdr],
704 $cb);
705 } else {
706 $cb->($_[0], \%hdr);
707 }
708 };
709
710 my $len = $hdr{"content-length"};
711
712 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
713 $finish->(undef, 598 => "Request cancelled by on_header");
714 } elsif (
715 $hdr{Status} =~ /^(?:1..|204|205|304)$/
716 or $method eq "HEAD"
717 or (defined $len && !$len)
718 ) {
719 # no body
720 $finish->("", undef, undef, 1);
721 } else {
722 # body handling, many different code paths
723 # - no body expected
724 # - want_body_handle
725 # - te chunked
726 # - 2x length known (with or without on_body)
727 # - 2x length not known (with or without on_body)
728 if (!$redirect && $arg{want_body_handle}) {
729 $_[0]->on_eof (undef);
730 $_[0]->on_error (undef);
731 $_[0]->on_read (undef);
732
733 $finish->(delete $state{handle});
734
735 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
736 my $cl = 0;
737 my $body = undef;
738 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
739
740 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
741
742 my $read_chunk; $read_chunk = sub {
743 $_[1] =~ /^([0-9a-fA-F]+)/
744 or $finish->(undef, 599 => "Garbled chunked transfer encoding");
745
746 my $len = hex $1;
747
748 if ($len) {
749 $cl += $len;
750
751 $_[0]->push_read (chunk => $len, sub {
752 $on_body->($_[1], \%hdr)
753 or return $finish->(undef, 598 => "Request cancelled by on_body");
754
755 $_[0]->push_read (line => sub {
756 length $_[1]
757 and return $finish->(undef, 599 => "Garbled chunked transfer encoding");
758 $_[0]->push_read (line => $read_chunk);
759 });
760 });
761 } else {
762 $hdr{"content-length"} ||= $cl;
763
764 $_[0]->push_read (line => $qr_nlnl, sub {
765 if (length $_[1]) {
766 for ("$_[1]") {
767 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
768
769 my $hdr = parse_hdr
770 or return $finish->(undef, 599 => "Garbled response trailers");
771
772 %hdr = (%hdr, %$hdr);
773 }
774 }
775
776 $finish->($body, undef, undef, 1);
777 });
778 }
779 };
780
781 $_[0]->push_read (line => $read_chunk);
782
783 } elsif ($arg{on_body}) {
784 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
785
786 if ($len) {
787 $_[0]->on_read (sub {
788 $len -= length $_[0]{rbuf};
789
790 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
791 or return $finish->(undef, 598 => "Request cancelled by on_body");
792
793 $len > 0
794 or $finish->("", undef, undef, 1);
795 });
796 } else {
797 $_[0]->on_eof (sub {
798 $finish->("");
799 });
800 $_[0]->on_read (sub {
801 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
802 or $finish->(undef, 598 => "Request cancelled by on_body");
803 });
804 }
805 } else {
806 $_[0]->on_eof (undef);
807
808 if ($len) {
809 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
810 $_[0]->on_read (sub {
811 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
812 if $len <= length $_[0]{rbuf};
813 });
814 } else {
815 $_[0]->on_error (sub {
816 ($! == Errno::EPIPE || !$!)
817 ? $finish->(delete $_[0]{rbuf})
818 : $finish->(undef, 599 => $_[2]);
819 });
820 $_[0]->on_read (sub { });
821 }
822 }
823 }
824 };
825
826 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
827 };
828
829 # now handle proxy-CONNECT method 936 # now handle proxy-CONNECT method
830 if ($proxy && $uscheme eq "https") { 937 if ($proxy && $uscheme eq "https") {
831 # oh dear, we have to wrap it into a connect request 938 # oh dear, we have to wrap it into a connect request
832 939
833 # maybe re-use $uauthority with patched port? 940 # maybe re-use $uauthority with patched port?
834 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012"); 941 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012");
835 $state{handle}->push_read (line => $qr_nlnl, sub { 942 $state{handle}->push_read (line => $qr_nlnl, sub {
836 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 943 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
837 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" })); 944 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }));
838 945
839 if ($2 == 200) { 946 if ($2 == 200) {
840 $rpath = $upath; 947 $rpath = $upath;
841 &$handle_actual_request; 948 $handle_actual_request->();
842 } else { 949 } else {
843 %state = (); 950 %state = ();
844 $cb->(undef, { @pseudo, Status => $2, Reason => $3 }); 951 $cb->(undef, { @pseudo, Status => $2, Reason => $3 });
845 }
846 }); 952 }
847 } else {
848 &$handle_actual_request;
849 } 953 });
954 } else {
955 $handle_actual_request->();
850 }; 956 }
957 };
958
959 _get_slot $uhost, sub {
960 $state{slot_guard} = shift;
961
962 return unless $state{connect_guard};
851 963
852 my $tcp_connect = $arg{tcp_connect} 964 my $tcp_connect = $arg{tcp_connect}
853 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect }; 965 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
854 966
855 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout }); 967 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout });
856
857 }; 968 };
858 969
859 defined wantarray && AnyEvent::Util::guard { %state = () } 970 defined wantarray && AnyEvent::Util::guard { %state = () }
860} 971}
861 972
896string of the form C<http://host:port> (optionally C<https:...>), croaks 1007string of the form C<http://host:port> (optionally C<https:...>), croaks
897otherwise. 1008otherwise.
898 1009
899To clear an already-set proxy, use C<undef>. 1010To clear an already-set proxy, use C<undef>.
900 1011
1012=item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end]
1013
1014Remove all cookies from the cookie jar that have been expired. If
1015C<$session_end> is given and true, then additionally remove all session
1016cookies.
1017
1018You should call this function (with a true C<$session_end>) before you
1019save cookies to disk, and you should call this function after loading them
1020again. If you have a long-running program you can additonally call this
1021function from time to time.
1022
1023A cookie jar is initially an empty hash-reference that is managed by this
1024module. It's format is subject to change, but currently it is like this:
1025
1026The key C<version> has to contain C<1>, otherwise the hash gets
1027emptied. All other keys are hostnames or IP addresses pointing to
1028hash-references. The key for these inner hash references is the
1029server path for which this cookie is meant, and the values are again
1030hash-references. The keys of those hash-references is the cookie name, and
1031the value, you guessed it, is another hash-reference, this time with the
1032key-value pairs from the cookie, except for C<expires> and C<max-age>,
1033which have been replaced by a C<_expires> key that contains the cookie
1034expiry timestamp.
1035
1036Here is an example of a cookie jar with a single cookie, so you have a
1037chance of understanding the above paragraph:
1038
1039 {
1040 version => 1,
1041 "10.0.0.1" => {
1042 "/" => {
1043 "mythweb_id" => {
1044 _expires => 1293917923,
1045 value => "ooRung9dThee3ooyXooM1Ohm",
1046 },
1047 },
1048 },
1049 }
1050
901=item $date = AnyEvent::HTTP::format_date $timestamp 1051=item $date = AnyEvent::HTTP::format_date $timestamp
902 1052
903Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP 1053Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
904Date (RFC 2616). 1054Date (RFC 2616).
905 1055
906=item $timestamp = AnyEvent::HTTP::parse_date $date 1056=item $timestamp = AnyEvent::HTTP::parse_date $date
907 1057
908Takes a HTTP Date (RFC 2616) and returns the corresponding POSIX 1058Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a
1059bunch of minor variations of those, and returns the corresponding POSIX
909timestamp, or C<undef> if the date cannot be parsed. 1060timestamp, or C<undef> if the date cannot be parsed.
910 1061
911=item $AnyEvent::HTTP::MAX_RECURSE 1062=item $AnyEvent::HTTP::MAX_RECURSE
912 1063
913The default value for the C<recurse> request parameter (default: C<10>). 1064The default value for the C<recurse> request parameter (default: C<10>).
953sub parse_date($) { 1104sub parse_date($) {
954 my ($date) = @_; 1105 my ($date) = @_;
955 1106
956 my ($d, $m, $y, $H, $M, $S); 1107 my ($d, $m, $y, $H, $M, $S);
957 1108
958 if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9]) ([A-Z][a-z][a-z]) ([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1109 if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
959 # RFC 822/1123, required by RFC 2616 1110 # RFC 822/1123, required by RFC 2616 (with " ")
1111 # cookie dates (with "-")
1112
960 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); 1113 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
961 1114
962 } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1115 } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
963 # RFC 850 1116 # RFC 850
964 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); 1117 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
965 1118
966 } elsif ($date =~ /^[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9 ][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([0-9][0-9][0-9][0-9])$/) { 1119 } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) {
967 # ISO C's asctime 1120 # ISO C's asctime
968 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); 1121 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
969 } 1122 }
970 # other formats fail in the loop below 1123 # other formats fail in the loop below
971 1124

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines