ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.72 by root, Fri Dec 31 20:59:19 2010 UTC vs.
Revision 1.79 by root, Sat Jan 1 20:01:07 2011 UTC

122 122
123If the server sends a header multiple times, then their contents will be 123If the server sends a header multiple times, then their contents will be
124joined together with a comma (C<,>), as per the HTTP spec. 124joined together with a comma (C<,>), as per the HTTP spec.
125 125
126If an internal error occurs, such as not being able to resolve a hostname, 126If an internal error occurs, such as not being able to resolve a hostname,
127then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<59x> 127then C<$data> will be C<undef>, C<< $headers->{Status} >> will be
128(usually C<599>) and the C<Reason> pseudo-header will contain an error 128C<590>-C<599> and the C<Reason> pseudo-header will contain an error
129message. 129message. Currently the following status codes are used:
130
131=over 4
132
133=item 595 - errors during connection etsbalishment, proxy handshake.
134
135=item 596 - errors during TLS negotiation, request sending and header processing.
136
137=item 597 - errors during body receiving or processing.
138
139=item 598 - user aborted request via C<on_header> or C<on_body>.
140
141=item 599 - other, usually nonretryable, errors (garbled URL etc.).
142
143=back
130 144
131A typical callback might look like this: 145A typical callback might look like this:
132 146
133 sub { 147 sub {
134 my ($body, $hdr) = @_; 148 my ($body, $hdr) = @_;
412 426
413 \@cookies 427 \@cookies
414} 428}
415 429
416# parse set_cookie header into jar 430# parse set_cookie header into jar
417sub cookie_jar_set_cookie($$) { 431sub cookie_jar_set_cookie($$$) {
418 my ($jar, $set_cookie) = @_; 432 my ($jar, $set_cookie, $uhost) = @_;
419 433
420 for ($set_cookie) { 434 for ($set_cookie) {
421 # parse NAME=VALUE 435 # parse NAME=VALUE
422 my @kv; 436 my @kv;
423 437
438 # expires is not http-compliant in the original cookie-spec,
439 # we support the official date format and some extensions
424 while ( 440 while (
425 m{ 441 m{
426 \G\s* 442 \G\s*
427 (?: 443 (?:
428 expires \s*=\s* ([A-Z][a-z][a-z],\ [^,;]+) 444 expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
429 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) 445 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )
430 ) 446 )
431 }gcxsi 447 }gcxsi
432 ) { 448 ) {
433 my $name = $2; 449 my $name = $2;
472 } else { 488 } else {
473 $cdom = $uhost; 489 $cdom = $uhost;
474 } 490 }
475 491
476 # store it 492 # store it
477 $arg{cookie_jar}{version} = 1; 493 $jar->{version} = 1;
478 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv; 494 $jar->{$cdom}{$cpath}{$name} = \%kv;
479 495
480 redo if /\G\s*,/gc; 496 redo if /\G\s*,/gc;
481 } 497 }
482}
483} 498}
484 499
485# continue to parse $_ for headers and place them into the arg 500# continue to parse $_ for headers and place them into the arg
486sub parse_hdr() { 501sub parse_hdr() {
487 my %hdr; 502 my %hdr;
601 _get_slot $uhost, sub { 616 _get_slot $uhost, sub {
602 $state{slot_guard} = shift; 617 $state{slot_guard} = shift;
603 618
604 return unless $state{connect_guard}; 619 return unless $state{connect_guard};
605 620
621 my $ae_error = 595; # connecting
622
606 my $connect_cb = sub { 623 my $connect_cb = sub {
607 $state{fh} = shift 624 $state{fh} = shift
608 or do { 625 or do {
609 my $err = "$!"; 626 my $err = "$!";
610 %state = (); 627 %state = ();
611 return $cb->(undef, { @pseudo, Status => 599, Reason => $err }); 628 return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err });
612 }; 629 };
613
614 pop; # free memory, save a tree
615 630
616 return unless delete $state{connect_guard}; 631 return unless delete $state{connect_guard};
617 632
618 # get handle 633 # get handle
619 $state{handle} = new AnyEvent::Handle 634 $state{handle} = new AnyEvent::Handle
622 tls_ctx => $arg{tls_ctx}, 637 tls_ctx => $arg{tls_ctx},
623 # these need to be reconfigured on keepalive handles 638 # these need to be reconfigured on keepalive handles
624 timeout => $timeout, 639 timeout => $timeout,
625 on_error => sub { 640 on_error => sub {
626 %state = (); 641 %state = ();
627 $cb->(undef, { @pseudo, Status => 599, Reason => $_[2] }); 642 $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] });
628 }, 643 },
629 on_eof => sub { 644 on_eof => sub {
630 %state = (); 645 %state = ();
631 $cb->(undef, { @pseudo, Status => 599, Reason => "Unexpected end-of-file" }); 646 $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" });
632 }, 647 },
633 ; 648 ;
634 649
635 # limit the number of persistent connections 650 # limit the number of persistent connections
636 # keepalive not yet supported 651 # keepalive not yet supported
644 659
645 $state{handle}->starttls ("connect") if $rscheme eq "https"; 660 $state{handle}->starttls ("connect") if $rscheme eq "https";
646 661
647 # handle actual, non-tunneled, request 662 # handle actual, non-tunneled, request
648 my $handle_actual_request = sub { 663 my $handle_actual_request = sub {
664 $ae_error = 596; # request phase
665
649 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; 666 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
650 667
651 # send request 668 # send request
652 $state{handle}->push_write ( 669 $state{handle}->push_write (
653 "$method $rpath HTTP/1.1\015\012" 670 "$method $rpath HTTP/1.1\015\012"
664 # status line and headers 681 # status line and headers
665 $state{read_response} = sub { 682 $state{read_response} = sub {
666 for ("$_[1]") { 683 for ("$_[1]") {
667 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 684 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
668 685
669 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/igxc 686 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
670 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" })); 687 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
671 688
672 # 100 Continue handling 689 # 100 Continue handling
673 # should not happen as we don't send expect: 100-continue, 690 # should not happen as we don't send expect: 100-continue,
674 # but we handle it just in case. 691 # but we handle it just in case.
709 726
710 if ($recurse) { 727 if ($recurse) {
711 my $status = $hdr{Status}; 728 my $status = $hdr{Status};
712 729
713 # industry standard is to redirect POST as GET for 730 # industry standard is to redirect POST as GET for
714 # 301, 302 and 303, in contrast to http/1.0 and 1.1. 731 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
715 # also, the UA should ask the user for 301 and 307 and POST, 732 # also, the UA should ask the user for 301 and 307 and POST,
716 # industry standard seems to be to simply follow. 733 # industry standard seems to be to simply follow.
717 # we go with the industry standard. 734 # we go with the industry standard.
718 if ($status == 301 or $status == 302 or $status == 303) { 735 if ($status == 301 or $status == 302 or $status == 303) {
719 # HTTP/1.1 is unclear on how to mutate the method 736 # HTTP/1.1 is unclear on how to mutate the method
723 $redirect = 1; 740 $redirect = 1;
724 } 741 }
725 } 742 }
726 743
727 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive]) 744 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
728 my $keepalive = pop; 745 my $may_keep_alive = $_[3];
729 746
730 $state{handle}->destroy if $state{handle}; 747 $state{handle}->destroy if $state{handle};
731 %state = (); 748 %state = ();
732 749
733 if (defined $_[1]) { 750 if (defined $_[1]) {
735 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2]; 752 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
736 } 753 }
737 754
738 # set-cookie processing 755 # set-cookie processing
739 if ($arg{cookie_jar}) { 756 if ($arg{cookie_jar}) {
740 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}; 757 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost;
758 }
741 759
742 if ($redirect && exists $hdr{location}) { 760 if ($redirect && exists $hdr{location}) {
743 # we ignore any errors, as it is very common to receive 761 # we ignore any errors, as it is very common to receive
744 # Content-Length != 0 but no actual body 762 # Content-Length != 0 but no actual body
745 # we also access %hdr, as $_[1] might be an erro 763 # we also access %hdr, as $_[1] might be an erro
751 $cb); 769 $cb);
752 } else { 770 } else {
753 $cb->($_[0], \%hdr); 771 $cb->($_[0], \%hdr);
754 } 772 }
755 }; 773 };
774
775 $ae_error = 597; # body phase
756 776
757 my $len = $hdr{"content-length"}; 777 my $len = $hdr{"content-length"};
758 778
759 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) { 779 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
760 $finish->(undef, 598 => "Request cancelled by on_header"); 780 $finish->(undef, 598 => "Request cancelled by on_header");
782 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) { 802 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
783 my $cl = 0; 803 my $cl = 0;
784 my $body = undef; 804 my $body = undef;
785 my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; 805 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
786 806
787 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
788
789 my $read_chunk; $read_chunk = sub { 807 my $read_chunk; $read_chunk = sub {
790 $_[1] =~ /^([0-9a-fA-F]+)/ 808 $_[1] =~ /^([0-9a-fA-F]+)/
791 or $finish->(undef, 599 => "Garbled chunked transfer encoding"); 809 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
792 810
793 my $len = hex $1; 811 my $len = hex $1;
794 812
795 if ($len) { 813 if ($len) {
796 $cl += $len; 814 $cl += $len;
799 $on_body->($_[1], \%hdr) 817 $on_body->($_[1], \%hdr)
800 or return $finish->(undef, 598 => "Request cancelled by on_body"); 818 or return $finish->(undef, 598 => "Request cancelled by on_body");
801 819
802 $_[0]->push_read (line => sub { 820 $_[0]->push_read (line => sub {
803 length $_[1] 821 length $_[1]
804 and return $finish->(undef, 599 => "Garbled chunked transfer encoding"); 822 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
805 $_[0]->push_read (line => $read_chunk); 823 $_[0]->push_read (line => $read_chunk);
806 }); 824 });
807 }); 825 });
808 } else { 826 } else {
809 $hdr{"content-length"} ||= $cl; 827 $hdr{"content-length"} ||= $cl;
812 if (length $_[1]) { 830 if (length $_[1]) {
813 for ("$_[1]") { 831 for ("$_[1]") {
814 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 832 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
815 833
816 my $hdr = parse_hdr 834 my $hdr = parse_hdr
817 or return $finish->(undef, 599 => "Garbled response trailers"); 835 or return $finish->(undef, $ae_error => "Garbled response trailers");
818 836
819 %hdr = (%hdr, %$hdr); 837 %hdr = (%hdr, %$hdr);
820 } 838 }
821 } 839 }
822 840
826 }; 844 };
827 845
828 $_[0]->push_read (line => $read_chunk); 846 $_[0]->push_read (line => $read_chunk);
829 847
830 } elsif ($arg{on_body}) { 848 } elsif ($arg{on_body}) {
831 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
832
833 if ($len) { 849 if ($len) {
834 $_[0]->on_read (sub { 850 $_[0]->on_read (sub {
835 $len -= length $_[0]{rbuf}; 851 $len -= length $_[0]{rbuf};
836 852
837 $arg{on_body}(delete $_[0]{rbuf}, \%hdr) 853 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
851 } 867 }
852 } else { 868 } else {
853 $_[0]->on_eof (undef); 869 $_[0]->on_eof (undef);
854 870
855 if ($len) { 871 if ($len) {
856 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
857 $_[0]->on_read (sub { 872 $_[0]->on_read (sub {
858 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) 873 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
859 if $len <= length $_[0]{rbuf}; 874 if $len <= length $_[0]{rbuf};
860 }); 875 });
861 } else { 876 } else {
862 $_[0]->on_error (sub { 877 $_[0]->on_error (sub {
863 ($! == Errno::EPIPE || !$!) 878 ($! == Errno::EPIPE || !$!)
864 ? $finish->(delete $_[0]{rbuf}) 879 ? $finish->(delete $_[0]{rbuf})
865 : $finish->(undef, 599 => $_[2]); 880 : $finish->(undef, $ae_error => $_[2]);
866 }); 881 });
867 $_[0]->on_read (sub { }); 882 $_[0]->on_read (sub { });
868 } 883 }
869 } 884 }
870 } 885 }
950Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP 965Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
951Date (RFC 2616). 966Date (RFC 2616).
952 967
953=item $timestamp = AnyEvent::HTTP::parse_date $date 968=item $timestamp = AnyEvent::HTTP::parse_date $date
954 969
955Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) and 970Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a
956returns the corresponding POSIX timestamp, or C<undef> if the date cannot 971bunch of minor variations of those, and returns the corresponding POSIX
957be parsed. 972timestamp, or C<undef> if the date cannot be parsed.
958 973
959=item $AnyEvent::HTTP::MAX_RECURSE 974=item $AnyEvent::HTTP::MAX_RECURSE
960 975
961The default value for the C<recurse> request parameter (default: C<10>). 976The default value for the C<recurse> request parameter (default: C<10>).
962 977
1001sub parse_date($) { 1016sub parse_date($) {
1002 my ($date) = @_; 1017 my ($date) = @_;
1003 1018
1004 my ($d, $m, $y, $H, $M, $S); 1019 my ($d, $m, $y, $H, $M, $S);
1005 1020
1006 if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9])[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1021 if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1007 # RFC 822/1123, required by RFC 2616 (with " ") 1022 # RFC 822/1123, required by RFC 2616 (with " ")
1008 # cookie dates (with "-") 1023 # cookie dates (with "-")
1009 1024
1010 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); 1025 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
1011 1026
1012 } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1027 } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1013 # RFC 850 1028 # RFC 850
1014 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); 1029 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
1015 1030
1016 } elsif ($date =~ /^[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9 ][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([0-9][0-9][0-9][0-9])$/) { 1031 } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) {
1017 # ISO C's asctime 1032 # ISO C's asctime
1018 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); 1033 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
1019 } 1034 }
1020 # other formats fail in the loop below 1035 # other formats fail in the loop below
1021 1036

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines