ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.71 by root, Fri Dec 31 20:50:58 2010 UTC vs.
Revision 1.79 by root, Sat Jan 1 20:01:07 2011 UTC

122 122
123If the server sends a header multiple times, then their contents will be 123If the server sends a header multiple times, then their contents will be
124joined together with a comma (C<,>), as per the HTTP spec. 124joined together with a comma (C<,>), as per the HTTP spec.
125 125
126If an internal error occurs, such as not being able to resolve a hostname, 126If an internal error occurs, such as not being able to resolve a hostname,
127then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<59x> 127then C<$data> will be C<undef>, C<< $headers->{Status} >> will be
128(usually C<599>) and the C<Reason> pseudo-header will contain an error 128C<590>-C<599> and the C<Reason> pseudo-header will contain an error
129message. 129message. Currently the following status codes are used:
130
131=over 4
132
133=item 595 - errors during connection etsbalishment, proxy handshake.
134
135=item 596 - errors during TLS negotiation, request sending and header processing.
136
137=item 597 - errors during body receiving or processing.
138
139=item 598 - user aborted request via C<on_header> or C<on_body>.
140
141=item 599 - other, usually nonretryable, errors (garbled URL etc.).
142
143=back
130 144
131A typical callback might look like this: 145A typical callback might look like this:
132 146
133 sub { 147 sub {
134 my ($body, $hdr) = @_; 148 my ($body, $hdr) = @_;
364 push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; 378 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
365 379
366 _slot_schedule $_[0]; 380 _slot_schedule $_[0];
367} 381}
368 382
383# extract cookies from jar
369sub cookie_jar_extract($$$$) { 384sub cookie_jar_extract($$$$) {
370 my ($jar, $uscheme, $uhost, $upath) = @_; 385 my ($jar, $uscheme, $uhost, $upath) = @_;
371 386
372 %$jar = () if $jar->{version} != 1; 387 %$jar = () if $jar->{version} != 1;
373 388
410 } 425 }
411 426
412 \@cookies 427 \@cookies
413} 428}
414 429
430# parse set_cookie header into jar
431sub cookie_jar_set_cookie($$$) {
432 my ($jar, $set_cookie, $uhost) = @_;
433
434 for ($set_cookie) {
435 # parse NAME=VALUE
436 my @kv;
437
438 # expires is not http-compliant in the original cookie-spec,
439 # we support the official date format and some extensions
440 while (
441 m{
442 \G\s*
443 (?:
444 expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
445 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )
446 )
447 }gcxsi
448 ) {
449 my $name = $2;
450 my $value = $4;
451
452 unless (defined $name) {
453 # expires
454 $name = "expires";
455 $value = $1;
456 } elsif (!defined $value) {
457 # quoted
458 $value = $3;
459 $value =~ s/\\(.)/$1/gs;
460 }
461
462 push @kv, lc $name, $value;
463
464 last unless /\G\s*;/gc;
465 }
466
467 last unless @kv;
468
469 my $name = shift @kv;
470 my %kv = (value => shift @kv, @kv);
471
472 $kv{expires} ||= format_date (AE::now + $kv{"max-age"})
473 if exists $kv{"max-age"};
474
475 my $cdom;
476 my $cpath = (delete $kv{path}) || "/";
477
478 if (exists $kv{domain}) {
479 $cdom = delete $kv{domain};
480
481 $cdom =~ s/^\.?/./; # make sure it starts with a "."
482
483 next if $cdom =~ /\.$/;
484
485 # this is not rfc-like and not netscape-like. go figure.
486 my $ndots = $cdom =~ y/.//;
487 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
488 } else {
489 $cdom = $uhost;
490 }
491
492 # store it
493 $jar->{version} = 1;
494 $jar->{$cdom}{$cpath}{$name} = \%kv;
495
496 redo if /\G\s*,/gc;
497 }
498}
499
415# continue to parse $_ for headers and place them into the arg 500# continue to parse $_ for headers and place them into the arg
416sub parse_hdr() { 501sub parse_hdr() {
417 my %hdr; 502 my %hdr;
418 503
419 # things seen, not parsed: 504 # things seen, not parsed:
531 _get_slot $uhost, sub { 616 _get_slot $uhost, sub {
532 $state{slot_guard} = shift; 617 $state{slot_guard} = shift;
533 618
534 return unless $state{connect_guard}; 619 return unless $state{connect_guard};
535 620
621 my $ae_error = 595; # connecting
622
536 my $connect_cb = sub { 623 my $connect_cb = sub {
537 $state{fh} = shift 624 $state{fh} = shift
538 or do { 625 or do {
539 my $err = "$!"; 626 my $err = "$!";
540 %state = (); 627 %state = ();
541 return $cb->(undef, { @pseudo, Status => 599, Reason => $err }); 628 return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err });
542 }; 629 };
543
544 pop; # free memory, save a tree
545 630
546 return unless delete $state{connect_guard}; 631 return unless delete $state{connect_guard};
547 632
548 # get handle 633 # get handle
549 $state{handle} = new AnyEvent::Handle 634 $state{handle} = new AnyEvent::Handle
552 tls_ctx => $arg{tls_ctx}, 637 tls_ctx => $arg{tls_ctx},
553 # these need to be reconfigured on keepalive handles 638 # these need to be reconfigured on keepalive handles
554 timeout => $timeout, 639 timeout => $timeout,
555 on_error => sub { 640 on_error => sub {
556 %state = (); 641 %state = ();
557 $cb->(undef, { @pseudo, Status => 599, Reason => $_[2] }); 642 $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] });
558 }, 643 },
559 on_eof => sub { 644 on_eof => sub {
560 %state = (); 645 %state = ();
561 $cb->(undef, { @pseudo, Status => 599, Reason => "Unexpected end-of-file" }); 646 $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" });
562 }, 647 },
563 ; 648 ;
564 649
565 # limit the number of persistent connections 650 # limit the number of persistent connections
566 # keepalive not yet supported 651 # keepalive not yet supported
574 659
575 $state{handle}->starttls ("connect") if $rscheme eq "https"; 660 $state{handle}->starttls ("connect") if $rscheme eq "https";
576 661
577 # handle actual, non-tunneled, request 662 # handle actual, non-tunneled, request
578 my $handle_actual_request = sub { 663 my $handle_actual_request = sub {
664 $ae_error = 596; # request phase
665
579 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; 666 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
580 667
581 # send request 668 # send request
582 $state{handle}->push_write ( 669 $state{handle}->push_write (
583 "$method $rpath HTTP/1.1\015\012" 670 "$method $rpath HTTP/1.1\015\012"
594 # status line and headers 681 # status line and headers
595 $state{read_response} = sub { 682 $state{read_response} = sub {
596 for ("$_[1]") { 683 for ("$_[1]") {
597 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 684 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
598 685
599 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/igxc 686 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
600 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" })); 687 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
601 688
602 # 100 Continue handling 689 # 100 Continue handling
603 # should not happen as we don't send expect: 100-continue, 690 # should not happen as we don't send expect: 100-continue,
604 # but we handle it just in case. 691 # but we handle it just in case.
639 726
640 if ($recurse) { 727 if ($recurse) {
641 my $status = $hdr{Status}; 728 my $status = $hdr{Status};
642 729
643 # industry standard is to redirect POST as GET for 730 # industry standard is to redirect POST as GET for
644 # 301, 302 and 303, in contrast to http/1.0 and 1.1. 731 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
645 # also, the UA should ask the user for 301 and 307 and POST, 732 # also, the UA should ask the user for 301 and 307 and POST,
646 # industry standard seems to be to simply follow. 733 # industry standard seems to be to simply follow.
647 # we go with the industry standard. 734 # we go with the industry standard.
648 if ($status == 301 or $status == 302 or $status == 303) { 735 if ($status == 301 or $status == 302 or $status == 303) {
649 # HTTP/1.1 is unclear on how to mutate the method 736 # HTTP/1.1 is unclear on how to mutate the method
653 $redirect = 1; 740 $redirect = 1;
654 } 741 }
655 } 742 }
656 743
657 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive]) 744 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
658 my $keepalive = pop; 745 my $may_keep_alive = $_[3];
659 746
660 $state{handle}->destroy if $state{handle}; 747 $state{handle}->destroy if $state{handle};
661 %state = (); 748 %state = ();
662 749
663 if (defined $_[1]) { 750 if (defined $_[1]) {
665 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2]; 752 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
666 } 753 }
667 754
668 # set-cookie processing 755 # set-cookie processing
669 if ($arg{cookie_jar}) { 756 if ($arg{cookie_jar}) {
670 for ($hdr{"set-cookie"}) { 757 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost;
671 # parse NAME=VALUE
672 my @kv;
673
674 while (
675 m{
676 \G\s*
677 (?:
678 expires \s*=\s* ([A-Z][a-z][a-z],\ [^,;]+)
679 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )
680 )
681 }gcxsi
682 ) {
683 my $name = $2;
684 my $value = $4;
685
686 unless (defined $name) {
687 # expires
688 $name = "expires";
689 $value = $1;
690 } elsif (!defined $value) {
691 # quoted
692 $value = $3;
693 $value =~ s/\\(.)/$1/gs;
694 }
695
696 push @kv, lc $name, $value;
697
698 last unless /\G\s*;/gc;
699 }
700
701 last unless @kv;
702
703 my $name = shift @kv;
704 my %kv = (value => shift @kv, @kv);
705
706 $kv{expires} ||= format_date (AE::now + $kv{"max-age"})
707 if exists $kv{"max-age"};
708
709 my $cdom;
710 my $cpath = (delete $kv{path}) || "/";
711
712 if (exists $kv{domain}) {
713 $cdom = delete $kv{domain};
714
715 $cdom =~ s/^\.?/./; # make sure it starts with a "."
716
717 next if $cdom =~ /\.$/;
718
719 # this is not rfc-like and not netscape-like. go figure.
720 my $ndots = $cdom =~ y/.//;
721 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
722 } else {
723 $cdom = $uhost;
724 }
725
726 # store it
727 $arg{cookie_jar}{version} = 1;
728 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
729
730 redo if /\G\s*,/gc;
731 }
732 } 758 }
733 759
734 if ($redirect && exists $hdr{location}) { 760 if ($redirect && exists $hdr{location}) {
735 # we ignore any errors, as it is very common to receive 761 # we ignore any errors, as it is very common to receive
736 # Content-Length != 0 but no actual body 762 # Content-Length != 0 but no actual body
743 $cb); 769 $cb);
744 } else { 770 } else {
745 $cb->($_[0], \%hdr); 771 $cb->($_[0], \%hdr);
746 } 772 }
747 }; 773 };
774
775 $ae_error = 597; # body phase
748 776
749 my $len = $hdr{"content-length"}; 777 my $len = $hdr{"content-length"};
750 778
751 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) { 779 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
752 $finish->(undef, 598 => "Request cancelled by on_header"); 780 $finish->(undef, 598 => "Request cancelled by on_header");
774 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) { 802 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
775 my $cl = 0; 803 my $cl = 0;
776 my $body = undef; 804 my $body = undef;
777 my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; 805 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
778 806
779 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
780
781 my $read_chunk; $read_chunk = sub { 807 my $read_chunk; $read_chunk = sub {
782 $_[1] =~ /^([0-9a-fA-F]+)/ 808 $_[1] =~ /^([0-9a-fA-F]+)/
783 or $finish->(undef, 599 => "Garbled chunked transfer encoding"); 809 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
784 810
785 my $len = hex $1; 811 my $len = hex $1;
786 812
787 if ($len) { 813 if ($len) {
788 $cl += $len; 814 $cl += $len;
791 $on_body->($_[1], \%hdr) 817 $on_body->($_[1], \%hdr)
792 or return $finish->(undef, 598 => "Request cancelled by on_body"); 818 or return $finish->(undef, 598 => "Request cancelled by on_body");
793 819
794 $_[0]->push_read (line => sub { 820 $_[0]->push_read (line => sub {
795 length $_[1] 821 length $_[1]
796 and return $finish->(undef, 599 => "Garbled chunked transfer encoding"); 822 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
797 $_[0]->push_read (line => $read_chunk); 823 $_[0]->push_read (line => $read_chunk);
798 }); 824 });
799 }); 825 });
800 } else { 826 } else {
801 $hdr{"content-length"} ||= $cl; 827 $hdr{"content-length"} ||= $cl;
804 if (length $_[1]) { 830 if (length $_[1]) {
805 for ("$_[1]") { 831 for ("$_[1]") {
806 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 832 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
807 833
808 my $hdr = parse_hdr 834 my $hdr = parse_hdr
809 or return $finish->(undef, 599 => "Garbled response trailers"); 835 or return $finish->(undef, $ae_error => "Garbled response trailers");
810 836
811 %hdr = (%hdr, %$hdr); 837 %hdr = (%hdr, %$hdr);
812 } 838 }
813 } 839 }
814 840
818 }; 844 };
819 845
820 $_[0]->push_read (line => $read_chunk); 846 $_[0]->push_read (line => $read_chunk);
821 847
822 } elsif ($arg{on_body}) { 848 } elsif ($arg{on_body}) {
823 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
824
825 if ($len) { 849 if ($len) {
826 $_[0]->on_read (sub { 850 $_[0]->on_read (sub {
827 $len -= length $_[0]{rbuf}; 851 $len -= length $_[0]{rbuf};
828 852
829 $arg{on_body}(delete $_[0]{rbuf}, \%hdr) 853 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
843 } 867 }
844 } else { 868 } else {
845 $_[0]->on_eof (undef); 869 $_[0]->on_eof (undef);
846 870
847 if ($len) { 871 if ($len) {
848 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
849 $_[0]->on_read (sub { 872 $_[0]->on_read (sub {
850 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) 873 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
851 if $len <= length $_[0]{rbuf}; 874 if $len <= length $_[0]{rbuf};
852 }); 875 });
853 } else { 876 } else {
854 $_[0]->on_error (sub { 877 $_[0]->on_error (sub {
855 ($! == Errno::EPIPE || !$!) 878 ($! == Errno::EPIPE || !$!)
856 ? $finish->(delete $_[0]{rbuf}) 879 ? $finish->(delete $_[0]{rbuf})
857 : $finish->(undef, 599 => $_[2]); 880 : $finish->(undef, $ae_error => $_[2]);
858 }); 881 });
859 $_[0]->on_read (sub { }); 882 $_[0]->on_read (sub { });
860 } 883 }
861 } 884 }
862 } 885 }
942Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP 965Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
943Date (RFC 2616). 966Date (RFC 2616).
944 967
945=item $timestamp = AnyEvent::HTTP::parse_date $date 968=item $timestamp = AnyEvent::HTTP::parse_date $date
946 969
947Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) and 970Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a
948returns the corresponding POSIX timestamp, or C<undef> if the date cannot 971bunch of minor variations of those, and returns the corresponding POSIX
949be parsed. 972timestamp, or C<undef> if the date cannot be parsed.
950 973
951=item $AnyEvent::HTTP::MAX_RECURSE 974=item $AnyEvent::HTTP::MAX_RECURSE
952 975
953The default value for the C<recurse> request parameter (default: C<10>). 976The default value for the C<recurse> request parameter (default: C<10>).
954 977
993sub parse_date($) { 1016sub parse_date($) {
994 my ($date) = @_; 1017 my ($date) = @_;
995 1018
996 my ($d, $m, $y, $H, $M, $S); 1019 my ($d, $m, $y, $H, $M, $S);
997 1020
998 if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9])[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1021 if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
999 # RFC 822/1123, required by RFC 2616 (with " ") 1022 # RFC 822/1123, required by RFC 2616 (with " ")
1000 # cookie dates (with "-") 1023 # cookie dates (with "-")
1001 1024
1002 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); 1025 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
1003 1026
1004 } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1027 } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1005 # RFC 850 1028 # RFC 850
1006 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); 1029 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
1007 1030
1008 } elsif ($date =~ /^[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9 ][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([0-9][0-9][0-9][0-9])$/) { 1031 } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) {
1009 # ISO C's asctime 1032 # ISO C's asctime
1010 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); 1033 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
1011 } 1034 }
1012 # other formats fail in the loop below 1035 # other formats fail in the loop below
1013 1036

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines