ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.71 by root, Fri Dec 31 20:50:58 2010 UTC vs.
Revision 1.77 by root, Sat Jan 1 19:13:41 2011 UTC

122 122
123If the server sends a header multiple times, then their contents will be 123If the server sends a header multiple times, then their contents will be
124joined together with a comma (C<,>), as per the HTTP spec. 124joined together with a comma (C<,>), as per the HTTP spec.
125 125
126If an internal error occurs, such as not being able to resolve a hostname, 126If an internal error occurs, such as not being able to resolve a hostname,
127then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<59x> 127then C<$data> will be C<undef>, C<< $headers->{Status} >> will be
128(usually C<599>) and the C<Reason> pseudo-header will contain an error 128C<590>-C<599> and the C<Reason> pseudo-header will contain an error
129message. 129message. Currently the following status codes are used:
130
131=over 4
132
133=item 595 - errors during connection etsbalishment, proxy handshake.
134
135=item 596 - errors during TLS negotiation, request sending and header processing.
136
137=item 597 - errors during body receive or processing.
138
139=item 598 - user aborted request in C<on_header> or C<on_body>.
140
141=item 599 - other, usually nonretryable, errors (garbled URL etc.).
142
143=back
130 144
131A typical callback might look like this: 145A typical callback might look like this:
132 146
133 sub { 147 sub {
134 my ($body, $hdr) = @_; 148 my ($body, $hdr) = @_;
364 push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; 378 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
365 379
366 _slot_schedule $_[0]; 380 _slot_schedule $_[0];
367} 381}
368 382
383# extract cookies from jar
369sub cookie_jar_extract($$$$) { 384sub cookie_jar_extract($$$$) {
370 my ($jar, $uscheme, $uhost, $upath) = @_; 385 my ($jar, $uscheme, $uhost, $upath) = @_;
371 386
372 %$jar = () if $jar->{version} != 1; 387 %$jar = () if $jar->{version} != 1;
373 388
410 } 425 }
411 426
412 \@cookies 427 \@cookies
413} 428}
414 429
430# parse set_cookie header into jar
431sub cookie_jar_set_cookie($$$) {
432 my ($jar, $set_cookie, $uhost) = @_;
433
434 for ($set_cookie) {
435 # parse NAME=VALUE
436 my @kv;
437
438 while (
439 m{
440 \G\s*
441 (?:
442 expires \s*=\s* ([A-Z][a-z][a-z],\ [^,;]+)
443 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )
444 )
445 }gcxsi
446 ) {
447 my $name = $2;
448 my $value = $4;
449
450 unless (defined $name) {
451 # expires
452 $name = "expires";
453 $value = $1;
454 } elsif (!defined $value) {
455 # quoted
456 $value = $3;
457 $value =~ s/\\(.)/$1/gs;
458 }
459
460 push @kv, lc $name, $value;
461
462 last unless /\G\s*;/gc;
463 }
464
465 last unless @kv;
466
467 my $name = shift @kv;
468 my %kv = (value => shift @kv, @kv);
469
470 $kv{expires} ||= format_date (AE::now + $kv{"max-age"})
471 if exists $kv{"max-age"};
472
473 my $cdom;
474 my $cpath = (delete $kv{path}) || "/";
475
476 if (exists $kv{domain}) {
477 $cdom = delete $kv{domain};
478
479 $cdom =~ s/^\.?/./; # make sure it starts with a "."
480
481 next if $cdom =~ /\.$/;
482
483 # this is not rfc-like and not netscape-like. go figure.
484 my $ndots = $cdom =~ y/.//;
485 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
486 } else {
487 $cdom = $uhost;
488 }
489
490 # store it
491 $jar->{version} = 1;
492 $jar->{$cdom}{$cpath}{$name} = \%kv;
493
494 redo if /\G\s*,/gc;
495 }
496}
497
415# continue to parse $_ for headers and place them into the arg 498# continue to parse $_ for headers and place them into the arg
416sub parse_hdr() { 499sub parse_hdr() {
417 my %hdr; 500 my %hdr;
418 501
419 # things seen, not parsed: 502 # things seen, not parsed:
531 _get_slot $uhost, sub { 614 _get_slot $uhost, sub {
532 $state{slot_guard} = shift; 615 $state{slot_guard} = shift;
533 616
534 return unless $state{connect_guard}; 617 return unless $state{connect_guard};
535 618
619 my $ae_error = 595; # connecting
620
536 my $connect_cb = sub { 621 my $connect_cb = sub {
537 $state{fh} = shift 622 $state{fh} = shift
538 or do { 623 or do {
539 my $err = "$!"; 624 my $err = "$!";
540 %state = (); 625 %state = ();
541 return $cb->(undef, { @pseudo, Status => 599, Reason => $err }); 626 return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err });
542 }; 627 };
543
544 pop; # free memory, save a tree
545 628
546 return unless delete $state{connect_guard}; 629 return unless delete $state{connect_guard};
547 630
548 # get handle 631 # get handle
549 $state{handle} = new AnyEvent::Handle 632 $state{handle} = new AnyEvent::Handle
552 tls_ctx => $arg{tls_ctx}, 635 tls_ctx => $arg{tls_ctx},
553 # these need to be reconfigured on keepalive handles 636 # these need to be reconfigured on keepalive handles
554 timeout => $timeout, 637 timeout => $timeout,
555 on_error => sub { 638 on_error => sub {
556 %state = (); 639 %state = ();
557 $cb->(undef, { @pseudo, Status => 599, Reason => $_[2] }); 640 $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] });
558 }, 641 },
559 on_eof => sub { 642 on_eof => sub {
560 %state = (); 643 %state = ();
561 $cb->(undef, { @pseudo, Status => 599, Reason => "Unexpected end-of-file" }); 644 $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" });
562 }, 645 },
563 ; 646 ;
564 647
565 # limit the number of persistent connections 648 # limit the number of persistent connections
566 # keepalive not yet supported 649 # keepalive not yet supported
574 657
575 $state{handle}->starttls ("connect") if $rscheme eq "https"; 658 $state{handle}->starttls ("connect") if $rscheme eq "https";
576 659
577 # handle actual, non-tunneled, request 660 # handle actual, non-tunneled, request
578 my $handle_actual_request = sub { 661 my $handle_actual_request = sub {
662 $ae_error = 596; # request phase
663
579 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; 664 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
580 665
581 # send request 666 # send request
582 $state{handle}->push_write ( 667 $state{handle}->push_write (
583 "$method $rpath HTTP/1.1\015\012" 668 "$method $rpath HTTP/1.1\015\012"
594 # status line and headers 679 # status line and headers
595 $state{read_response} = sub { 680 $state{read_response} = sub {
596 for ("$_[1]") { 681 for ("$_[1]") {
597 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 682 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
598 683
599 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/igxc 684 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
600 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" })); 685 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
601 686
602 # 100 Continue handling 687 # 100 Continue handling
603 # should not happen as we don't send expect: 100-continue, 688 # should not happen as we don't send expect: 100-continue,
604 # but we handle it just in case. 689 # but we handle it just in case.
639 724
640 if ($recurse) { 725 if ($recurse) {
641 my $status = $hdr{Status}; 726 my $status = $hdr{Status};
642 727
643 # industry standard is to redirect POST as GET for 728 # industry standard is to redirect POST as GET for
644 # 301, 302 and 303, in contrast to http/1.0 and 1.1. 729 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
645 # also, the UA should ask the user for 301 and 307 and POST, 730 # also, the UA should ask the user for 301 and 307 and POST,
646 # industry standard seems to be to simply follow. 731 # industry standard seems to be to simply follow.
647 # we go with the industry standard. 732 # we go with the industry standard.
648 if ($status == 301 or $status == 302 or $status == 303) { 733 if ($status == 301 or $status == 302 or $status == 303) {
649 # HTTP/1.1 is unclear on how to mutate the method 734 # HTTP/1.1 is unclear on how to mutate the method
653 $redirect = 1; 738 $redirect = 1;
654 } 739 }
655 } 740 }
656 741
657 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive]) 742 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
658 my $keepalive = pop; 743 my $may_keep_alive = $_[3];
659 744
660 $state{handle}->destroy if $state{handle}; 745 $state{handle}->destroy if $state{handle};
661 %state = (); 746 %state = ();
662 747
663 if (defined $_[1]) { 748 if (defined $_[1]) {
665 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2]; 750 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
666 } 751 }
667 752
668 # set-cookie processing 753 # set-cookie processing
669 if ($arg{cookie_jar}) { 754 if ($arg{cookie_jar}) {
670 for ($hdr{"set-cookie"}) { 755 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost;
671 # parse NAME=VALUE
672 my @kv;
673
674 while (
675 m{
676 \G\s*
677 (?:
678 expires \s*=\s* ([A-Z][a-z][a-z],\ [^,;]+)
679 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )
680 )
681 }gcxsi
682 ) {
683 my $name = $2;
684 my $value = $4;
685
686 unless (defined $name) {
687 # expires
688 $name = "expires";
689 $value = $1;
690 } elsif (!defined $value) {
691 # quoted
692 $value = $3;
693 $value =~ s/\\(.)/$1/gs;
694 }
695
696 push @kv, lc $name, $value;
697
698 last unless /\G\s*;/gc;
699 }
700
701 last unless @kv;
702
703 my $name = shift @kv;
704 my %kv = (value => shift @kv, @kv);
705
706 $kv{expires} ||= format_date (AE::now + $kv{"max-age"})
707 if exists $kv{"max-age"};
708
709 my $cdom;
710 my $cpath = (delete $kv{path}) || "/";
711
712 if (exists $kv{domain}) {
713 $cdom = delete $kv{domain};
714
715 $cdom =~ s/^\.?/./; # make sure it starts with a "."
716
717 next if $cdom =~ /\.$/;
718
719 # this is not rfc-like and not netscape-like. go figure.
720 my $ndots = $cdom =~ y/.//;
721 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
722 } else {
723 $cdom = $uhost;
724 }
725
726 # store it
727 $arg{cookie_jar}{version} = 1;
728 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
729
730 redo if /\G\s*,/gc;
731 }
732 } 756 }
733 757
734 if ($redirect && exists $hdr{location}) { 758 if ($redirect && exists $hdr{location}) {
735 # we ignore any errors, as it is very common to receive 759 # we ignore any errors, as it is very common to receive
736 # Content-Length != 0 but no actual body 760 # Content-Length != 0 but no actual body
743 $cb); 767 $cb);
744 } else { 768 } else {
745 $cb->($_[0], \%hdr); 769 $cb->($_[0], \%hdr);
746 } 770 }
747 }; 771 };
772
773 $ae_error = 597; # body phase
748 774
749 my $len = $hdr{"content-length"}; 775 my $len = $hdr{"content-length"};
750 776
751 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) { 777 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
752 $finish->(undef, 598 => "Request cancelled by on_header"); 778 $finish->(undef, 598 => "Request cancelled by on_header");
774 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) { 800 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
775 my $cl = 0; 801 my $cl = 0;
776 my $body = undef; 802 my $body = undef;
777 my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; 803 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
778 804
779 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
780
781 my $read_chunk; $read_chunk = sub { 805 my $read_chunk; $read_chunk = sub {
782 $_[1] =~ /^([0-9a-fA-F]+)/ 806 $_[1] =~ /^([0-9a-fA-F]+)/
783 or $finish->(undef, 599 => "Garbled chunked transfer encoding"); 807 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
784 808
785 my $len = hex $1; 809 my $len = hex $1;
786 810
787 if ($len) { 811 if ($len) {
788 $cl += $len; 812 $cl += $len;
791 $on_body->($_[1], \%hdr) 815 $on_body->($_[1], \%hdr)
792 or return $finish->(undef, 598 => "Request cancelled by on_body"); 816 or return $finish->(undef, 598 => "Request cancelled by on_body");
793 817
794 $_[0]->push_read (line => sub { 818 $_[0]->push_read (line => sub {
795 length $_[1] 819 length $_[1]
796 and return $finish->(undef, 599 => "Garbled chunked transfer encoding"); 820 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
797 $_[0]->push_read (line => $read_chunk); 821 $_[0]->push_read (line => $read_chunk);
798 }); 822 });
799 }); 823 });
800 } else { 824 } else {
801 $hdr{"content-length"} ||= $cl; 825 $hdr{"content-length"} ||= $cl;
804 if (length $_[1]) { 828 if (length $_[1]) {
805 for ("$_[1]") { 829 for ("$_[1]") {
806 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 830 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
807 831
808 my $hdr = parse_hdr 832 my $hdr = parse_hdr
809 or return $finish->(undef, 599 => "Garbled response trailers"); 833 or return $finish->(undef, $ae_error => "Garbled response trailers");
810 834
811 %hdr = (%hdr, %$hdr); 835 %hdr = (%hdr, %$hdr);
812 } 836 }
813 } 837 }
814 838
818 }; 842 };
819 843
820 $_[0]->push_read (line => $read_chunk); 844 $_[0]->push_read (line => $read_chunk);
821 845
822 } elsif ($arg{on_body}) { 846 } elsif ($arg{on_body}) {
823 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
824
825 if ($len) { 847 if ($len) {
826 $_[0]->on_read (sub { 848 $_[0]->on_read (sub {
827 $len -= length $_[0]{rbuf}; 849 $len -= length $_[0]{rbuf};
828 850
829 $arg{on_body}(delete $_[0]{rbuf}, \%hdr) 851 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
843 } 865 }
844 } else { 866 } else {
845 $_[0]->on_eof (undef); 867 $_[0]->on_eof (undef);
846 868
847 if ($len) { 869 if ($len) {
848 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
849 $_[0]->on_read (sub { 870 $_[0]->on_read (sub {
850 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) 871 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
851 if $len <= length $_[0]{rbuf}; 872 if $len <= length $_[0]{rbuf};
852 }); 873 });
853 } else { 874 } else {
854 $_[0]->on_error (sub { 875 $_[0]->on_error (sub {
855 ($! == Errno::EPIPE || !$!) 876 ($! == Errno::EPIPE || !$!)
856 ? $finish->(delete $_[0]{rbuf}) 877 ? $finish->(delete $_[0]{rbuf})
857 : $finish->(undef, 599 => $_[2]); 878 : $finish->(undef, $ae_error => $_[2]);
858 }); 879 });
859 $_[0]->on_read (sub { }); 880 $_[0]->on_read (sub { });
860 } 881 }
861 } 882 }
862 } 883 }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines