ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.77 by root, Sat Jan 1 19:13:41 2011 UTC vs.
Revision 1.89 by root, Mon Jan 3 00:23:25 2011 UTC

36 36
37=cut 37=cut
38 38
39package AnyEvent::HTTP; 39package AnyEvent::HTTP;
40 40
41use strict; 41use common::sense;
42no warnings;
43 42
44use Errno (); 43use Errno ();
45 44
46use AnyEvent 5.0 (); 45use AnyEvent 5.0 ();
47use AnyEvent::Util (); 46use AnyEvent::Util ();
58our $MAX_PERSISTENT = 8; 57our $MAX_PERSISTENT = 8;
59our $PERSISTENT_TIMEOUT = 2; 58our $PERSISTENT_TIMEOUT = 2;
60our $TIMEOUT = 300; 59our $TIMEOUT = 300;
61 60
62# changing these is evil 61# changing these is evil
63our $MAX_PERSISTENT_PER_HOST = 0; 62our $MAX_PERSISTENT_PER_HOST = 2;
64our $MAX_PER_HOST = 4; 63our $MAX_PER_HOST = 4;
65 64
66our $PROXY; 65our $PROXY;
67our $ACTIVE = 0; 66our $ACTIVE = 0;
68 67
132 131
133=item 595 - errors during connection etsbalishment, proxy handshake. 132=item 595 - errors during connection etsbalishment, proxy handshake.
134 133
135=item 596 - errors during TLS negotiation, request sending and header processing. 134=item 596 - errors during TLS negotiation, request sending and header processing.
136 135
137=item 597 - errors during body receive or processing. 136=item 597 - errors during body receiving or processing.
138 137
139=item 598 - user aborted request in C<on_header> or C<on_body>. 138=item 598 - user aborted request via C<on_header> or C<on_body>.
140 139
141=item 599 - other, usually nonretryable, errors (garbled URL etc.). 140=item 599 - other, usually nonretryable, errors (garbled URL etc.).
142 141
143=back 142=back
144 143
196=item cookie_jar => $hash_ref 195=item cookie_jar => $hash_ref
197 196
198Passing this parameter enables (simplified) cookie-processing, loosely 197Passing this parameter enables (simplified) cookie-processing, loosely
199based on the original netscape specification. 198based on the original netscape specification.
200 199
201The C<$hash_ref> must be an (initially empty) hash reference which will 200The C<$hash_ref> must be an (initially empty) hash reference which
202get updated automatically. It is possible to save the cookie jar to 201will get updated automatically. It is possible to save the cookie jar
203persistent storage with something like JSON or Storable, but this is not 202to persistent storage with something like JSON or Storable - see the
204recommended, as session-only cookies might survive longer than expected. 203C<AnyEvent::HTTP::cookie_jar_expire> function if you wish to remove
204expired or session-only cookies, and also for documentation on the format
205of the cookie jar.
205 206
206Note that this cookie implementation is not meant to be complete. If 207Note that this cookie implementation is not meant to be complete. If
207you want complete cookie management you have to do that on your 208you want complete cookie management you have to do that on your
208own. C<cookie_jar> is meant as a quick fix to get some cookie-using sites 209own. C<cookie_jar> is meant as a quick fix to get most cookie-using sites
209working. Cookies are a privacy disaster, do not use them unless required 210working. Cookies are a privacy disaster, do not use them unless required
210to. 211to.
211 212
212When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:> 213When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:>
213headers will be set and handled by this module, otherwise they will be 214headers will be set and handled by this module, otherwise they will be
378 push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; 379 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
379 380
380 _slot_schedule $_[0]; 381 _slot_schedule $_[0];
381} 382}
382 383
384#############################################################################
385
386# expire cookies
387sub cookie_jar_expire($;$) {
388 my ($jar, $session_end) = @_;
389
390 %$jar = () if $jar->{version} != 1;
391
392 my $anow = AE::now;
393
394 while (my ($chost, $paths) = each %$jar) {
395 next unless ref $paths;
396
397 while (my ($cpath, $cookies) = each %$paths) {
398 while (my ($cookie, $kv) = each %$cookies) {
399 if (exists $kv->{_expires}) {
400 delete $cookies->{$cookie}
401 if $anow > $kv->{_expires};
402 } elsif ($session_end) {
403 delete $cookies->{$cookie};
404 }
405 }
406
407 delete $paths->{$cpath}
408 unless %$cookies;
409 }
410
411 delete $jar->{$chost}
412 unless %$paths;
413 }
414}
415
383# extract cookies from jar 416# extract cookies from jar
384sub cookie_jar_extract($$$$) { 417sub cookie_jar_extract($$$$) {
385 my ($jar, $uscheme, $uhost, $upath) = @_; 418 my ($jar, $uscheme, $uhost, $upath) = @_;
386 419
387 %$jar = () if $jar->{version} != 1; 420 %$jar = () if $jar->{version} != 1;
403 next unless $cpath eq substr $upath, 0, length $cpath; 436 next unless $cpath eq substr $upath, 0, length $cpath;
404 437
405 while (my ($cookie, $kv) = each %$cookies) { 438 while (my ($cookie, $kv) = each %$cookies) {
406 next if $uscheme ne "https" && exists $kv->{secure}; 439 next if $uscheme ne "https" && exists $kv->{secure};
407 440
408 if (exists $kv->{expires}) { 441 if (exists $kv->{_expires} and AE::now > $kv->{_expires}) {
409 if (AE::now > parse_date ($kv->{expires})) {
410 delete $cookies->{$cookie}; 442 delete $cookies->{$cookie};
411 next; 443 next;
412 }
413 } 444 }
414 445
415 my $value = $kv->{value}; 446 my $value = $kv->{value};
416 447
417 if ($value =~ /[=;,[:space:]]/) { 448 if ($value =~ /[=;,[:space:]]/) {
426 457
427 \@cookies 458 \@cookies
428} 459}
429 460
430# parse set_cookie header into jar 461# parse set_cookie header into jar
431sub cookie_jar_set_cookie($$$) { 462sub cookie_jar_set_cookie($$$$) {
432 my ($jar, $set_cookie, $uhost) = @_; 463 my ($jar, $set_cookie, $uhost, $date) = @_;
464
465 my $anow = int AE::now;
466 my $snow; # server-now
433 467
434 for ($set_cookie) { 468 for ($set_cookie) {
435 # parse NAME=VALUE 469 # parse NAME=VALUE
436 my @kv; 470 my @kv;
437 471
472 # expires is not http-compliant in the original cookie-spec,
473 # we support the official date format and some extensions
438 while ( 474 while (
439 m{ 475 m{
440 \G\s* 476 \G\s*
441 (?: 477 (?:
442 expires \s*=\s* ([A-Z][a-z][a-z],\ [^,;]+) 478 expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
443 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) 479 | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) )?
444 ) 480 )
445 }gcxsi 481 }gcxsi
446 ) { 482 ) {
447 my $name = $2; 483 my $name = $2;
448 my $value = $4; 484 my $value = $4;
449 485
450 unless (defined $name) { 486 if (defined $1) {
451 # expires 487 # expires
452 $name = "expires"; 488 $name = "expires";
453 $value = $1; 489 $value = $1;
454 } elsif (!defined $value) { 490 } elsif (defined $3) {
455 # quoted 491 # quoted
456 $value = $3; 492 $value = $3;
457 $value =~ s/\\(.)/$1/gs; 493 $value =~ s/\\(.)/$1/gs;
458 } 494 }
459 495
465 last unless @kv; 501 last unless @kv;
466 502
467 my $name = shift @kv; 503 my $name = shift @kv;
468 my %kv = (value => shift @kv, @kv); 504 my %kv = (value => shift @kv, @kv);
469 505
470 $kv{expires} ||= format_date (AE::now + $kv{"max-age"})
471 if exists $kv{"max-age"}; 506 if (exists $kv{"max-age"}) {
507 $kv{_expires} = $anow + delete $kv{"max-age"};
508 } elsif (exists $kv{expires}) {
509 $snow ||= parse_date ($date) || $anow;
510 $kv{_expires} = $anow + (parse_date (delete $kv{expires}) - $snow);
511 } else {
512 delete $kv{_expires};
513 }
472 514
473 my $cdom; 515 my $cdom;
474 my $cpath = (delete $kv{path}) || "/"; 516 my $cpath = (delete $kv{path}) || "/";
475 517
476 if (exists $kv{domain}) { 518 if (exists $kv{domain}) {
487 $cdom = $uhost; 529 $cdom = $uhost;
488 } 530 }
489 531
490 # store it 532 # store it
491 $jar->{version} = 1; 533 $jar->{version} = 1;
492 $jar->{$cdom}{$cpath}{$name} = \%kv; 534 $jar->{lc $cdom}{$cpath}{$name} = \%kv;
493 535
494 redo if /\G\s*,/gc; 536 redo if /\G\s*,/gc;
495 } 537 }
496} 538}
497 539
564 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" }); 606 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" });
565 607
566 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 608 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
567 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" }); 609 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" });
568 610
569 my $uhost = $1; 611 my $uhost = lc $1;
570 $uport = $2 if defined $2; 612 $uport = $2 if defined $2;
571 613
572 $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost" 614 $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost"
573 unless exists $hdr{host}; 615 unless exists $hdr{host};
574 616
593 $rscheme = "http" unless defined $rscheme; 635 $rscheme = "http" unless defined $rscheme;
594 636
595 # don't support https requests over https-proxy transport, 637 # don't support https requests over https-proxy transport,
596 # can't be done with tls as spec'ed, unless you double-encrypt. 638 # can't be done with tls as spec'ed, unless you double-encrypt.
597 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https"; 639 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https";
640
641 $rhost = lc $rhost;
642 $rscheme = lc $rscheme;
598 } else { 643 } else {
599 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath); 644 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
600 } 645 }
601 646
602 # leave out fragment and query string, just a heuristic 647 # leave out fragment and query string, just a heuristic
604 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"}; 649 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"};
605 650
606 $hdr{"content-length"} = length $arg{body} 651 $hdr{"content-length"} = length $arg{body}
607 if length $arg{body} || $method ne "GET"; 652 if length $arg{body} || $method ne "GET";
608 653
609 $hdr{connection} = "close TE"; #1.1 654 my $idempotent = $method =~ /^(?:GET|HEAD|PUT|DELETE|OPTIONS|TRACE)$/;
655
656 # default value for keepalive is true iff the request is for an idempotent method
657 my $keepalive = exists $arg{keepalive}
658 ? $arg{keepalive}*1
659 : $idempotent ? $PERSISTENT_TIMEOUT : 0;
660
661 $hdr{connection} = ($keepalive ? "" : "close ") . "Te"; #1.1
610 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1 662 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1
611 663
612 my %state = (connect_guard => 1); 664 my %state = (connect_guard => 1);
613 665
614 _get_slot $uhost, sub {
615 $state{slot_guard} = shift;
616
617 return unless $state{connect_guard};
618
619 my $ae_error = 595; # connecting 666 my $ae_error = 595; # connecting
620 667
621 my $connect_cb = sub { 668 # handle actual, non-tunneled, request
622 $state{fh} = shift 669 my $handle_actual_request = sub {
670 $ae_error = 596; # request phase
671
672 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
673
674 # send request
675 $state{handle}->push_write (
676 "$method $rpath HTTP/1.1\015\012"
677 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
678 . "\015\012"
679 . (delete $arg{body})
680 );
681
682 # return if error occured during push_write()
683 return unless %state;
684
685 # reduce memory usage, save a kitten, also re-use it for the response headers.
686 %hdr = ();
687
688 # status line and headers
689 $state{read_response} = sub {
690 for ("$_[1]") {
691 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
692
693 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
694 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
695
696 # 100 Continue handling
697 # should not happen as we don't send expect: 100-continue,
698 # but we handle it just in case.
699 # since we send the request body regardless, if we get an error
700 # we are out of-sync, which we currently do NOT handle correctly.
701 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
702 if $2 eq 100;
703
704 push @pseudo,
705 HTTPVersion => $1,
706 Status => $2,
707 Reason => $3,
623 or do { 708 ;
624 my $err = "$!"; 709
710 my $hdr = parse_hdr
711 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
712
713 %hdr = (%$hdr, @pseudo);
714 }
715
716 # redirect handling
717 # microsoft and other shitheads don't give a shit for following standards,
718 # try to support some common forms of broken Location headers.
719 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
720 $hdr{location} =~ s/^\.\/+//;
721
722 my $url = "$rscheme://$uhost:$uport";
723
724 unless ($hdr{location} =~ s/^\///) {
725 $url .= $upath;
726 $url =~ s/\/[^\/]*$//;
727 }
728
729 $hdr{location} = "$url/$hdr{location}";
730 }
731
732 my $redirect;
733
734 if ($recurse) {
735 my $status = $hdr{Status};
736
737 # industry standard is to redirect POST as GET for
738 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
739 # also, the UA should ask the user for 301 and 307 and POST,
740 # industry standard seems to be to simply follow.
741 # we go with the industry standard.
742 if ($status == 301 or $status == 302 or $status == 303) {
743 # HTTP/1.1 is unclear on how to mutate the method
744 $method = "GET" unless $method eq "HEAD";
745 $redirect = 1;
746 } elsif ($status == 307) {
747 $redirect = 1;
748 }
749 }
750
751 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
752 my $may_keep_alive = $_[3];
753
754 $state{handle}->destroy if $state{handle};
625 %state = (); 755 %state = ();
626 return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err }); 756
757 if (defined $_[1]) {
758 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
759 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
760 }
761
762 # set-cookie processing
763 if ($arg{cookie_jar}) {
764 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
765 }
766
767 if ($redirect && exists $hdr{location}) {
768 # we ignore any errors, as it is very common to receive
769 # Content-Length != 0 but no actual body
770 # we also access %hdr, as $_[1] might be an erro
771 http_request (
772 $method => $hdr{location},
773 %arg,
774 recurse => $recurse - 1,
775 Redirect => [$_[0], \%hdr],
776 $cb);
777 } else {
778 $cb->($_[0], \%hdr);
779 }
780 };
781
782 $ae_error = 597; # body phase
783
784 my $len = $hdr{"content-length"};
785
786 # body handling, many different code paths
787 # - no body expected
788 # - want_body_handle
789 # - te chunked
790 # - 2x length known (with or without on_body)
791 # - 2x length not known (with or without on_body)
792 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
793 $finish->(undef, 598 => "Request cancelled by on_header");
794 } elsif (
795 $hdr{Status} =~ /^(?:1..|204|205|304)$/
796 or $method eq "HEAD"
797 or (defined $len && $len == 0) # == 0, not !, because "0 " is true
798 ) {
799 # no body
800 $finish->("", undef, undef, 1);
801
802 } elsif (!$redirect && $arg{want_body_handle}) {
803 $_[0]->on_eof (undef);
804 $_[0]->on_error (undef);
805 $_[0]->on_read (undef);
806
807 $finish->(delete $state{handle});
808
809 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
810 my $cl = 0;
811 my $body = undef;
812 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
813
814 $state{read_chunk} = sub {
815 $_[1] =~ /^([0-9a-fA-F]+)/
816 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
817
818 my $len = hex $1;
819
820 if ($len) {
821 $cl += $len;
822
823 $_[0]->push_read (chunk => $len, sub {
824 $on_body->($_[1], \%hdr)
825 or return $finish->(undef, 598 => "Request cancelled by on_body");
826
827 $_[0]->push_read (line => sub {
828 length $_[1]
829 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
830 $_[0]->push_read (line => $state{read_chunk});
831 });
832 });
833 } else {
834 $hdr{"content-length"} ||= $cl;
835
836 $_[0]->push_read (line => $qr_nlnl, sub {
837 if (length $_[1]) {
838 for ("$_[1]") {
839 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
840
841 my $hdr = parse_hdr
842 or return $finish->(undef, $ae_error => "Garbled response trailers");
843
844 %hdr = (%hdr, %$hdr);
845 }
846 }
847
848 $finish->($body, undef, undef, 1);
849 });
850 }
627 }; 851 };
628 852
853 $_[0]->push_read (line => $state{read_chunk});
854
855 } elsif ($arg{on_body}) {
856 if (defined $len) {
857 $_[0]->on_read (sub {
858 $len -= length $_[0]{rbuf};
859
860 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
861 or return $finish->(undef, 598 => "Request cancelled by on_body");
862
863 $len > 0
864 or $finish->("", undef, undef, 1);
865 });
866 } else {
867 $_[0]->on_eof (sub {
868 $finish->("");
869 });
870 $_[0]->on_read (sub {
871 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
872 or $finish->(undef, 598 => "Request cancelled by on_body");
873 });
874 }
875 } else {
876 $_[0]->on_eof (undef);
877
878 if (defined $len) {
879 $_[0]->on_read (sub {
880 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
881 if $len <= length $_[0]{rbuf};
882 });
883 } else {
884 $_[0]->on_error (sub {
885 ($! == Errno::EPIPE || !$!)
886 ? $finish->(delete $_[0]{rbuf})
887 : $finish->(undef, $ae_error => $_[2]);
888 });
889 $_[0]->on_read (sub { });
890 }
891 }
892 };
893
894 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
895 };
896
897 my $connect_cb = sub {
898 $state{fh} = shift
899 or do {
900 my $err = "$!";
901 %state = ();
902 return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err });
903 };
904
629 return unless delete $state{connect_guard}; 905 return unless delete $state{connect_guard};
630 906
631 # get handle 907 # get handle
632 $state{handle} = new AnyEvent::Handle 908 $state{handle} = new AnyEvent::Handle
633 fh => $state{fh}, 909 fh => $state{fh},
634 peername => $rhost, 910 peername => $rhost,
635 tls_ctx => $arg{tls_ctx}, 911 tls_ctx => $arg{tls_ctx},
636 # these need to be reconfigured on keepalive handles 912 # these need to be reconfigured on keepalive handles
637 timeout => $timeout, 913 timeout => $timeout,
638 on_error => sub { 914 on_error => sub {
639 %state = (); 915 %state = ();
640 $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] }); 916 $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] });
641 }, 917 },
642 on_eof => sub { 918 on_eof => sub {
643 %state = (); 919 %state = ();
644 $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" }); 920 $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" });
645 }, 921 },
646 ; 922 ;
647 923
648 # limit the number of persistent connections 924 # limit the number of persistent connections
649 # keepalive not yet supported 925 # keepalive not yet supported
650# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 926# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
651# ++$KA_COUNT{$_[1]}; 927# ++$KA_COUNT{$_[1]};
652# $state{handle}{ka_count_guard} = AnyEvent::Util::guard { 928# $state{handle}{ka_count_guard} = AnyEvent::Util::guard {
653# --$KA_COUNT{$_[1]} 929# --$KA_COUNT{$_[1]}
654# }; 930# };
655# $hdr{connection} = "keep-alive"; 931# $hdr{connection} = "keep-alive";
656# } 932# }
657 933
658 $state{handle}->starttls ("connect") if $rscheme eq "https"; 934 $state{handle}->starttls ("connect") if $rscheme eq "https";
659 935
660 # handle actual, non-tunneled, request
661 my $handle_actual_request = sub {
662 $ae_error = 596; # request phase
663
664 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
665
666 # send request
667 $state{handle}->push_write (
668 "$method $rpath HTTP/1.1\015\012"
669 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
670 . "\015\012"
671 . (delete $arg{body})
672 );
673
674 # return if error occured during push_write()
675 return unless %state;
676
677 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
678
679 # status line and headers
680 $state{read_response} = sub {
681 for ("$_[1]") {
682 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
683
684 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
685 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
686
687 # 100 Continue handling
688 # should not happen as we don't send expect: 100-continue,
689 # but we handle it just in case.
690 # since we send the request body regardless, if we get an error
691 # we are out of-sync, which we currently do NOT handle correctly.
692 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
693 if $2 eq 100;
694
695 push @pseudo,
696 HTTPVersion => $1,
697 Status => $2,
698 Reason => $3,
699 ;
700
701 my $hdr = parse_hdr
702 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
703
704 %hdr = (%$hdr, @pseudo);
705 }
706
707 # redirect handling
708 # microsoft and other shitheads don't give a shit for following standards,
709 # try to support some common forms of broken Location headers.
710 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
711 $hdr{location} =~ s/^\.\/+//;
712
713 my $url = "$rscheme://$uhost:$uport";
714
715 unless ($hdr{location} =~ s/^\///) {
716 $url .= $upath;
717 $url =~ s/\/[^\/]*$//;
718 }
719
720 $hdr{location} = "$url/$hdr{location}";
721 }
722
723 my $redirect;
724
725 if ($recurse) {
726 my $status = $hdr{Status};
727
728 # industry standard is to redirect POST as GET for
729 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
730 # also, the UA should ask the user for 301 and 307 and POST,
731 # industry standard seems to be to simply follow.
732 # we go with the industry standard.
733 if ($status == 301 or $status == 302 or $status == 303) {
734 # HTTP/1.1 is unclear on how to mutate the method
735 $method = "GET" unless $method eq "HEAD";
736 $redirect = 1;
737 } elsif ($status == 307) {
738 $redirect = 1;
739 }
740 }
741
742 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
743 my $may_keep_alive = $_[3];
744
745 $state{handle}->destroy if $state{handle};
746 %state = ();
747
748 if (defined $_[1]) {
749 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
750 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
751 }
752
753 # set-cookie processing
754 if ($arg{cookie_jar}) {
755 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost;
756 }
757
758 if ($redirect && exists $hdr{location}) {
759 # we ignore any errors, as it is very common to receive
760 # Content-Length != 0 but no actual body
761 # we also access %hdr, as $_[1] might be an erro
762 http_request (
763 $method => $hdr{location},
764 %arg,
765 recurse => $recurse - 1,
766 Redirect => [$_[0], \%hdr],
767 $cb);
768 } else {
769 $cb->($_[0], \%hdr);
770 }
771 };
772
773 $ae_error = 597; # body phase
774
775 my $len = $hdr{"content-length"};
776
777 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
778 $finish->(undef, 598 => "Request cancelled by on_header");
779 } elsif (
780 $hdr{Status} =~ /^(?:1..|204|205|304)$/
781 or $method eq "HEAD"
782 or (defined $len && !$len)
783 ) {
784 # no body
785 $finish->("", undef, undef, 1);
786 } else {
787 # body handling, many different code paths
788 # - no body expected
789 # - want_body_handle
790 # - te chunked
791 # - 2x length known (with or without on_body)
792 # - 2x length not known (with or without on_body)
793 if (!$redirect && $arg{want_body_handle}) {
794 $_[0]->on_eof (undef);
795 $_[0]->on_error (undef);
796 $_[0]->on_read (undef);
797
798 $finish->(delete $state{handle});
799
800 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
801 my $cl = 0;
802 my $body = undef;
803 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
804
805 my $read_chunk; $read_chunk = sub {
806 $_[1] =~ /^([0-9a-fA-F]+)/
807 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
808
809 my $len = hex $1;
810
811 if ($len) {
812 $cl += $len;
813
814 $_[0]->push_read (chunk => $len, sub {
815 $on_body->($_[1], \%hdr)
816 or return $finish->(undef, 598 => "Request cancelled by on_body");
817
818 $_[0]->push_read (line => sub {
819 length $_[1]
820 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
821 $_[0]->push_read (line => $read_chunk);
822 });
823 });
824 } else {
825 $hdr{"content-length"} ||= $cl;
826
827 $_[0]->push_read (line => $qr_nlnl, sub {
828 if (length $_[1]) {
829 for ("$_[1]") {
830 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
831
832 my $hdr = parse_hdr
833 or return $finish->(undef, $ae_error => "Garbled response trailers");
834
835 %hdr = (%hdr, %$hdr);
836 }
837 }
838
839 $finish->($body, undef, undef, 1);
840 });
841 }
842 };
843
844 $_[0]->push_read (line => $read_chunk);
845
846 } elsif ($arg{on_body}) {
847 if ($len) {
848 $_[0]->on_read (sub {
849 $len -= length $_[0]{rbuf};
850
851 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
852 or return $finish->(undef, 598 => "Request cancelled by on_body");
853
854 $len > 0
855 or $finish->("", undef, undef, 1);
856 });
857 } else {
858 $_[0]->on_eof (sub {
859 $finish->("");
860 });
861 $_[0]->on_read (sub {
862 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
863 or $finish->(undef, 598 => "Request cancelled by on_body");
864 });
865 }
866 } else {
867 $_[0]->on_eof (undef);
868
869 if ($len) {
870 $_[0]->on_read (sub {
871 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
872 if $len <= length $_[0]{rbuf};
873 });
874 } else {
875 $_[0]->on_error (sub {
876 ($! == Errno::EPIPE || !$!)
877 ? $finish->(delete $_[0]{rbuf})
878 : $finish->(undef, $ae_error => $_[2]);
879 });
880 $_[0]->on_read (sub { });
881 }
882 }
883 }
884 };
885
886 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
887 };
888
889 # now handle proxy-CONNECT method 936 # now handle proxy-CONNECT method
890 if ($proxy && $uscheme eq "https") { 937 if ($proxy && $uscheme eq "https") {
891 # oh dear, we have to wrap it into a connect request 938 # oh dear, we have to wrap it into a connect request
892 939
893 # maybe re-use $uauthority with patched port? 940 # maybe re-use $uauthority with patched port?
894 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012"); 941 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012");
895 $state{handle}->push_read (line => $qr_nlnl, sub { 942 $state{handle}->push_read (line => $qr_nlnl, sub {
896 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 943 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
897 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" })); 944 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }));
898 945
899 if ($2 == 200) { 946 if ($2 == 200) {
900 $rpath = $upath; 947 $rpath = $upath;
901 &$handle_actual_request; 948 $handle_actual_request->();
902 } else { 949 } else {
903 %state = (); 950 %state = ();
904 $cb->(undef, { @pseudo, Status => $2, Reason => $3 }); 951 $cb->(undef, { @pseudo, Status => $2, Reason => $3 });
905 }
906 }); 952 }
907 } else {
908 &$handle_actual_request;
909 } 953 });
954 } else {
955 $handle_actual_request->();
910 }; 956 }
957 };
958
959 _get_slot $uhost, sub {
960 $state{slot_guard} = shift;
961
962 return unless $state{connect_guard};
911 963
912 my $tcp_connect = $arg{tcp_connect} 964 my $tcp_connect = $arg{tcp_connect}
913 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect }; 965 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
914 966
915 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout }); 967 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout });
916
917 }; 968 };
918 969
919 defined wantarray && AnyEvent::Util::guard { %state = () } 970 defined wantarray && AnyEvent::Util::guard { %state = () }
920} 971}
921 972
956string of the form C<http://host:port> (optionally C<https:...>), croaks 1007string of the form C<http://host:port> (optionally C<https:...>), croaks
957otherwise. 1008otherwise.
958 1009
959To clear an already-set proxy, use C<undef>. 1010To clear an already-set proxy, use C<undef>.
960 1011
1012=item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end]
1013
1014Remove all cookies from the cookie jar that have been expired. If
1015C<$session_end> is given and true, then additionally remove all session
1016cookies.
1017
1018You should call this function (with a true C<$session_end>) before you
1019save cookies to disk, and you should call this function after loading them
1020again. If you have a long-running program you can additonally call this
1021function from time to time.
1022
1023A cookie jar is initially an empty hash-reference that is managed by this
1024module. It's format is subject to change, but currently it is like this:
1025
1026The key C<version> has to contain C<1>, otherwise the hash gets
1027emptied. All other keys are hostnames or IP addresses pointing to
1028hash-references. The key for these inner hash references is the
1029server path for which this cookie is meant, and the values are again
1030hash-references. The keys of those hash-references is the cookie name, and
1031the value, you guessed it, is another hash-reference, this time with the
1032key-value pairs from the cookie, except for C<expires> and C<max-age>,
1033which have been replaced by a C<_expires> key that contains the cookie
1034expiry timestamp.
1035
1036Here is an example of a cookie jar with a single cookie, so you have a
1037chance of understanding the above paragraph:
1038
1039 {
1040 version => 1,
1041 "10.0.0.1" => {
1042 "/" => {
1043 "mythweb_id" => {
1044 _expires => 1293917923,
1045 value => "ooRung9dThee3ooyXooM1Ohm",
1046 },
1047 },
1048 },
1049 }
1050
961=item $date = AnyEvent::HTTP::format_date $timestamp 1051=item $date = AnyEvent::HTTP::format_date $timestamp
962 1052
963Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP 1053Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
964Date (RFC 2616). 1054Date (RFC 2616).
965 1055
966=item $timestamp = AnyEvent::HTTP::parse_date $date 1056=item $timestamp = AnyEvent::HTTP::parse_date $date
967 1057
968Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) and 1058Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a
969returns the corresponding POSIX timestamp, or C<undef> if the date cannot 1059bunch of minor variations of those, and returns the corresponding POSIX
970be parsed. 1060timestamp, or C<undef> if the date cannot be parsed.
971 1061
972=item $AnyEvent::HTTP::MAX_RECURSE 1062=item $AnyEvent::HTTP::MAX_RECURSE
973 1063
974The default value for the C<recurse> request parameter (default: C<10>). 1064The default value for the C<recurse> request parameter (default: C<10>).
975 1065
1014sub parse_date($) { 1104sub parse_date($) {
1015 my ($date) = @_; 1105 my ($date) = @_;
1016 1106
1017 my ($d, $m, $y, $H, $M, $S); 1107 my ($d, $m, $y, $H, $M, $S);
1018 1108
1019 if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9])[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1109 if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1020 # RFC 822/1123, required by RFC 2616 (with " ") 1110 # RFC 822/1123, required by RFC 2616 (with " ")
1021 # cookie dates (with "-") 1111 # cookie dates (with "-")
1022 1112
1023 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); 1113 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
1024 1114
1025 } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1115 } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1026 # RFC 850 1116 # RFC 850
1027 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); 1117 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
1028 1118
1029 } elsif ($date =~ /^[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9 ][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([0-9][0-9][0-9][0-9])$/) { 1119 } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) {
1030 # ISO C's asctime 1120 # ISO C's asctime
1031 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); 1121 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
1032 } 1122 }
1033 # other formats fail in the loop below 1123 # other formats fail in the loop below
1034 1124

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines