ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.76 by root, Sat Jan 1 02:20:49 2011 UTC vs.
Revision 1.87 by root, Sun Jan 2 08:51:53 2011 UTC

36 36
37=cut 37=cut
38 38
39package AnyEvent::HTTP; 39package AnyEvent::HTTP;
40 40
41use strict; 41use common::sense;
42no warnings;
43 42
44use Errno (); 43use Errno ();
45 44
46use AnyEvent 5.0 (); 45use AnyEvent 5.0 ();
47use AnyEvent::Util (); 46use AnyEvent::Util ();
122 121
123If the server sends a header multiple times, then their contents will be 122If the server sends a header multiple times, then their contents will be
124joined together with a comma (C<,>), as per the HTTP spec. 123joined together with a comma (C<,>), as per the HTTP spec.
125 124
126If an internal error occurs, such as not being able to resolve a hostname, 125If an internal error occurs, such as not being able to resolve a hostname,
127then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<59x> 126then C<$data> will be C<undef>, C<< $headers->{Status} >> will be
128(usually C<599>) and the C<Reason> pseudo-header will contain an error 127C<590>-C<599> and the C<Reason> pseudo-header will contain an error
129message. 128message. Currently the following status codes are used:
129
130=over 4
131
132=item 595 - errors during connection etsbalishment, proxy handshake.
133
134=item 596 - errors during TLS negotiation, request sending and header processing.
135
136=item 597 - errors during body receiving or processing.
137
138=item 598 - user aborted request via C<on_header> or C<on_body>.
139
140=item 599 - other, usually nonretryable, errors (garbled URL etc.).
141
142=back
130 143
131A typical callback might look like this: 144A typical callback might look like this:
132 145
133 sub { 146 sub {
134 my ($body, $hdr) = @_; 147 my ($body, $hdr) = @_;
182=item cookie_jar => $hash_ref 195=item cookie_jar => $hash_ref
183 196
184Passing this parameter enables (simplified) cookie-processing, loosely 197Passing this parameter enables (simplified) cookie-processing, loosely
185based on the original netscape specification. 198based on the original netscape specification.
186 199
187The C<$hash_ref> must be an (initially empty) hash reference which will 200The C<$hash_ref> must be an (initially empty) hash reference which
188get updated automatically. It is possible to save the cookie jar to 201will get updated automatically. It is possible to save the cookie jar
189persistent storage with something like JSON or Storable, but this is not 202to persistent storage with something like JSON or Storable - see the
190recommended, as session-only cookies might survive longer than expected. 203C<AnyEvent::HTTP::cookie_jar_expire> function if you wish to remove
204expired or session-only cookies, and also for documentation on the format
205of the cookie jar.
191 206
192Note that this cookie implementation is not meant to be complete. If 207Note that this cookie implementation is not meant to be complete. If
193you want complete cookie management you have to do that on your 208you want complete cookie management you have to do that on your
194own. C<cookie_jar> is meant as a quick fix to get some cookie-using sites 209own. C<cookie_jar> is meant as a quick fix to get most cookie-using sites
195working. Cookies are a privacy disaster, do not use them unless required 210working. Cookies are a privacy disaster, do not use them unless required
196to. 211to.
197 212
198When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:> 213When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:>
199headers will be set and handled by this module, otherwise they will be 214headers will be set and handled by this module, otherwise they will be
364 push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; 379 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
365 380
366 _slot_schedule $_[0]; 381 _slot_schedule $_[0];
367} 382}
368 383
384#############################################################################
385
386# expire cookies
387sub cookie_jar_expire($;$) {
388 my ($jar, $session_end) = @_;
389
390 %$jar = () if $jar->{version} != 1;
391
392 my $anow = AE::now;
393
394 while (my ($chost, $paths) = each %$jar) {
395 next unless ref $paths;
396
397 while (my ($cpath, $cookies) = each %$paths) {
398 while (my ($cookie, $kv) = each %$cookies) {
399 if (exists $kv->{_expires}) {
400 delete $cookies->{$cookie}
401 if $anow > $kv->{_expires};
402 } elsif ($session_end) {
403 delete $cookies->{$cookie};
404 }
405 }
406
407 delete $paths->{$cpath}
408 unless %$cookies;
409 }
410
411 delete $jar->{$chost}
412 unless %$paths;
413 }
414}
415
369# extract cookies from jar 416# extract cookies from jar
370sub cookie_jar_extract($$$$) { 417sub cookie_jar_extract($$$$) {
371 my ($jar, $uscheme, $uhost, $upath) = @_; 418 my ($jar, $uscheme, $uhost, $upath) = @_;
372 419
373 %$jar = () if $jar->{version} != 1; 420 %$jar = () if $jar->{version} != 1;
389 next unless $cpath eq substr $upath, 0, length $cpath; 436 next unless $cpath eq substr $upath, 0, length $cpath;
390 437
391 while (my ($cookie, $kv) = each %$cookies) { 438 while (my ($cookie, $kv) = each %$cookies) {
392 next if $uscheme ne "https" && exists $kv->{secure}; 439 next if $uscheme ne "https" && exists $kv->{secure};
393 440
394 if (exists $kv->{expires}) { 441 if (exists $kv->{_expires} and AE::now > $kv->{_expires}) {
395 if (AE::now > parse_date ($kv->{expires})) {
396 delete $cookies->{$cookie}; 442 delete $cookies->{$cookie};
397 next; 443 next;
398 }
399 } 444 }
400 445
401 my $value = $kv->{value}; 446 my $value = $kv->{value};
402 447
403 if ($value =~ /[=;,[:space:]]/) { 448 if ($value =~ /[=;,[:space:]]/) {
412 457
413 \@cookies 458 \@cookies
414} 459}
415 460
416# parse set_cookie header into jar 461# parse set_cookie header into jar
417sub cookie_jar_set_cookie($$$) { 462sub cookie_jar_set_cookie($$$$) {
418 my ($jar, $set_cookie, $uhost) = @_; 463 my ($jar, $set_cookie, $uhost, $date) = @_;
464
465 my $anow = int AE::now;
466 my $snow; # server-now
419 467
420 for ($set_cookie) { 468 for ($set_cookie) {
421 # parse NAME=VALUE 469 # parse NAME=VALUE
422 my @kv; 470 my @kv;
423 471
472 # expires is not http-compliant in the original cookie-spec,
473 # we support the official date format and some extensions
424 while ( 474 while (
425 m{ 475 m{
426 \G\s* 476 \G\s*
427 (?: 477 (?:
428 expires \s*=\s* ([A-Z][a-z][a-z],\ [^,;]+) 478 expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
429 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) 479 | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) )?
430 ) 480 )
431 }gcxsi 481 }gcxsi
432 ) { 482 ) {
433 my $name = $2; 483 my $name = $2;
434 my $value = $4; 484 my $value = $4;
435 485
436 unless (defined $name) { 486 if (defined $1) {
437 # expires 487 # expires
438 $name = "expires"; 488 $name = "expires";
439 $value = $1; 489 $value = $1;
440 } elsif (!defined $value) { 490 } elsif (defined $3) {
441 # quoted 491 # quoted
442 $value = $3; 492 $value = $3;
443 $value =~ s/\\(.)/$1/gs; 493 $value =~ s/\\(.)/$1/gs;
444 } 494 }
445 495
451 last unless @kv; 501 last unless @kv;
452 502
453 my $name = shift @kv; 503 my $name = shift @kv;
454 my %kv = (value => shift @kv, @kv); 504 my %kv = (value => shift @kv, @kv);
455 505
456 $kv{expires} ||= format_date (AE::now + $kv{"max-age"})
457 if exists $kv{"max-age"}; 506 if (exists $kv{"max-age"}) {
507 $kv{_expires} = $anow + delete $kv{"max-age"};
508 } elsif (exists $kv{expires}) {
509 $snow ||= parse_date ($date) || $anow;
510 $kv{_expires} = $anow + (parse_date (delete $kv{expires}) - $snow);
511 } else {
512 delete $kv{_expires};
513 }
458 514
459 my $cdom; 515 my $cdom;
460 my $cpath = (delete $kv{path}) || "/"; 516 my $cpath = (delete $kv{path}) || "/";
461 517
462 if (exists $kv{domain}) { 518 if (exists $kv{domain}) {
473 $cdom = $uhost; 529 $cdom = $uhost;
474 } 530 }
475 531
476 # store it 532 # store it
477 $jar->{version} = 1; 533 $jar->{version} = 1;
478 $jar->{$cdom}{$cpath}{$name} = \%kv; 534 $jar->{lc $cdom}{$cpath}{$name} = \%kv;
479 535
480 redo if /\G\s*,/gc; 536 redo if /\G\s*,/gc;
481 } 537 }
482} 538}
483 539
550 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" }); 606 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" });
551 607
552 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 608 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
553 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" }); 609 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" });
554 610
555 my $uhost = $1; 611 my $uhost = lc $1;
556 $uport = $2 if defined $2; 612 $uport = $2 if defined $2;
557 613
558 $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost" 614 $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost"
559 unless exists $hdr{host}; 615 unless exists $hdr{host};
560 616
579 $rscheme = "http" unless defined $rscheme; 635 $rscheme = "http" unless defined $rscheme;
580 636
581 # don't support https requests over https-proxy transport, 637 # don't support https requests over https-proxy transport,
582 # can't be done with tls as spec'ed, unless you double-encrypt. 638 # can't be done with tls as spec'ed, unless you double-encrypt.
583 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https"; 639 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https";
640
641 $rhost = lc $rhost;
642 $rscheme = lc $rscheme;
584 } else { 643 } else {
585 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath); 644 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
586 } 645 }
587 646
588 # leave out fragment and query string, just a heuristic 647 # leave out fragment and query string, just a heuristic
590 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"}; 649 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"};
591 650
592 $hdr{"content-length"} = length $arg{body} 651 $hdr{"content-length"} = length $arg{body}
593 if length $arg{body} || $method ne "GET"; 652 if length $arg{body} || $method ne "GET";
594 653
595 $hdr{connection} = "close TE"; #1.1 654 $hdr{connection} = "close Te"; #1.1
596 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1 655 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1
597 656
598 my %state = (connect_guard => 1); 657 my %state = (connect_guard => 1);
599 658
600 _get_slot $uhost, sub { 659 _get_slot $uhost, sub {
601 $state{slot_guard} = shift; 660 $state{slot_guard} = shift;
602 661
603 return unless $state{connect_guard}; 662 return unless $state{connect_guard};
663
664 my $ae_error = 595; # connecting
665
666 # handle actual, non-tunneled, request
667 my $handle_actual_request = sub {
668 $ae_error = 596; # request phase
669
670 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
671
672 # send request
673 $state{handle}->push_write (
674 "$method $rpath HTTP/1.1\015\012"
675 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
676 . "\015\012"
677 . (delete $arg{body})
678 );
679
680 # return if error occured during push_write()
681 return unless %state;
682
683 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
684
685 # status line and headers
686 $state{read_response} = sub {
687 for ("$_[1]") {
688 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
689
690 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
691 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
692
693 # 100 Continue handling
694 # should not happen as we don't send expect: 100-continue,
695 # but we handle it just in case.
696 # since we send the request body regardless, if we get an error
697 # we are out of-sync, which we currently do NOT handle correctly.
698 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
699 if $2 eq 100;
700
701 push @pseudo,
702 HTTPVersion => $1,
703 Status => $2,
704 Reason => $3,
705 ;
706
707 my $hdr = parse_hdr
708 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
709
710 %hdr = (%$hdr, @pseudo);
711 }
712
713 # redirect handling
714 # microsoft and other shitheads don't give a shit for following standards,
715 # try to support some common forms of broken Location headers.
716 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
717 $hdr{location} =~ s/^\.\/+//;
718
719 my $url = "$rscheme://$uhost:$uport";
720
721 unless ($hdr{location} =~ s/^\///) {
722 $url .= $upath;
723 $url =~ s/\/[^\/]*$//;
724 }
725
726 $hdr{location} = "$url/$hdr{location}";
727 }
728
729 my $redirect;
730
731 if ($recurse) {
732 my $status = $hdr{Status};
733
734 # industry standard is to redirect POST as GET for
735 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
736 # also, the UA should ask the user for 301 and 307 and POST,
737 # industry standard seems to be to simply follow.
738 # we go with the industry standard.
739 if ($status == 301 or $status == 302 or $status == 303) {
740 # HTTP/1.1 is unclear on how to mutate the method
741 $method = "GET" unless $method eq "HEAD";
742 $redirect = 1;
743 } elsif ($status == 307) {
744 $redirect = 1;
745 }
746 }
747
748 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
749 my $may_keep_alive = $_[3];
750
751 $state{handle}->destroy if $state{handle};
752 %state = ();
753
754 if (defined $_[1]) {
755 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
756 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
757 }
758
759 # set-cookie processing
760 if ($arg{cookie_jar}) {
761 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
762 }
763
764 if ($redirect && exists $hdr{location}) {
765 # we ignore any errors, as it is very common to receive
766 # Content-Length != 0 but no actual body
767 # we also access %hdr, as $_[1] might be an erro
768 http_request (
769 $method => $hdr{location},
770 %arg,
771 recurse => $recurse - 1,
772 Redirect => [$_[0], \%hdr],
773 $cb);
774 } else {
775 $cb->($_[0], \%hdr);
776 }
777 };
778
779 $ae_error = 597; # body phase
780
781 my $len = $hdr{"content-length"};
782
783 # body handling, many different code paths
784 # - no body expected
785 # - want_body_handle
786 # - te chunked
787 # - 2x length known (with or without on_body)
788 # - 2x length not known (with or without on_body)
789 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
790 $finish->(undef, 598 => "Request cancelled by on_header");
791 } elsif (
792 $hdr{Status} =~ /^(?:1..|204|205|304)$/
793 or $method eq "HEAD"
794 or (defined $len && $len == 0) # == 0, not !, because "0 " is true
795 ) {
796 # no body
797 $finish->("", undef, undef, 1);
798
799 } elsif (!$redirect && $arg{want_body_handle}) {
800 $_[0]->on_eof (undef);
801 $_[0]->on_error (undef);
802 $_[0]->on_read (undef);
803
804 $finish->(delete $state{handle});
805
806 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
807 my $cl = 0;
808 my $body = undef;
809 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
810
811 $state{read_chunk} = sub {
812 $_[1] =~ /^([0-9a-fA-F]+)/
813 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
814
815 my $len = hex $1;
816
817 if ($len) {
818 $cl += $len;
819
820 $_[0]->push_read (chunk => $len, sub {
821 $on_body->($_[1], \%hdr)
822 or return $finish->(undef, 598 => "Request cancelled by on_body");
823
824 $_[0]->push_read (line => sub {
825 length $_[1]
826 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
827 $_[0]->push_read (line => $state{read_chunk});
828 });
829 });
830 } else {
831 $hdr{"content-length"} ||= $cl;
832
833 $_[0]->push_read (line => $qr_nlnl, sub {
834 if (length $_[1]) {
835 for ("$_[1]") {
836 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
837
838 my $hdr = parse_hdr
839 or return $finish->(undef, $ae_error => "Garbled response trailers");
840
841 %hdr = (%hdr, %$hdr);
842 }
843 }
844
845 $finish->($body, undef, undef, 1);
846 });
847 }
848 };
849
850 $_[0]->push_read (line => $state{read_chunk});
851
852 } elsif ($arg{on_body}) {
853 if (defined $len) {
854 $_[0]->on_read (sub {
855 $len -= length $_[0]{rbuf};
856
857 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
858 or return $finish->(undef, 598 => "Request cancelled by on_body");
859
860 $len > 0
861 or $finish->("", undef, undef, 1);
862 });
863 } else {
864 $_[0]->on_eof (sub {
865 $finish->("");
866 });
867 $_[0]->on_read (sub {
868 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
869 or $finish->(undef, 598 => "Request cancelled by on_body");
870 });
871 }
872 } else {
873 $_[0]->on_eof (undef);
874
875 if (defined $len) {
876 $_[0]->on_read (sub {
877 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
878 if $len <= length $_[0]{rbuf};
879 });
880 } else {
881 $_[0]->on_error (sub {
882 ($! == Errno::EPIPE || !$!)
883 ? $finish->(delete $_[0]{rbuf})
884 : $finish->(undef, $ae_error => $_[2]);
885 });
886 $_[0]->on_read (sub { });
887 }
888 }
889 };
890
891 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
892 };
604 893
605 my $connect_cb = sub { 894 my $connect_cb = sub {
606 $state{fh} = shift 895 $state{fh} = shift
607 or do { 896 or do {
608 my $err = "$!"; 897 my $err = "$!";
609 %state = (); 898 %state = ();
610 return $cb->(undef, { @pseudo, Status => 599, Reason => $err }); 899 return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err });
611 }; 900 };
612 901
613 return unless delete $state{connect_guard}; 902 return unless delete $state{connect_guard};
614 903
615 # get handle 904 # get handle
619 tls_ctx => $arg{tls_ctx}, 908 tls_ctx => $arg{tls_ctx},
620 # these need to be reconfigured on keepalive handles 909 # these need to be reconfigured on keepalive handles
621 timeout => $timeout, 910 timeout => $timeout,
622 on_error => sub { 911 on_error => sub {
623 %state = (); 912 %state = ();
624 $cb->(undef, { @pseudo, Status => 599, Reason => $_[2] }); 913 $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] });
625 }, 914 },
626 on_eof => sub { 915 on_eof => sub {
627 %state = (); 916 %state = ();
628 $cb->(undef, { @pseudo, Status => 599, Reason => "Unexpected end-of-file" }); 917 $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" });
629 }, 918 },
630 ; 919 ;
631 920
632 # limit the number of persistent connections 921 # limit the number of persistent connections
633 # keepalive not yet supported 922 # keepalive not yet supported
639# $hdr{connection} = "keep-alive"; 928# $hdr{connection} = "keep-alive";
640# } 929# }
641 930
642 $state{handle}->starttls ("connect") if $rscheme eq "https"; 931 $state{handle}->starttls ("connect") if $rscheme eq "https";
643 932
644 # handle actual, non-tunneled, request
645 my $handle_actual_request = sub {
646 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
647
648 # send request
649 $state{handle}->push_write (
650 "$method $rpath HTTP/1.1\015\012"
651 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
652 . "\015\012"
653 . (delete $arg{body})
654 );
655
656 # return if error occured during push_write()
657 return unless %state;
658
659 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
660
661 # status line and headers
662 $state{read_response} = sub {
663 for ("$_[1]") {
664 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
665
666 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
667 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
668
669 # 100 Continue handling
670 # should not happen as we don't send expect: 100-continue,
671 # but we handle it just in case.
672 # since we send the request body regardless, if we get an error
673 # we are out of-sync, which we currently do NOT handle correctly.
674 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
675 if $2 eq 100;
676
677 push @pseudo,
678 HTTPVersion => $1,
679 Status => $2,
680 Reason => $3,
681 ;
682
683 my $hdr = parse_hdr
684 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
685
686 %hdr = (%$hdr, @pseudo);
687 }
688
689 # redirect handling
690 # microsoft and other shitheads don't give a shit for following standards,
691 # try to support some common forms of broken Location headers.
692 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
693 $hdr{location} =~ s/^\.\/+//;
694
695 my $url = "$rscheme://$uhost:$uport";
696
697 unless ($hdr{location} =~ s/^\///) {
698 $url .= $upath;
699 $url =~ s/\/[^\/]*$//;
700 }
701
702 $hdr{location} = "$url/$hdr{location}";
703 }
704
705 my $redirect;
706
707 if ($recurse) {
708 my $status = $hdr{Status};
709
710 # industry standard is to redirect POST as GET for
711 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
712 # also, the UA should ask the user for 301 and 307 and POST,
713 # industry standard seems to be to simply follow.
714 # we go with the industry standard.
715 if ($status == 301 or $status == 302 or $status == 303) {
716 # HTTP/1.1 is unclear on how to mutate the method
717 $method = "GET" unless $method eq "HEAD";
718 $redirect = 1;
719 } elsif ($status == 307) {
720 $redirect = 1;
721 }
722 }
723
724 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
725 my $may_keep_alive = $_[3];
726
727 $state{handle}->destroy if $state{handle};
728 %state = ();
729
730 if (defined $_[1]) {
731 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
732 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
733 }
734
735 # set-cookie processing
736 if ($arg{cookie_jar}) {
737 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost;
738 }
739
740 if ($redirect && exists $hdr{location}) {
741 # we ignore any errors, as it is very common to receive
742 # Content-Length != 0 but no actual body
743 # we also access %hdr, as $_[1] might be an erro
744 http_request (
745 $method => $hdr{location},
746 %arg,
747 recurse => $recurse - 1,
748 Redirect => [$_[0], \%hdr],
749 $cb);
750 } else {
751 $cb->($_[0], \%hdr);
752 }
753 };
754
755 my $len = $hdr{"content-length"};
756
757 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
758 $finish->(undef, 598 => "Request cancelled by on_header");
759 } elsif (
760 $hdr{Status} =~ /^(?:1..|204|205|304)$/
761 or $method eq "HEAD"
762 or (defined $len && !$len)
763 ) {
764 # no body
765 $finish->("", undef, undef, 1);
766 } else {
767 # body handling, many different code paths
768 # - no body expected
769 # - want_body_handle
770 # - te chunked
771 # - 2x length known (with or without on_body)
772 # - 2x length not known (with or without on_body)
773 if (!$redirect && $arg{want_body_handle}) {
774 $_[0]->on_eof (undef);
775 $_[0]->on_error (undef);
776 $_[0]->on_read (undef);
777
778 $finish->(delete $state{handle});
779
780 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
781 my $cl = 0;
782 my $body = undef;
783 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
784
785 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
786
787 my $read_chunk; $read_chunk = sub {
788 $_[1] =~ /^([0-9a-fA-F]+)/
789 or $finish->(undef, 599 => "Garbled chunked transfer encoding");
790
791 my $len = hex $1;
792
793 if ($len) {
794 $cl += $len;
795
796 $_[0]->push_read (chunk => $len, sub {
797 $on_body->($_[1], \%hdr)
798 or return $finish->(undef, 598 => "Request cancelled by on_body");
799
800 $_[0]->push_read (line => sub {
801 length $_[1]
802 and return $finish->(undef, 599 => "Garbled chunked transfer encoding");
803 $_[0]->push_read (line => $read_chunk);
804 });
805 });
806 } else {
807 $hdr{"content-length"} ||= $cl;
808
809 $_[0]->push_read (line => $qr_nlnl, sub {
810 if (length $_[1]) {
811 for ("$_[1]") {
812 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
813
814 my $hdr = parse_hdr
815 or return $finish->(undef, 599 => "Garbled response trailers");
816
817 %hdr = (%hdr, %$hdr);
818 }
819 }
820
821 $finish->($body, undef, undef, 1);
822 });
823 }
824 };
825
826 $_[0]->push_read (line => $read_chunk);
827
828 } elsif ($arg{on_body}) {
829 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
830
831 if ($len) {
832 $_[0]->on_read (sub {
833 $len -= length $_[0]{rbuf};
834
835 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
836 or return $finish->(undef, 598 => "Request cancelled by on_body");
837
838 $len > 0
839 or $finish->("", undef, undef, 1);
840 });
841 } else {
842 $_[0]->on_eof (sub {
843 $finish->("");
844 });
845 $_[0]->on_read (sub {
846 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
847 or $finish->(undef, 598 => "Request cancelled by on_body");
848 });
849 }
850 } else {
851 $_[0]->on_eof (undef);
852
853 if ($len) {
854 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
855 $_[0]->on_read (sub {
856 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
857 if $len <= length $_[0]{rbuf};
858 });
859 } else {
860 $_[0]->on_error (sub {
861 ($! == Errno::EPIPE || !$!)
862 ? $finish->(delete $_[0]{rbuf})
863 : $finish->(undef, 599 => $_[2]);
864 });
865 $_[0]->on_read (sub { });
866 }
867 }
868 }
869 };
870
871 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
872 };
873
874 # now handle proxy-CONNECT method 933 # now handle proxy-CONNECT method
875 if ($proxy && $uscheme eq "https") { 934 if ($proxy && $uscheme eq "https") {
876 # oh dear, we have to wrap it into a connect request 935 # oh dear, we have to wrap it into a connect request
877 936
878 # maybe re-use $uauthority with patched port? 937 # maybe re-use $uauthority with patched port?
879 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012"); 938 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012");
880 $state{handle}->push_read (line => $qr_nlnl, sub { 939 $state{handle}->push_read (line => $qr_nlnl, sub {
881 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 940 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
882 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" })); 941 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }));
883 942
884 if ($2 == 200) { 943 if ($2 == 200) {
885 $rpath = $upath; 944 $rpath = $upath;
886 &$handle_actual_request; 945 $handle_actual_request->();
887 } else { 946 } else {
888 %state = (); 947 %state = ();
889 $cb->(undef, { @pseudo, Status => $2, Reason => $3 }); 948 $cb->(undef, { @pseudo, Status => $2, Reason => $3 });
890 } 949 }
891 }); 950 });
892 } else { 951 } else {
893 &$handle_actual_request; 952 $handle_actual_request->();
894 } 953 }
895 }; 954 };
896 955
897 my $tcp_connect = $arg{tcp_connect} 956 my $tcp_connect = $arg{tcp_connect}
898 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect }; 957 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
899 958
900 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout }); 959 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout });
901
902 }; 960 };
903 961
904 defined wantarray && AnyEvent::Util::guard { %state = () } 962 defined wantarray && AnyEvent::Util::guard { %state = () }
905} 963}
906 964
941string of the form C<http://host:port> (optionally C<https:...>), croaks 999string of the form C<http://host:port> (optionally C<https:...>), croaks
942otherwise. 1000otherwise.
943 1001
944To clear an already-set proxy, use C<undef>. 1002To clear an already-set proxy, use C<undef>.
945 1003
1004=item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end]
1005
1006Remove all cookies from the cookie jar that have been expired. If
1007C<$session_end> is given and true, then additionally remove all session
1008cookies.
1009
1010You should call this function (with a true C<$session_end>) before you
1011save cookies to disk, and you should call this function after loading them
1012again. If you have a long-running program you can additonally call this
1013function from time to time.
1014
1015A cookie jar is initially an empty hash-reference that is managed by this
1016module. It's format is subject to change, but currently it is like this:
1017
1018The key C<version> has to contain C<1>, otherwise the hash gets
1019emptied. All other keys are hostnames or IP addresses pointing to
1020hash-references. The key for these inner hash references is the
1021server path for which this cookie is meant, and the values are again
1022hash-references. The keys of those hash-references is the cookie name, and
1023the value, you guessed it, is another hash-reference, this time with the
1024key-value pairs from the cookie, except for C<expires> and C<max-age>,
1025which have been replaced by a C<_expires> key that contains the cookie
1026expiry timestamp.
1027
1028Here is an example of a cookie jar with a single cookie, so you have a
1029chance of understanding the above paragraph:
1030
1031 {
1032 version => 1,
1033 "10.0.0.1" => {
1034 "/" => {
1035 "mythweb_id" => {
1036 _expires => 1293917923,
1037 value => "ooRung9dThee3ooyXooM1Ohm",
1038 },
1039 },
1040 },
1041 }
1042
946=item $date = AnyEvent::HTTP::format_date $timestamp 1043=item $date = AnyEvent::HTTP::format_date $timestamp
947 1044
948Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP 1045Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
949Date (RFC 2616). 1046Date (RFC 2616).
950 1047
951=item $timestamp = AnyEvent::HTTP::parse_date $date 1048=item $timestamp = AnyEvent::HTTP::parse_date $date
952 1049
953Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) and 1050Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a
954returns the corresponding POSIX timestamp, or C<undef> if the date cannot 1051bunch of minor variations of those, and returns the corresponding POSIX
955be parsed. 1052timestamp, or C<undef> if the date cannot be parsed.
956 1053
957=item $AnyEvent::HTTP::MAX_RECURSE 1054=item $AnyEvent::HTTP::MAX_RECURSE
958 1055
959The default value for the C<recurse> request parameter (default: C<10>). 1056The default value for the C<recurse> request parameter (default: C<10>).
960 1057
999sub parse_date($) { 1096sub parse_date($) {
1000 my ($date) = @_; 1097 my ($date) = @_;
1001 1098
1002 my ($d, $m, $y, $H, $M, $S); 1099 my ($d, $m, $y, $H, $M, $S);
1003 1100
1004 if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9])[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1101 if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1005 # RFC 822/1123, required by RFC 2616 (with " ") 1102 # RFC 822/1123, required by RFC 2616 (with " ")
1006 # cookie dates (with "-") 1103 # cookie dates (with "-")
1007 1104
1008 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); 1105 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
1009 1106
1010 } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1107 } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1011 # RFC 850 1108 # RFC 850
1012 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); 1109 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
1013 1110
1014 } elsif ($date =~ /^[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9 ][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([0-9][0-9][0-9][0-9])$/) { 1111 } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) {
1015 # ISO C's asctime 1112 # ISO C's asctime
1016 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); 1113 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
1017 } 1114 }
1018 # other formats fail in the loop below 1115 # other formats fail in the loop below
1019 1116

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines