ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.71 by root, Fri Dec 31 20:50:58 2010 UTC vs.
Revision 1.88 by root, Sun Jan 2 20:57:03 2011 UTC

36 36
37=cut 37=cut
38 38
39package AnyEvent::HTTP; 39package AnyEvent::HTTP;
40 40
41use strict; 41use common::sense;
42no warnings;
43 42
44use Errno (); 43use Errno ();
45 44
46use AnyEvent 5.0 (); 45use AnyEvent 5.0 ();
47use AnyEvent::Util (); 46use AnyEvent::Util ();
58our $MAX_PERSISTENT = 8; 57our $MAX_PERSISTENT = 8;
59our $PERSISTENT_TIMEOUT = 2; 58our $PERSISTENT_TIMEOUT = 2;
60our $TIMEOUT = 300; 59our $TIMEOUT = 300;
61 60
62# changing these is evil 61# changing these is evil
63our $MAX_PERSISTENT_PER_HOST = 0; 62our $MAX_PERSISTENT_PER_HOST = 2;
64our $MAX_PER_HOST = 4; 63our $MAX_PER_HOST = 4;
65 64
66our $PROXY; 65our $PROXY;
67our $ACTIVE = 0; 66our $ACTIVE = 0;
68 67
122 121
123If the server sends a header multiple times, then their contents will be 122If the server sends a header multiple times, then their contents will be
124joined together with a comma (C<,>), as per the HTTP spec. 123joined together with a comma (C<,>), as per the HTTP spec.
125 124
126If an internal error occurs, such as not being able to resolve a hostname, 125If an internal error occurs, such as not being able to resolve a hostname,
127then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<59x> 126then C<$data> will be C<undef>, C<< $headers->{Status} >> will be
128(usually C<599>) and the C<Reason> pseudo-header will contain an error 127C<590>-C<599> and the C<Reason> pseudo-header will contain an error
129message. 128message. Currently the following status codes are used:
129
130=over 4
131
132=item 595 - errors during connection etsbalishment, proxy handshake.
133
134=item 596 - errors during TLS negotiation, request sending and header processing.
135
136=item 597 - errors during body receiving or processing.
137
138=item 598 - user aborted request via C<on_header> or C<on_body>.
139
140=item 599 - other, usually nonretryable, errors (garbled URL etc.).
141
142=back
130 143
131A typical callback might look like this: 144A typical callback might look like this:
132 145
133 sub { 146 sub {
134 my ($body, $hdr) = @_; 147 my ($body, $hdr) = @_;
182=item cookie_jar => $hash_ref 195=item cookie_jar => $hash_ref
183 196
184Passing this parameter enables (simplified) cookie-processing, loosely 197Passing this parameter enables (simplified) cookie-processing, loosely
185based on the original netscape specification. 198based on the original netscape specification.
186 199
187The C<$hash_ref> must be an (initially empty) hash reference which will 200The C<$hash_ref> must be an (initially empty) hash reference which
188get updated automatically. It is possible to save the cookie jar to 201will get updated automatically. It is possible to save the cookie jar
189persistent storage with something like JSON or Storable, but this is not 202to persistent storage with something like JSON or Storable - see the
190recommended, as session-only cookies might survive longer than expected. 203C<AnyEvent::HTTP::cookie_jar_expire> function if you wish to remove
204expired or session-only cookies, and also for documentation on the format
205of the cookie jar.
191 206
192Note that this cookie implementation is not meant to be complete. If 207Note that this cookie implementation is not meant to be complete. If
193you want complete cookie management you have to do that on your 208you want complete cookie management you have to do that on your
194own. C<cookie_jar> is meant as a quick fix to get some cookie-using sites 209own. C<cookie_jar> is meant as a quick fix to get most cookie-using sites
195working. Cookies are a privacy disaster, do not use them unless required 210working. Cookies are a privacy disaster, do not use them unless required
196to. 211to.
197 212
198When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:> 213When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:>
199headers will be set and handled by this module, otherwise they will be 214headers will be set and handled by this module, otherwise they will be
364 push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; 379 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
365 380
366 _slot_schedule $_[0]; 381 _slot_schedule $_[0];
367} 382}
368 383
384#############################################################################
385
386# expire cookies
387sub cookie_jar_expire($;$) {
388 my ($jar, $session_end) = @_;
389
390 %$jar = () if $jar->{version} != 1;
391
392 my $anow = AE::now;
393
394 while (my ($chost, $paths) = each %$jar) {
395 next unless ref $paths;
396
397 while (my ($cpath, $cookies) = each %$paths) {
398 while (my ($cookie, $kv) = each %$cookies) {
399 if (exists $kv->{_expires}) {
400 delete $cookies->{$cookie}
401 if $anow > $kv->{_expires};
402 } elsif ($session_end) {
403 delete $cookies->{$cookie};
404 }
405 }
406
407 delete $paths->{$cpath}
408 unless %$cookies;
409 }
410
411 delete $jar->{$chost}
412 unless %$paths;
413 }
414}
415
416# extract cookies from jar
369sub cookie_jar_extract($$$$) { 417sub cookie_jar_extract($$$$) {
370 my ($jar, $uscheme, $uhost, $upath) = @_; 418 my ($jar, $uscheme, $uhost, $upath) = @_;
371 419
372 %$jar = () if $jar->{version} != 1; 420 %$jar = () if $jar->{version} != 1;
373 421
388 next unless $cpath eq substr $upath, 0, length $cpath; 436 next unless $cpath eq substr $upath, 0, length $cpath;
389 437
390 while (my ($cookie, $kv) = each %$cookies) { 438 while (my ($cookie, $kv) = each %$cookies) {
391 next if $uscheme ne "https" && exists $kv->{secure}; 439 next if $uscheme ne "https" && exists $kv->{secure};
392 440
393 if (exists $kv->{expires}) { 441 if (exists $kv->{_expires} and AE::now > $kv->{_expires}) {
394 if (AE::now > parse_date ($kv->{expires})) {
395 delete $cookies->{$cookie}; 442 delete $cookies->{$cookie};
396 next; 443 next;
397 }
398 } 444 }
399 445
400 my $value = $kv->{value}; 446 my $value = $kv->{value};
401 447
402 if ($value =~ /[=;,[:space:]]/) { 448 if ($value =~ /[=;,[:space:]]/) {
410 } 456 }
411 457
412 \@cookies 458 \@cookies
413} 459}
414 460
461# parse set_cookie header into jar
462sub cookie_jar_set_cookie($$$$) {
463 my ($jar, $set_cookie, $uhost, $date) = @_;
464
465 my $anow = int AE::now;
466 my $snow; # server-now
467
468 for ($set_cookie) {
469 # parse NAME=VALUE
470 my @kv;
471
472 # expires is not http-compliant in the original cookie-spec,
473 # we support the official date format and some extensions
474 while (
475 m{
476 \G\s*
477 (?:
478 expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
479 | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) )?
480 )
481 }gcxsi
482 ) {
483 my $name = $2;
484 my $value = $4;
485
486 if (defined $1) {
487 # expires
488 $name = "expires";
489 $value = $1;
490 } elsif (defined $3) {
491 # quoted
492 $value = $3;
493 $value =~ s/\\(.)/$1/gs;
494 }
495
496 push @kv, lc $name, $value;
497
498 last unless /\G\s*;/gc;
499 }
500
501 last unless @kv;
502
503 my $name = shift @kv;
504 my %kv = (value => shift @kv, @kv);
505
506 if (exists $kv{"max-age"}) {
507 $kv{_expires} = $anow + delete $kv{"max-age"};
508 } elsif (exists $kv{expires}) {
509 $snow ||= parse_date ($date) || $anow;
510 $kv{_expires} = $anow + (parse_date (delete $kv{expires}) - $snow);
511 } else {
512 delete $kv{_expires};
513 }
514
515 my $cdom;
516 my $cpath = (delete $kv{path}) || "/";
517
518 if (exists $kv{domain}) {
519 $cdom = delete $kv{domain};
520
521 $cdom =~ s/^\.?/./; # make sure it starts with a "."
522
523 next if $cdom =~ /\.$/;
524
525 # this is not rfc-like and not netscape-like. go figure.
526 my $ndots = $cdom =~ y/.//;
527 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
528 } else {
529 $cdom = $uhost;
530 }
531
532 # store it
533 $jar->{version} = 1;
534 $jar->{lc $cdom}{$cpath}{$name} = \%kv;
535
536 redo if /\G\s*,/gc;
537 }
538}
539
415# continue to parse $_ for headers and place them into the arg 540# continue to parse $_ for headers and place them into the arg
416sub parse_hdr() { 541sub parse_hdr() {
417 my %hdr; 542 my %hdr;
418 543
419 # things seen, not parsed: 544 # things seen, not parsed:
481 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" }); 606 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" });
482 607
483 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 608 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
484 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" }); 609 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" });
485 610
486 my $uhost = $1; 611 my $uhost = lc $1;
487 $uport = $2 if defined $2; 612 $uport = $2 if defined $2;
488 613
489 $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost" 614 $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost"
490 unless exists $hdr{host}; 615 unless exists $hdr{host};
491 616
510 $rscheme = "http" unless defined $rscheme; 635 $rscheme = "http" unless defined $rscheme;
511 636
512 # don't support https requests over https-proxy transport, 637 # don't support https requests over https-proxy transport,
513 # can't be done with tls as spec'ed, unless you double-encrypt. 638 # can't be done with tls as spec'ed, unless you double-encrypt.
514 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https"; 639 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https";
640
641 $rhost = lc $rhost;
642 $rscheme = lc $rscheme;
515 } else { 643 } else {
516 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath); 644 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
517 } 645 }
518 646
519 # leave out fragment and query string, just a heuristic 647 # leave out fragment and query string, just a heuristic
521 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"}; 649 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"};
522 650
523 $hdr{"content-length"} = length $arg{body} 651 $hdr{"content-length"} = length $arg{body}
524 if length $arg{body} || $method ne "GET"; 652 if length $arg{body} || $method ne "GET";
525 653
526 $hdr{connection} = "close TE"; #1.1 654 $hdr{connection} = "close Te"; #1.1
527 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1 655 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1
528 656
529 my %state = (connect_guard => 1); 657 my %state = (connect_guard => 1);
530 658
531 _get_slot $uhost, sub { 659 my $ae_error = 595; # connecting
532 $state{slot_guard} = shift;
533 660
661 # handle actual, non-tunneled, request
662 my $handle_actual_request = sub {
663 $ae_error = 596; # request phase
664
665 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
666
667 # send request
668 $state{handle}->push_write (
669 "$method $rpath HTTP/1.1\015\012"
670 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
671 . "\015\012"
672 . (delete $arg{body})
673 );
674
675 # return if error occured during push_write()
534 return unless $state{connect_guard}; 676 return unless %state;
535 677
536 my $connect_cb = sub { 678 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
537 $state{fh} = shift 679
680 # status line and headers
681 $state{read_response} = sub {
682 for ("$_[1]") {
683 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
684
685 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
686 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
687
688 # 100 Continue handling
689 # should not happen as we don't send expect: 100-continue,
690 # but we handle it just in case.
691 # since we send the request body regardless, if we get an error
692 # we are out of-sync, which we currently do NOT handle correctly.
693 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
694 if $2 eq 100;
695
696 push @pseudo,
697 HTTPVersion => $1,
698 Status => $2,
699 Reason => $3,
538 or do { 700 ;
539 my $err = "$!"; 701
702 my $hdr = parse_hdr
703 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
704
705 %hdr = (%$hdr, @pseudo);
706 }
707
708 # redirect handling
709 # microsoft and other shitheads don't give a shit for following standards,
710 # try to support some common forms of broken Location headers.
711 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
712 $hdr{location} =~ s/^\.\/+//;
713
714 my $url = "$rscheme://$uhost:$uport";
715
716 unless ($hdr{location} =~ s/^\///) {
717 $url .= $upath;
718 $url =~ s/\/[^\/]*$//;
719 }
720
721 $hdr{location} = "$url/$hdr{location}";
722 }
723
724 my $redirect;
725
726 if ($recurse) {
727 my $status = $hdr{Status};
728
729 # industry standard is to redirect POST as GET for
730 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
731 # also, the UA should ask the user for 301 and 307 and POST,
732 # industry standard seems to be to simply follow.
733 # we go with the industry standard.
734 if ($status == 301 or $status == 302 or $status == 303) {
735 # HTTP/1.1 is unclear on how to mutate the method
736 $method = "GET" unless $method eq "HEAD";
737 $redirect = 1;
738 } elsif ($status == 307) {
739 $redirect = 1;
740 }
741 }
742
743 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
744 my $may_keep_alive = $_[3];
745
746 $state{handle}->destroy if $state{handle};
540 %state = (); 747 %state = ();
541 return $cb->(undef, { @pseudo, Status => 599, Reason => $err }); 748
749 if (defined $_[1]) {
750 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
751 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
752 }
753
754 # set-cookie processing
755 if ($arg{cookie_jar}) {
756 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
757 }
758
759 if ($redirect && exists $hdr{location}) {
760 # we ignore any errors, as it is very common to receive
761 # Content-Length != 0 but no actual body
762 # we also access %hdr, as $_[1] might be an erro
763 http_request (
764 $method => $hdr{location},
765 %arg,
766 recurse => $recurse - 1,
767 Redirect => [$_[0], \%hdr],
768 $cb);
769 } else {
770 $cb->($_[0], \%hdr);
771 }
772 };
773
774 $ae_error = 597; # body phase
775
776 my $len = $hdr{"content-length"};
777
778 # body handling, many different code paths
779 # - no body expected
780 # - want_body_handle
781 # - te chunked
782 # - 2x length known (with or without on_body)
783 # - 2x length not known (with or without on_body)
784 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
785 $finish->(undef, 598 => "Request cancelled by on_header");
786 } elsif (
787 $hdr{Status} =~ /^(?:1..|204|205|304)$/
788 or $method eq "HEAD"
789 or (defined $len && $len == 0) # == 0, not !, because "0 " is true
790 ) {
791 # no body
792 $finish->("", undef, undef, 1);
793
794 } elsif (!$redirect && $arg{want_body_handle}) {
795 $_[0]->on_eof (undef);
796 $_[0]->on_error (undef);
797 $_[0]->on_read (undef);
798
799 $finish->(delete $state{handle});
800
801 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
802 my $cl = 0;
803 my $body = undef;
804 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
805
806 $state{read_chunk} = sub {
807 $_[1] =~ /^([0-9a-fA-F]+)/
808 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
809
810 my $len = hex $1;
811
812 if ($len) {
813 $cl += $len;
814
815 $_[0]->push_read (chunk => $len, sub {
816 $on_body->($_[1], \%hdr)
817 or return $finish->(undef, 598 => "Request cancelled by on_body");
818
819 $_[0]->push_read (line => sub {
820 length $_[1]
821 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
822 $_[0]->push_read (line => $state{read_chunk});
823 });
824 });
825 } else {
826 $hdr{"content-length"} ||= $cl;
827
828 $_[0]->push_read (line => $qr_nlnl, sub {
829 if (length $_[1]) {
830 for ("$_[1]") {
831 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
832
833 my $hdr = parse_hdr
834 or return $finish->(undef, $ae_error => "Garbled response trailers");
835
836 %hdr = (%hdr, %$hdr);
837 }
838 }
839
840 $finish->($body, undef, undef, 1);
841 });
842 }
542 }; 843 };
543 844
544 pop; # free memory, save a tree 845 $_[0]->push_read (line => $state{read_chunk});
545 846
847 } elsif ($arg{on_body}) {
848 if (defined $len) {
849 $_[0]->on_read (sub {
850 $len -= length $_[0]{rbuf};
851
852 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
853 or return $finish->(undef, 598 => "Request cancelled by on_body");
854
855 $len > 0
856 or $finish->("", undef, undef, 1);
857 });
858 } else {
859 $_[0]->on_eof (sub {
860 $finish->("");
861 });
862 $_[0]->on_read (sub {
863 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
864 or $finish->(undef, 598 => "Request cancelled by on_body");
865 });
866 }
867 } else {
868 $_[0]->on_eof (undef);
869
870 if (defined $len) {
871 $_[0]->on_read (sub {
872 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
873 if $len <= length $_[0]{rbuf};
874 });
875 } else {
876 $_[0]->on_error (sub {
877 ($! == Errno::EPIPE || !$!)
878 ? $finish->(delete $_[0]{rbuf})
879 : $finish->(undef, $ae_error => $_[2]);
880 });
881 $_[0]->on_read (sub { });
882 }
883 }
884 };
885
886 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
887 };
888
889 my $connect_cb = sub {
890 $state{fh} = shift
891 or do {
892 my $err = "$!";
893 %state = ();
894 return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err });
895 };
896
546 return unless delete $state{connect_guard}; 897 return unless delete $state{connect_guard};
547 898
548 # get handle 899 # get handle
549 $state{handle} = new AnyEvent::Handle 900 $state{handle} = new AnyEvent::Handle
550 fh => $state{fh}, 901 fh => $state{fh},
551 peername => $rhost, 902 peername => $rhost,
552 tls_ctx => $arg{tls_ctx}, 903 tls_ctx => $arg{tls_ctx},
553 # these need to be reconfigured on keepalive handles 904 # these need to be reconfigured on keepalive handles
554 timeout => $timeout, 905 timeout => $timeout,
555 on_error => sub { 906 on_error => sub {
556 %state = (); 907 %state = ();
557 $cb->(undef, { @pseudo, Status => 599, Reason => $_[2] }); 908 $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] });
558 }, 909 },
559 on_eof => sub { 910 on_eof => sub {
560 %state = (); 911 %state = ();
561 $cb->(undef, { @pseudo, Status => 599, Reason => "Unexpected end-of-file" }); 912 $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" });
562 }, 913 },
563 ; 914 ;
564 915
565 # limit the number of persistent connections 916 # limit the number of persistent connections
566 # keepalive not yet supported 917 # keepalive not yet supported
567# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 918# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
568# ++$KA_COUNT{$_[1]}; 919# ++$KA_COUNT{$_[1]};
569# $state{handle}{ka_count_guard} = AnyEvent::Util::guard { 920# $state{handle}{ka_count_guard} = AnyEvent::Util::guard {
570# --$KA_COUNT{$_[1]} 921# --$KA_COUNT{$_[1]}
571# }; 922# };
572# $hdr{connection} = "keep-alive"; 923# $hdr{connection} = "keep-alive";
573# } 924# }
574 925
575 $state{handle}->starttls ("connect") if $rscheme eq "https"; 926 $state{handle}->starttls ("connect") if $rscheme eq "https";
576 927
577 # handle actual, non-tunneled, request
578 my $handle_actual_request = sub {
579 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
580
581 # send request
582 $state{handle}->push_write (
583 "$method $rpath HTTP/1.1\015\012"
584 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
585 . "\015\012"
586 . (delete $arg{body})
587 );
588
589 # return if error occured during push_write()
590 return unless %state;
591
592 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
593
594 # status line and headers
595 $state{read_response} = sub {
596 for ("$_[1]") {
597 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
598
599 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/igxc
600 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
601
602 # 100 Continue handling
603 # should not happen as we don't send expect: 100-continue,
604 # but we handle it just in case.
605 # since we send the request body regardless, if we get an error
606 # we are out of-sync, which we currently do NOT handle correctly.
607 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
608 if $2 eq 100;
609
610 push @pseudo,
611 HTTPVersion => $1,
612 Status => $2,
613 Reason => $3,
614 ;
615
616 my $hdr = parse_hdr
617 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
618
619 %hdr = (%$hdr, @pseudo);
620 }
621
622 # redirect handling
623 # microsoft and other shitheads don't give a shit for following standards,
624 # try to support some common forms of broken Location headers.
625 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
626 $hdr{location} =~ s/^\.\/+//;
627
628 my $url = "$rscheme://$uhost:$uport";
629
630 unless ($hdr{location} =~ s/^\///) {
631 $url .= $upath;
632 $url =~ s/\/[^\/]*$//;
633 }
634
635 $hdr{location} = "$url/$hdr{location}";
636 }
637
638 my $redirect;
639
640 if ($recurse) {
641 my $status = $hdr{Status};
642
643 # industry standard is to redirect POST as GET for
644 # 301, 302 and 303, in contrast to http/1.0 and 1.1.
645 # also, the UA should ask the user for 301 and 307 and POST,
646 # industry standard seems to be to simply follow.
647 # we go with the industry standard.
648 if ($status == 301 or $status == 302 or $status == 303) {
649 # HTTP/1.1 is unclear on how to mutate the method
650 $method = "GET" unless $method eq "HEAD";
651 $redirect = 1;
652 } elsif ($status == 307) {
653 $redirect = 1;
654 }
655 }
656
657 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
658 my $keepalive = pop;
659
660 $state{handle}->destroy if $state{handle};
661 %state = ();
662
663 if (defined $_[1]) {
664 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
665 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
666 }
667
668 # set-cookie processing
669 if ($arg{cookie_jar}) {
670 for ($hdr{"set-cookie"}) {
671 # parse NAME=VALUE
672 my @kv;
673
674 while (
675 m{
676 \G\s*
677 (?:
678 expires \s*=\s* ([A-Z][a-z][a-z],\ [^,;]+)
679 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )
680 )
681 }gcxsi
682 ) {
683 my $name = $2;
684 my $value = $4;
685
686 unless (defined $name) {
687 # expires
688 $name = "expires";
689 $value = $1;
690 } elsif (!defined $value) {
691 # quoted
692 $value = $3;
693 $value =~ s/\\(.)/$1/gs;
694 }
695
696 push @kv, lc $name, $value;
697
698 last unless /\G\s*;/gc;
699 }
700
701 last unless @kv;
702
703 my $name = shift @kv;
704 my %kv = (value => shift @kv, @kv);
705
706 $kv{expires} ||= format_date (AE::now + $kv{"max-age"})
707 if exists $kv{"max-age"};
708
709 my $cdom;
710 my $cpath = (delete $kv{path}) || "/";
711
712 if (exists $kv{domain}) {
713 $cdom = delete $kv{domain};
714
715 $cdom =~ s/^\.?/./; # make sure it starts with a "."
716
717 next if $cdom =~ /\.$/;
718
719 # this is not rfc-like and not netscape-like. go figure.
720 my $ndots = $cdom =~ y/.//;
721 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
722 } else {
723 $cdom = $uhost;
724 }
725
726 # store it
727 $arg{cookie_jar}{version} = 1;
728 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
729
730 redo if /\G\s*,/gc;
731 }
732 }
733
734 if ($redirect && exists $hdr{location}) {
735 # we ignore any errors, as it is very common to receive
736 # Content-Length != 0 but no actual body
737 # we also access %hdr, as $_[1] might be an erro
738 http_request (
739 $method => $hdr{location},
740 %arg,
741 recurse => $recurse - 1,
742 Redirect => [$_[0], \%hdr],
743 $cb);
744 } else {
745 $cb->($_[0], \%hdr);
746 }
747 };
748
749 my $len = $hdr{"content-length"};
750
751 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
752 $finish->(undef, 598 => "Request cancelled by on_header");
753 } elsif (
754 $hdr{Status} =~ /^(?:1..|204|205|304)$/
755 or $method eq "HEAD"
756 or (defined $len && !$len)
757 ) {
758 # no body
759 $finish->("", undef, undef, 1);
760 } else {
761 # body handling, many different code paths
762 # - no body expected
763 # - want_body_handle
764 # - te chunked
765 # - 2x length known (with or without on_body)
766 # - 2x length not known (with or without on_body)
767 if (!$redirect && $arg{want_body_handle}) {
768 $_[0]->on_eof (undef);
769 $_[0]->on_error (undef);
770 $_[0]->on_read (undef);
771
772 $finish->(delete $state{handle});
773
774 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
775 my $cl = 0;
776 my $body = undef;
777 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
778
779 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
780
781 my $read_chunk; $read_chunk = sub {
782 $_[1] =~ /^([0-9a-fA-F]+)/
783 or $finish->(undef, 599 => "Garbled chunked transfer encoding");
784
785 my $len = hex $1;
786
787 if ($len) {
788 $cl += $len;
789
790 $_[0]->push_read (chunk => $len, sub {
791 $on_body->($_[1], \%hdr)
792 or return $finish->(undef, 598 => "Request cancelled by on_body");
793
794 $_[0]->push_read (line => sub {
795 length $_[1]
796 and return $finish->(undef, 599 => "Garbled chunked transfer encoding");
797 $_[0]->push_read (line => $read_chunk);
798 });
799 });
800 } else {
801 $hdr{"content-length"} ||= $cl;
802
803 $_[0]->push_read (line => $qr_nlnl, sub {
804 if (length $_[1]) {
805 for ("$_[1]") {
806 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
807
808 my $hdr = parse_hdr
809 or return $finish->(undef, 599 => "Garbled response trailers");
810
811 %hdr = (%hdr, %$hdr);
812 }
813 }
814
815 $finish->($body, undef, undef, 1);
816 });
817 }
818 };
819
820 $_[0]->push_read (line => $read_chunk);
821
822 } elsif ($arg{on_body}) {
823 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
824
825 if ($len) {
826 $_[0]->on_read (sub {
827 $len -= length $_[0]{rbuf};
828
829 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
830 or return $finish->(undef, 598 => "Request cancelled by on_body");
831
832 $len > 0
833 or $finish->("", undef, undef, 1);
834 });
835 } else {
836 $_[0]->on_eof (sub {
837 $finish->("");
838 });
839 $_[0]->on_read (sub {
840 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
841 or $finish->(undef, 598 => "Request cancelled by on_body");
842 });
843 }
844 } else {
845 $_[0]->on_eof (undef);
846
847 if ($len) {
848 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
849 $_[0]->on_read (sub {
850 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
851 if $len <= length $_[0]{rbuf};
852 });
853 } else {
854 $_[0]->on_error (sub {
855 ($! == Errno::EPIPE || !$!)
856 ? $finish->(delete $_[0]{rbuf})
857 : $finish->(undef, 599 => $_[2]);
858 });
859 $_[0]->on_read (sub { });
860 }
861 }
862 }
863 };
864
865 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
866 };
867
868 # now handle proxy-CONNECT method 928 # now handle proxy-CONNECT method
869 if ($proxy && $uscheme eq "https") { 929 if ($proxy && $uscheme eq "https") {
870 # oh dear, we have to wrap it into a connect request 930 # oh dear, we have to wrap it into a connect request
871 931
872 # maybe re-use $uauthority with patched port? 932 # maybe re-use $uauthority with patched port?
873 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012"); 933 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012");
874 $state{handle}->push_read (line => $qr_nlnl, sub { 934 $state{handle}->push_read (line => $qr_nlnl, sub {
875 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 935 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
876 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" })); 936 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }));
877 937
878 if ($2 == 200) { 938 if ($2 == 200) {
879 $rpath = $upath; 939 $rpath = $upath;
880 &$handle_actual_request; 940 $handle_actual_request->();
881 } else { 941 } else {
882 %state = (); 942 %state = ();
883 $cb->(undef, { @pseudo, Status => $2, Reason => $3 }); 943 $cb->(undef, { @pseudo, Status => $2, Reason => $3 });
884 }
885 }); 944 }
886 } else {
887 &$handle_actual_request;
888 } 945 });
946 } else {
947 $handle_actual_request->();
889 }; 948 }
949 };
950
951 _get_slot $uhost, sub {
952 $state{slot_guard} = shift;
953
954 return unless $state{connect_guard};
890 955
891 my $tcp_connect = $arg{tcp_connect} 956 my $tcp_connect = $arg{tcp_connect}
892 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect }; 957 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
893 958
894 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout }); 959 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout });
895
896 }; 960 };
897 961
898 defined wantarray && AnyEvent::Util::guard { %state = () } 962 defined wantarray && AnyEvent::Util::guard { %state = () }
899} 963}
900 964
935string of the form C<http://host:port> (optionally C<https:...>), croaks 999string of the form C<http://host:port> (optionally C<https:...>), croaks
936otherwise. 1000otherwise.
937 1001
938To clear an already-set proxy, use C<undef>. 1002To clear an already-set proxy, use C<undef>.
939 1003
1004=item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end]
1005
1006Remove all cookies from the cookie jar that have been expired. If
1007C<$session_end> is given and true, then additionally remove all session
1008cookies.
1009
1010You should call this function (with a true C<$session_end>) before you
1011save cookies to disk, and you should call this function after loading them
1012again. If you have a long-running program you can additonally call this
1013function from time to time.
1014
1015A cookie jar is initially an empty hash-reference that is managed by this
1016module. It's format is subject to change, but currently it is like this:
1017
1018The key C<version> has to contain C<1>, otherwise the hash gets
1019emptied. All other keys are hostnames or IP addresses pointing to
1020hash-references. The key for these inner hash references is the
1021server path for which this cookie is meant, and the values are again
1022hash-references. The keys of those hash-references is the cookie name, and
1023the value, you guessed it, is another hash-reference, this time with the
1024key-value pairs from the cookie, except for C<expires> and C<max-age>,
1025which have been replaced by a C<_expires> key that contains the cookie
1026expiry timestamp.
1027
1028Here is an example of a cookie jar with a single cookie, so you have a
1029chance of understanding the above paragraph:
1030
1031 {
1032 version => 1,
1033 "10.0.0.1" => {
1034 "/" => {
1035 "mythweb_id" => {
1036 _expires => 1293917923,
1037 value => "ooRung9dThee3ooyXooM1Ohm",
1038 },
1039 },
1040 },
1041 }
1042
940=item $date = AnyEvent::HTTP::format_date $timestamp 1043=item $date = AnyEvent::HTTP::format_date $timestamp
941 1044
942Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP 1045Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
943Date (RFC 2616). 1046Date (RFC 2616).
944 1047
945=item $timestamp = AnyEvent::HTTP::parse_date $date 1048=item $timestamp = AnyEvent::HTTP::parse_date $date
946 1049
947Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) and 1050Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a
948returns the corresponding POSIX timestamp, or C<undef> if the date cannot 1051bunch of minor variations of those, and returns the corresponding POSIX
949be parsed. 1052timestamp, or C<undef> if the date cannot be parsed.
950 1053
951=item $AnyEvent::HTTP::MAX_RECURSE 1054=item $AnyEvent::HTTP::MAX_RECURSE
952 1055
953The default value for the C<recurse> request parameter (default: C<10>). 1056The default value for the C<recurse> request parameter (default: C<10>).
954 1057
993sub parse_date($) { 1096sub parse_date($) {
994 my ($date) = @_; 1097 my ($date) = @_;
995 1098
996 my ($d, $m, $y, $H, $M, $S); 1099 my ($d, $m, $y, $H, $M, $S);
997 1100
998 if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9])[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1101 if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
999 # RFC 822/1123, required by RFC 2616 (with " ") 1102 # RFC 822/1123, required by RFC 2616 (with " ")
1000 # cookie dates (with "-") 1103 # cookie dates (with "-")
1001 1104
1002 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); 1105 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
1003 1106
1004 } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1107 } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1005 # RFC 850 1108 # RFC 850
1006 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); 1109 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
1007 1110
1008 } elsif ($date =~ /^[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9 ][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([0-9][0-9][0-9][0-9])$/) { 1111 } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) {
1009 # ISO C's asctime 1112 # ISO C's asctime
1010 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); 1113 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
1011 } 1114 }
1012 # other formats fail in the loop below 1115 # other formats fail in the loop below
1013 1116

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines