ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.69 by root, Fri Dec 31 19:32:47 2010 UTC vs.
Revision 1.87 by root, Sun Jan 2 08:51:53 2011 UTC

36 36
37=cut 37=cut
38 38
39package AnyEvent::HTTP; 39package AnyEvent::HTTP;
40 40
41use strict; 41use common::sense;
42no warnings;
43 42
44use Errno (); 43use Errno ();
45 44
46use AnyEvent 5.0 (); 45use AnyEvent 5.0 ();
47use AnyEvent::Util (); 46use AnyEvent::Util ();
122 121
123If the server sends a header multiple times, then their contents will be 122If the server sends a header multiple times, then their contents will be
124joined together with a comma (C<,>), as per the HTTP spec. 123joined together with a comma (C<,>), as per the HTTP spec.
125 124
126If an internal error occurs, such as not being able to resolve a hostname, 125If an internal error occurs, such as not being able to resolve a hostname,
127then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<59x> 126then C<$data> will be C<undef>, C<< $headers->{Status} >> will be
128(usually C<599>) and the C<Reason> pseudo-header will contain an error 127C<590>-C<599> and the C<Reason> pseudo-header will contain an error
129message. 128message. Currently the following status codes are used:
129
130=over 4
131
132=item 595 - errors during connection etsbalishment, proxy handshake.
133
134=item 596 - errors during TLS negotiation, request sending and header processing.
135
136=item 597 - errors during body receiving or processing.
137
138=item 598 - user aborted request via C<on_header> or C<on_body>.
139
140=item 599 - other, usually nonretryable, errors (garbled URL etc.).
141
142=back
130 143
131A typical callback might look like this: 144A typical callback might look like this:
132 145
133 sub { 146 sub {
134 my ($body, $hdr) = @_; 147 my ($body, $hdr) = @_;
182=item cookie_jar => $hash_ref 195=item cookie_jar => $hash_ref
183 196
184Passing this parameter enables (simplified) cookie-processing, loosely 197Passing this parameter enables (simplified) cookie-processing, loosely
185based on the original netscape specification. 198based on the original netscape specification.
186 199
187The C<$hash_ref> must be an (initially empty) hash reference which will 200The C<$hash_ref> must be an (initially empty) hash reference which
188get updated automatically. It is possible to save the cookie_jar to 201will get updated automatically. It is possible to save the cookie jar
189persistent storage with something like JSON or Storable, but this is not 202to persistent storage with something like JSON or Storable - see the
190recommended, as expiry times are currently being ignored. 203C<AnyEvent::HTTP::cookie_jar_expire> function if you wish to remove
204expired or session-only cookies, and also for documentation on the format
205of the cookie jar.
191 206
192Note that this cookie implementation is not of very high quality, nor 207Note that this cookie implementation is not meant to be complete. If
193meant to be complete. If you want complete cookie management you have to 208you want complete cookie management you have to do that on your
194do that on your own. C<cookie_jar> is meant as a quick fix to get some 209own. C<cookie_jar> is meant as a quick fix to get most cookie-using sites
195cookie-using sites working. Cookies are a privacy disaster, do not use 210working. Cookies are a privacy disaster, do not use them unless required
196them unless required to. 211to.
197 212
198When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:> 213When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:>
199headers will be ste and handled by this module, otherwise they will be 214headers will be set and handled by this module, otherwise they will be
200left untouched. 215left untouched.
201 216
202=item tls_ctx => $scheme | $tls_ctx 217=item tls_ctx => $scheme | $tls_ctx
203 218
204Specifies the AnyEvent::TLS context to be used for https connections. This 219Specifies the AnyEvent::TLS context to be used for https connections. This
364 push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; 379 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
365 380
366 _slot_schedule $_[0]; 381 _slot_schedule $_[0];
367} 382}
368 383
384#############################################################################
385
386# expire cookies
387sub cookie_jar_expire($;$) {
388 my ($jar, $session_end) = @_;
389
390 %$jar = () if $jar->{version} != 1;
391
392 my $anow = AE::now;
393
394 while (my ($chost, $paths) = each %$jar) {
395 next unless ref $paths;
396
397 while (my ($cpath, $cookies) = each %$paths) {
398 while (my ($cookie, $kv) = each %$cookies) {
399 if (exists $kv->{_expires}) {
400 delete $cookies->{$cookie}
401 if $anow > $kv->{_expires};
402 } elsif ($session_end) {
403 delete $cookies->{$cookie};
404 }
405 }
406
407 delete $paths->{$cpath}
408 unless %$cookies;
409 }
410
411 delete $jar->{$chost}
412 unless %$paths;
413 }
414}
415
416# extract cookies from jar
417sub cookie_jar_extract($$$$) {
418 my ($jar, $uscheme, $uhost, $upath) = @_;
419
420 %$jar = () if $jar->{version} != 1;
421
422 my @cookies;
423
424 while (my ($chost, $paths) = each %$jar) {
425 next unless ref $paths;
426
427 if ($chost =~ /^\./) {
428 next unless $chost eq substr $uhost, -length $chost;
429 } elsif ($chost =~ /\./) {
430 next unless $chost eq $uhost;
431 } else {
432 next;
433 }
434
435 while (my ($cpath, $cookies) = each %$paths) {
436 next unless $cpath eq substr $upath, 0, length $cpath;
437
438 while (my ($cookie, $kv) = each %$cookies) {
439 next if $uscheme ne "https" && exists $kv->{secure};
440
441 if (exists $kv->{_expires} and AE::now > $kv->{_expires}) {
442 delete $cookies->{$cookie};
443 next;
444 }
445
446 my $value = $kv->{value};
447
448 if ($value =~ /[=;,[:space:]]/) {
449 $value =~ s/([\\"])/\\$1/g;
450 $value = "\"$value\"";
451 }
452
453 push @cookies, "$cookie=$value";
454 }
455 }
456 }
457
458 \@cookies
459}
460
461# parse set_cookie header into jar
462sub cookie_jar_set_cookie($$$$) {
463 my ($jar, $set_cookie, $uhost, $date) = @_;
464
465 my $anow = int AE::now;
466 my $snow; # server-now
467
468 for ($set_cookie) {
469 # parse NAME=VALUE
470 my @kv;
471
472 # expires is not http-compliant in the original cookie-spec,
473 # we support the official date format and some extensions
474 while (
475 m{
476 \G\s*
477 (?:
478 expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
479 | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) )?
480 )
481 }gcxsi
482 ) {
483 my $name = $2;
484 my $value = $4;
485
486 if (defined $1) {
487 # expires
488 $name = "expires";
489 $value = $1;
490 } elsif (defined $3) {
491 # quoted
492 $value = $3;
493 $value =~ s/\\(.)/$1/gs;
494 }
495
496 push @kv, lc $name, $value;
497
498 last unless /\G\s*;/gc;
499 }
500
501 last unless @kv;
502
503 my $name = shift @kv;
504 my %kv = (value => shift @kv, @kv);
505
506 if (exists $kv{"max-age"}) {
507 $kv{_expires} = $anow + delete $kv{"max-age"};
508 } elsif (exists $kv{expires}) {
509 $snow ||= parse_date ($date) || $anow;
510 $kv{_expires} = $anow + (parse_date (delete $kv{expires}) - $snow);
511 } else {
512 delete $kv{_expires};
513 }
514
515 my $cdom;
516 my $cpath = (delete $kv{path}) || "/";
517
518 if (exists $kv{domain}) {
519 $cdom = delete $kv{domain};
520
521 $cdom =~ s/^\.?/./; # make sure it starts with a "."
522
523 next if $cdom =~ /\.$/;
524
525 # this is not rfc-like and not netscape-like. go figure.
526 my $ndots = $cdom =~ y/.//;
527 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
528 } else {
529 $cdom = $uhost;
530 }
531
532 # store it
533 $jar->{version} = 1;
534 $jar->{lc $cdom}{$cpath}{$name} = \%kv;
535
536 redo if /\G\s*,/gc;
537 }
538}
539
369# continue to parse $_ for headers and place them into the arg 540# continue to parse $_ for headers and place them into the arg
370sub parse_hdr() { 541sub parse_hdr() {
371 my %hdr; 542 my %hdr;
372 543
373 # things seen, not parsed: 544 # things seen, not parsed:
435 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" }); 606 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" });
436 607
437 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 608 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
438 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" }); 609 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" });
439 610
440 my $uhost = $1; 611 my $uhost = lc $1;
441 $uport = $2 if defined $2; 612 $uport = $2 if defined $2;
442 613
443 $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost" 614 $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost"
444 unless exists $hdr{host}; 615 unless exists $hdr{host};
445 616
448 619
449 $upath =~ s%^/?%/%; 620 $upath =~ s%^/?%/%;
450 621
451 # cookie processing 622 # cookie processing
452 if (my $jar = $arg{cookie_jar}) { 623 if (my $jar = $arg{cookie_jar}) {
453 %$jar = () if $jar->{version} != 1; 624 my $cookies = cookie_jar_extract $jar, $uscheme, $uhost, $upath;
454 625
455 my @cookie;
456
457 while (my ($chost, $v) = each %$jar) {
458 if ($chost =~ /^\./) {
459 next unless $chost eq substr $uhost, -length $chost;
460 } elsif ($chost =~ /\./) {
461 next unless $chost eq $uhost;
462 } else {
463 next;
464 }
465
466 while (my ($cpath, $v) = each %$v) {
467 next unless $cpath eq substr $upath, 0, length $cpath;
468
469 while (my ($k, $v) = each %$v) {
470 next if $uscheme ne "https" && exists $v->{secure};
471 my $value = $v->{value};
472 $value =~ s/([\\"])/\\$1/g;
473 push @cookie, "$k=\"$value\"";
474 }
475 }
476 }
477
478 $hdr{cookie} = join "; ", @cookie 626 $hdr{cookie} = join "; ", @$cookies
479 if @cookie; 627 if @$cookies;
480 } 628 }
481 629
482 my ($rhost, $rport, $rscheme, $rpath); # request host, port, path 630 my ($rhost, $rport, $rscheme, $rpath); # request host, port, path
483 631
484 if ($proxy) { 632 if ($proxy) {
487 $rscheme = "http" unless defined $rscheme; 635 $rscheme = "http" unless defined $rscheme;
488 636
489 # don't support https requests over https-proxy transport, 637 # don't support https requests over https-proxy transport,
490 # can't be done with tls as spec'ed, unless you double-encrypt. 638 # can't be done with tls as spec'ed, unless you double-encrypt.
491 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https"; 639 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https";
640
641 $rhost = lc $rhost;
642 $rscheme = lc $rscheme;
492 } else { 643 } else {
493 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath); 644 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
494 } 645 }
495 646
496 # leave out fragment and query string, just a heuristic 647 # leave out fragment and query string, just a heuristic
498 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"}; 649 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"};
499 650
500 $hdr{"content-length"} = length $arg{body} 651 $hdr{"content-length"} = length $arg{body}
501 if length $arg{body} || $method ne "GET"; 652 if length $arg{body} || $method ne "GET";
502 653
503 $hdr{connection} = "close TE"; #1.1 654 $hdr{connection} = "close Te"; #1.1
504 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1 655 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1
505 656
506 my %state = (connect_guard => 1); 657 my %state = (connect_guard => 1);
507 658
508 _get_slot $uhost, sub { 659 _get_slot $uhost, sub {
509 $state{slot_guard} = shift; 660 $state{slot_guard} = shift;
510 661
511 return unless $state{connect_guard}; 662 return unless $state{connect_guard};
663
664 my $ae_error = 595; # connecting
665
666 # handle actual, non-tunneled, request
667 my $handle_actual_request = sub {
668 $ae_error = 596; # request phase
669
670 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
671
672 # send request
673 $state{handle}->push_write (
674 "$method $rpath HTTP/1.1\015\012"
675 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
676 . "\015\012"
677 . (delete $arg{body})
678 );
679
680 # return if error occured during push_write()
681 return unless %state;
682
683 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
684
685 # status line and headers
686 $state{read_response} = sub {
687 for ("$_[1]") {
688 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
689
690 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
691 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
692
693 # 100 Continue handling
694 # should not happen as we don't send expect: 100-continue,
695 # but we handle it just in case.
696 # since we send the request body regardless, if we get an error
697 # we are out of-sync, which we currently do NOT handle correctly.
698 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
699 if $2 eq 100;
700
701 push @pseudo,
702 HTTPVersion => $1,
703 Status => $2,
704 Reason => $3,
705 ;
706
707 my $hdr = parse_hdr
708 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
709
710 %hdr = (%$hdr, @pseudo);
711 }
712
713 # redirect handling
714 # microsoft and other shitheads don't give a shit for following standards,
715 # try to support some common forms of broken Location headers.
716 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
717 $hdr{location} =~ s/^\.\/+//;
718
719 my $url = "$rscheme://$uhost:$uport";
720
721 unless ($hdr{location} =~ s/^\///) {
722 $url .= $upath;
723 $url =~ s/\/[^\/]*$//;
724 }
725
726 $hdr{location} = "$url/$hdr{location}";
727 }
728
729 my $redirect;
730
731 if ($recurse) {
732 my $status = $hdr{Status};
733
734 # industry standard is to redirect POST as GET for
735 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
736 # also, the UA should ask the user for 301 and 307 and POST,
737 # industry standard seems to be to simply follow.
738 # we go with the industry standard.
739 if ($status == 301 or $status == 302 or $status == 303) {
740 # HTTP/1.1 is unclear on how to mutate the method
741 $method = "GET" unless $method eq "HEAD";
742 $redirect = 1;
743 } elsif ($status == 307) {
744 $redirect = 1;
745 }
746 }
747
748 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
749 my $may_keep_alive = $_[3];
750
751 $state{handle}->destroy if $state{handle};
752 %state = ();
753
754 if (defined $_[1]) {
755 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
756 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
757 }
758
759 # set-cookie processing
760 if ($arg{cookie_jar}) {
761 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
762 }
763
764 if ($redirect && exists $hdr{location}) {
765 # we ignore any errors, as it is very common to receive
766 # Content-Length != 0 but no actual body
767 # we also access %hdr, as $_[1] might be an erro
768 http_request (
769 $method => $hdr{location},
770 %arg,
771 recurse => $recurse - 1,
772 Redirect => [$_[0], \%hdr],
773 $cb);
774 } else {
775 $cb->($_[0], \%hdr);
776 }
777 };
778
779 $ae_error = 597; # body phase
780
781 my $len = $hdr{"content-length"};
782
783 # body handling, many different code paths
784 # - no body expected
785 # - want_body_handle
786 # - te chunked
787 # - 2x length known (with or without on_body)
788 # - 2x length not known (with or without on_body)
789 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
790 $finish->(undef, 598 => "Request cancelled by on_header");
791 } elsif (
792 $hdr{Status} =~ /^(?:1..|204|205|304)$/
793 or $method eq "HEAD"
794 or (defined $len && $len == 0) # == 0, not !, because "0 " is true
795 ) {
796 # no body
797 $finish->("", undef, undef, 1);
798
799 } elsif (!$redirect && $arg{want_body_handle}) {
800 $_[0]->on_eof (undef);
801 $_[0]->on_error (undef);
802 $_[0]->on_read (undef);
803
804 $finish->(delete $state{handle});
805
806 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
807 my $cl = 0;
808 my $body = undef;
809 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
810
811 $state{read_chunk} = sub {
812 $_[1] =~ /^([0-9a-fA-F]+)/
813 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
814
815 my $len = hex $1;
816
817 if ($len) {
818 $cl += $len;
819
820 $_[0]->push_read (chunk => $len, sub {
821 $on_body->($_[1], \%hdr)
822 or return $finish->(undef, 598 => "Request cancelled by on_body");
823
824 $_[0]->push_read (line => sub {
825 length $_[1]
826 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
827 $_[0]->push_read (line => $state{read_chunk});
828 });
829 });
830 } else {
831 $hdr{"content-length"} ||= $cl;
832
833 $_[0]->push_read (line => $qr_nlnl, sub {
834 if (length $_[1]) {
835 for ("$_[1]") {
836 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
837
838 my $hdr = parse_hdr
839 or return $finish->(undef, $ae_error => "Garbled response trailers");
840
841 %hdr = (%hdr, %$hdr);
842 }
843 }
844
845 $finish->($body, undef, undef, 1);
846 });
847 }
848 };
849
850 $_[0]->push_read (line => $state{read_chunk});
851
852 } elsif ($arg{on_body}) {
853 if (defined $len) {
854 $_[0]->on_read (sub {
855 $len -= length $_[0]{rbuf};
856
857 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
858 or return $finish->(undef, 598 => "Request cancelled by on_body");
859
860 $len > 0
861 or $finish->("", undef, undef, 1);
862 });
863 } else {
864 $_[0]->on_eof (sub {
865 $finish->("");
866 });
867 $_[0]->on_read (sub {
868 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
869 or $finish->(undef, 598 => "Request cancelled by on_body");
870 });
871 }
872 } else {
873 $_[0]->on_eof (undef);
874
875 if (defined $len) {
876 $_[0]->on_read (sub {
877 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
878 if $len <= length $_[0]{rbuf};
879 });
880 } else {
881 $_[0]->on_error (sub {
882 ($! == Errno::EPIPE || !$!)
883 ? $finish->(delete $_[0]{rbuf})
884 : $finish->(undef, $ae_error => $_[2]);
885 });
886 $_[0]->on_read (sub { });
887 }
888 }
889 };
890
891 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
892 };
512 893
513 my $connect_cb = sub { 894 my $connect_cb = sub {
514 $state{fh} = shift 895 $state{fh} = shift
515 or do { 896 or do {
516 my $err = "$!"; 897 my $err = "$!";
517 %state = (); 898 %state = ();
518 return $cb->(undef, { @pseudo, Status => 599, Reason => $err }); 899 return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err });
519 }; 900 };
520
521 pop; # free memory, save a tree
522 901
523 return unless delete $state{connect_guard}; 902 return unless delete $state{connect_guard};
524 903
525 # get handle 904 # get handle
526 $state{handle} = new AnyEvent::Handle 905 $state{handle} = new AnyEvent::Handle
529 tls_ctx => $arg{tls_ctx}, 908 tls_ctx => $arg{tls_ctx},
530 # these need to be reconfigured on keepalive handles 909 # these need to be reconfigured on keepalive handles
531 timeout => $timeout, 910 timeout => $timeout,
532 on_error => sub { 911 on_error => sub {
533 %state = (); 912 %state = ();
534 $cb->(undef, { @pseudo, Status => 599, Reason => $_[2] }); 913 $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] });
535 }, 914 },
536 on_eof => sub { 915 on_eof => sub {
537 %state = (); 916 %state = ();
538 $cb->(undef, { @pseudo, Status => 599, Reason => "Unexpected end-of-file" }); 917 $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" });
539 }, 918 },
540 ; 919 ;
541 920
542 # limit the number of persistent connections 921 # limit the number of persistent connections
543 # keepalive not yet supported 922 # keepalive not yet supported
549# $hdr{connection} = "keep-alive"; 928# $hdr{connection} = "keep-alive";
550# } 929# }
551 930
552 $state{handle}->starttls ("connect") if $rscheme eq "https"; 931 $state{handle}->starttls ("connect") if $rscheme eq "https";
553 932
554 # handle actual, non-tunneled, request
555 my $handle_actual_request = sub {
556 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
557
558 # send request
559 $state{handle}->push_write (
560 "$method $rpath HTTP/1.1\015\012"
561 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
562 . "\015\012"
563 . (delete $arg{body})
564 );
565
566 # return if error occured during push_write()
567 return unless %state;
568
569 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
570
571 # status line and headers
572 $state{read_response} = sub {
573 for ("$_[1]") {
574 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
575
576 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/igxc
577 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
578
579 # 100 Continue handling
580 # should not happen as we don't send expect: 100-continue,
581 # but we handle it just in case.
582 # since we send the request body regardless, if we get an error
583 # we are out of-sync, which we currently do NOT handle correctly.
584 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
585 if $2 eq 100;
586
587 push @pseudo,
588 HTTPVersion => $1,
589 Status => $2,
590 Reason => $3,
591 ;
592
593 my $hdr = parse_hdr
594 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
595
596 %hdr = (%$hdr, @pseudo);
597 }
598
599 # redirect handling
600 # microsoft and other shitheads don't give a shit for following standards,
601 # try to support some common forms of broken Location headers.
602 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
603 $hdr{location} =~ s/^\.\/+//;
604
605 my $url = "$rscheme://$uhost:$uport";
606
607 unless ($hdr{location} =~ s/^\///) {
608 $url .= $upath;
609 $url =~ s/\/[^\/]*$//;
610 }
611
612 $hdr{location} = "$url/$hdr{location}";
613 }
614
615 my $redirect;
616
617 if ($recurse) {
618 my $status = $hdr{Status};
619
620 # industry standard is to redirect POST as GET for
621 # 301, 302 and 303, in contrast to http/1.0 and 1.1.
622 # also, the UA should ask the user for 301 and 307 and POST,
623 # industry standard seems to be to simply follow.
624 # we go with the industry standard.
625 if ($status == 301 or $status == 302 or $status == 303) {
626 # HTTP/1.1 is unclear on how to mutate the method
627 $method = "GET" unless $method eq "HEAD";
628 $redirect = 1;
629 } elsif ($status == 307) {
630 $redirect = 1;
631 }
632 }
633
634 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
635 my $keepalive = pop;
636
637 $state{handle}->destroy if $state{handle};
638 %state = ();
639
640 if (defined $_[1]) {
641 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
642 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
643 }
644
645 # set-cookie processing
646 if ($arg{cookie_jar}) {
647 for ($hdr{"set-cookie"}) {
648 # parse NAME=VALUE
649 my @kv;
650
651 while (/\G\s* ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )/gcxs) {
652 my $name = $1;
653 my $value = $3;
654
655 unless ($value) {
656 $value = $2;
657 $value =~ s/\\(.)/$1/gs;
658 }
659
660 push @kv, $name => $value;
661
662 last unless /\G\s*;/gc;
663 }
664
665 last unless @kv;
666
667 my $name = shift @kv;
668 my %kv = (value => shift @kv, @kv);
669
670 my $cdom;
671 my $cpath = (delete $kv{path}) || "/";
672
673 if (exists $kv{domain}) {
674 $cdom = delete $kv{domain};
675
676 $cdom =~ s/^\.?/./; # make sure it starts with a "."
677
678 next if $cdom =~ /\.$/;
679
680 # this is not rfc-like and not netscape-like. go figure.
681 my $ndots = $cdom =~ y/.//;
682 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
683 } else {
684 $cdom = $uhost;
685 }
686
687 # store it
688 $arg{cookie_jar}{version} = 1;
689 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
690
691 redo if /\G\s*,/gc;
692 }
693 }
694
695 if ($redirect && exists $hdr{location}) {
696 # we ignore any errors, as it is very common to receive
697 # Content-Length != 0 but no actual body
698 # we also access %hdr, as $_[1] might be an erro
699 http_request (
700 $method => $hdr{location},
701 %arg,
702 recurse => $recurse - 1,
703 Redirect => [$_[0], \%hdr],
704 $cb);
705 } else {
706 $cb->($_[0], \%hdr);
707 }
708 };
709
710 my $len = $hdr{"content-length"};
711
712 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
713 $finish->(undef, 598 => "Request cancelled by on_header");
714 } elsif (
715 $hdr{Status} =~ /^(?:1..|204|205|304)$/
716 or $method eq "HEAD"
717 or (defined $len && !$len)
718 ) {
719 # no body
720 $finish->("", undef, undef, 1);
721 } else {
722 # body handling, many different code paths
723 # - no body expected
724 # - want_body_handle
725 # - te chunked
726 # - 2x length known (with or without on_body)
727 # - 2x length not known (with or without on_body)
728 if (!$redirect && $arg{want_body_handle}) {
729 $_[0]->on_eof (undef);
730 $_[0]->on_error (undef);
731 $_[0]->on_read (undef);
732
733 $finish->(delete $state{handle});
734
735 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
736 my $cl = 0;
737 my $body = undef;
738 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
739
740 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
741
742 my $read_chunk; $read_chunk = sub {
743 $_[1] =~ /^([0-9a-fA-F]+)/
744 or $finish->(undef, 599 => "Garbled chunked transfer encoding");
745
746 my $len = hex $1;
747
748 if ($len) {
749 $cl += $len;
750
751 $_[0]->push_read (chunk => $len, sub {
752 $on_body->($_[1], \%hdr)
753 or return $finish->(undef, 598 => "Request cancelled by on_body");
754
755 $_[0]->push_read (line => sub {
756 length $_[1]
757 and return $finish->(undef, 599 => "Garbled chunked transfer encoding");
758 $_[0]->push_read (line => $read_chunk);
759 });
760 });
761 } else {
762 $hdr{"content-length"} ||= $cl;
763
764 $_[0]->push_read (line => $qr_nlnl, sub {
765 if (length $_[1]) {
766 for ("$_[1]") {
767 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
768
769 my $hdr = parse_hdr
770 or return $finish->(undef, 599 => "Garbled response trailers");
771
772 %hdr = (%hdr, %$hdr);
773 }
774 }
775
776 $finish->($body, undef, undef, 1);
777 });
778 }
779 };
780
781 $_[0]->push_read (line => $read_chunk);
782
783 } elsif ($arg{on_body}) {
784 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
785
786 if ($len) {
787 $_[0]->on_read (sub {
788 $len -= length $_[0]{rbuf};
789
790 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
791 or return $finish->(undef, 598 => "Request cancelled by on_body");
792
793 $len > 0
794 or $finish->("", undef, undef, 1);
795 });
796 } else {
797 $_[0]->on_eof (sub {
798 $finish->("");
799 });
800 $_[0]->on_read (sub {
801 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
802 or $finish->(undef, 598 => "Request cancelled by on_body");
803 });
804 }
805 } else {
806 $_[0]->on_eof (undef);
807
808 if ($len) {
809 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
810 $_[0]->on_read (sub {
811 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
812 if $len <= length $_[0]{rbuf};
813 });
814 } else {
815 $_[0]->on_error (sub {
816 ($! == Errno::EPIPE || !$!)
817 ? $finish->(delete $_[0]{rbuf})
818 : $finish->(undef, 599 => $_[2]);
819 });
820 $_[0]->on_read (sub { });
821 }
822 }
823 }
824 };
825
826 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
827 };
828
829 # now handle proxy-CONNECT method 933 # now handle proxy-CONNECT method
830 if ($proxy && $uscheme eq "https") { 934 if ($proxy && $uscheme eq "https") {
831 # oh dear, we have to wrap it into a connect request 935 # oh dear, we have to wrap it into a connect request
832 936
833 # maybe re-use $uauthority with patched port? 937 # maybe re-use $uauthority with patched port?
834 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012"); 938 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012");
835 $state{handle}->push_read (line => $qr_nlnl, sub { 939 $state{handle}->push_read (line => $qr_nlnl, sub {
836 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 940 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
837 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" })); 941 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }));
838 942
839 if ($2 == 200) { 943 if ($2 == 200) {
840 $rpath = $upath; 944 $rpath = $upath;
841 &$handle_actual_request; 945 $handle_actual_request->();
842 } else { 946 } else {
843 %state = (); 947 %state = ();
844 $cb->(undef, { @pseudo, Status => $2, Reason => $3 }); 948 $cb->(undef, { @pseudo, Status => $2, Reason => $3 });
845 } 949 }
846 }); 950 });
847 } else { 951 } else {
848 &$handle_actual_request; 952 $handle_actual_request->();
849 } 953 }
850 }; 954 };
851 955
852 my $tcp_connect = $arg{tcp_connect} 956 my $tcp_connect = $arg{tcp_connect}
853 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect }; 957 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
854 958
855 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout }); 959 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout });
856
857 }; 960 };
858 961
859 defined wantarray && AnyEvent::Util::guard { %state = () } 962 defined wantarray && AnyEvent::Util::guard { %state = () }
860} 963}
861 964
896string of the form C<http://host:port> (optionally C<https:...>), croaks 999string of the form C<http://host:port> (optionally C<https:...>), croaks
897otherwise. 1000otherwise.
898 1001
899To clear an already-set proxy, use C<undef>. 1002To clear an already-set proxy, use C<undef>.
900 1003
1004=item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end]
1005
1006Remove all cookies from the cookie jar that have been expired. If
1007C<$session_end> is given and true, then additionally remove all session
1008cookies.
1009
1010You should call this function (with a true C<$session_end>) before you
1011save cookies to disk, and you should call this function after loading them
1012again. If you have a long-running program you can additonally call this
1013function from time to time.
1014
1015A cookie jar is initially an empty hash-reference that is managed by this
1016module. It's format is subject to change, but currently it is like this:
1017
1018The key C<version> has to contain C<1>, otherwise the hash gets
1019emptied. All other keys are hostnames or IP addresses pointing to
1020hash-references. The key for these inner hash references is the
1021server path for which this cookie is meant, and the values are again
1022hash-references. The keys of those hash-references is the cookie name, and
1023the value, you guessed it, is another hash-reference, this time with the
1024key-value pairs from the cookie, except for C<expires> and C<max-age>,
1025which have been replaced by a C<_expires> key that contains the cookie
1026expiry timestamp.
1027
1028Here is an example of a cookie jar with a single cookie, so you have a
1029chance of understanding the above paragraph:
1030
1031 {
1032 version => 1,
1033 "10.0.0.1" => {
1034 "/" => {
1035 "mythweb_id" => {
1036 _expires => 1293917923,
1037 value => "ooRung9dThee3ooyXooM1Ohm",
1038 },
1039 },
1040 },
1041 }
1042
901=item $date = AnyEvent::HTTP::format_date $timestamp 1043=item $date = AnyEvent::HTTP::format_date $timestamp
902 1044
903Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP 1045Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
904Date (RFC 2616). 1046Date (RFC 2616).
905 1047
906=item $timestamp = AnyEvent::HTTP::parse_date $date 1048=item $timestamp = AnyEvent::HTTP::parse_date $date
907 1049
908Takes a HTTP Date (RFC 2616) and returns the corresponding POSIX 1050Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a
1051bunch of minor variations of those, and returns the corresponding POSIX
909timestamp, or C<undef> if the date cannot be parsed. 1052timestamp, or C<undef> if the date cannot be parsed.
910 1053
911=item $AnyEvent::HTTP::MAX_RECURSE 1054=item $AnyEvent::HTTP::MAX_RECURSE
912 1055
913The default value for the C<recurse> request parameter (default: C<10>). 1056The default value for the C<recurse> request parameter (default: C<10>).
953sub parse_date($) { 1096sub parse_date($) {
954 my ($date) = @_; 1097 my ($date) = @_;
955 1098
956 my ($d, $m, $y, $H, $M, $S); 1099 my ($d, $m, $y, $H, $M, $S);
957 1100
958 if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9]) ([A-Z][a-z][a-z]) ([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1101 if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
959 # RFC 822/1123, required by RFC 2616 1102 # RFC 822/1123, required by RFC 2616 (with " ")
1103 # cookie dates (with "-")
1104
960 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); 1105 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
961 1106
962 } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1107 } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
963 # RFC 850 1108 # RFC 850
964 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); 1109 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
965 1110
966 } elsif ($date =~ /^[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9 ][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([0-9][0-9][0-9][0-9])$/) { 1111 } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) {
967 # ISO C's asctime 1112 # ISO C's asctime
968 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); 1113 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
969 } 1114 }
970 # other formats fail in the loop below 1115 # other formats fail in the loop below
971 1116

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines