ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.77 by root, Sat Jan 1 19:13:41 2011 UTC vs.
Revision 1.81 by root, Sun Jan 2 01:20:17 2011 UTC

132 132
133=item 595 - errors during connection etsbalishment, proxy handshake. 133=item 595 - errors during connection etsbalishment, proxy handshake.
134 134
135=item 596 - errors during TLS negotiation, request sending and header processing. 135=item 596 - errors during TLS negotiation, request sending and header processing.
136 136
137=item 597 - errors during body receive or processing. 137=item 597 - errors during body receiving or processing.
138 138
139=item 598 - user aborted request in C<on_header> or C<on_body>. 139=item 598 - user aborted request via C<on_header> or C<on_body>.
140 140
141=item 599 - other, usually nonretryable, errors (garbled URL etc.). 141=item 599 - other, usually nonretryable, errors (garbled URL etc.).
142 142
143=back 143=back
144 144
196=item cookie_jar => $hash_ref 196=item cookie_jar => $hash_ref
197 197
198Passing this parameter enables (simplified) cookie-processing, loosely 198Passing this parameter enables (simplified) cookie-processing, loosely
199based on the original netscape specification. 199based on the original netscape specification.
200 200
201The C<$hash_ref> must be an (initially empty) hash reference which will 201The C<$hash_ref> must be an (initially empty) hash reference which
202get updated automatically. It is possible to save the cookie jar to 202will get updated automatically. It is possible to save the cookie jar
203persistent storage with something like JSON or Storable, but this is not 203to persistent storage with something like JSON or Storable - see the
204recommended, as session-only cookies might survive longer than expected. 204C<AnyEvent::HTTP::cookie_jar_expire> function if you wish to remove
205expired or session-only cookies, and also for documentation on the format
206of the cookie jar.
205 207
206Note that this cookie implementation is not meant to be complete. If 208Note that this cookie implementation is not meant to be complete. If
207you want complete cookie management you have to do that on your 209you want complete cookie management you have to do that on your
208own. C<cookie_jar> is meant as a quick fix to get some cookie-using sites 210own. C<cookie_jar> is meant as a quick fix to get most cookie-using sites
209working. Cookies are a privacy disaster, do not use them unless required 211working. Cookies are a privacy disaster, do not use them unless required
210to. 212to.
211 213
212When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:> 214When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:>
213headers will be set and handled by this module, otherwise they will be 215headers will be set and handled by this module, otherwise they will be
378 push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; 380 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
379 381
380 _slot_schedule $_[0]; 382 _slot_schedule $_[0];
381} 383}
382 384
385#############################################################################
386
387# expire cookies
388sub cookie_jar_expire($;$) {
389 my ($jar, $session_end) = @_;
390
391 %$jar = () if $jar->{version} != 1;
392
393 my $anow = AE::now;
394
395 while (my ($chost, $paths) = each %$jar) {
396 next unless ref $paths;
397
398 while (my ($cpath, $cookies) = each %$paths) {
399 while (my ($cookie, $kv) = each %$cookies) {
400 if (exists $kv->{_expires}) {
401 delete $cookies->{$cookie}
402 if $anow > $kv->{_expires};
403 } elsif ($session_end) {
404 delete $cookies->{$cookie};
405 }
406 }
407
408 delete $paths->{$cpath}
409 unless %$cookies;
410 }
411
412 delete $jar->{$chost}
413 unless %$paths;
414 }
415}
416
383# extract cookies from jar 417# extract cookies from jar
384sub cookie_jar_extract($$$$) { 418sub cookie_jar_extract($$$$) {
385 my ($jar, $uscheme, $uhost, $upath) = @_; 419 my ($jar, $uscheme, $uhost, $upath) = @_;
386 420
387 %$jar = () if $jar->{version} != 1; 421 %$jar = () if $jar->{version} != 1;
403 next unless $cpath eq substr $upath, 0, length $cpath; 437 next unless $cpath eq substr $upath, 0, length $cpath;
404 438
405 while (my ($cookie, $kv) = each %$cookies) { 439 while (my ($cookie, $kv) = each %$cookies) {
406 next if $uscheme ne "https" && exists $kv->{secure}; 440 next if $uscheme ne "https" && exists $kv->{secure};
407 441
408 if (exists $kv->{expires}) { 442 if (exists $kv->{_expires} and AE::now > $kv->{_expires}) {
409 if (AE::now > parse_date ($kv->{expires})) {
410 delete $cookies->{$cookie}; 443 delete $cookies->{$cookie};
411 next; 444 next;
412 }
413 } 445 }
414 446
415 my $value = $kv->{value}; 447 my $value = $kv->{value};
416 448
417 if ($value =~ /[=;,[:space:]]/) { 449 if ($value =~ /[=;,[:space:]]/) {
426 458
427 \@cookies 459 \@cookies
428} 460}
429 461
430# parse set_cookie header into jar 462# parse set_cookie header into jar
431sub cookie_jar_set_cookie($$$) { 463sub cookie_jar_set_cookie($$$$) {
432 my ($jar, $set_cookie, $uhost) = @_; 464 my ($jar, $set_cookie, $uhost, $date) = @_;
465
466 my $anow = int AE::now;
467 my $snow; # server-now
433 468
434 for ($set_cookie) { 469 for ($set_cookie) {
435 # parse NAME=VALUE 470 # parse NAME=VALUE
436 my @kv; 471 my @kv;
437 472
473 # expires is not http-compliant in the original cookie-spec,
474 # we support the official date format and some extensions
438 while ( 475 while (
439 m{ 476 m{
440 \G\s* 477 \G\s*
441 (?: 478 (?:
442 expires \s*=\s* ([A-Z][a-z][a-z],\ [^,;]+) 479 expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
443 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) 480 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )
444 ) 481 )
445 }gcxsi 482 }gcxsi
446 ) { 483 ) {
447 my $name = $2; 484 my $name = $2;
465 last unless @kv; 502 last unless @kv;
466 503
467 my $name = shift @kv; 504 my $name = shift @kv;
468 my %kv = (value => shift @kv, @kv); 505 my %kv = (value => shift @kv, @kv);
469 506
470 $kv{expires} ||= format_date (AE::now + $kv{"max-age"})
471 if exists $kv{"max-age"}; 507 if (exists $kv{"max-age"}) {
508 $kv{_expires} = $anow + delete $kv{"max-age"};
509 } elsif (exists $kv{expires}) {
510 $snow ||= parse_date ($date) || $anow;
511 $kv{_expires} = $anow + (parse_date (delete $kv{expires}) - $snow);
512 } else {
513 delete $kv{_expires};
514 }
472 515
473 my $cdom; 516 my $cdom;
474 my $cpath = (delete $kv{path}) || "/"; 517 my $cpath = (delete $kv{path}) || "/";
475 518
476 if (exists $kv{domain}) { 519 if (exists $kv{domain}) {
750 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2]; 793 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
751 } 794 }
752 795
753 # set-cookie processing 796 # set-cookie processing
754 if ($arg{cookie_jar}) { 797 if ($arg{cookie_jar}) {
755 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost; 798 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
756 } 799 }
757 800
758 if ($redirect && exists $hdr{location}) { 801 if ($redirect && exists $hdr{location}) {
759 # we ignore any errors, as it is very common to receive 802 # we ignore any errors, as it is very common to receive
760 # Content-Length != 0 but no actual body 803 # Content-Length != 0 but no actual body
800 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) { 843 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
801 my $cl = 0; 844 my $cl = 0;
802 my $body = undef; 845 my $body = undef;
803 my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; 846 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
804 847
805 my $read_chunk; $read_chunk = sub { 848 $state{read_chunk} = sub {
806 $_[1] =~ /^([0-9a-fA-F]+)/ 849 $_[1] =~ /^([0-9a-fA-F]+)/
807 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); 850 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
808 851
809 my $len = hex $1; 852 my $len = hex $1;
810 853
816 or return $finish->(undef, 598 => "Request cancelled by on_body"); 859 or return $finish->(undef, 598 => "Request cancelled by on_body");
817 860
818 $_[0]->push_read (line => sub { 861 $_[0]->push_read (line => sub {
819 length $_[1] 862 length $_[1]
820 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); 863 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
821 $_[0]->push_read (line => $read_chunk); 864 $_[0]->push_read (line => $state{read_chunk});
822 }); 865 });
823 }); 866 });
824 } else { 867 } else {
825 $hdr{"content-length"} ||= $cl; 868 $hdr{"content-length"} ||= $cl;
826 869
839 $finish->($body, undef, undef, 1); 882 $finish->($body, undef, undef, 1);
840 }); 883 });
841 } 884 }
842 }; 885 };
843 886
844 $_[0]->push_read (line => $read_chunk); 887 $_[0]->push_read (line => $state{read_chunk});
845 888
846 } elsif ($arg{on_body}) { 889 } elsif ($arg{on_body}) {
847 if ($len) { 890 if ($len) {
848 $_[0]->on_read (sub { 891 $_[0]->on_read (sub {
849 $len -= length $_[0]{rbuf}; 892 $len -= length $_[0]{rbuf};
896 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 939 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
897 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" })); 940 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }));
898 941
899 if ($2 == 200) { 942 if ($2 == 200) {
900 $rpath = $upath; 943 $rpath = $upath;
901 &$handle_actual_request; 944 $handle_actual_request->();
902 } else { 945 } else {
903 %state = (); 946 %state = ();
904 $cb->(undef, { @pseudo, Status => $2, Reason => $3 }); 947 $cb->(undef, { @pseudo, Status => $2, Reason => $3 });
905 } 948 }
906 }); 949 });
907 } else { 950 } else {
908 &$handle_actual_request; 951 $handle_actual_request->();
909 } 952 }
910 }; 953 };
911 954
912 my $tcp_connect = $arg{tcp_connect} 955 my $tcp_connect = $arg{tcp_connect}
913 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect }; 956 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
956string of the form C<http://host:port> (optionally C<https:...>), croaks 999string of the form C<http://host:port> (optionally C<https:...>), croaks
957otherwise. 1000otherwise.
958 1001
959To clear an already-set proxy, use C<undef>. 1002To clear an already-set proxy, use C<undef>.
960 1003
1004=item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end]
1005
1006Remove all cookies from the cookie jar that have been expired. If
1007C<$session_end> is given and true, then additionally remove all session
1008cookies.
1009
1010You should call this function (with a true C<$session_end>) before you
1011save cookies to disk, and you should call this function after loading them
1012again. If you have a long-running program you can additonally call this
1013function from time to time.
1014
1015A cookie jar is initially an empty hash-reference that is managed by this
1016module. It's format is subject to change, but currently it is like this:
1017
1018The key C<version> has to contain C<1>, otherwise the hash gets
1019emptied. All other keys are hostnames or IP addresses pointing to
1020hash-references. The key for these inner hash references is the
1021server path for which this cookie is meant, and the values are again
1022hash-references. The keys of those hash-references is the cookie name, and
1023the value, you guessed it, is another hash-reference, this time with the
1024key-value pairs from the cookie, except for C<expires> and C<max-age>,
1025which have been replaced by a C<_expires> key that contains the cookie
1026expiry timestamp.
1027
1028Here is an example of a cookie jar with a single cookie, so you have a
1029chance of understanding the above paragraph:
1030
1031 {
1032 version => 1,
1033 "10.0.0.1" => {
1034 "/" => {
1035 "mythweb_id" => {
1036 _expires => 1293917923,
1037 value => "ooRung9dThee3ooyXooM1Ohm",
1038 },
1039 },
1040 },
1041 }
1042
961=item $date = AnyEvent::HTTP::format_date $timestamp 1043=item $date = AnyEvent::HTTP::format_date $timestamp
962 1044
963Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP 1045Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
964Date (RFC 2616). 1046Date (RFC 2616).
965 1047
966=item $timestamp = AnyEvent::HTTP::parse_date $date 1048=item $timestamp = AnyEvent::HTTP::parse_date $date
967 1049
968Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) and 1050Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a
969returns the corresponding POSIX timestamp, or C<undef> if the date cannot 1051bunch of minor variations of those, and returns the corresponding POSIX
970be parsed. 1052timestamp, or C<undef> if the date cannot be parsed.
971 1053
972=item $AnyEvent::HTTP::MAX_RECURSE 1054=item $AnyEvent::HTTP::MAX_RECURSE
973 1055
974The default value for the C<recurse> request parameter (default: C<10>). 1056The default value for the C<recurse> request parameter (default: C<10>).
975 1057
1014sub parse_date($) { 1096sub parse_date($) {
1015 my ($date) = @_; 1097 my ($date) = @_;
1016 1098
1017 my ($d, $m, $y, $H, $M, $S); 1099 my ($d, $m, $y, $H, $M, $S);
1018 1100
1019 if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9])[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1101 if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1020 # RFC 822/1123, required by RFC 2616 (with " ") 1102 # RFC 822/1123, required by RFC 2616 (with " ")
1021 # cookie dates (with "-") 1103 # cookie dates (with "-")
1022 1104
1023 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); 1105 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
1024 1106
1025 } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1107 } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1026 # RFC 850 1108 # RFC 850
1027 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); 1109 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
1028 1110
1029 } elsif ($date =~ /^[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9 ][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([0-9][0-9][0-9][0-9])$/) { 1111 } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) {
1030 # ISO C's asctime 1112 # ISO C's asctime
1031 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); 1113 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
1032 } 1114 }
1033 # other formats fail in the loop below 1115 # other formats fail in the loop below
1034 1116

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines