ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.69 by root, Fri Dec 31 19:32:47 2010 UTC vs.
Revision 1.76 by root, Sat Jan 1 02:20:49 2011 UTC

183 183
184Passing this parameter enables (simplified) cookie-processing, loosely 184Passing this parameter enables (simplified) cookie-processing, loosely
185based on the original netscape specification. 185based on the original netscape specification.
186 186
187The C<$hash_ref> must be an (initially empty) hash reference which will 187The C<$hash_ref> must be an (initially empty) hash reference which will
188get updated automatically. It is possible to save the cookie_jar to 188get updated automatically. It is possible to save the cookie jar to
189persistent storage with something like JSON or Storable, but this is not 189persistent storage with something like JSON or Storable, but this is not
190recommended, as expiry times are currently being ignored. 190recommended, as session-only cookies might survive longer than expected.
191 191
192Note that this cookie implementation is not of very high quality, nor 192Note that this cookie implementation is not meant to be complete. If
193meant to be complete. If you want complete cookie management you have to 193you want complete cookie management you have to do that on your
194do that on your own. C<cookie_jar> is meant as a quick fix to get some 194own. C<cookie_jar> is meant as a quick fix to get some cookie-using sites
195cookie-using sites working. Cookies are a privacy disaster, do not use 195working. Cookies are a privacy disaster, do not use them unless required
196them unless required to. 196to.
197 197
198When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:> 198When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:>
199headers will be ste and handled by this module, otherwise they will be 199headers will be set and handled by this module, otherwise they will be
200left untouched. 200left untouched.
201 201
202=item tls_ctx => $scheme | $tls_ctx 202=item tls_ctx => $scheme | $tls_ctx
203 203
204Specifies the AnyEvent::TLS context to be used for https connections. This 204Specifies the AnyEvent::TLS context to be used for https connections. This
364 push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; 364 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
365 365
366 _slot_schedule $_[0]; 366 _slot_schedule $_[0];
367} 367}
368 368
369# extract cookies from jar
370sub cookie_jar_extract($$$$) {
371 my ($jar, $uscheme, $uhost, $upath) = @_;
372
373 %$jar = () if $jar->{version} != 1;
374
375 my @cookies;
376
377 while (my ($chost, $paths) = each %$jar) {
378 next unless ref $paths;
379
380 if ($chost =~ /^\./) {
381 next unless $chost eq substr $uhost, -length $chost;
382 } elsif ($chost =~ /\./) {
383 next unless $chost eq $uhost;
384 } else {
385 next;
386 }
387
388 while (my ($cpath, $cookies) = each %$paths) {
389 next unless $cpath eq substr $upath, 0, length $cpath;
390
391 while (my ($cookie, $kv) = each %$cookies) {
392 next if $uscheme ne "https" && exists $kv->{secure};
393
394 if (exists $kv->{expires}) {
395 if (AE::now > parse_date ($kv->{expires})) {
396 delete $cookies->{$cookie};
397 next;
398 }
399 }
400
401 my $value = $kv->{value};
402
403 if ($value =~ /[=;,[:space:]]/) {
404 $value =~ s/([\\"])/\\$1/g;
405 $value = "\"$value\"";
406 }
407
408 push @cookies, "$cookie=$value";
409 }
410 }
411 }
412
413 \@cookies
414}
415
416# parse set_cookie header into jar
417sub cookie_jar_set_cookie($$$) {
418 my ($jar, $set_cookie, $uhost) = @_;
419
420 for ($set_cookie) {
421 # parse NAME=VALUE
422 my @kv;
423
424 while (
425 m{
426 \G\s*
427 (?:
428 expires \s*=\s* ([A-Z][a-z][a-z],\ [^,;]+)
429 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )
430 )
431 }gcxsi
432 ) {
433 my $name = $2;
434 my $value = $4;
435
436 unless (defined $name) {
437 # expires
438 $name = "expires";
439 $value = $1;
440 } elsif (!defined $value) {
441 # quoted
442 $value = $3;
443 $value =~ s/\\(.)/$1/gs;
444 }
445
446 push @kv, lc $name, $value;
447
448 last unless /\G\s*;/gc;
449 }
450
451 last unless @kv;
452
453 my $name = shift @kv;
454 my %kv = (value => shift @kv, @kv);
455
456 $kv{expires} ||= format_date (AE::now + $kv{"max-age"})
457 if exists $kv{"max-age"};
458
459 my $cdom;
460 my $cpath = (delete $kv{path}) || "/";
461
462 if (exists $kv{domain}) {
463 $cdom = delete $kv{domain};
464
465 $cdom =~ s/^\.?/./; # make sure it starts with a "."
466
467 next if $cdom =~ /\.$/;
468
469 # this is not rfc-like and not netscape-like. go figure.
470 my $ndots = $cdom =~ y/.//;
471 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
472 } else {
473 $cdom = $uhost;
474 }
475
476 # store it
477 $jar->{version} = 1;
478 $jar->{$cdom}{$cpath}{$name} = \%kv;
479
480 redo if /\G\s*,/gc;
481 }
482}
483
369# continue to parse $_ for headers and place them into the arg 484# continue to parse $_ for headers and place them into the arg
370sub parse_hdr() { 485sub parse_hdr() {
371 my %hdr; 486 my %hdr;
372 487
373 # things seen, not parsed: 488 # things seen, not parsed:
448 563
449 $upath =~ s%^/?%/%; 564 $upath =~ s%^/?%/%;
450 565
451 # cookie processing 566 # cookie processing
452 if (my $jar = $arg{cookie_jar}) { 567 if (my $jar = $arg{cookie_jar}) {
453 %$jar = () if $jar->{version} != 1; 568 my $cookies = cookie_jar_extract $jar, $uscheme, $uhost, $upath;
454 569
455 my @cookie;
456
457 while (my ($chost, $v) = each %$jar) {
458 if ($chost =~ /^\./) {
459 next unless $chost eq substr $uhost, -length $chost;
460 } elsif ($chost =~ /\./) {
461 next unless $chost eq $uhost;
462 } else {
463 next;
464 }
465
466 while (my ($cpath, $v) = each %$v) {
467 next unless $cpath eq substr $upath, 0, length $cpath;
468
469 while (my ($k, $v) = each %$v) {
470 next if $uscheme ne "https" && exists $v->{secure};
471 my $value = $v->{value};
472 $value =~ s/([\\"])/\\$1/g;
473 push @cookie, "$k=\"$value\"";
474 }
475 }
476 }
477
478 $hdr{cookie} = join "; ", @cookie 570 $hdr{cookie} = join "; ", @$cookies
479 if @cookie; 571 if @$cookies;
480 } 572 }
481 573
482 my ($rhost, $rport, $rscheme, $rpath); # request host, port, path 574 my ($rhost, $rport, $rscheme, $rpath); # request host, port, path
483 575
484 if ($proxy) { 576 if ($proxy) {
515 or do { 607 or do {
516 my $err = "$!"; 608 my $err = "$!";
517 %state = (); 609 %state = ();
518 return $cb->(undef, { @pseudo, Status => 599, Reason => $err }); 610 return $cb->(undef, { @pseudo, Status => 599, Reason => $err });
519 }; 611 };
520
521 pop; # free memory, save a tree
522 612
523 return unless delete $state{connect_guard}; 613 return unless delete $state{connect_guard};
524 614
525 # get handle 615 # get handle
526 $state{handle} = new AnyEvent::Handle 616 $state{handle} = new AnyEvent::Handle
571 # status line and headers 661 # status line and headers
572 $state{read_response} = sub { 662 $state{read_response} = sub {
573 for ("$_[1]") { 663 for ("$_[1]") {
574 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 664 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
575 665
576 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/igxc 666 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
577 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" })); 667 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
578 668
579 # 100 Continue handling 669 # 100 Continue handling
580 # should not happen as we don't send expect: 100-continue, 670 # should not happen as we don't send expect: 100-continue,
581 # but we handle it just in case. 671 # but we handle it just in case.
616 706
617 if ($recurse) { 707 if ($recurse) {
618 my $status = $hdr{Status}; 708 my $status = $hdr{Status};
619 709
620 # industry standard is to redirect POST as GET for 710 # industry standard is to redirect POST as GET for
621 # 301, 302 and 303, in contrast to http/1.0 and 1.1. 711 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
622 # also, the UA should ask the user for 301 and 307 and POST, 712 # also, the UA should ask the user for 301 and 307 and POST,
623 # industry standard seems to be to simply follow. 713 # industry standard seems to be to simply follow.
624 # we go with the industry standard. 714 # we go with the industry standard.
625 if ($status == 301 or $status == 302 or $status == 303) { 715 if ($status == 301 or $status == 302 or $status == 303) {
626 # HTTP/1.1 is unclear on how to mutate the method 716 # HTTP/1.1 is unclear on how to mutate the method
630 $redirect = 1; 720 $redirect = 1;
631 } 721 }
632 } 722 }
633 723
634 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive]) 724 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
635 my $keepalive = pop; 725 my $may_keep_alive = $_[3];
636 726
637 $state{handle}->destroy if $state{handle}; 727 $state{handle}->destroy if $state{handle};
638 %state = (); 728 %state = ();
639 729
640 if (defined $_[1]) { 730 if (defined $_[1]) {
642 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2]; 732 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
643 } 733 }
644 734
645 # set-cookie processing 735 # set-cookie processing
646 if ($arg{cookie_jar}) { 736 if ($arg{cookie_jar}) {
647 for ($hdr{"set-cookie"}) { 737 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost;
648 # parse NAME=VALUE
649 my @kv;
650
651 while (/\G\s* ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )/gcxs) {
652 my $name = $1;
653 my $value = $3;
654
655 unless ($value) {
656 $value = $2;
657 $value =~ s/\\(.)/$1/gs;
658 }
659
660 push @kv, $name => $value;
661
662 last unless /\G\s*;/gc;
663 }
664
665 last unless @kv;
666
667 my $name = shift @kv;
668 my %kv = (value => shift @kv, @kv);
669
670 my $cdom;
671 my $cpath = (delete $kv{path}) || "/";
672
673 if (exists $kv{domain}) {
674 $cdom = delete $kv{domain};
675
676 $cdom =~ s/^\.?/./; # make sure it starts with a "."
677
678 next if $cdom =~ /\.$/;
679
680 # this is not rfc-like and not netscape-like. go figure.
681 my $ndots = $cdom =~ y/.//;
682 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
683 } else {
684 $cdom = $uhost;
685 }
686
687 # store it
688 $arg{cookie_jar}{version} = 1;
689 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
690
691 redo if /\G\s*,/gc;
692 }
693 } 738 }
694 739
695 if ($redirect && exists $hdr{location}) { 740 if ($redirect && exists $hdr{location}) {
696 # we ignore any errors, as it is very common to receive 741 # we ignore any errors, as it is very common to receive
697 # Content-Length != 0 but no actual body 742 # Content-Length != 0 but no actual body
903Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP 948Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
904Date (RFC 2616). 949Date (RFC 2616).
905 950
906=item $timestamp = AnyEvent::HTTP::parse_date $date 951=item $timestamp = AnyEvent::HTTP::parse_date $date
907 952
908Takes a HTTP Date (RFC 2616) and returns the corresponding POSIX 953Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) and
909timestamp, or C<undef> if the date cannot be parsed. 954returns the corresponding POSIX timestamp, or C<undef> if the date cannot
955be parsed.
910 956
911=item $AnyEvent::HTTP::MAX_RECURSE 957=item $AnyEvent::HTTP::MAX_RECURSE
912 958
913The default value for the C<recurse> request parameter (default: C<10>). 959The default value for the C<recurse> request parameter (default: C<10>).
914 960
953sub parse_date($) { 999sub parse_date($) {
954 my ($date) = @_; 1000 my ($date) = @_;
955 1001
956 my ($d, $m, $y, $H, $M, $S); 1002 my ($d, $m, $y, $H, $M, $S);
957 1003
958 if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9]) ([A-Z][a-z][a-z]) ([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1004 if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9])[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) {
959 # RFC 822/1123, required by RFC 2616 1005 # RFC 822/1123, required by RFC 2616 (with " ")
1006 # cookie dates (with "-")
1007
960 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); 1008 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
961 1009
962 } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1010 } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) {
963 # RFC 850 1011 # RFC 850
964 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); 1012 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines