ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.56 by root, Mon Sep 6 05:30:54 2010 UTC vs.
Revision 1.58 by root, Sun Nov 14 20:23:00 2010 UTC

48use AnyEvent::Socket (); 48use AnyEvent::Socket ();
49use AnyEvent::Handle (); 49use AnyEvent::Handle ();
50 50
51use base Exporter::; 51use base Exporter::;
52 52
53our $VERSION = '1.45'; 53our $VERSION = '1.46';
54 54
55our @EXPORT = qw(http_get http_post http_head http_request); 55our @EXPORT = qw(http_get http_post http_head http_request);
56 56
57our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 57our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
58our $MAX_RECURSE = 10; 58our $MAX_RECURSE = 10;
92must be an absolute http or https URL. 92must be an absolute http or https URL.
93 93
94When called in void context, nothing is returned. In other contexts, 94When called in void context, nothing is returned. In other contexts,
95C<http_request> returns a "cancellation guard" - you have to keep the 95C<http_request> returns a "cancellation guard" - you have to keep the
96object at least alive until the callback get called. If the object gets 96object at least alive until the callback get called. If the object gets
97destroyed before the callbakc is called, the request will be cancelled. 97destroyed before the callback is called, the request will be cancelled.
98 98
99The callback will be called with the response body data as first argument 99The callback will be called with the response body data as first argument
100(or C<undef> if an error occured), and a hash-ref with response headers as 100(or C<undef> if an error occured), and a hash-ref with response headers as
101second argument. 101second argument.
102 102
338 push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; 338 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
339 339
340 _slot_schedule $_[0]; 340 _slot_schedule $_[0];
341} 341}
342 342
343our $qr_nl = qr{\015?\012};
344our $qr_nlnl = qr{(?<![^\012])\015?\012}; 343our $qr_nlnl = qr{(?<![^\012])\015?\012};
345 344
346our $TLS_CTX_LOW = { cache => 1, sslv2 => 1 }; 345our $TLS_CTX_LOW = { cache => 1, sslv2 => 1 };
347our $TLS_CTX_HIGH = { cache => 1, verify => 1, verify_peername => "https" }; 346our $TLS_CTX_HIGH = { cache => 1, verify => 1, verify_peername => "https" };
348 347
515 # return if error occured during push_write() 514 # return if error occured during push_write()
516 return unless %state; 515 return unless %state;
517 516
518 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use 517 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
519 518
520 # status line 519 # status line and headers
521 $state{handle}->push_read (line => $qr_nl, sub { 520 $state{handle}->push_read (line => $qr_nlnl, sub {
522 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
523 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid server response ($_[1])", @pseudo }));
524
525 push @pseudo,
526 HTTPVersion => $1,
527 Status => $2,
528 Reason => $3,
529 ;
530
531 # headers, could be optimized a bit
532 $state{handle}->unshift_read (line => $qr_nlnl, sub {
533 for ("$_[1]") { 521 for ("$_[1]") {
534 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 522 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
535 523
524 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )? \015?\012/igxc
525 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid server response", @pseudo }));
526
527 push @pseudo,
528 HTTPVersion => $1,
529 Status => $2,
530 Reason => $3,
531 ;
532
536 # things seen, not parsed: 533 # things seen, not parsed:
537 # p3pP="NON CUR OTPi OUR NOR UNI" 534 # p3pP="NON CUR OTPi OUR NOR UNI"
538 535
539 $hdr{lc $1} .= ",$2" 536 $hdr{lc $1} .= ",$2"
540 while /\G 537 while /\G
541 ([^:\000-\037]*): 538 ([^:\000-\037]*):
542 [\011\040]* 539 [\011\040]*
543 ((?: [^\012]+ | \012[\011\040] )*) 540 ((?: [^\012]+ | \012[\011\040] )*)
544 \012 541 \012
545 /gxc; 542 /gxc;
546 543
547 /\G$/ 544 /\G$/
548 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Garbled response headers", @pseudo })); 545 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Garbled response headers", @pseudo }));
546 }
547
548 # remove the "," prefix we added to all headers above
549 substr $_, 0, 1, ""
550 for values %hdr;
551
552 # patch in all pseudo headers
553 %hdr = (%hdr, @pseudo);
554
555 # redirect handling
556 # microsoft and other shitheads don't give a shit for following standards,
557 # try to support some common forms of broken Location headers.
558 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
559 $hdr{location} =~ s/^\.\/+//;
560
561 my $url = "$rscheme://$uhost:$uport";
562
563 unless ($hdr{location} =~ s/^\///) {
564 $url .= $upath;
565 $url =~ s/\/[^\/]*$//;
549 } 566 }
550 567
551 # remove the "," prefix we added to all headers above
552 substr $_, 0, 1, ""
553 for values %hdr;
554
555 # patch in all pseudo headers
556 %hdr = (%hdr, @pseudo);
557
558 # redirect handling
559 # microsoft and other shitheads don't give a shit for following standards,
560 # try to support some common forms of broken Location headers.
561 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
562 $hdr{location} =~ s/^\.\/+//;
563
564 my $url = "$rscheme://$uhost:$uport";
565
566 unless ($hdr{location} =~ s/^\///) {
567 $url .= $upath;
568 $url =~ s/\/[^\/]*$//;
569 }
570
571 $hdr{location} = "$url/$hdr{location}"; 568 $hdr{location} = "$url/$hdr{location}";
569 }
570
571 my $redirect;
572
573 if ($recurse) {
574 my $status = $hdr{Status};
575
576 # industry standard is to redirect POST as GET for
577 # 301, 302 and 303, in contrast to http/1.0 and 1.1.
578 # also, the UA should ask the user for 301 and 307 and POST,
579 # industry standard seems to be to simply follow.
580 # we go with the industry standard.
581 if ($status == 301 or $status == 302 or $status == 303) {
582 # HTTP/1.1 is unclear on how to mutate the method
583 $method = "GET" unless $method eq "HEAD";
584 $redirect = 1;
585 } elsif ($status == 307) {
586 $redirect = 1;
572 } 587 }
588 }
573 589
574 my $redirect; 590 my $finish = sub {
591 $state{handle}->destroy if $state{handle};
592 %state = ();
575 593
594 # set-cookie processing
576 if ($recurse) { 595 if ($arg{cookie_jar}) {
577 my $status = $hdr{Status}; 596 for ($_[1]{"set-cookie"}) {
597 # parse NAME=VALUE
598 my @kv;
578 599
579 if (($status == 301 || $status == 302) && $method ne "POST") { 600 while (/\G\s* ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )/gcxs) {
580 # apparently, mozilla et al. just change POST to GET here
581 # more research is needed before we do the same
582 $redirect = 1; 601 my $name = $1;
583 } elsif ($status == 303) { 602 my $value = $3;
584 # even http/1.1 is unclear on how to mutate the method 603
585 $method = "GET" unless $method eq "HEAD"; 604 unless ($value) {
605 $value = $2;
606 $value =~ s/\\(.)/$1/gs;
607 }
608
609 push @kv, $name => $value;
610
611 last unless /\G\s*;/gc;
612 }
613
614 last unless @kv;
615
616 my $name = shift @kv;
617 my %kv = (value => shift @kv, @kv);
618
619 my $cdom;
620 my $cpath = (delete $kv{path}) || "/";
621
622 if (exists $kv{domain}) {
623 $cdom = delete $kv{domain};
624
625 $cdom =~ s/^\.?/./; # make sure it starts with a "."
626
627 next if $cdom =~ /\.$/;
628
629 # this is not rfc-like and not netscape-like. go figure.
630 my $ndots = $cdom =~ y/.//;
631 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
632 } else {
633 $cdom = $uhost;
634 }
635
586 $redirect = 1; 636 # store it
587 } elsif ($status == 307 && $method =~ /^(?:GET|HEAD)$/) { 637 $arg{cookie_jar}{version} = 1;
588 $redirect = 1; 638 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
639
640 redo if /\G\s*,/gc;
589 } 641 }
590 } 642 }
591 643
592 my $finish = sub { 644 if ($redirect && exists $hdr{location}) {
593 $state{handle}->destroy if $state{handle}; 645 # we ignore any errors, as it is very common to receive
594 %state = (); 646 # Content-Length != 0 but no actual body
595 647 # we also access %hdr, as $_[1] might be an erro
596 # set-cookie processing 648 http_request (
597 if ($arg{cookie_jar}) { 649 $method => $hdr{location},
598 for ($_[1]{"set-cookie"}) {
599 # parse NAME=VALUE
600 my @kv;
601
602 while (/\G\s* ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )/gcxs) {
603 my $name = $1;
604 my $value = $3;
605
606 unless ($value) {
607 $value = $2;
608 $value =~ s/\\(.)/$1/gs;
609 } 650 %arg,
651 recurse => $recurse - 1,
652 Redirect => \@_,
653 $cb);
654 } else {
655 $cb->($_[0], $_[1]);
656 }
657 };
610 658
611 push @kv, $name => $value; 659 my $len = $hdr{"content-length"};
612 660
613 last unless /\G\s*;/gc; 661 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
662 $finish->(undef, { Status => 598, Reason => "Request cancelled by on_header", @pseudo });
663 } elsif (
664 $hdr{Status} =~ /^(?:1..|[23]04)$/
665 or $method eq "HEAD"
666 or (defined $len && !$len)
667 ) {
668 # no body
669 $finish->("", \%hdr);
670 } else {
671 # body handling, four different code paths
672 # for want_body_handle, on_body (2x), normal (2x)
673 # we might read too much here, but it does not matter yet (no pers. connections)
674 if (!$redirect && $arg{want_body_handle}) {
675 $_[0]->on_eof (undef);
676 $_[0]->on_error (undef);
677 $_[0]->on_read (undef);
678
679 $finish->(delete $state{handle}, \%hdr);
680
681 } elsif ($arg{on_body}) {
682 $_[0]->on_error (sub { $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }) });
683 if ($len) {
684 $_[0]->on_eof (undef);
685 $_[0]->on_read (sub {
686 $len -= length $_[0]{rbuf};
687
688 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
689 or $finish->(undef, { Status => 598, Reason => "Request cancelled by on_body", @pseudo });
690
614 } 691 $len > 0
615 692 or $finish->("", \%hdr);
616 last unless @kv;
617
618 my $name = shift @kv;
619 my %kv = (value => shift @kv, @kv);
620
621 my $cdom;
622 my $cpath = (delete $kv{path}) || "/";
623
624 if (exists $kv{domain}) {
625 $cdom = delete $kv{domain};
626
627 $cdom =~ s/^\.?/./; # make sure it starts with a "."
628
629 next if $cdom =~ /\.$/;
630
631 # this is not rfc-like and not netscape-like. go figure.
632 my $ndots = $cdom =~ y/.//;
633 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
634 } else {
635 $cdom = $uhost;
636 }
637
638 # store it
639 $arg{cookie_jar}{version} = 1;
640 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
641
642 redo if /\G\s*,/gc;
643 } 693 });
694 } else {
695 $_[0]->on_eof (sub {
696 $finish->("", \%hdr);
697 });
698 $_[0]->on_read (sub {
699 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
700 or $finish->(undef, { Status => 598, Reason => "Request cancelled by on_body", @pseudo });
701 });
644 } 702 }
703 } else {
704 $_[0]->on_eof (undef);
645 705
646 if ($redirect && exists $hdr{location}) { 706 if ($len) {
647 # we ignore any errors, as it is very common to receive 707 $_[0]->on_error (sub { $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }) });
648 # Content-Length != 0 but no actual body 708 $_[0]->on_read (sub {
649 # we also access %hdr, as $_[1] might be an erro 709 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), \%hdr)
650 http_request ( 710 if $len <= length $_[0]{rbuf};
651 $method => $hdr{location},
652 %arg,
653 recurse => $recurse - 1,
654 Redirect => \@_,
655 $cb); 711 });
656 } else { 712 } else {
657 $cb->($_[0], $_[1]);
658 }
659 };
660
661 my $len = $hdr{"content-length"};
662
663 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
664 $finish->(undef, { Status => 598, Reason => "Request cancelled by on_header", @pseudo });
665 } elsif (
666 $hdr{Status} =~ /^(?:1..|[23]04)$/
667 or $method eq "HEAD"
668 or (defined $len && !$len)
669 ) {
670 # no body
671 $finish->("", \%hdr);
672 } else {
673 # body handling, four different code paths
674 # for want_body_handle, on_body (2x), normal (2x)
675 # we might read too much here, but it does not matter yet (no pers. connections)
676 if (!$redirect && $arg{want_body_handle}) {
677 $_[0]->on_eof (undef);
678 $_[0]->on_error (undef);
679 $_[0]->on_read (undef);
680
681 $finish->(delete $state{handle}, \%hdr);
682
683 } elsif ($arg{on_body}) {
684 $_[0]->on_error (sub { $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }) });
685 if ($len) {
686 $_[0]->on_eof (undef);
687 $_[0]->on_read (sub {
688 $len -= length $_[0]{rbuf};
689
690 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
691 or $finish->(undef, { Status => 598, Reason => "Request cancelled by on_body", @pseudo });
692
693 $len > 0
694 or $finish->("", \%hdr);
695 });
696 } else {
697 $_[0]->on_eof (sub {
698 $finish->("", \%hdr);
699 });
700 $_[0]->on_read (sub {
701 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
702 or $finish->(undef, { Status => 598, Reason => "Request cancelled by on_body", @pseudo });
703 });
704 }
705 } else {
706 $_[0]->on_eof (undef);
707
708 if ($len) {
709 $_[0]->on_error (sub { $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }) });
710 $_[0]->on_read (sub {
711 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), \%hdr)
712 if $len <= length $_[0]{rbuf};
713 });
714 } else {
715 $_[0]->on_error (sub { 713 $_[0]->on_error (sub {
716 $! == Errno::EPIPE || !$! 714 ($! == Errno::EPIPE || !$!)
717 ? $finish->(delete $_[0]{rbuf}, \%hdr) 715 ? $finish->(delete $_[0]{rbuf}, \%hdr)
718 : $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }); 716 : $finish->(undef, { Status => 599, Reason => $_[2], @pseudo });
719 }); 717 });
720 $_[0]->on_read (sub { }); 718 $_[0]->on_read (sub { });
721 }
722 } 719 }
723 } 720 }
724 }); 721 }
725 }); 722 });
726 }; 723 };
727 724
728 # now handle proxy-CONNECT method 725 # now handle proxy-CONNECT method
729 if ($proxy && $uscheme eq "https") { 726 if ($proxy && $uscheme eq "https") {

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines