ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.56 by root, Mon Sep 6 05:30:54 2010 UTC vs.
Revision 1.61 by root, Thu Dec 30 03:45:01 2010 UTC

43 43
44use Errno (); 44use Errno ();
45 45
46use AnyEvent 5.0 (); 46use AnyEvent 5.0 ();
47use AnyEvent::Util (); 47use AnyEvent::Util ();
48use AnyEvent::Socket ();
49use AnyEvent::Handle (); 48use AnyEvent::Handle ();
50 49
51use base Exporter::; 50use base Exporter::;
52 51
53our $VERSION = '1.45'; 52our $VERSION = '1.46';
54 53
55our @EXPORT = qw(http_get http_post http_head http_request); 54our @EXPORT = qw(http_get http_post http_head http_request);
56 55
57our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 56our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
58our $MAX_RECURSE = 10; 57our $MAX_RECURSE = 10;
92must be an absolute http or https URL. 91must be an absolute http or https URL.
93 92
94When called in void context, nothing is returned. In other contexts, 93When called in void context, nothing is returned. In other contexts,
95C<http_request> returns a "cancellation guard" - you have to keep the 94C<http_request> returns a "cancellation guard" - you have to keep the
96object at least alive until the callback get called. If the object gets 95object at least alive until the callback get called. If the object gets
97destroyed before the callbakc is called, the request will be cancelled. 96destroyed before the callback is called, the request will be cancelled.
98 97
99The callback will be called with the response body data as first argument 98The callback will be called with the response body data as first argument
100(or C<undef> if an error occured), and a hash-ref with response headers as 99(or C<undef> if an error occured), and a hash-ref with response headers as
101second argument. 100second argument.
102 101
211connect (for exmaple, to bind it on a given IP address). This parameter 210connect (for exmaple, to bind it on a given IP address). This parameter
212overrides the prepare callback passed to C<AnyEvent::Socket::tcp_connect> 211overrides the prepare callback passed to C<AnyEvent::Socket::tcp_connect>
213and behaves exactly the same way (e.g. it has to provide a 212and behaves exactly the same way (e.g. it has to provide a
214timeout). See the description for the C<$prepare_cb> argument of 213timeout). See the description for the C<$prepare_cb> argument of
215C<AnyEvent::Socket::tcp_connect> for details. 214C<AnyEvent::Socket::tcp_connect> for details.
215
216=item tcp_connect => $callback->($host, $service, $connect_cb, $prepare_cb)
217
218In even rarer cases you want total control over how AnyEvent::HTTP
219establishes connections. Normally it uses L<AnyEvent::Socket::tcp_connect>
220to do this, but you can provide your own C<tcp_connect> function -
221obviously, it has to follow the same calling conventions, except that it
222may always return a connection guard object.
223
224There are probably lots of weird uses for this function, starting from
225tracing the hosts C<http_request> actually tries to connect, to (inexact
226but fast) host => IP address caching or even socks protocol support.
216 227
217=item on_header => $callback->($headers) 228=item on_header => $callback->($headers)
218 229
219When specified, this callback will be called with the header hash as soon 230When specified, this callback will be called with the header hash as soon
220as headers have been successfully received from the remote server (not on 231as headers have been successfully received from the remote server (not on
338 push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; 349 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
339 350
340 _slot_schedule $_[0]; 351 _slot_schedule $_[0];
341} 352}
342 353
343our $qr_nl = qr{\015?\012};
344our $qr_nlnl = qr{(?<![^\012])\015?\012}; 354our $qr_nlnl = qr{(?<![^\012])\015?\012};
345 355
346our $TLS_CTX_LOW = { cache => 1, sslv2 => 1 }; 356our $TLS_CTX_LOW = { cache => 1, sslv2 => 1 };
347our $TLS_CTX_HIGH = { cache => 1, verify => 1, verify_peername => "https" }; 357our $TLS_CTX_HIGH = { cache => 1, verify => 1, verify_peername => "https" };
348 358
455 _get_slot $uhost, sub { 465 _get_slot $uhost, sub {
456 $state{slot_guard} = shift; 466 $state{slot_guard} = shift;
457 467
458 return unless $state{connect_guard}; 468 return unless $state{connect_guard};
459 469
460 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { 470 my $tcp_connect = $arg{tcp_connect}
471 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
472
473 $state{connect_guard} = $tcp_connect->(
474 $rhost,
475 $rport,
476 sub {
461 $state{fh} = shift 477 $state{fh} = shift
462 or do { 478 or do {
463 my $err = "$!"; 479 my $err = "$!";
464 %state = (); 480 %state = ();
465 return $cb->(undef, { Status => 599, Reason => $err, @pseudo }); 481 return $cb->(undef, { Status => 599, Reason => $err, @pseudo });
466 }; 482 };
467 483
468 pop; # free memory, save a tree 484 pop; # free memory, save a tree
469 485
470 return unless delete $state{connect_guard}; 486 return unless delete $state{connect_guard};
471 487
472 # get handle 488 # get handle
473 $state{handle} = new AnyEvent::Handle 489 $state{handle} = new AnyEvent::Handle
474 fh => $state{fh}, 490 fh => $state{fh},
475 peername => $rhost, 491 peername => $rhost,
476 tls_ctx => $arg{tls_ctx}, 492 tls_ctx => $arg{tls_ctx},
477 # these need to be reconfigured on keepalive handles 493 # these need to be reconfigured on keepalive handles
478 timeout => $timeout, 494 timeout => $timeout,
479 on_error => sub { 495 on_error => sub {
480 %state = (); 496 %state = ();
481 $cb->(undef, { Status => 599, Reason => $_[2], @pseudo }); 497 $cb->(undef, { Status => 599, Reason => $_[2], @pseudo });
482 }, 498 },
483 on_eof => sub { 499 on_eof => sub {
484 %state = (); 500 %state = ();
485 $cb->(undef, { Status => 599, Reason => "Unexpected end-of-file", @pseudo }); 501 $cb->(undef, { Status => 599, Reason => "Unexpected end-of-file", @pseudo });
486 }, 502 },
487 ; 503 ;
488 504
489 # limit the number of persistent connections 505 # limit the number of persistent connections
490 # keepalive not yet supported 506 # keepalive not yet supported
491# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 507# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
492# ++$KA_COUNT{$_[1]}; 508# ++$KA_COUNT{$_[1]};
493# $state{handle}{ka_count_guard} = AnyEvent::Util::guard { 509# $state{handle}{ka_count_guard} = AnyEvent::Util::guard {
494# --$KA_COUNT{$_[1]} 510# --$KA_COUNT{$_[1]}
495# }; 511# };
496# $hdr{connection} = "keep-alive"; 512# $hdr{connection} = "keep-alive";
497# } else { 513# } else {
498 delete $hdr{connection}; 514 delete $hdr{connection};
499# } 515# }
500 516
501 $state{handle}->starttls ("connect") if $rscheme eq "https"; 517 $state{handle}->starttls ("connect") if $rscheme eq "https";
502 518
503 # handle actual, non-tunneled, request 519 # handle actual, non-tunneled, request
504 my $handle_actual_request = sub { 520 my $handle_actual_request = sub {
505 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; 521 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
506 522
507 # send request 523 # send request
508 $state{handle}->push_write ( 524 $state{handle}->push_write (
509 "$method $rpath HTTP/1.0\015\012" 525 "$method $rpath HTTP/1.0\015\012"
510 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr) 526 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
511 . "\015\012" 527 . "\015\012"
512 . (delete $arg{body}) 528 . (delete $arg{body})
513 ); 529 );
514 530
515 # return if error occured during push_write() 531 # return if error occured during push_write()
516 return unless %state; 532 return unless %state;
517 533
518 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use 534 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
519 535
520 # status line 536 # status line and headers
521 $state{handle}->push_read (line => $qr_nl, sub {
522 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
523 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid server response ($_[1])", @pseudo }));
524
525 push @pseudo,
526 HTTPVersion => $1,
527 Status => $2,
528 Reason => $3,
529 ;
530
531 # headers, could be optimized a bit
532 $state{handle}->unshift_read (line => $qr_nlnl, sub { 537 $state{handle}->push_read (line => $qr_nlnl, sub {
533 for ("$_[1]") { 538 for ("$_[1]") {
534 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 539 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
540
541 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )? \015?\012/igxc
542 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid server response", @pseudo }));
543
544 push @pseudo,
545 HTTPVersion => $1,
546 Status => $2,
547 Reason => $3,
548 ;
535 549
536 # things seen, not parsed: 550 # things seen, not parsed:
537 # p3pP="NON CUR OTPi OUR NOR UNI" 551 # p3pP="NON CUR OTPi OUR NOR UNI"
538 552
539 $hdr{lc $1} .= ",$2" 553 $hdr{lc $1} .= ",$2"
574 my $redirect; 588 my $redirect;
575 589
576 if ($recurse) { 590 if ($recurse) {
577 my $status = $hdr{Status}; 591 my $status = $hdr{Status};
578 592
579 if (($status == 301 || $status == 302) && $method ne "POST") { 593 # industry standard is to redirect POST as GET for
580 # apparently, mozilla et al. just change POST to GET here 594 # 301, 302 and 303, in contrast to http/1.0 and 1.1.
581 # more research is needed before we do the same 595 # also, the UA should ask the user for 301 and 307 and POST,
582 $redirect = 1; 596 # industry standard seems to be to simply follow.
583 } elsif ($status == 303) { 597 # we go with the industry standard.
598 if ($status == 301 or $status == 302 or $status == 303) {
584 # even http/1.1 is unclear on how to mutate the method 599 # HTTP/1.1 is unclear on how to mutate the method
585 $method = "GET" unless $method eq "HEAD"; 600 $method = "GET" unless $method eq "HEAD";
586 $redirect = 1; 601 $redirect = 1;
587 } elsif ($status == 307 && $method =~ /^(?:GET|HEAD)$/) { 602 } elsif ($status == 307) {
588 $redirect = 1; 603 $redirect = 1;
589 } 604 }
590 } 605 }
591 606
592 my $finish = sub { 607 my $finish = sub {
672 } else { 687 } else {
673 # body handling, four different code paths 688 # body handling, four different code paths
674 # for want_body_handle, on_body (2x), normal (2x) 689 # for want_body_handle, on_body (2x), normal (2x)
675 # we might read too much here, but it does not matter yet (no pers. connections) 690 # we might read too much here, but it does not matter yet (no pers. connections)
676 if (!$redirect && $arg{want_body_handle}) { 691 if (!$redirect && $arg{want_body_handle}) {
677 $_[0]->on_eof (undef); 692 $_[0]->on_eof (undef);
678 $_[0]->on_error (undef); 693 $_[0]->on_error (undef);
679 $_[0]->on_read (undef); 694 $_[0]->on_read (undef);
680 695
681 $finish->(delete $state{handle}, \%hdr); 696 $finish->(delete $state{handle}, \%hdr);
682 697
711 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), \%hdr) 726 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), \%hdr)
712 if $len <= length $_[0]{rbuf}; 727 if $len <= length $_[0]{rbuf};
713 }); 728 });
714 } else { 729 } else {
715 $_[0]->on_error (sub { 730 $_[0]->on_error (sub {
716 $! == Errno::EPIPE || !$! 731 ($! == Errno::EPIPE || !$!)
717 ? $finish->(delete $_[0]{rbuf}, \%hdr) 732 ? $finish->(delete $_[0]{rbuf}, \%hdr)
718 : $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }); 733 : $finish->(undef, { Status => 599, Reason => $_[2], @pseudo });
719 }); 734 });
720 $_[0]->on_read (sub { }); 735 $_[0]->on_read (sub { });
721 } 736 }
722 } 737 }
723 } 738 }
724 }); 739 });
725 }); 740 };
726 };
727 741
728 # now handle proxy-CONNECT method 742 # now handle proxy-CONNECT method
729 if ($proxy && $uscheme eq "https") { 743 if ($proxy && $uscheme eq "https") {
730 # oh dear, we have to wrap it into a connect request 744 # oh dear, we have to wrap it into a connect request
731 745
732 # maybe re-use $uauthority with patched port? 746 # maybe re-use $uauthority with patched port?
733 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012"); 747 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012");
734 $state{handle}->push_read (line => $qr_nlnl, sub { 748 $state{handle}->push_read (line => $qr_nlnl, sub {
735 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 749 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
736 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid proxy connect response ($_[1])", @pseudo })); 750 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid proxy connect response ($_[1])", @pseudo }));
737 751
738 if ($2 == 200) { 752 if ($2 == 200) {
739 $rpath = $upath; 753 $rpath = $upath;
740 &$handle_actual_request; 754 &$handle_actual_request;
741 } else { 755 } else {
742 %state = (); 756 %state = ();
743 $cb->(undef, { Status => $2, Reason => $3, @pseudo }); 757 $cb->(undef, { Status => $2, Reason => $3, @pseudo });
758 }
744 } 759 });
745 });
746 } else { 760 } else {
747 &$handle_actual_request; 761 &$handle_actual_request;
762 }
763
748 } 764 },
749
750 }, $arg{on_prepare} || sub { $timeout }; 765 $arg{on_prepare} || sub { $timeout }
766 );
751 }; 767 };
752 768
753 defined wantarray && AnyEvent::Util::guard { %state = () } 769 defined wantarray && AnyEvent::Util::guard { %state = () }
754} 770}
755 771
790string of the form C<http://host:port> (optionally C<https:...>), croaks 806string of the form C<http://host:port> (optionally C<https:...>), croaks
791otherwise. 807otherwise.
792 808
793To clear an already-set proxy, use C<undef>. 809To clear an already-set proxy, use C<undef>.
794 810
811=item $date = AnyEvent::HTTP::format_date $timestamp
812
813Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
814Date (RFC 2616).
815
816=item $timestamp = AnyEvent::HTTP::parse_date $date
817
818Takes a HTTP Date (RFC 2616) and returns the corresponding POSIX
819timestamp, or C<undef> if the date cannot be parsed.
820
795=item $AnyEvent::HTTP::MAX_RECURSE 821=item $AnyEvent::HTTP::MAX_RECURSE
796 822
797The default value for the C<recurse> request parameter (default: C<10>). 823The default value for the C<recurse> request parameter (default: C<10>).
798 824
799=item $AnyEvent::HTTP::USERAGENT 825=item $AnyEvent::HTTP::USERAGENT
817connections. This number of can be useful for load-leveling. 843connections. This number of can be useful for load-leveling.
818 844
819=back 845=back
820 846
821=cut 847=cut
848
849our @month = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
850our @weekday = qw(Sun Mon Tue Wed Thu Fri Sat);
851
852sub format_date($) {
853 my ($time) = @_;
854
855 # RFC 822/1123 format
856 my ($S, $M, $H, $mday, $mon, $year, $wday, $yday, undef) = gmtime $time;
857
858 sprintf "%s, %02d %s %04d %02d:%02d:%02d GMT",
859 $weekday[$wday], $mday, $month[$mon], $year + 1900,
860 $H, $M, $S;
861}
862
863sub parse_date($) {
864 my ($date) = @_;
865
866 my ($d, $m, $y, $H, $M, $S);
867
868 if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9]) ([A-Z][a-z][a-z]) ([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) {
869 # RFC 822/1123, required by RFC 2616
870 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
871
872 } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) {
873 # RFC 850
874 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
875
876 } elsif ($date =~ /^[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9 ][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([0-9][0-9][0-9][0-9])$/) {
877 # ISO C's asctime
878 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
879 }
880 # other formats fail in the loop below
881
882 for (0..11) {
883 if ($m eq $month[$_]) {
884 require Time::Local;
885 return Time::Local::timegm ($S, $M, $H, $d, $_, $y);
886 }
887 }
888
889 undef
890}
822 891
823sub set_proxy($) { 892sub set_proxy($) {
824 if (length $_[0]) { 893 if (length $_[0]) {
825 $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix 894 $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix
826 or Carp::croak "$_[0]: invalid proxy URL"; 895 or Carp::croak "$_[0]: invalid proxy URL";
833# initialise proxy from environment 902# initialise proxy from environment
834eval { 903eval {
835 set_proxy $ENV{http_proxy}; 904 set_proxy $ENV{http_proxy};
836}; 905};
837 906
907=head2 SOCKS PROXIES
908
909Socks proxies are not directly supported by AnyEvent::HTTP. You can
910compile your perl to support socks, or use an external program such as
911F<socksify> (dante) or F<tsocks> to make your program use a socks proxy
912transparently.
913
914Alternatively, for AnyEvent::HTTP only, you can use your own
915C<tcp_connect> function that does the proxy handshake - here is an example
916that works with socks4a proxies:
917
918 use Errno;
919 use AnyEvent::Util;
920 use AnyEvent::Socket;
921 use AnyEvent::Handle;
922
923 # host, port and username of/for your socks4a proxy
924 my $socks_host = "10.0.0.23";
925 my $socks_port = 9050;
926 my $socks_user = "";
927
928 sub socks4a_connect {
929 my ($host, $port, $connect_cb, $prepare_cb) = @_;
930
931 my $hdl = new AnyEvent::Handle
932 connect => [$socks_host, $socks_port],
933 on_prepare => sub { $prepare_cb->($_[0]{fh}) },
934 on_error => sub { $connect_cb->() },
935 ;
936
937 $hdl->push_write (pack "CCnNZ*Z*", 4, 1, $port, 1, $socks_user, $host);
938
939 $hdl->push_read (chunk => 8, sub {
940 my ($hdl, $chunk) = @_;
941 my ($status, $port, $ipn) = unpack "xCna4", $chunk;
942
943 if ($status == 0x5a) {
944 $connect_cb->($hdl->{fh}, (format_address $ipn) . ":$port");
945 } else {
946 $! = Errno::ENXIO; $connect_cb->();
947 }
948 });
949
950 $hdl
951 }
952
953Use C<socks4a_connect> instead of C<tcp_connect> when doing C<http_request>s,
954possibly after switching off other proxy types:
955
956 AnyEvent::HTTP::set_proxy undef; # usually you do not want other proxies
957
958 http_get 'http://www.google.com', tcp_connect => \&socks4a_connect, sub {
959 my ($data, $headers) = @_;
960 ...
961 };
962
838=head1 SEE ALSO 963=head1 SEE ALSO
839 964
840L<AnyEvent>. 965L<AnyEvent>.
841 966
842=head1 AUTHOR 967=head1 AUTHOR

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines