ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.59 by root, Wed Dec 29 23:59:36 2010 UTC vs.
Revision 1.65 by root, Fri Dec 31 03:47:32 2010 UTC

47use AnyEvent::Util (); 47use AnyEvent::Util ();
48use AnyEvent::Handle (); 48use AnyEvent::Handle ();
49 49
50use base Exporter::; 50use base Exporter::;
51 51
52our $VERSION = '1.46'; 52our $VERSION = '1.5';
53 53
54our @EXPORT = qw(http_get http_post http_head http_request); 54our @EXPORT = qw(http_get http_post http_head http_request);
55 55
56our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 56our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
57our $MAX_RECURSE = 10; 57our $MAX_RECURSE = 10;
100second argument. 100second argument.
101 101
102All the headers in that hash are lowercased. In addition to the response 102All the headers in that hash are lowercased. In addition to the response
103headers, the "pseudo-headers" (uppercase to avoid clashing with possible 103headers, the "pseudo-headers" (uppercase to avoid clashing with possible
104response headers) C<HTTPVersion>, C<Status> and C<Reason> contain the 104response headers) C<HTTPVersion>, C<Status> and C<Reason> contain the
105three parts of the HTTP Status-Line of the same name. 105three parts of the HTTP Status-Line of the same name. If an error occurs
106during the body phase of a request, then the original C<Status> and
107C<Reason> values from the header are available as C<OrigStatus> and
108C<OrigReason>.
106 109
107The pseudo-header C<URL> contains the actual URL (which can differ from 110The pseudo-header C<URL> contains the actual URL (which can differ from
108the requested URL when following redirects - for example, you might get 111the requested URL when following redirects - for example, you might get
109an error that your URL scheme is not supported even though your URL is a 112an error that your URL scheme is not supported even though your URL is a
110valid http URL because it redirected to an ftp URL, in which case you can 113valid http URL because it redirected to an ftp URL, in which case you can
216=item tcp_connect => $callback->($host, $service, $connect_cb, $prepare_cb) 219=item tcp_connect => $callback->($host, $service, $connect_cb, $prepare_cb)
217 220
218In even rarer cases you want total control over how AnyEvent::HTTP 221In even rarer cases you want total control over how AnyEvent::HTTP
219establishes connections. Normally it uses L<AnyEvent::Socket::tcp_connect> 222establishes connections. Normally it uses L<AnyEvent::Socket::tcp_connect>
220to do this, but you can provide your own C<tcp_connect> function - 223to do this, but you can provide your own C<tcp_connect> function -
221obviously, it has to follow the same calling conventions. 224obviously, it has to follow the same calling conventions, except that it
225may always return a connection guard object.
222 226
223There are probably lots of weird uses for this function, starting from 227There are probably lots of weird uses for this function, starting from
224tracing the hosts C<http_request> actually tries to connect, to (inexact 228tracing the hosts C<http_request> actually tries to connect, to (inexact
225but fast) host => IP address caching or even socks protocol support. 229but fast) host => IP address caching or even socks protocol support.
226 230
376 my @pseudo = (URL => $url); 380 my @pseudo = (URL => $url);
377 push @pseudo, Redirect => delete $arg{Redirect} if exists $arg{Redirect}; 381 push @pseudo, Redirect => delete $arg{Redirect} if exists $arg{Redirect};
378 382
379 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE; 383 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE;
380 384
381 return $cb->(undef, { Status => 599, Reason => "Too many redirections", @pseudo }) 385 return $cb->(undef, { @pseudo, Status => 599, Reason => "Too many redirections" })
382 if $recurse < 0; 386 if $recurse < 0;
383 387
384 my $proxy = $arg{proxy} || $PROXY; 388 my $proxy = $arg{proxy} || $PROXY;
385 my $timeout = $arg{timeout} || $TIMEOUT; 389 my $timeout = $arg{timeout} || $TIMEOUT;
386 390
389 393
390 $uscheme = lc $uscheme; 394 $uscheme = lc $uscheme;
391 395
392 my $uport = $uscheme eq "http" ? 80 396 my $uport = $uscheme eq "http" ? 80
393 : $uscheme eq "https" ? 443 397 : $uscheme eq "https" ? 443
394 : return $cb->(undef, { Status => 599, Reason => "Only http and https URL schemes supported", @pseudo }); 398 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" });
395 399
396 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 400 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
397 or return $cb->(undef, { Status => 599, Reason => "Unparsable URL", @pseudo }); 401 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" });
398 402
399 my $uhost = $1; 403 my $uhost = $1;
400 $uport = $2 if defined $2; 404 $uport = $2 if defined $2;
401 405
402 $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost" 406 $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost"
464 _get_slot $uhost, sub { 468 _get_slot $uhost, sub {
465 $state{slot_guard} = shift; 469 $state{slot_guard} = shift;
466 470
467 return unless $state{connect_guard}; 471 return unless $state{connect_guard};
468 472
469 my $tcp_connect = $arg{tcp_connect} 473 my $connect_cb = sub {
470 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
471
472 $state{connect_guard} = $tcp_connect->(
473 $rhost,
474 $rport,
475 sub {
476 $state{fh} = shift 474 $state{fh} = shift
477 or do { 475 or do {
478 my $err = "$!"; 476 my $err = "$!";
479 %state = (); 477 %state = ();
480 return $cb->(undef, { Status => 599, Reason => $err, @pseudo }); 478 return $cb->(undef, { @pseudo, Status => 599, Reason => $err });
481 }; 479 };
482 480
483 pop; # free memory, save a tree 481 pop; # free memory, save a tree
484 482
485 return unless delete $state{connect_guard}; 483 return unless delete $state{connect_guard};
486 484
487 # get handle 485 # get handle
488 $state{handle} = new AnyEvent::Handle 486 $state{handle} = new AnyEvent::Handle
489 fh => $state{fh}, 487 fh => $state{fh},
490 peername => $rhost, 488 peername => $rhost,
491 tls_ctx => $arg{tls_ctx}, 489 tls_ctx => $arg{tls_ctx},
492 # these need to be reconfigured on keepalive handles 490 # these need to be reconfigured on keepalive handles
493 timeout => $timeout, 491 timeout => $timeout,
494 on_error => sub { 492 on_error => sub {
495 %state = (); 493 %state = ();
496 $cb->(undef, { Status => 599, Reason => $_[2], @pseudo }); 494 $cb->(undef, { @pseudo, Status => 599, Reason => $_[2] });
497 }, 495 },
498 on_eof => sub { 496 on_eof => sub {
499 %state = (); 497 %state = ();
500 $cb->(undef, { Status => 599, Reason => "Unexpected end-of-file", @pseudo }); 498 $cb->(undef, { @pseudo, Status => 599, Reason => "Unexpected end-of-file" });
501 }, 499 },
502 ; 500 ;
503 501
504 # limit the number of persistent connections 502 # limit the number of persistent connections
505 # keepalive not yet supported 503 # keepalive not yet supported
506# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 504# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
507# ++$KA_COUNT{$_[1]}; 505# ++$KA_COUNT{$_[1]};
508# $state{handle}{ka_count_guard} = AnyEvent::Util::guard { 506# $state{handle}{ka_count_guard} = AnyEvent::Util::guard {
509# --$KA_COUNT{$_[1]} 507# --$KA_COUNT{$_[1]}
510# }; 508# };
511# $hdr{connection} = "keep-alive"; 509# $hdr{connection} = "keep-alive";
512# } else { 510# } else {
513 delete $hdr{connection}; 511 delete $hdr{connection};
514# } 512# }
515 513
516 $state{handle}->starttls ("connect") if $rscheme eq "https"; 514 $state{handle}->starttls ("connect") if $rscheme eq "https";
517 515
518 # handle actual, non-tunneled, request 516 # handle actual, non-tunneled, request
519 my $handle_actual_request = sub { 517 my $handle_actual_request = sub {
520 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; 518 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
521 519
522 # send request 520 # send request
523 $state{handle}->push_write ( 521 $state{handle}->push_write (
524 "$method $rpath HTTP/1.0\015\012" 522 "$method $rpath HTTP/1.0\015\012"
525 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr) 523 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
526 . "\015\012" 524 . "\015\012"
527 . (delete $arg{body}) 525 . (delete $arg{body})
528 ); 526 );
529 527
530 # return if error occured during push_write() 528 # return if error occured during push_write()
531 return unless %state; 529 return unless %state;
532 530
533 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use 531 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
534 532
535 # status line and headers 533 # status line and headers
536 $state{handle}->push_read (line => $qr_nlnl, sub { 534 $state{handle}->push_read (line => $qr_nlnl, sub {
535 my $keepalive = pop;
536
537 for ("$_[1]") { 537 for ("$_[1]") {
538 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 538 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
539 539
540 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )? \015?\012/igxc 540 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )? \015?\012/igxc
541 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid server response", @pseudo })); 541 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
542 542
543 push @pseudo, 543 push @pseudo,
544 HTTPVersion => $1, 544 HTTPVersion => $1,
545 Status => $2, 545 Status => $2,
546 Reason => $3, 546 Reason => $3,
547 ; 547 ;
548 548
549 # things seen, not parsed: 549 # things seen, not parsed:
550 # p3pP="NON CUR OTPi OUR NOR UNI" 550 # p3pP="NON CUR OTPi OUR NOR UNI"
551 551
552 $hdr{lc $1} .= ",$2" 552 $hdr{lc $1} .= ",$2"
553 while /\G 553 while /\G
554 ([^:\000-\037]*): 554 ([^:\000-\037]*):
555 [\011\040]* 555 [\011\040]*
556 ((?: [^\012]+ | \012[\011\040] )*) 556 ((?: [^\012]+ | \012[\011\040] )*)
557 \012 557 \012
558 /gxc; 558 /gxc;
559 559
560 /\G$/ 560 /\G$/
561 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Garbled response headers", @pseudo })); 561 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
562 }
563
564 # remove the "," prefix we added to all headers above
565 substr $_, 0, 1, ""
566 for values %hdr;
567
568 # patch in all pseudo headers
569 %hdr = (%hdr, @pseudo);
570
571 # redirect handling
572 # microsoft and other shitheads don't give a shit for following standards,
573 # try to support some common forms of broken Location headers.
574 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
575 $hdr{location} =~ s/^\.\/+//;
576
577 my $url = "$rscheme://$uhost:$uport";
578
579 unless ($hdr{location} =~ s/^\///) {
580 $url .= $upath;
581 $url =~ s/\/[^\/]*$//;
562 } 582 }
563 583
564 # remove the "," prefix we added to all headers above
565 substr $_, 0, 1, ""
566 for values %hdr;
567
568 # patch in all pseudo headers
569 %hdr = (%hdr, @pseudo);
570
571 # redirect handling
572 # microsoft and other shitheads don't give a shit for following standards,
573 # try to support some common forms of broken Location headers.
574 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
575 $hdr{location} =~ s/^\.\/+//;
576
577 my $url = "$rscheme://$uhost:$uport";
578
579 unless ($hdr{location} =~ s/^\///) {
580 $url .= $upath;
581 $url =~ s/\/[^\/]*$//;
582 }
583
584 $hdr{location} = "$url/$hdr{location}"; 584 $hdr{location} = "$url/$hdr{location}";
585 }
586
587 my $redirect;
588
589 if ($recurse) {
590 my $status = $hdr{Status};
591
592 # industry standard is to redirect POST as GET for
593 # 301, 302 and 303, in contrast to http/1.0 and 1.1.
594 # also, the UA should ask the user for 301 and 307 and POST,
595 # industry standard seems to be to simply follow.
596 # we go with the industry standard.
597 if ($status == 301 or $status == 302 or $status == 303) {
598 # HTTP/1.1 is unclear on how to mutate the method
599 $method = "GET" unless $method eq "HEAD";
600 $redirect = 1;
601 } elsif ($status == 307) {
602 $redirect = 1;
585 } 603 }
604 }
586 605
587 my $redirect; 606 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
607 $state{handle}->destroy if $state{handle};
608 %state = ();
588 609
610 if (defined $_[1]) {
611 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
612 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
613 }
614
615 # set-cookie processing
589 if ($recurse) { 616 if ($arg{cookie_jar}) {
590 my $status = $hdr{Status}; 617 for ($hdr{"set-cookie"}) {
618 # parse NAME=VALUE
619 my @kv;
591 620
592 # industry standard is to redirect POST as GET for 621 while (/\G\s* ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )/gcxs) {
593 # 301, 302 and 303, in contrast to http/1.0 and 1.1.
594 # also, the UA should ask the user for 301 and 307 and POST,
595 # industry standard seems to be to simply follow.
596 # we go with the industry standard.
597 if ($status == 301 or $status == 302 or $status == 303) {
598 # HTTP/1.1 is unclear on how to mutate the method
599 $method = "GET" unless $method eq "HEAD";
600 $redirect = 1; 622 my $name = $1;
601 } elsif ($status == 307) { 623 my $value = $3;
624
625 unless ($value) {
626 $value = $2;
627 $value =~ s/\\(.)/$1/gs;
628 }
629
630 push @kv, $name => $value;
631
632 last unless /\G\s*;/gc;
633 }
634
635 last unless @kv;
636
637 my $name = shift @kv;
638 my %kv = (value => shift @kv, @kv);
639
640 my $cdom;
641 my $cpath = (delete $kv{path}) || "/";
642
643 if (exists $kv{domain}) {
644 $cdom = delete $kv{domain};
645
646 $cdom =~ s/^\.?/./; # make sure it starts with a "."
647
648 next if $cdom =~ /\.$/;
649
650 # this is not rfc-like and not netscape-like. go figure.
651 my $ndots = $cdom =~ y/.//;
652 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
653 } else {
654 $cdom = $uhost;
655 }
656
602 $redirect = 1; 657 # store it
658 $arg{cookie_jar}{version} = 1;
659 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
660
661 redo if /\G\s*,/gc;
603 } 662 }
604 } 663 }
605 664
606 my $finish = sub { 665 if ($redirect && exists $hdr{location}) {
607 $state{handle}->destroy if $state{handle}; 666 # we ignore any errors, as it is very common to receive
608 %state = (); 667 # Content-Length != 0 but no actual body
609 668 # we also access %hdr, as $_[1] might be an erro
610 # set-cookie processing 669 http_request (
611 if ($arg{cookie_jar}) { 670 $method => $hdr{location},
612 for ($_[1]{"set-cookie"}) {
613 # parse NAME=VALUE
614 my @kv;
615
616 while (/\G\s* ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )/gcxs) {
617 my $name = $1;
618 my $value = $3;
619
620 unless ($value) {
621 $value = $2;
622 $value =~ s/\\(.)/$1/gs;
623 } 671 %arg,
672 recurse => $recurse - 1,
673 Redirect => [$_[0], \%hdr],
674 $cb);
675 } else {
676 $cb->($_[0], \%hdr);
677 }
678 };
624 679
625 push @kv, $name => $value; 680 my $len = $hdr{"content-length"};
626 681
627 last unless /\G\s*;/gc; 682 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
683 $finish->(undef, 598 => "Request cancelled by on_header");
684 } elsif (
685 $hdr{Status} =~ /^(?:1..|204|205|304)$/
686 or $method eq "HEAD"
687 or (defined $len && !$len)
688 ) {
689 # no body
690 $finish->("", undef, undef, 1);
691 } else {
692 # body handling, four different code paths
693 # for want_body_handle, on_body (2x), normal (2x)
694 # we might read too much here, but it does not matter yet (no pipelining)
695 if (!$redirect && $arg{want_body_handle}) {
696 $_[0]->on_eof (undef);
697 $_[0]->on_error (undef);
698 $_[0]->on_read (undef);
699
700 $finish->(delete $state{handle});
701
702 } elsif ($arg{on_body}) {
703 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
704 if ($len) {
705 $_[0]->on_read (sub {
706 $len -= length $_[0]{rbuf};
707
708 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
709 or $finish->(undef, 598 => "Request cancelled by on_body");
710
628 } 711 $len > 0
629 712 or $finish->("", undef, undef, 1);
630 last unless @kv;
631
632 my $name = shift @kv;
633 my %kv = (value => shift @kv, @kv);
634
635 my $cdom;
636 my $cpath = (delete $kv{path}) || "/";
637
638 if (exists $kv{domain}) {
639 $cdom = delete $kv{domain};
640
641 $cdom =~ s/^\.?/./; # make sure it starts with a "."
642
643 next if $cdom =~ /\.$/;
644
645 # this is not rfc-like and not netscape-like. go figure.
646 my $ndots = $cdom =~ y/.//;
647 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
648 } else {
649 $cdom = $uhost;
650 }
651
652 # store it
653 $arg{cookie_jar}{version} = 1;
654 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
655
656 redo if /\G\s*,/gc;
657 } 713 });
714 } else {
715 $_[0]->on_eof (sub {
716 $finish->("");
717 });
718 $_[0]->on_read (sub {
719 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
720 or $finish->(undef, 598 => "Request cancelled by on_body");
721 });
658 } 722 }
723 } else {
724 $_[0]->on_eof (undef);
659 725
660 if ($redirect && exists $hdr{location}) { 726 if ($len) {
661 # we ignore any errors, as it is very common to receive 727 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
662 # Content-Length != 0 but no actual body 728 $_[0]->on_read (sub {
663 # we also access %hdr, as $_[1] might be an erro 729 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
664 http_request ( 730 if $len <= length $_[0]{rbuf};
665 $method => $hdr{location},
666 %arg,
667 recurse => $recurse - 1,
668 Redirect => \@_,
669 $cb); 731 });
670 } else { 732 } else {
671 $cb->($_[0], $_[1]);
672 }
673 };
674
675 my $len = $hdr{"content-length"};
676
677 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
678 $finish->(undef, { Status => 598, Reason => "Request cancelled by on_header", @pseudo });
679 } elsif (
680 $hdr{Status} =~ /^(?:1..|[23]04)$/
681 or $method eq "HEAD"
682 or (defined $len && !$len)
683 ) {
684 # no body
685 $finish->("", \%hdr);
686 } else {
687 # body handling, four different code paths
688 # for want_body_handle, on_body (2x), normal (2x)
689 # we might read too much here, but it does not matter yet (no pers. connections)
690 if (!$redirect && $arg{want_body_handle}) {
691 $_[0]->on_eof (undef);
692 $_[0]->on_error (undef);
693 $_[0]->on_read (undef);
694
695 $finish->(delete $state{handle}, \%hdr);
696
697 } elsif ($arg{on_body}) {
698 $_[0]->on_error (sub { $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }) });
699 if ($len) {
700 $_[0]->on_eof (undef);
701 $_[0]->on_read (sub {
702 $len -= length $_[0]{rbuf};
703
704 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
705 or $finish->(undef, { Status => 598, Reason => "Request cancelled by on_body", @pseudo });
706
707 $len > 0
708 or $finish->("", \%hdr);
709 });
710 } else {
711 $_[0]->on_eof (sub {
712 $finish->("", \%hdr);
713 });
714 $_[0]->on_read (sub {
715 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
716 or $finish->(undef, { Status => 598, Reason => "Request cancelled by on_body", @pseudo });
717 });
718 }
719 } else {
720 $_[0]->on_eof (undef);
721
722 if ($len) {
723 $_[0]->on_error (sub { $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }) });
724 $_[0]->on_read (sub {
725 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), \%hdr)
726 if $len <= length $_[0]{rbuf};
727 });
728 } else {
729 $_[0]->on_error (sub { 733 $_[0]->on_error (sub {
730 ($! == Errno::EPIPE || !$!) 734 ($! == Errno::EPIPE || !$!)
731 ? $finish->(delete $_[0]{rbuf}, \%hdr) 735 ? $finish->(delete $_[0]{rbuf})
732 : $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }); 736 : $finish->(undef, 599 => $_[2]);
733 }); 737 });
734 $_[0]->on_read (sub { }); 738 $_[0]->on_read (sub { });
735 }
736 } 739 }
737 } 740 }
738 }); 741 }
739 }; 742 });
743 };
740 744
741 # now handle proxy-CONNECT method 745 # now handle proxy-CONNECT method
742 if ($proxy && $uscheme eq "https") { 746 if ($proxy && $uscheme eq "https") {
743 # oh dear, we have to wrap it into a connect request 747 # oh dear, we have to wrap it into a connect request
744 748
745 # maybe re-use $uauthority with patched port? 749 # maybe re-use $uauthority with patched port?
746 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012"); 750 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012");
747 $state{handle}->push_read (line => $qr_nlnl, sub { 751 $state{handle}->push_read (line => $qr_nlnl, sub {
748 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 752 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
749 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid proxy connect response ($_[1])", @pseudo })); 753 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }));
750 754
751 if ($2 == 200) { 755 if ($2 == 200) {
752 $rpath = $upath; 756 $rpath = $upath;
753 &$handle_actual_request; 757 &$handle_actual_request;
754 } else { 758 } else {
755 %state = (); 759 %state = ();
756 $cb->(undef, { Status => $2, Reason => $3, @pseudo }); 760 $cb->(undef, { @pseudo, Status => $2, Reason => $3 });
757 }
758 }); 761 }
762 });
759 } else { 763 } else {
760 &$handle_actual_request; 764 &$handle_actual_request;
761 }
762
763 }, 765 }
764 $arg{on_prepare} || sub { $timeout }
765 ); 766 };
767
768 my $tcp_connect = $arg{tcp_connect}
769 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
770
771 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout });
772
766 }; 773 };
767 774
768 defined wantarray && AnyEvent::Util::guard { %state = () } 775 defined wantarray && AnyEvent::Util::guard { %state = () }
769} 776}
770 777
805string of the form C<http://host:port> (optionally C<https:...>), croaks 812string of the form C<http://host:port> (optionally C<https:...>), croaks
806otherwise. 813otherwise.
807 814
808To clear an already-set proxy, use C<undef>. 815To clear an already-set proxy, use C<undef>.
809 816
817=item $date = AnyEvent::HTTP::format_date $timestamp
818
819Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
820Date (RFC 2616).
821
822=item $timestamp = AnyEvent::HTTP::parse_date $date
823
824Takes a HTTP Date (RFC 2616) and returns the corresponding POSIX
825timestamp, or C<undef> if the date cannot be parsed.
826
810=item $AnyEvent::HTTP::MAX_RECURSE 827=item $AnyEvent::HTTP::MAX_RECURSE
811 828
812The default value for the C<recurse> request parameter (default: C<10>). 829The default value for the C<recurse> request parameter (default: C<10>).
813 830
814=item $AnyEvent::HTTP::USERAGENT 831=item $AnyEvent::HTTP::USERAGENT
832connections. This number of can be useful for load-leveling. 849connections. This number of can be useful for load-leveling.
833 850
834=back 851=back
835 852
836=cut 853=cut
854
855our @month = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
856our @weekday = qw(Sun Mon Tue Wed Thu Fri Sat);
857
858sub format_date($) {
859 my ($time) = @_;
860
861 # RFC 822/1123 format
862 my ($S, $M, $H, $mday, $mon, $year, $wday, $yday, undef) = gmtime $time;
863
864 sprintf "%s, %02d %s %04d %02d:%02d:%02d GMT",
865 $weekday[$wday], $mday, $month[$mon], $year + 1900,
866 $H, $M, $S;
867}
868
869sub parse_date($) {
870 my ($date) = @_;
871
872 my ($d, $m, $y, $H, $M, $S);
873
874 if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9]) ([A-Z][a-z][a-z]) ([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) {
875 # RFC 822/1123, required by RFC 2616
876 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
877
878 } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) {
879 # RFC 850
880 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
881
882 } elsif ($date =~ /^[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9 ][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([0-9][0-9][0-9][0-9])$/) {
883 # ISO C's asctime
884 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
885 }
886 # other formats fail in the loop below
887
888 for (0..11) {
889 if ($m eq $month[$_]) {
890 require Time::Local;
891 return Time::Local::timegm ($S, $M, $H, $d, $_, $y);
892 }
893 }
894
895 undef
896}
837 897
838sub set_proxy($) { 898sub set_proxy($) {
839 if (length $_[0]) { 899 if (length $_[0]) {
840 $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix 900 $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix
841 or Carp::croak "$_[0]: invalid proxy URL"; 901 or Carp::croak "$_[0]: invalid proxy URL";
848# initialise proxy from environment 908# initialise proxy from environment
849eval { 909eval {
850 set_proxy $ENV{http_proxy}; 910 set_proxy $ENV{http_proxy};
851}; 911};
852 912
913=head2 SOCKS PROXIES
914
915Socks proxies are not directly supported by AnyEvent::HTTP. You can
916compile your perl to support socks, or use an external program such as
917F<socksify> (dante) or F<tsocks> to make your program use a socks proxy
918transparently.
919
920Alternatively, for AnyEvent::HTTP only, you can use your own
921C<tcp_connect> function that does the proxy handshake - here is an example
922that works with socks4a proxies:
923
924 use Errno;
925 use AnyEvent::Util;
926 use AnyEvent::Socket;
927 use AnyEvent::Handle;
928
929 # host, port and username of/for your socks4a proxy
930 my $socks_host = "10.0.0.23";
931 my $socks_port = 9050;
932 my $socks_user = "";
933
934 sub socks4a_connect {
935 my ($host, $port, $connect_cb, $prepare_cb) = @_;
936
937 my $hdl = new AnyEvent::Handle
938 connect => [$socks_host, $socks_port],
939 on_prepare => sub { $prepare_cb->($_[0]{fh}) },
940 on_error => sub { $connect_cb->() },
941 ;
942
943 $hdl->push_write (pack "CCnNZ*Z*", 4, 1, $port, 1, $socks_user, $host);
944
945 $hdl->push_read (chunk => 8, sub {
946 my ($hdl, $chunk) = @_;
947 my ($status, $port, $ipn) = unpack "xCna4", $chunk;
948
949 if ($status == 0x5a) {
950 $connect_cb->($hdl->{fh}, (format_address $ipn) . ":$port");
951 } else {
952 $! = Errno::ENXIO; $connect_cb->();
953 }
954 });
955
956 $hdl
957 }
958
959Use C<socks4a_connect> instead of C<tcp_connect> when doing C<http_request>s,
960possibly after switching off other proxy types:
961
962 AnyEvent::HTTP::set_proxy undef; # usually you do not want other proxies
963
964 http_get 'http://www.google.com', tcp_connect => \&socks4a_connect, sub {
965 my ($data, $headers) = @_;
966 ...
967 };
968
853=head1 SEE ALSO 969=head1 SEE ALSO
854 970
855L<AnyEvent>. 971L<AnyEvent>.
856 972
857=head1 AUTHOR 973=head1 AUTHOR

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines