--- AnyEvent-HTTP/HTTP.pm 2008/06/03 16:37:13 1.1 +++ AnyEvent-HTTP/HTTP.pm 2008/06/04 12:32:30 1.8 @@ -35,16 +35,18 @@ our @EXPORT = qw(http_get http_request); -our $MAX_REDIRECTS = 10; our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; -our $MAX_PERSISTENT = 8; -our $PERSISTENT_TIMEOUT = 15; -our $TIMEOUT = 60; +our $MAX_RECURSE = 10; +our $MAX_PERSISTENT = 8; +our $PERSISTENT_TIMEOUT = 2; +our $TIMEOUT = 300; # changing these is evil our $MAX_PERSISTENT_PER_HOST = 2; our $MAX_PER_HOST = 4; # not respected yet :( +our $PROXY; + my %KA_COUNT; # number of open keep-alive connections per host =item http_get $url, key => value..., $cb->($data, $headers) @@ -52,20 +54,55 @@ Executes an HTTP-GET request. See the http_request function for details on additional parameters. +=item http_head $url, key => value..., $cb->($data, $headers) + +Executes an HTTP-HEAD request. See the http_request function for details on +additional parameters. + +=item http_post $url, $body, key => value..., $cb->($data, $headers) + +Executes an HTTP-POST request with a request body of C<$bod>. See the +http_request function for details on additional parameters. + =item http_request $method => $url, key => value..., $cb->($data, $headers) Executes a HTTP request of type C<$method> (e.g. C, C). The URL must be an absolute http or https URL. +The callback will be called with the response data as first argument +(or C if it wasn't available due to errors), and a hash-ref with +response headers as second argument. + +All the headers in that hash are lowercased. In addition to the response +headers, the three "pseudo-headers" C, C and +C contain the three parts of the HTTP Status-Line of the same +name. + +If an internal error occurs, such as not being able to resolve a hostname, +then C<$data> will be C, C<< $headers->{Status} >> will be C<599> +and the C pseudo-header will contain an error message. + +A typical callback might look like this: + + sub { + my ($body, $hdr) = @_; + + if ($hdr->{Status} =~ /^2/) { + ... everything should be ok + } else { + print "error, $hdr->{Status} $hdr->{Reason}\n"; + } + } + Additional parameters are key-value pairs, and are fully optional. They include: =over 4 -=item recurse => $boolean (default: true) +=item recurse => $count (default: $MAX_RECURSE) Whether to recurse requests or not, e.g. on redirects, authentication -retries and so on. +retries and so on, and how often to do so. =item headers => hashref @@ -74,7 +111,20 @@ =item timeout => $seconds The time-out to use for various stages - each connect attempt will reset -the timeout, as will read or write activity. +the timeout, as will read or write activity. Default timeout is 5 minutes. + +=item proxy => [$host, $port[, $scheme]] or undef + +Use the given http proxy for all requests. If not specified, then the +default proxy (as specified by C<$ENV{http_proxy}>) is used. + +C<$scheme> must be either missing or C for HTTP, or C for +HTTPS. + +=item body => $string + +The request body, usually empty. Will be-sent as-is (future versions of +this module might offer more options). =back @@ -88,41 +138,56 @@ my %hdr; - if (my $hdr = delete $arg{headers}) { + $method = uc $method; + + if (my $hdr = $arg{headers}) { while (my ($k, $v) = each %$hdr) { $hdr{lc $k} = $v; } } + my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE; + + return $cb->(undef, { Status => 599, Reason => "recursion limit reached" }) + if $recurse < 0; + + my $proxy = $arg{proxy} || $PROXY; my $timeout = $arg{timeout} || $TIMEOUT; $hdr{"user-agent"} ||= $USERAGENT; - my ($scheme, $authority, $path, $query, $fragment) = - $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; + my ($host, $port, $path, $scheme); - $scheme = lc $scheme; - my $port = $scheme eq "http" ? 80 + if ($proxy) { + ($host, $port, $scheme) = @$proxy; + $path = $url; + } else { + ($scheme, my $authority, $path, my $query, my $fragment) = + $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; + + $port = $scheme eq "http" ? 80 : $scheme eq "https" ? 443 - : croak "$url: only http and https URLs supported"; + : return $cb->(undef, { Status => 599, Reason => "$url: only http and https URLs supported" }); - $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x - or croak "$authority: unparsable URL"; + $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x + or return $cb->(undef, { Status => 599, Reason => "$url: unparsable URL" }); - my $host = $1; - $port = $2 if defined $2; + $host = $1; + $port = $2 if defined $2; - $host =~ s/^\[(.*)\]$/$1/; - $path .= "?$query" if length $query; + $host =~ s/^\[(.*)\]$/$1/; + $path .= "?$query" if length $query; - $hdr{host} = $host = lc $host; + $path = "/" unless $path; - my %state; + $hdr{host} = $host = lc $host; + } + + $scheme = lc $scheme; - my $body = ""; - $state{body} = $body; + my %state; - $hdr{"content-length"} = length $body; + $hdr{"content-length"} = length $arg{body}; $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub { $state{fh} = shift @@ -140,6 +205,7 @@ ++$KA_COUNT{$_[1]}; $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; $hdr{connection} = "keep-alive"; + delete $hdr{connection}; # keep-alive not yet supported } else { delete $hdr{connection}; } @@ -157,10 +223,10 @@ # send request $state{handle}->push_write ( - "\U$method\E $path HTTP/1.0\015\012" + "$method $path HTTP/1.0\015\012" . (join "", map "$_: $hdr{$_}\015\012", keys %hdr) . "\015\012" - . (delete $state{body}) + . (delete $arg{body}) ); %hdr = (); # reduce memory usage, save a kitten @@ -179,9 +245,11 @@ # headers, could be optimized a bit $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { for ("$_[1]\012") { + # we support spaces in field names, as lotus domino + # creates them. $hdr{lc $1} .= ",$2" while /\G - ([^:\000-\040]+): + ([^:\000-\037]+): [\011\040]* ((?: [^\015\012]+ | \015?\012[\011\040] )*) \015?\012 @@ -194,24 +262,38 @@ substr $_, 0, 1, "" for values %hdr; - if (exists $hdr{"content-length"}) { - $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub { - # could cache persistent connection now - if ($hdr{connection} =~ /\bkeep-alive\b/i) { - }; - - %state = (); - $cb->($_[1], \%hdr); - }); + my $finish = sub { + if ($_[1]{Status} =~ /^30[12]$/ && $recurse) { + http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb); + } else { + $cb->($_[0], $_[1]); + } + }; + + if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") { + %state = (); + $finish->(undef, \%hdr); } else { - # too bad, need to read until we get an error or EOF, - # no way to detect winged data. - $_[0]->on_error (sub { - %state = (); - $cb->($_[0]{rbuf}, \%hdr); - }); - $_[0]->on_eof (undef); - $_[0]->on_read (sub { }); + if (exists $hdr{"content-length"}) { + $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub { + # could cache persistent connection now + if ($hdr{connection} =~ /\bkeep-alive\b/i) { + # but we don't, due to misdesigns, this is annoyingly complex + }; + + %state = (); + $finish->($_[1], \%hdr); + }); + } else { + # too bad, need to read until we get an error or EOF, + # no way to detect winged data. + $_[0]->on_error (sub { + %state = (); + $finish->($_[0]{rbuf}, \%hdr); + }); + $_[0]->on_eof (undef); + $_[0]->on_read (sub { }); + } } }); }); @@ -227,14 +309,28 @@ &http_request } -=head2 GLOBAL VARIABLES +sub http_head($$;@) { + unshift @_, "HEAD"; + &http_request +} + +sub http_post($$$;@) { + unshift @_, "POST", "body"; + &http_request +} + +=head2 GLOBAL FUNCTIONS AND VARIABLES =over 4 -=item $AnyEvent::HTTP::MAX_REDIRECTS +=item AnyEvent::HTTP::set_proxy "proxy-url" + +Sets the default proxy server to use. The proxy-url must begin with a +string of the form C (optionally C). + +=item $AnyEvent::HTTP::MAX_RECURSE -The default value for the C request parameter -(default: C<10>). +The default value for the C request parameter (default: C<10>). =item $AnyEvent::HTTP::USERAGENT @@ -245,14 +341,25 @@ The maximum number of persistent connections to keep open (default: 8). +Not implemented currently. + =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT -The maximum time to cache a persistent connection, in seconds (default: 15). +The maximum time to cache a persistent connection, in seconds (default: 2). + +Not implemented currently. =back =cut +sub set_proxy($) { + $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix; +} + +# initialise proxy from environment +set_proxy $ENV{http_proxy}; + =head1 SEE ALSO L.