ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.11 by root, Thu Jun 5 15:34:00 2008 UTC vs.
Revision 1.19 by elmex, Mon Jun 9 13:02:13 2008 UTC

3AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client 3AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4 4
5=head1 SYNOPSIS 5=head1 SYNOPSIS
6 6
7 use AnyEvent::HTTP; 7 use AnyEvent::HTTP;
8
9 http_get "http://www.nethype.de/", sub { print $_[1] };
10
11 # ... do something else here
8 12
9=head1 DESCRIPTION 13=head1 DESCRIPTION
10 14
11This module is an L<AnyEvent> user, you need to make sure that you use and 15This module is an L<AnyEvent> user, you need to make sure that you use and
12run a supported event loop. 16run a supported event loop.
44use AnyEvent::Socket (); 48use AnyEvent::Socket ();
45use AnyEvent::Handle (); 49use AnyEvent::Handle ();
46 50
47use base Exporter::; 51use base Exporter::;
48 52
49our $VERSION = '1.0'; 53our $VERSION = '1.01';
50 54
51our @EXPORT = qw(http_get http_request); 55our @EXPORT = qw(http_get http_post http_head http_request);
52 56
53our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 57our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
54our $MAX_RECURSE = 10; 58our $MAX_RECURSE = 10;
55our $MAX_PERSISTENT = 8; 59our $MAX_PERSISTENT = 8;
56our $PERSISTENT_TIMEOUT = 2; 60our $PERSISTENT_TIMEOUT = 2;
59# changing these is evil 63# changing these is evil
60our $MAX_PERSISTENT_PER_HOST = 2; 64our $MAX_PERSISTENT_PER_HOST = 2;
61our $MAX_PER_HOST = 4; 65our $MAX_PER_HOST = 4;
62 66
63our $PROXY; 67our $PROXY;
68our $ACTIVE = 0;
64 69
65my %KA_COUNT; # number of open keep-alive connections per host 70my %KA_COUNT; # number of open keep-alive connections per host
66my %CO_SLOT; # number of open connections, and wait queue, per host 71my %CO_SLOT; # number of open connections, and wait queue, per host
67 72
68=item http_get $url, key => value..., $cb->($data, $headers) 73=item http_get $url, key => value..., $cb->($data, $headers)
121Whether to recurse requests or not, e.g. on redirects, authentication 126Whether to recurse requests or not, e.g. on redirects, authentication
122retries and so on, and how often to do so. 127retries and so on, and how often to do so.
123 128
124=item headers => hashref 129=item headers => hashref
125 130
126The request headers to use. 131The request headers to use. Currently, C<http_request> may provide its
132own C<Host:>, C<Content-Length:>, C<Connection:> and C<Cookie:> headers
133and will provide defaults for C<User-Agent:> and C<Referer:>.
127 134
128=item timeout => $seconds 135=item timeout => $seconds
129 136
130The time-out to use for various stages - each connect attempt will reset 137The time-out to use for various stages - each connect attempt will reset
131the timeout, as will read or write activity. Default timeout is 5 minutes. 138the timeout, as will read or write activity. Default timeout is 5 minutes.
181 } 188 }
182 ; 189 ;
183 190
184=cut 191=cut
185 192
193sub _slot_schedule;
186sub _slot_schedule($) { 194sub _slot_schedule($) {
187 my $host = shift; 195 my $host = shift;
188 196
189 while ($CO_SLOT{$host}[0] < $MAX_PER_HOST) { 197 while ($CO_SLOT{$host}[0] < $MAX_PER_HOST) {
190 if (my $cb = shift @{ $CO_SLOT{$host}[1] }) { 198 if (my $cb = shift @{ $CO_SLOT{$host}[1] }) {
191 # somebody wnats that slot 199 # somebody wants that slot
192 ++$CO_SLOT{$host}[0]; 200 ++$CO_SLOT{$host}[0];
201 ++$ACTIVE;
193 202
194 $cb->(AnyEvent::Util::guard { 203 $cb->(AnyEvent::Util::guard {
204 --$ACTIVE;
195 --$CO_SLOT{$host}[0]; 205 --$CO_SLOT{$host}[0];
196 _slot_schedule $host; 206 _slot_schedule $host;
197 }); 207 });
198 } else { 208 } else {
199 # nobody wants the slot, maybe we can forget about it 209 # nobody wants the slot, maybe we can forget about it
200 delete $CO_SLOT{$host} unless $CO_SLOT{$host}[0]; 210 delete $CO_SLOT{$host} unless $CO_SLOT{$host}[0];
201 warn "$host deleted" unless $CO_SLOT{$host}[0];#d#
202 last; 211 last;
203 } 212 }
204 } 213 }
205} 214}
206 215
209 push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; 218 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
210 219
211 _slot_schedule $_[0]; 220 _slot_schedule $_[0];
212} 221}
213 222
214sub http_request($$$;@) { 223sub http_request($$@) {
215 my $cb = pop; 224 my $cb = pop;
216 my ($method, $url, %arg) = @_; 225 my ($method, $url, %arg) = @_;
217 226
218 my %hdr; 227 my %hdr;
219 228
225 } 234 }
226 } 235 }
227 236
228 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE; 237 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE;
229 238
230 return $cb->(undef, { Status => 599, Reason => "recursion limit reached" }) 239 return $cb->(undef, { Status => 599, Reason => "recursion limit reached", URL => $url })
231 if $recurse < 0; 240 if $recurse < 0;
232 241
233 my $proxy = $arg{proxy} || $PROXY; 242 my $proxy = $arg{proxy} || $PROXY;
234 my $timeout = $arg{timeout} || $TIMEOUT; 243 my $timeout = $arg{timeout} || $TIMEOUT;
235 244
240 249
241 $scheme = lc $scheme; 250 $scheme = lc $scheme;
242 251
243 my $uport = $scheme eq "http" ? 80 252 my $uport = $scheme eq "http" ? 80
244 : $scheme eq "https" ? 443 253 : $scheme eq "https" ? 443
245 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported" }); 254 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported", URL => $url });
255
256 $hdr{referer} ||= "$scheme://$authority$upath"; # leave out fragment and query string, just a heuristic
246 257
247 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 258 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
248 or return $cb->(undef, { Status => 599, Reason => "unparsable URL" }); 259 or return $cb->(undef, { Status => 599, Reason => "unparsable URL", URL => $url });
249 260
250 my $uhost = $1; 261 my $uhost = $1;
251 $uport = $2 if defined $2; 262 $uport = $2 if defined $2;
252 263
253 $uhost =~ s/^\[(.*)\]$/$1/; 264 $uhost =~ s/^\[(.*)\]$/$1/;
298 309
299 return unless $state{connect_guard}; 310 return unless $state{connect_guard};
300 311
301 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { 312 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub {
302 $state{fh} = shift 313 $state{fh} = shift
303 or return $cb->(undef, { Status => 599, Reason => "$!" }); 314 or return $cb->(undef, { Status => 599, Reason => "$!", URL => $url });
304 315
305 delete $state{connect_guard}; # reduce memory usage, save a tree 316 delete $state{connect_guard}; # reduce memory usage, save a tree
306 317
307 # get handle 318 # get handle
308 $state{handle} = new AnyEvent::Handle 319 $state{handle} = new AnyEvent::Handle
320 } 331 }
321 332
322 # (re-)configure handle 333 # (re-)configure handle
323 $state{handle}->timeout ($timeout); 334 $state{handle}->timeout ($timeout);
324 $state{handle}->on_error (sub { 335 $state{handle}->on_error (sub {
336 my $errno = "$!";
325 %state = (); 337 %state = ();
326 $cb->(undef, { Status => 599, Reason => "$!" }); 338 $cb->(undef, { Status => 599, Reason => $errno, URL => $url });
327 }); 339 });
328 $state{handle}->on_eof (sub { 340 $state{handle}->on_eof (sub {
329 %state = (); 341 %state = ();
330 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 342 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file", URL => $url });
331 }); 343 });
332 344
333 # send request 345 # send request
334 $state{handle}->push_write ( 346 $state{handle}->push_write (
335 "$method $rpath HTTP/1.0\015\012" 347 "$method $rpath HTTP/1.0\015\012"
341 %hdr = (); # reduce memory usage, save a kitten 353 %hdr = (); # reduce memory usage, save a kitten
342 354
343 # status line 355 # status line
344 $state{handle}->push_read (line => qr/\015?\012/, sub { 356 $state{handle}->push_read (line => qr/\015?\012/, sub {
345 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix 357 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
346 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" })); 358 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])", URL => $url }));
347 359
348 my %hdr = ( # response headers 360 my %hdr = ( # response headers
349 HTTPVersion => "\x00$1", 361 HTTPVersion => "\x00$1",
350 Status => "\x00$2", 362 Status => "\x00$2",
351 Reason => "\x00$3", 363 Reason => "\x00$3",
364 URL => "\x00$url"
352 ); 365 );
353 366
354 # headers, could be optimized a bit 367 # headers, could be optimized a bit
355 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 368 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
356 for ("$_[1]\012") { 369 for ("$_[1]\012") {
363 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 376 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
364 \015?\012 377 \015?\012
365 /gxc; 378 /gxc;
366 379
367 /\G$/ 380 /\G$/
368 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers" })); 381 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers", URL => $url }));
369 } 382 }
370 383
371 substr $_, 0, 1, "" 384 substr $_, 0, 1, ""
372 for values %hdr; 385 for values %hdr;
373 386
396 $arg{cookie_jar}{version} = 1; 409 $arg{cookie_jar}{version} = 1;
397 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv; 410 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
398 } 411 }
399 } 412 }
400 413
401 if ($_[1]{Status} =~ /^x30[12]$/ && $recurse) { 414 if ($_[1]{Status} =~ /^30[12]$/ && $recurse) {
402 # microsoft and other assholes don't give a shit for following standards, 415 # microsoft and other assholes don't give a shit for following standards,
403 # try to support a common form of broken Location header. 416 # try to support a common form of broken Location header.
404 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%; 417 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%;
405 418
406 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb); 419 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
439 }; 452 };
440 453
441 defined wantarray && AnyEvent::Util::guard { %state = () } 454 defined wantarray && AnyEvent::Util::guard { %state = () }
442} 455}
443 456
444sub http_get($$;@) { 457sub http_get($@) {
445 unshift @_, "GET"; 458 unshift @_, "GET";
446 &http_request 459 &http_request
447} 460}
448 461
449sub http_head($$;@) { 462sub http_head($@) {
450 unshift @_, "HEAD"; 463 unshift @_, "HEAD";
451 &http_request 464 &http_request
452} 465}
453 466
454sub http_post($$$;@) { 467sub http_post($$@) {
455 unshift @_, "POST", "body"; 468 unshift @_, "POST", "body";
456 &http_request 469 &http_request
457} 470}
458 471
459=back 472=back
486 499
487The maximum time to cache a persistent connection, in seconds (default: 2). 500The maximum time to cache a persistent connection, in seconds (default: 2).
488 501
489Not implemented currently. 502Not implemented currently.
490 503
504=item $AnyEvent::HTTP::ACTIVE
505
506The number of active connections. This is not the number of currently
507running requests, but the number of currently open and non-idle TCP
508connections. This number of can be useful for load-leveling.
509
491=back 510=back
492 511
493=cut 512=cut
494 513
495sub set_proxy($) { 514sub set_proxy($) {
503 522
504L<AnyEvent>. 523L<AnyEvent>.
505 524
506=head1 AUTHOR 525=head1 AUTHOR
507 526
508 Marc Lehmann <schmorp@schmorp.de> 527 Marc Lehmann <schmorp@schmorp.de>
509 http://home.schmorp.de/ 528 http://home.schmorp.de/
510 529
511=cut 530=cut
512 531
5131 5321
514 533

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines