ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.10 by root, Thu Jun 5 13:06:43 2008 UTC vs.
Revision 1.11 by root, Thu Jun 5 15:34:00 2008 UTC

8 8
9=head1 DESCRIPTION 9=head1 DESCRIPTION
10 10
11This module is an L<AnyEvent> user, you need to make sure that you use and 11This module is an L<AnyEvent> user, you need to make sure that you use and
12run a supported event loop. 12run a supported event loop.
13
14This module implements a simple, stateless and non-blocking HTTP
15client. It supports GET, POST and other request methods, cookies and more,
16all on a very low level. It can follow redirects supports proxies and
17automatically limits the number of connections to the values specified in
18the RFC.
19
20It should generally be a "good client" that is enough for most HTTP
21tasks. Simple tasks should be simple, but complex tasks should still be
22possible as the user retains control over request and response headers.
23
24The caller is responsible for authentication management, cookies (if
25the simplistic implementation in this module doesn't suffice), referer
26and other high-level protocol details for which this module offers only
27limited support.
13 28
14=head2 METHODS 29=head2 METHODS
15 30
16=over 4 31=over 4
17 32
41our $PERSISTENT_TIMEOUT = 2; 56our $PERSISTENT_TIMEOUT = 2;
42our $TIMEOUT = 300; 57our $TIMEOUT = 300;
43 58
44# changing these is evil 59# changing these is evil
45our $MAX_PERSISTENT_PER_HOST = 2; 60our $MAX_PERSISTENT_PER_HOST = 2;
46our $MAX_PER_HOST = 4; # not respected yet :( 61our $MAX_PER_HOST = 4;
47 62
48our $PROXY; 63our $PROXY;
49 64
50my %KA_COUNT; # number of open keep-alive connections per host 65my %KA_COUNT; # number of open keep-alive connections per host
66my %CO_SLOT; # number of open connections, and wait queue, per host
51 67
52=item http_get $url, key => value..., $cb->($data, $headers) 68=item http_get $url, key => value..., $cb->($data, $headers)
53 69
54Executes an HTTP-GET request. See the http_request function for details on 70Executes an HTTP-GET request. See the http_request function for details on
55additional parameters. 71additional parameters.
164 print Dumper $hdr; 180 print Dumper $hdr;
165 } 181 }
166 ; 182 ;
167 183
168=cut 184=cut
185
186sub _slot_schedule($) {
187 my $host = shift;
188
189 while ($CO_SLOT{$host}[0] < $MAX_PER_HOST) {
190 if (my $cb = shift @{ $CO_SLOT{$host}[1] }) {
191 # somebody wnats that slot
192 ++$CO_SLOT{$host}[0];
193
194 $cb->(AnyEvent::Util::guard {
195 --$CO_SLOT{$host}[0];
196 _slot_schedule $host;
197 });
198 } else {
199 # nobody wants the slot, maybe we can forget about it
200 delete $CO_SLOT{$host} unless $CO_SLOT{$host}[0];
201 warn "$host deleted" unless $CO_SLOT{$host}[0];#d#
202 last;
203 }
204 }
205}
206
207# wait for a free slot on host, call callback
208sub _get_slot($$) {
209 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
210
211 _slot_schedule $_[0];
212}
169 213
170sub http_request($$$;@) { 214sub http_request($$$;@) {
171 my $cb = pop; 215 my $cb = pop;
172 my ($method, $url, %arg) = @_; 216 my ($method, $url, %arg) = @_;
173 217
245 $hdr{host} = $uhost; 289 $hdr{host} = $uhost;
246 } 290 }
247 291
248 $hdr{"content-length"} = length $arg{body}; 292 $hdr{"content-length"} = length $arg{body};
249 293
250 my %state; 294 my %state = (connect_guard => 1);
251 295
296 _get_slot $uhost, sub {
297 $state{slot_guard} = shift;
298
299 return unless $state{connect_guard};
300
252 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { 301 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub {
253 $state{fh} = shift 302 $state{fh} = shift
254 or return $cb->(undef, { Status => 599, Reason => "$!" }); 303 or return $cb->(undef, { Status => 599, Reason => "$!" });
255 304
256 delete $state{connect_guard}; # reduce memory usage, save a tree 305 delete $state{connect_guard}; # reduce memory usage, save a tree
257 306
258 # get handle 307 # get handle
259 $state{handle} = new AnyEvent::Handle 308 $state{handle} = new AnyEvent::Handle
260 fh => $state{fh}, 309 fh => $state{fh},
261 ($scheme eq "https" ? (tls => "connect") : ()); 310 ($scheme eq "https" ? (tls => "connect") : ());
262 311
263 # limit the number of persistent connections 312 # limit the number of persistent connections
264 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 313 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
265 ++$KA_COUNT{$_[1]}; 314 ++$KA_COUNT{$_[1]};
266 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; 315 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
267 $hdr{connection} = "keep-alive"; 316 $hdr{connection} = "keep-alive";
268 delete $hdr{connection}; # keep-alive not yet supported 317 delete $hdr{connection}; # keep-alive not yet supported
269 } else { 318 } else {
270 delete $hdr{connection}; 319 delete $hdr{connection};
271 } 320 }
272 321
273 # (re-)configure handle 322 # (re-)configure handle
274 $state{handle}->timeout ($timeout); 323 $state{handle}->timeout ($timeout);
275 $state{handle}->on_error (sub { 324 $state{handle}->on_error (sub {
276 %state = (); 325 %state = ();
277 $cb->(undef, { Status => 599, Reason => "$!" }); 326 $cb->(undef, { Status => 599, Reason => "$!" });
278 }); 327 });
279 $state{handle}->on_eof (sub { 328 $state{handle}->on_eof (sub {
280 %state = (); 329 %state = ();
281 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 330 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
282 }); 331 });
283 332
284 # send request 333 # send request
285 $state{handle}->push_write ( 334 $state{handle}->push_write (
286 "$method $rpath HTTP/1.0\015\012" 335 "$method $rpath HTTP/1.0\015\012"
287 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr) 336 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
288 . "\015\012" 337 . "\015\012"
289 . (delete $arg{body}) 338 . (delete $arg{body})
290 );
291
292 %hdr = (); # reduce memory usage, save a kitten
293
294 # status line
295 $state{handle}->push_read (line => qr/\015?\012/, sub {
296 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
297 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
298
299 my %hdr = ( # response headers
300 HTTPVersion => "\x00$1",
301 Status => "\x00$2",
302 Reason => "\x00$3",
303 ); 339 );
304 340
341 %hdr = (); # reduce memory usage, save a kitten
342
343 # status line
344 $state{handle}->push_read (line => qr/\015?\012/, sub {
345 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
346 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
347
348 my %hdr = ( # response headers
349 HTTPVersion => "\x00$1",
350 Status => "\x00$2",
351 Reason => "\x00$3",
352 );
353
305 # headers, could be optimized a bit 354 # headers, could be optimized a bit
306 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 355 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
307 for ("$_[1]\012") { 356 for ("$_[1]\012") {
308 # we support spaces in field names, as lotus domino 357 # we support spaces in field names, as lotus domino
309 # creates them. 358 # creates them.
310 $hdr{lc $1} .= "\x00$2" 359 $hdr{lc $1} .= "\x00$2"
311 while /\G 360 while /\G
312 ([^:\000-\037]+): 361 ([^:\000-\037]+):
313 [\011\040]* 362 [\011\040]*
314 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 363 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
315 \015?\012 364 \015?\012
316 /gxc; 365 /gxc;
317 366
318 /\G$/ 367 /\G$/
319 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers" })); 368 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers" }));
320 } 369 }
321 370
322 substr $_, 0, 1, "" 371 substr $_, 0, 1, ""
323 for values %hdr; 372 for values %hdr;
324 373
325 my $finish = sub { 374 my $finish = sub {
326 %state = (); 375 %state = ();
327 376
328 # set-cookie processing 377 # set-cookie processing
329 if ($arg{cookie_jar} && exists $hdr{"set-cookie"}) { 378 if ($arg{cookie_jar} && exists $hdr{"set-cookie"}) {
330 for (split /\x00/, $hdr{"set-cookie"}) { 379 for (split /\x00/, $hdr{"set-cookie"}) {
331 my ($cookie, @arg) = split /;\s*/; 380 my ($cookie, @arg) = split /;\s*/;
332 my ($name, $value) = split /=/, $cookie, 2; 381 my ($name, $value) = split /=/, $cookie, 2;
333 my %kv = (value => $value, map { split /=/, $_, 2 } @arg); 382 my %kv = (value => $value, map { split /=/, $_, 2 } @arg);
334 383
335 my $cdom = (delete $kv{domain}) || $uhost; 384 my $cdom = (delete $kv{domain}) || $uhost;
336 my $cpath = (delete $kv{path}) || "/"; 385 my $cpath = (delete $kv{path}) || "/";
337 386
338 $cdom =~ s/^.?/./; # make sure it starts with a "." 387 $cdom =~ s/^.?/./; # make sure it starts with a "."
339 388
389 next if $cdom =~ /\.$/;
390
391 # this is not rfc-like and not netscape-like. go figure.
340 my $ndots = $cdom =~ y/.//; 392 my $ndots = $cdom =~ y/.//;
341 next if $ndots < ($cdom =~ /[^.]{3}$/ ? 2 : 3); 393 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
342 394
343 # store it 395 # store it
344 $arg{cookie_jar}{version} = 1; 396 $arg{cookie_jar}{version} = 1;
345 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv; 397 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
398 }
399 }
400
401 if ($_[1]{Status} =~ /^x30[12]$/ && $recurse) {
402 # microsoft and other assholes don't give a shit for following standards,
403 # try to support a common form of broken Location header.
404 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%;
405
406 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
407 } else {
408 $cb->($_[0], $_[1]);
409 }
410 };
411
412 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
413 $finish->(undef, \%hdr);
414 } else {
415 if (exists $hdr{"content-length"}) {
416 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
417 # could cache persistent connection now
418 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
419 # but we don't, due to misdesigns, this is annoyingly complex
420 };
421
422 $finish->($_[1], \%hdr);
423 });
424 } else {
425 # too bad, need to read until we get an error or EOF,
426 # no way to detect winged data.
427 $_[0]->on_error (sub {
428 $finish->($_[0]{rbuf}, \%hdr);
429 });
430 $_[0]->on_eof (undef);
431 $_[0]->on_read (sub { });
346 } 432 }
347 } 433 }
348
349 if ($_[1]{Status} =~ /^x30[12]$/ && $recurse) {
350 # microsoft and other assholes don't give a shit for following standards,
351 # try to support a common form of broken Location header.
352 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%;
353
354 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
355 } else {
356 $cb->($_[0], $_[1]);
357 }
358 }; 434 });
359
360 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
361 $finish->(undef, \%hdr);
362 } else {
363 if (exists $hdr{"content-length"}) {
364 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
365 # could cache persistent connection now
366 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
367 # but we don't, due to misdesigns, this is annoyingly complex
368 };
369
370 $finish->($_[1], \%hdr);
371 });
372 } else {
373 # too bad, need to read until we get an error or EOF,
374 # no way to detect winged data.
375 $_[0]->on_error (sub {
376 $finish->($_[0]{rbuf}, \%hdr);
377 });
378 $_[0]->on_eof (undef);
379 $_[0]->on_read (sub { });
380 }
381 }
382 }); 435 });
436 }, sub {
437 $timeout
383 }); 438 };
384 }, sub {
385 $timeout
386 }; 439 };
387 440
388 defined wantarray && AnyEvent::Util::guard { %state = () } 441 defined wantarray && AnyEvent::Util::guard { %state = () }
389} 442}
390 443

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines