ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.32 by root, Fri Oct 24 01:27:29 2008 UTC vs.
Revision 1.40 by root, Sun Jul 5 01:45:01 2009 UTC

41use strict; 41use strict;
42no warnings; 42no warnings;
43 43
44use Carp; 44use Carp;
45 45
46use AnyEvent (); 46use AnyEvent 4.452 ();
47use AnyEvent::Util (); 47use AnyEvent::Util ();
48use AnyEvent::Socket (); 48use AnyEvent::Socket ();
49use AnyEvent::Handle (); 49use AnyEvent::Handle ();
50 50
51use base Exporter::; 51use base Exporter::;
52 52
53our $VERSION = '1.05'; 53our $VERSION = '1.12';
54 54
55our @EXPORT = qw(http_get http_post http_head http_request); 55our @EXPORT = qw(http_get http_post http_head http_request);
56 56
57our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 57our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
58our $MAX_RECURSE = 10; 58our $MAX_RECURSE = 10;
59our $MAX_PERSISTENT = 8; 59our $MAX_PERSISTENT = 8;
60our $PERSISTENT_TIMEOUT = 2; 60our $PERSISTENT_TIMEOUT = 2;
61our $TIMEOUT = 300; 61our $TIMEOUT = 300;
62 62
165based on the original netscape specification. 165based on the original netscape specification.
166 166
167The C<$hash_ref> must be an (initially empty) hash reference which will 167The C<$hash_ref> must be an (initially empty) hash reference which will
168get updated automatically. It is possible to save the cookie_jar to 168get updated automatically. It is possible to save the cookie_jar to
169persistent storage with something like JSON or Storable, but this is not 169persistent storage with something like JSON or Storable, but this is not
170recommended, as expire times are currently being ignored. 170recommended, as expiry times are currently being ignored.
171 171
172Note that this cookie implementation is not of very high quality, nor 172Note that this cookie implementation is not of very high quality, nor
173meant to be complete. If you want complete cookie management you have to 173meant to be complete. If you want complete cookie management you have to
174do that on your own. C<cookie_jar> is meant as a quick fix to get some 174do that on your own. C<cookie_jar> is meant as a quick fix to get some
175cookie-using sites working. Cookies are a privacy disaster, do not use 175cookie-using sites working. Cookies are a privacy disaster, do not use
176them unless required to. 176them unless required to.
177
178=item tls_ctx => $scheme | $tls_ctx
179
180Specifies the AnyEvent::TLS context to be used for https connections. This
181parameter follows the same rules as the C<tls_ctx> parameter to
182L<AnyEvent::Handle>, but additionally, the two strings C<low> or
183C<high> can be specified, which give you a predefined low-security (no
184verification, highest compatibility) and high-security (CA and common-name
185verification) TLS context.
186
187The default for this option is C<low>, which could be interpreted as "give
188me the page, no matter what".
177 189
178=back 190=back
179 191
180Example: make a simple HTTP GET request for http://www.nethype.de/ 192Example: make a simple HTTP GET request for http://www.nethype.de/
181 193
237 push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; 249 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
238 250
239 _slot_schedule $_[0]; 251 _slot_schedule $_[0];
240} 252}
241 253
254our $qr_nl = qr<\015?\012>;
255our $qr_nlnl = qr<\015?\012\015?\012>;
256
257our $TLS_CTX_LOW = { cache => 1, dh => undef, sslv2 => 1 };
258our $TLS_CTX_HIGH = { cache => 1, verify => 1, verify_cn => "https", dh => "skip4096" };
259
242sub http_request($$@) { 260sub http_request($$@) {
243 my $cb = pop; 261 my $cb = pop;
244 my ($method, $url, %arg) = @_; 262 my ($method, $url, %arg) = @_;
245 263
246 my %hdr; 264 my %hdr;
265
266 $arg{tls_ctx} = $TLS_CTX_LOW if $arg{tls_ctx} eq "low" || !exists $arg{tls_ctx};
267 $arg{tls_ctx} = $TLS_CTX_HIGH if $arg{tls_ctx} eq "high";
247 268
248 $method = uc $method; 269 $method = uc $method;
249 270
250 if (my $hdr = $arg{headers}) { 271 if (my $hdr = $arg{headers}) {
251 while (my ($k, $v) = each %$hdr) { 272 while (my ($k, $v) = each %$hdr) {
253 } 274 }
254 } 275 }
255 276
256 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE; 277 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE;
257 278
258 return $cb->(undef, { Status => 599, Reason => "recursion limit reached", URL => $url }) 279 return $cb->(undef, { Status => 599, Reason => "Too many redirections", URL => $url })
259 if $recurse < 0; 280 if $recurse < 0;
260 281
261 my $proxy = $arg{proxy} || $PROXY; 282 my $proxy = $arg{proxy} || $PROXY;
262 my $timeout = $arg{timeout} || $TIMEOUT; 283 my $timeout = $arg{timeout} || $TIMEOUT;
263 284
268 289
269 $uscheme = lc $uscheme; 290 $uscheme = lc $uscheme;
270 291
271 my $uport = $uscheme eq "http" ? 80 292 my $uport = $uscheme eq "http" ? 80
272 : $uscheme eq "https" ? 443 293 : $uscheme eq "https" ? 443
273 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported", URL => $url }); 294 : return $cb->(undef, { Status => 599, Reason => "Only http and https URL schemes supported (not '$uscheme')", URL => $url });
274 295
275 $hdr{referer} ||= "$uscheme://$uauthority$upath"; # leave out fragment and query string, just a heuristic 296 $hdr{referer} ||= "$uscheme://$uauthority$upath"; # leave out fragment and query string, just a heuristic
276 297
277 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 298 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
278 or return $cb->(undef, { Status => 599, Reason => "unparsable URL", URL => $url }); 299 or return $cb->(undef, { Status => 599, Reason => "Unparsable URL", URL => $url });
279 300
280 my $uhost = $1; 301 my $uhost = $1;
281 $uport = $2 if defined $2; 302 $uport = $2 if defined $2;
282 303
283 $uhost =~ s/^\[(.*)\]$/$1/; 304 $uhost =~ s/^\[(.*)\]$/$1/;
317 } 338 }
318 339
319 my ($rhost, $rport, $rscheme, $rpath); # request host, port, path 340 my ($rhost, $rport, $rscheme, $rpath); # request host, port, path
320 341
321 if ($proxy) { 342 if ($proxy) {
322 ($rhost, $rport, $rscheme, $rpath) = (@$proxy, $url); 343 ($rpath, $rhost, $rport, $rscheme) = ($url, @$proxy);
323 344
324 # don't support https requests over https-proxy transport, 345 # don't support https requests over https-proxy transport,
325 # can't be done with tls as spec'ed. 346 # can't be done with tls as spec'ed, unless you double-encrypt.
326 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https"; 347 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https";
327 } else { 348 } else {
328 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath); 349 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
329 } 350 }
330 351
338 359
339 return unless $state{connect_guard}; 360 return unless $state{connect_guard};
340 361
341 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { 362 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub {
342 $state{fh} = shift 363 $state{fh} = shift
343 or return $cb->(undef, { Status => 599, Reason => "$!", URL => $url }); 364 or return (%state = (), $cb->(undef, { Status => 599, Reason => "$!", URL => $url }));
365 pop; # free memory, save a tree
344 366
345 delete $state{connect_guard}; # reduce memory usage, save a tree 367 return unless delete $state{connect_guard};
346 368
347 # get handle 369 # get handle
348 $state{handle} = new AnyEvent::Handle 370 $state{handle} = new AnyEvent::Handle
349 fh => $state{fh}; 371 fh => $state{fh},
372 timeout => $timeout,
373 peername => $rhost,
374 tls_ctx => $arg{tls_ctx};
350 375
351 # limit the number of persistent connections 376 # limit the number of persistent connections
377 # keepalive not yet supported
352 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 378 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
353 ++$KA_COUNT{$_[1]}; 379 ++$KA_COUNT{$_[1]};
354 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; 380 $state{handle}{ka_count_guard} = AnyEvent::Util::guard {
381 --$KA_COUNT{$_[1]}
382 };
355 $hdr{connection} = "keep-alive"; 383 $hdr{connection} = "keep-alive";
356 delete $hdr{connection}; # keep-alive not yet supported
357 } else { 384 } else {
358 delete $hdr{connection}; 385 delete $hdr{connection};
359 } 386 }
360 387
361 # (re-)configure handle 388 # (re-)configure handle
362 $state{handle}->timeout ($timeout);
363 $state{handle}->on_error (sub { 389 $state{handle}->on_error (sub {
364 my $errno = "$!";
365 %state = (); 390 %state = ();
366 $cb->(undef, { Status => 599, Reason => $errno, URL => $url }); 391 $cb->(undef, { Status => 599, Reason => $_[2], URL => $url });
367 }); 392 });
368 $state{handle}->on_eof (sub { 393 $state{handle}->on_eof (sub {
369 %state = (); 394 %state = ();
370 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file", URL => $url }); 395 $cb->(undef, { Status => 599, Reason => "Unexpected end-of-file", URL => $url });
371 }); 396 });
372 397
373 $state{handle}->starttls ("connect") if $rscheme eq "https"; 398 $state{handle}->starttls ("connect") if $rscheme eq "https";
374 399
375 # handle actual, non-tunneled, request 400 # handle actual, non-tunneled, request
376 my $handle_actual_request = sub { 401 my $handle_actual_request = sub {
377# $state{handle}->starttls ("connect") if $uscheme eq "https"; 402 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
378 403
379 # send request 404 # send request
380 $state{handle}->push_write ( 405 $state{handle}->push_write (
381 "$method $rpath HTTP/1.0\015\012" 406 "$method $rpath HTTP/1.0\015\012"
382 . (join "", map "\u$_: $hdr{$_}\015\012", keys %hdr) 407 . (join "", map "\u$_: $hdr{$_}\015\012", keys %hdr)
385 ); 410 );
386 411
387 %hdr = (); # reduce memory usage, save a kitten 412 %hdr = (); # reduce memory usage, save a kitten
388 413
389 # status line 414 # status line
390 $state{handle}->push_read (line => qr/\015?\012/, sub { 415 $state{handle}->push_read (line => $qr_nl, sub {
391 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 416 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
392 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])", URL => $url })); 417 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid server response ($_[1])", URL => $url }));
393 418
394 my %hdr = ( # response headers 419 my %hdr = ( # response headers
395 HTTPVersion => ",$1", 420 HTTPVersion => ",$1",
396 Status => ",$2", 421 Status => ",$2",
397 Reason => ",$3", 422 Reason => ",$3",
398 URL => ",$url" 423 URL => ",$url"
399 ); 424 );
400 425
401 # headers, could be optimized a bit 426 # headers, could be optimized a bit
402 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 427 $state{handle}->unshift_read (line => $qr_nlnl, sub {
403 for ("$_[1]\012") { 428 for ("$_[1]\012") {
404 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 429 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
405 430
406 # we support spaces in field names, as lotus domino 431 # things seen, not parsed:
407 # creates them (actually spaces around seperators 432 # p3pP="NON CUR OTPi OUR NOR UNI"
408 # are strictly allowed in http, they are a security issue). 433
409 $hdr{lc $1} .= ",$2" 434 $hdr{lc $1} .= ",$2"
410 while /\G 435 while /\G
411 ([^:\000-\037]+): 436 ([^:\000-\037]+):
412 [\011\040]* 437 [\011\040]*
413 ((?: [^\012]+ | \012[\011\040] )*) 438 ((?: [^\012]+ | \012[\011\040] )*)
414 \012 439 \012
415 /gxc; 440 /gxc;
416 441
417 /\G$/ 442 /\G$/
418 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers", URL => $url })); 443 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Garbled response headers", URL => $url }));
419 } 444 }
420 445
421 substr $_, 0, 1, "" 446 substr $_, 0, 1, ""
422 for values %hdr; 447 for values %hdr;
423 448
424 my $finish = sub { 449 my $finish = sub {
425 # TODO: use destroy method, when/if available
426 #$state{handle}->destroy; 450 $state{handle}->destroy;
427 $state{handle}->on_eof (undef);
428 $state{handle}->on_error (undef);
429 %state = (); 451 %state = ();
430 452
431 # set-cookie processing 453 # set-cookie processing
432 if ($arg{cookie_jar}) { 454 if ($arg{cookie_jar}) {
433 for ($hdr{"set-cookie"}) { 455 for ($hdr{"set-cookie"}) {
494 } 516 }
495 517
496 if ($_[1]{Status} =~ /^30[12]$/ && $recurse && $method ne "POST") { 518 if ($_[1]{Status} =~ /^30[12]$/ && $recurse && $method ne "POST") {
497 # apparently, mozilla et al. just change POST to GET here 519 # apparently, mozilla et al. just change POST to GET here
498 # more research is needed before we do the same 520 # more research is needed before we do the same
499 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb); 521 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
500 } elsif ($_[1]{Status} == 303 && $recurse) { 522 } elsif ($_[1]{Status} == 303 && $recurse) {
501 # even http/1.1 is unclear on how to mutate the method 523 # even http/1.1 is unclear on how to mutate the method
502 $method = "GET" unless $method eq "HEAD"; 524 $method = "GET" unless $method eq "HEAD";
503 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb); 525 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
504 } elsif ($_[1]{Status} == 307 && $recurse && $method =~ /^(?:GET|HEAD)$/) { 526 } elsif ($_[1]{Status} == 307 && $recurse && $method =~ /^(?:GET|HEAD)$/) {
522 }); 544 });
523 } else { 545 } else {
524 # too bad, need to read until we get an error or EOF, 546 # too bad, need to read until we get an error or EOF,
525 # no way to detect winged data. 547 # no way to detect winged data.
526 $_[0]->on_error (sub { 548 $_[0]->on_error (sub {
549 # delete ought to be more efficient, as we would have to make
550 # a copy otherwise as $_[0] gets destroyed.
527 $finish->($_[0]{rbuf}, \%hdr); 551 $finish->(delete $_[0]{rbuf}, \%hdr);
528 }); 552 });
529 $_[0]->on_eof (undef); 553 $_[0]->on_eof (undef);
530 $_[0]->on_read (sub { }); 554 $_[0]->on_read (sub { });
531 } 555 }
532 } 556 }
538 if ($proxy && $uscheme eq "https") { 562 if ($proxy && $uscheme eq "https") {
539 # oh dear, we have to wrap it into a connect request 563 # oh dear, we have to wrap it into a connect request
540 564
541 # maybe re-use $uauthority with patched port? 565 # maybe re-use $uauthority with patched port?
542 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012"); 566 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012");
543 $state{handle}->push_read (line => qr/\015?\012\015?\012/, sub { 567 $state{handle}->push_read (line => $qr_nlnl, sub {
544 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 568 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
545 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid proxy connect response ($_[1])", URL => $url })); 569 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid proxy connect response ($_[1])", URL => $url }));
546 570
547 if ($2 == 200) { 571 if ($2 == 200) {
548 $rpath = $upath; 572 $rpath = $upath;
549 &$handle_actual_request; 573 &$handle_actual_request;
550 } else { 574 } else {
596The default value for the C<recurse> request parameter (default: C<10>). 620The default value for the C<recurse> request parameter (default: C<10>).
597 621
598=item $AnyEvent::HTTP::USERAGENT 622=item $AnyEvent::HTTP::USERAGENT
599 623
600The default value for the C<User-Agent> header (the default is 624The default value for the C<User-Agent> header (the default is
601C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>). 625C<Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
602 626
603=item $AnyEvent::HTTP::MAX_PERSISTENT 627=item $AnyEvent::HTTP::MAX_PERSISTENT
604 628
605The maximum number of persistent connections to keep open (default: 8). 629The maximum number of persistent connections to keep open (default: 8).
606 630
636=head1 AUTHOR 660=head1 AUTHOR
637 661
638 Marc Lehmann <schmorp@schmorp.de> 662 Marc Lehmann <schmorp@schmorp.de>
639 http://home.schmorp.de/ 663 http://home.schmorp.de/
640 664
665With many thanks to Дмитрий Шалашов, who provided countless
666testcases and bugreports.
667
641=cut 668=cut
642 669
6431 6701
644 671

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines