ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.39 by root, Thu Jun 11 12:46:01 2009 UTC vs.
Revision 1.40 by root, Sun Jul 5 01:45:01 2009 UTC

41use strict; 41use strict;
42no warnings; 42no warnings;
43 43
44use Carp; 44use Carp;
45 45
46use AnyEvent (); 46use AnyEvent 4.452 ();
47use AnyEvent::Util (); 47use AnyEvent::Util ();
48use AnyEvent::Socket (); 48use AnyEvent::Socket ();
49use AnyEvent::Handle (); 49use AnyEvent::Handle ();
50 50
51use base Exporter::; 51use base Exporter::;
52 52
53our $VERSION = '1.12'; 53our $VERSION = '1.12';
54 54
55our @EXPORT = qw(http_get http_post http_head http_request); 55our @EXPORT = qw(http_get http_post http_head http_request);
56 56
57our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 57our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
58our $MAX_RECURSE = 10; 58our $MAX_RECURSE = 10;
59our $MAX_PERSISTENT = 8; 59our $MAX_PERSISTENT = 8;
60our $PERSISTENT_TIMEOUT = 2; 60our $PERSISTENT_TIMEOUT = 2;
61our $TIMEOUT = 300; 61our $TIMEOUT = 300;
62 62
165based on the original netscape specification. 165based on the original netscape specification.
166 166
167The C<$hash_ref> must be an (initially empty) hash reference which will 167The C<$hash_ref> must be an (initially empty) hash reference which will
168get updated automatically. It is possible to save the cookie_jar to 168get updated automatically. It is possible to save the cookie_jar to
169persistent storage with something like JSON or Storable, but this is not 169persistent storage with something like JSON or Storable, but this is not
170recommended, as expire times are currently being ignored. 170recommended, as expiry times are currently being ignored.
171 171
172Note that this cookie implementation is not of very high quality, nor 172Note that this cookie implementation is not of very high quality, nor
173meant to be complete. If you want complete cookie management you have to 173meant to be complete. If you want complete cookie management you have to
174do that on your own. C<cookie_jar> is meant as a quick fix to get some 174do that on your own. C<cookie_jar> is meant as a quick fix to get some
175cookie-using sites working. Cookies are a privacy disaster, do not use 175cookie-using sites working. Cookies are a privacy disaster, do not use
176them unless required to. 176them unless required to.
177
178=item tls_ctx => $scheme | $tls_ctx
179
180Specifies the AnyEvent::TLS context to be used for https connections. This
181parameter follows the same rules as the C<tls_ctx> parameter to
182L<AnyEvent::Handle>, but additionally, the two strings C<low> or
183C<high> can be specified, which give you a predefined low-security (no
184verification, highest compatibility) and high-security (CA and common-name
185verification) TLS context.
186
187The default for this option is C<low>, which could be interpreted as "give
188me the page, no matter what".
177 189
178=back 190=back
179 191
180Example: make a simple HTTP GET request for http://www.nethype.de/ 192Example: make a simple HTTP GET request for http://www.nethype.de/
181 193
240} 252}
241 253
242our $qr_nl = qr<\015?\012>; 254our $qr_nl = qr<\015?\012>;
243our $qr_nlnl = qr<\015?\012\015?\012>; 255our $qr_nlnl = qr<\015?\012\015?\012>;
244 256
257our $TLS_CTX_LOW = { cache => 1, dh => undef, sslv2 => 1 };
258our $TLS_CTX_HIGH = { cache => 1, verify => 1, verify_cn => "https", dh => "skip4096" };
259
245sub http_request($$@) { 260sub http_request($$@) {
246 my $cb = pop; 261 my $cb = pop;
247 my ($method, $url, %arg) = @_; 262 my ($method, $url, %arg) = @_;
248 263
249 my %hdr; 264 my %hdr;
265
266 $arg{tls_ctx} = $TLS_CTX_LOW if $arg{tls_ctx} eq "low" || !exists $arg{tls_ctx};
267 $arg{tls_ctx} = $TLS_CTX_HIGH if $arg{tls_ctx} eq "high";
250 268
251 $method = uc $method; 269 $method = uc $method;
252 270
253 if (my $hdr = $arg{headers}) { 271 if (my $hdr = $arg{headers}) {
254 while (my ($k, $v) = each %$hdr) { 272 while (my ($k, $v) = each %$hdr) {
256 } 274 }
257 } 275 }
258 276
259 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE; 277 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE;
260 278
261 return $cb->(undef, { Status => 599, Reason => "recursion limit reached", URL => $url }) 279 return $cb->(undef, { Status => 599, Reason => "Too many redirections", URL => $url })
262 if $recurse < 0; 280 if $recurse < 0;
263 281
264 my $proxy = $arg{proxy} || $PROXY; 282 my $proxy = $arg{proxy} || $PROXY;
265 my $timeout = $arg{timeout} || $TIMEOUT; 283 my $timeout = $arg{timeout} || $TIMEOUT;
266 284
271 289
272 $uscheme = lc $uscheme; 290 $uscheme = lc $uscheme;
273 291
274 my $uport = $uscheme eq "http" ? 80 292 my $uport = $uscheme eq "http" ? 80
275 : $uscheme eq "https" ? 443 293 : $uscheme eq "https" ? 443
276 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported", URL => $url }); 294 : return $cb->(undef, { Status => 599, Reason => "Only http and https URL schemes supported (not '$uscheme')", URL => $url });
277 295
278 $hdr{referer} ||= "$uscheme://$uauthority$upath"; # leave out fragment and query string, just a heuristic 296 $hdr{referer} ||= "$uscheme://$uauthority$upath"; # leave out fragment and query string, just a heuristic
279 297
280 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 298 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
281 or return $cb->(undef, { Status => 599, Reason => "unparsable URL", URL => $url }); 299 or return $cb->(undef, { Status => 599, Reason => "Unparsable URL", URL => $url });
282 300
283 my $uhost = $1; 301 my $uhost = $1;
284 $uport = $2 if defined $2; 302 $uport = $2 if defined $2;
285 303
286 $uhost =~ s/^\[(.*)\]$/$1/; 304 $uhost =~ s/^\[(.*)\]$/$1/;
341 359
342 return unless $state{connect_guard}; 360 return unless $state{connect_guard};
343 361
344 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { 362 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub {
345 $state{fh} = shift 363 $state{fh} = shift
346 or return $cb->(undef, { Status => 599, Reason => "$!", URL => $url }); 364 or return (%state = (), $cb->(undef, { Status => 599, Reason => "$!", URL => $url }));
347 pop; # free memory, save a tree 365 pop; # free memory, save a tree
348 366
349 return unless delete $state{connect_guard}; 367 return unless delete $state{connect_guard};
350 368
351 # get handle 369 # get handle
352 $state{handle} = new AnyEvent::Handle 370 $state{handle} = new AnyEvent::Handle
353 fh => $state{fh}, 371 fh => $state{fh},
354 timeout => $timeout; 372 timeout => $timeout,
373 peername => $rhost,
374 tls_ctx => $arg{tls_ctx};
355 375
356 # limit the number of persistent connections 376 # limit the number of persistent connections
357 # keepalive not yet supported 377 # keepalive not yet supported
358 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 378 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
359 ++$KA_COUNT{$_[1]}; 379 ++$KA_COUNT{$_[1]};
365 delete $hdr{connection}; 385 delete $hdr{connection};
366 } 386 }
367 387
368 # (re-)configure handle 388 # (re-)configure handle
369 $state{handle}->on_error (sub { 389 $state{handle}->on_error (sub {
370 my $errno = "$!";
371 %state = (); 390 %state = ();
372 $cb->(undef, { Status => 599, Reason => $errno, URL => $url }); 391 $cb->(undef, { Status => 599, Reason => $_[2], URL => $url });
373 }); 392 });
374 $state{handle}->on_eof (sub { 393 $state{handle}->on_eof (sub {
375 %state = (); 394 %state = ();
376 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file", URL => $url }); 395 $cb->(undef, { Status => 599, Reason => "Unexpected end-of-file", URL => $url });
377 }); 396 });
378 397
379 $state{handle}->starttls ("connect") if $rscheme eq "https"; 398 $state{handle}->starttls ("connect") if $rscheme eq "https";
380 399
381 # handle actual, non-tunneled, request 400 # handle actual, non-tunneled, request
393 %hdr = (); # reduce memory usage, save a kitten 412 %hdr = (); # reduce memory usage, save a kitten
394 413
395 # status line 414 # status line
396 $state{handle}->push_read (line => $qr_nl, sub { 415 $state{handle}->push_read (line => $qr_nl, sub {
397 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 416 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
398 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])", URL => $url })); 417 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid server response ($_[1])", URL => $url }));
399 418
400 my %hdr = ( # response headers 419 my %hdr = ( # response headers
401 HTTPVersion => ",$1", 420 HTTPVersion => ",$1",
402 Status => ",$2", 421 Status => ",$2",
403 Reason => ",$3", 422 Reason => ",$3",
407 # headers, could be optimized a bit 426 # headers, could be optimized a bit
408 $state{handle}->unshift_read (line => $qr_nlnl, sub { 427 $state{handle}->unshift_read (line => $qr_nlnl, sub {
409 for ("$_[1]\012") { 428 for ("$_[1]\012") {
410 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 429 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
411 430
412 # we support spaces in field names, as lotus domino 431 # things seen, not parsed:
413 # creates them (actually spaces around seperators 432 # p3pP="NON CUR OTPi OUR NOR UNI"
414 # are strictly allowed in http, they are a security issue). 433
415 $hdr{lc $1} .= ",$2" 434 $hdr{lc $1} .= ",$2"
416 while /\G 435 while /\G
417 ([^:\000-\037]+): 436 ([^:\000-\037]+):
418 [\011\040]* 437 [\011\040]*
419 ((?: [^\012]+ | \012[\011\040] )*) 438 ((?: [^\012]+ | \012[\011\040] )*)
420 \012 439 \012
421 /gxc; 440 /gxc;
422 441
423 /\G$/ 442 /\G$/
424 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers", URL => $url })); 443 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Garbled response headers", URL => $url }));
425 } 444 }
426 445
427 substr $_, 0, 1, "" 446 substr $_, 0, 1, ""
428 for values %hdr; 447 for values %hdr;
429 448
497 } 516 }
498 517
499 if ($_[1]{Status} =~ /^30[12]$/ && $recurse && $method ne "POST") { 518 if ($_[1]{Status} =~ /^30[12]$/ && $recurse && $method ne "POST") {
500 # apparently, mozilla et al. just change POST to GET here 519 # apparently, mozilla et al. just change POST to GET here
501 # more research is needed before we do the same 520 # more research is needed before we do the same
502 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb); 521 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
503 } elsif ($_[1]{Status} == 303 && $recurse) { 522 } elsif ($_[1]{Status} == 303 && $recurse) {
504 # even http/1.1 is unclear on how to mutate the method 523 # even http/1.1 is unclear on how to mutate the method
505 $method = "GET" unless $method eq "HEAD"; 524 $method = "GET" unless $method eq "HEAD";
506 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb); 525 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
507 } elsif ($_[1]{Status} == 307 && $recurse && $method =~ /^(?:GET|HEAD)$/) { 526 } elsif ($_[1]{Status} == 307 && $recurse && $method =~ /^(?:GET|HEAD)$/) {
545 564
546 # maybe re-use $uauthority with patched port? 565 # maybe re-use $uauthority with patched port?
547 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012"); 566 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012");
548 $state{handle}->push_read (line => $qr_nlnl, sub { 567 $state{handle}->push_read (line => $qr_nlnl, sub {
549 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 568 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
550 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid proxy connect response ($_[1])", URL => $url })); 569 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid proxy connect response ($_[1])", URL => $url }));
551 570
552 if ($2 == 200) { 571 if ($2 == 200) {
553 $rpath = $upath; 572 $rpath = $upath;
554 &$handle_actual_request; 573 &$handle_actual_request;
555 } else { 574 } else {
601The default value for the C<recurse> request parameter (default: C<10>). 620The default value for the C<recurse> request parameter (default: C<10>).
602 621
603=item $AnyEvent::HTTP::USERAGENT 622=item $AnyEvent::HTTP::USERAGENT
604 623
605The default value for the C<User-Agent> header (the default is 624The default value for the C<User-Agent> header (the default is
606C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>). 625C<Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
607 626
608=item $AnyEvent::HTTP::MAX_PERSISTENT 627=item $AnyEvent::HTTP::MAX_PERSISTENT
609 628
610The maximum number of persistent connections to keep open (default: 8). 629The maximum number of persistent connections to keep open (default: 8).
611 630

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines