ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.30 by root, Thu Oct 23 02:46:20 2008 UTC vs.
Revision 1.31 by root, Fri Oct 24 01:25:54 2008 UTC

105contain the three parts of the HTTP Status-Line of the same name. The 105contain the three parts of the HTTP Status-Line of the same name. The
106pseudo-header C<URL> contains the original URL (which can differ from the 106pseudo-header C<URL> contains the original URL (which can differ from the
107requested URL when following redirects). 107requested URL when following redirects).
108 108
109If the server sends a header multiple lines, then their contents will be 109If the server sends a header multiple lines, then their contents will be
110joined together with C<\x00>. 110joined together with a command (C<,>).
111 111
112If an internal error occurs, such as not being able to resolve a hostname, 112If an internal error occurs, such as not being able to resolve a hostname,
113then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599> 113then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
114and the C<Reason> pseudo-header will contain an error message. 114and the C<Reason> pseudo-header will contain an error message.
115 115
261 my $proxy = $arg{proxy} || $PROXY; 261 my $proxy = $arg{proxy} || $PROXY;
262 my $timeout = $arg{timeout} || $TIMEOUT; 262 my $timeout = $arg{timeout} || $TIMEOUT;
263 263
264 $hdr{"user-agent"} ||= $USERAGENT; 264 $hdr{"user-agent"} ||= $USERAGENT;
265 265
266 my ($scheme, $authority, $upath, $query, $fragment) = 266 my ($uscheme, $uauthority, $upath, $query, $fragment) =
267 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; 267 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
268 268
269 $scheme = lc $scheme; 269 $uscheme = lc $uscheme;
270 270
271 my $uport = $scheme eq "http" ? 80 271 my $uport = $uscheme eq "http" ? 80
272 : $scheme eq "https" ? 443 272 : $uscheme eq "https" ? 443
273 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported", URL => $url }); 273 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported", URL => $url });
274 274
275 $hdr{referer} ||= "$scheme://$authority$upath"; # leave out fragment and query string, just a heuristic 275 $hdr{referer} ||= "$uscheme://$uauthority$upath"; # leave out fragment and query string, just a heuristic
276 276
277 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 277 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
278 or return $cb->(undef, { Status => 599, Reason => "unparsable URL", URL => $url }); 278 or return $cb->(undef, { Status => 599, Reason => "unparsable URL", URL => $url });
279 279
280 my $uhost = $1; 280 my $uhost = $1;
281 $uport = $2 if defined $2; 281 $uport = $2 if defined $2;
282 282
285 285
286 $upath =~ s%^/?%/%; 286 $upath =~ s%^/?%/%;
287 287
288 # cookie processing 288 # cookie processing
289 if (my $jar = $arg{cookie_jar}) { 289 if (my $jar = $arg{cookie_jar}) {
290 %$jar = () if $jar->{version} < 1; 290 %$jar = () if $jar->{version} != 1;
291 291
292 my @cookie; 292 my @cookie;
293 293
294 while (my ($chost, $v) = each %$jar) { 294 while (my ($chost, $v) = each %$jar) {
295 if ($chost =~ /^\./) { 295 if ($chost =~ /^\./) {
302 302
303 while (my ($cpath, $v) = each %$v) { 303 while (my ($cpath, $v) = each %$v) {
304 next unless $cpath eq substr $upath, 0, length $cpath; 304 next unless $cpath eq substr $upath, 0, length $cpath;
305 305
306 while (my ($k, $v) = each %$v) { 306 while (my ($k, $v) = each %$v) {
307 next if $scheme ne "https" && exists $v->{secure}; 307 next if $uscheme ne "https" && exists $v->{secure};
308 my $value = $v->{value};
309 $value =~ s/([\\"])/\\$1/g;
308 push @cookie, "$k=$v->{value}"; 310 push @cookie, "$k=\"$value\"";
309 } 311 }
310 } 312 }
311 } 313 }
312 314
313 $hdr{cookie} = join "; ", @cookie 315 $hdr{cookie} = join "; ", @cookie
314 if @cookie; 316 if @cookie;
315 } 317 }
316 318
317 my ($rhost, $rport, $rpath); # request host, port, path 319 my ($rhost, $rport, $rscheme, $rpath); # request host, port, path
318 320
319 if ($proxy) { 321 if ($proxy) {
320 ($rhost, $rport, $scheme) = @$proxy; 322 ($rhost, $rport, $rscheme, $rpath) = (@$proxy, $url);
321 $rpath = $url; 323
324 # don't support https requests over https-proxy transport,
325 # can't be done with tls as spec'ed.
326 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https";
322 } else { 327 } else {
323 ($rhost, $rport, $rpath) = ($uhost, $uport, $upath); 328 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
324 $hdr{host} = $uhost;
325 } 329 }
326 330
331 $hdr{host} = $uhost;
327 $hdr{"content-length"} = length $arg{body}; 332 $hdr{"content-length"} = length $arg{body};
328 333
329 my %state = (connect_guard => 1); 334 my %state = (connect_guard => 1);
330 335
331 _get_slot $uhost, sub { 336 _get_slot $uhost, sub {
339 344
340 delete $state{connect_guard}; # reduce memory usage, save a tree 345 delete $state{connect_guard}; # reduce memory usage, save a tree
341 346
342 # get handle 347 # get handle
343 $state{handle} = new AnyEvent::Handle 348 $state{handle} = new AnyEvent::Handle
344 fh => $state{fh}, 349 fh => $state{fh};
345 ($scheme eq "https" ? (tls => "connect") : ());
346 350
347 # limit the number of persistent connections 351 # limit the number of persistent connections
348 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 352 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
349 ++$KA_COUNT{$_[1]}; 353 ++$KA_COUNT{$_[1]};
350 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; 354 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
364 $state{handle}->on_eof (sub { 368 $state{handle}->on_eof (sub {
365 %state = (); 369 %state = ();
366 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file", URL => $url }); 370 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file", URL => $url });
367 }); 371 });
368 372
373 $state{handle}->starttls ("connect") if $rscheme eq "https";
374
375 # handle actual, non-tunneled, request
376 my $handle_actual_request = sub {
377# $state{handle}->starttls ("connect") if $uscheme eq "https";
378
369 # send request 379 # send request
370 $state{handle}->push_write ( 380 $state{handle}->push_write (
371 "$method $rpath HTTP/1.0\015\012" 381 "$method $rpath HTTP/1.0\015\012"
372 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr) 382 . (join "", map "\u$_: $hdr{$_}\015\012", keys %hdr)
373 . "\015\012" 383 . "\015\012"
374 . (delete $arg{body}) 384 . (delete $arg{body})
375 );
376
377 %hdr = (); # reduce memory usage, save a kitten
378
379 # status line
380 $state{handle}->push_read (line => qr/\015?\012/, sub {
381 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
382 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])", URL => $url }));
383
384 my %hdr = ( # response headers
385 HTTPVersion => "\x00$1",
386 Status => "\x00$2",
387 Reason => "\x00$3",
388 URL => "\x00$url"
389 ); 385 );
390 386
387 %hdr = (); # reduce memory usage, save a kitten
388
389 # status line
390 $state{handle}->push_read (line => qr/\015?\012/, sub {
391 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
392 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])", URL => $url }));
393
394 my %hdr = ( # response headers
395 HTTPVersion => ",$1",
396 Status => ",$2",
397 Reason => ",$3",
398 URL => ",$url"
399 );
400
391 # headers, could be optimized a bit 401 # headers, could be optimized a bit
392 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 402 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
393 for ("$_[1]\012") { 403 for ("$_[1]\012") {
404 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
405
394 # we support spaces in field names, as lotus domino 406 # we support spaces in field names, as lotus domino
395 # creates them (actually spaces around seperators 407 # creates them (actually spaces around seperators
396 # are strictly allowed in http, they are a security issue). 408 # are strictly allowed in http, they are a security issue).
397 $hdr{lc $1} .= "\x00$2" 409 $hdr{lc $1} .= ",$2"
398 while /\G 410 while /\G
399 ([^:\000-\037]+): 411 ([^:\000-\037]+):
400 [\011\040]* 412 [\011\040]*
401 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 413 ((?: [^\012]+ | \012[\011\040] )*)
402 \015?\012 414 \012
403 /gxc; 415 /gxc;
404 416
405 /\G$/ 417 /\G$/
406 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers", URL => $url })); 418 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers", URL => $url }));
407 } 419 }
408 420
409 substr $_, 0, 1, "" 421 substr $_, 0, 1, ""
410 for values %hdr; 422 for values %hdr;
411 423
412 my $finish = sub { 424 my $finish = sub {
413 # TODO: use destroy method, when/if available 425 # TODO: use destroy method, when/if available
414 #$state{handle}->destroy; 426 #$state{handle}->destroy;
415 $state{handle}->on_eof (undef); 427 $state{handle}->on_eof (undef);
416 $state{handle}->on_error (undef); 428 $state{handle}->on_error (undef);
417 %state = (); 429 %state = ();
418 430
419 # set-cookie processing 431 # set-cookie processing
420 if ($arg{cookie_jar} && exists $hdr{"set-cookie"}) { 432 if ($arg{cookie_jar}) {
421 for (split /\x00/, $hdr{"set-cookie"}) { 433 for ($hdr{"set-cookie"}) {
422 my ($cookie, @arg) = split /;\s*/; 434 # parse NAME=VALUE
423 my ($name, $value) = split /=/, $cookie, 2; 435 my @kv;
424 my %kv = (value => $value, map { split /=/, $_, 2 } @arg); 436
425 437 while (/\G\s* ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )/gcxs) {
438 my $name = $1;
439 my $value = $3;
440
441 unless ($value) {
442 $value = $2;
443 $value =~ s/\\(.)/$1/gs;
444 }
445
446 push @kv, $name => $value;
447
448 last unless /\G\s*;/gc;
449 }
450
451 last unless @kv;
452
453 my $name = shift @kv;
454 my %kv = (value => shift @kv, @kv);
455
426 my $cdom; 456 my $cdom;
427 my $cpath = (delete $kv{path}) || "/"; 457 my $cpath = (delete $kv{path}) || "/";
428 458
429 if (exists $kv{domain}) { 459 if (exists $kv{domain}) {
430 $cdom = delete $kv{domain}; 460 $cdom = delete $kv{domain};
431
432 $cdom =~ s/^\.?/./; # make sure it starts with a "."
433
434 next if $cdom =~ /\.$/;
435 461
462 $cdom =~ s/^\.?/./; # make sure it starts with a "."
463
464 next if $cdom =~ /\.$/;
465
436 # this is not rfc-like and not netscape-like. go figure. 466 # this is not rfc-like and not netscape-like. go figure.
437 my $ndots = $cdom =~ y/.//; 467 my $ndots = $cdom =~ y/.//;
438 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2); 468 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
439 } else { 469 } else {
440 $cdom = $uhost; 470 $cdom = $uhost;
471 }
472
473 # store it
474 $arg{cookie_jar}{version} = 1;
475 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
476
477 redo if /\G\s*,/gc;
441 } 478 }
442 479 }
480
481 # microsoft and other shitheads don't give a shit for following standards,
482 # try to support some common forms of broken Location headers.
483 if ($_[1]{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
484 $_[1]{location} =~ s/^\.\/+//;
485
486 my $url = "$rscheme://$uhost:$uport";
487
488 unless ($_[1]{location} =~ s/^\///) {
489 $url .= $upath;
490 $url =~ s/\/[^\/]*$//;
443 # store it 491 }
444 $arg{cookie_jar}{version} = 1; 492
445 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv; 493 $_[1]{location} = "$url/$_[1]{location}";
494 }
495
496 if ($_[1]{Status} =~ /^30[12]$/ && $recurse && $method ne "POST") {
497 # apparently, mozilla et al. just change POST to GET here
498 # more research is needed before we do the same
499 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
500 } elsif ($_[1]{Status} == 303 && $recurse) {
501 # even http/1.1 is unclear on how to mutate the method
502 $method = "GET" unless $method eq "HEAD";
503 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
504 } elsif ($_[1]{Status} == 307 && $recurse && $method =~ /^(?:GET|HEAD)$/) {
505 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
506 } else {
507 $cb->($_[0], $_[1]);
508 }
509 };
510
511 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
512 $finish->(undef, \%hdr);
513 } else {
514 if (exists $hdr{"content-length"}) {
515 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
516 # could cache persistent connection now
517 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
518 # but we don't, due to misdesigns, this is annoyingly complex
519 };
520
521 $finish->($_[1], \%hdr);
522 });
523 } else {
524 # too bad, need to read until we get an error or EOF,
525 # no way to detect winged data.
526 $_[0]->on_error (sub {
527 $finish->($_[0]{rbuf}, \%hdr);
528 });
529 $_[0]->on_eof (undef);
530 $_[0]->on_read (sub { });
446 } 531 }
447 } 532 }
448
449 # microsoft and other shitheads don't give a shit for following standards,
450 # try to support some common forms of broken Location headers.
451 if ($_[1]{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
452 $_[1]{location} =~ s/^\.\/+//;
453
454 my $url = "$scheme://$uhost:$uport";
455
456 unless ($_[1]{location} =~ s/^\///) {
457 $url .= $upath;
458 $url =~ s/\/[^\/]*$//;
459 }
460
461 $_[1]{location} = "$url/$_[1]{location}";
462 }
463
464 if ($_[1]{Status} =~ /^30[12]$/ && $recurse && $method ne "POST") {
465 # apparently, mozilla et al. just change POST to GET here
466 # more research is needed before we do the same
467 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
468 } elsif ($_[1]{Status} == 303 && $recurse) {
469 # even http/1.1 is unclear on how to mutate the method
470 $method = "GET" unless $method eq "HEAD";
471 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
472 } elsif ($_[1]{Status} == 307 && $recurse && $method =~ /^(?:GET|HEAD)$/) {
473 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
474 } else {
475 $cb->($_[0], $_[1]);
476 }
477 }; 533 });
534 });
535 };
478 536
479 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") { 537 # now handle proxy-CONNECT method
480 $finish->(undef, \%hdr); 538 if ($proxy && $uscheme eq "https") {
539 # oh dear, we have to wrap it into a connect request
540
541 # maybe re-use $uauthority with patched port?
542 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012");
543 $state{handle}->push_read (line => qr/\015?\012\015?\012/, sub {
544 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
545 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid proxy connect response ($_[1])", URL => $url }));
546
547 if ($2 == 200) {
548 $rpath = $upath;
549 &$handle_actual_request;
481 } else { 550 } else {
482 if (exists $hdr{"content-length"}) { 551 %state = ();
483 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub { 552 $cb->(undef, { Status => $2, Reason => $3, URL => $url });
484 # could cache persistent connection now
485 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
486 # but we don't, due to misdesigns, this is annoyingly complex
487 };
488
489 $finish->($_[1], \%hdr);
490 });
491 } else {
492 # too bad, need to read until we get an error or EOF,
493 # no way to detect winged data.
494 $_[0]->on_error (sub {
495 $finish->($_[0]{rbuf}, \%hdr);
496 });
497 $_[0]->on_eof (undef);
498 $_[0]->on_read (sub { });
499 }
500 } 553 }
501 }); 554 });
555 } else {
556 &$handle_actual_request;
502 }); 557 }
558
503 }, sub { 559 }, sub {
504 $timeout 560 $timeout
505 }; 561 };
506 }; 562 };
507 563

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines