ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.29 by root, Wed Oct 22 23:28:11 2008 UTC vs.
Revision 1.32 by root, Fri Oct 24 01:27:29 2008 UTC

104headers, the "pseudo-headers" C<HTTPVersion>, C<Status> and C<Reason> 104headers, the "pseudo-headers" C<HTTPVersion>, C<Status> and C<Reason>
105contain the three parts of the HTTP Status-Line of the same name. The 105contain the three parts of the HTTP Status-Line of the same name. The
106pseudo-header C<URL> contains the original URL (which can differ from the 106pseudo-header C<URL> contains the original URL (which can differ from the
107requested URL when following redirects). 107requested URL when following redirects).
108 108
109If the server sends a header multiple lines, then their contents will be 109If the server sends a header multiple times, then their contents will be
110joined together with C<\x00>. 110joined together with a comma (C<,>), as per the HTTP spec.
111 111
112If an internal error occurs, such as not being able to resolve a hostname, 112If an internal error occurs, such as not being able to resolve a hostname,
113then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599> 113then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
114and the C<Reason> pseudo-header will contain an error message. 114and the C<Reason> pseudo-header will contain an error message.
115 115
261 my $proxy = $arg{proxy} || $PROXY; 261 my $proxy = $arg{proxy} || $PROXY;
262 my $timeout = $arg{timeout} || $TIMEOUT; 262 my $timeout = $arg{timeout} || $TIMEOUT;
263 263
264 $hdr{"user-agent"} ||= $USERAGENT; 264 $hdr{"user-agent"} ||= $USERAGENT;
265 265
266 my ($scheme, $authority, $upath, $query, $fragment) = 266 my ($uscheme, $uauthority, $upath, $query, $fragment) =
267 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; 267 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
268 268
269 $scheme = lc $scheme; 269 $uscheme = lc $uscheme;
270 270
271 my $uport = $scheme eq "http" ? 80 271 my $uport = $uscheme eq "http" ? 80
272 : $scheme eq "https" ? 443 272 : $uscheme eq "https" ? 443
273 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported", URL => $url }); 273 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported", URL => $url });
274 274
275 $hdr{referer} ||= "$scheme://$authority$upath"; # leave out fragment and query string, just a heuristic 275 $hdr{referer} ||= "$uscheme://$uauthority$upath"; # leave out fragment and query string, just a heuristic
276 276
277 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 277 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
278 or return $cb->(undef, { Status => 599, Reason => "unparsable URL", URL => $url }); 278 or return $cb->(undef, { Status => 599, Reason => "unparsable URL", URL => $url });
279 279
280 my $uhost = $1; 280 my $uhost = $1;
281 $uport = $2 if defined $2; 281 $uport = $2 if defined $2;
282 282
285 285
286 $upath =~ s%^/?%/%; 286 $upath =~ s%^/?%/%;
287 287
288 # cookie processing 288 # cookie processing
289 if (my $jar = $arg{cookie_jar}) { 289 if (my $jar = $arg{cookie_jar}) {
290 %$jar = () if $jar->{version} < 1; 290 %$jar = () if $jar->{version} != 1;
291 291
292 my @cookie; 292 my @cookie;
293 293
294 while (my ($chost, $v) = each %$jar) { 294 while (my ($chost, $v) = each %$jar) {
295 if ($chost =~ /^\./) {
295 next unless $chost eq substr $uhost, -length $chost; 296 next unless $chost eq substr $uhost, -length $chost;
296 next unless $chost =~ /^\./; 297 } elsif ($chost =~ /\./) {
298 next unless $chost eq $uhost;
299 } else {
300 next;
301 }
297 302
298 while (my ($cpath, $v) = each %$v) { 303 while (my ($cpath, $v) = each %$v) {
299 next unless $cpath eq substr $upath, 0, length $cpath; 304 next unless $cpath eq substr $upath, 0, length $cpath;
300 305
301 while (my ($k, $v) = each %$v) { 306 while (my ($k, $v) = each %$v) {
302 next if $scheme ne "https" && exists $v->{secure}; 307 next if $uscheme ne "https" && exists $v->{secure};
308 my $value = $v->{value};
309 $value =~ s/([\\"])/\\$1/g;
303 push @cookie, "$k=$v->{value}"; 310 push @cookie, "$k=\"$value\"";
304 } 311 }
305 } 312 }
306 } 313 }
307 314
308 $hdr{cookie} = join "; ", @cookie 315 $hdr{cookie} = join "; ", @cookie
309 if @cookie; 316 if @cookie;
310 } 317 }
311 318
312 my ($rhost, $rport, $rpath); # request host, port, path 319 my ($rhost, $rport, $rscheme, $rpath); # request host, port, path
313 320
314 if ($proxy) { 321 if ($proxy) {
315 ($rhost, $rport, $scheme) = @$proxy; 322 ($rhost, $rport, $rscheme, $rpath) = (@$proxy, $url);
316 $rpath = $url; 323
324 # don't support https requests over https-proxy transport,
325 # can't be done with tls as spec'ed.
326 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https";
317 } else { 327 } else {
318 ($rhost, $rport, $rpath) = ($uhost, $uport, $upath); 328 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
319 $hdr{host} = $uhost;
320 } 329 }
321 330
331 $hdr{host} = $uhost;
322 $hdr{"content-length"} = length $arg{body}; 332 $hdr{"content-length"} = length $arg{body};
323 333
324 my %state = (connect_guard => 1); 334 my %state = (connect_guard => 1);
325 335
326 _get_slot $uhost, sub { 336 _get_slot $uhost, sub {
334 344
335 delete $state{connect_guard}; # reduce memory usage, save a tree 345 delete $state{connect_guard}; # reduce memory usage, save a tree
336 346
337 # get handle 347 # get handle
338 $state{handle} = new AnyEvent::Handle 348 $state{handle} = new AnyEvent::Handle
339 fh => $state{fh}, 349 fh => $state{fh};
340 ($scheme eq "https" ? (tls => "connect") : ());
341 350
342 # limit the number of persistent connections 351 # limit the number of persistent connections
343 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 352 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
344 ++$KA_COUNT{$_[1]}; 353 ++$KA_COUNT{$_[1]};
345 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; 354 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
359 $state{handle}->on_eof (sub { 368 $state{handle}->on_eof (sub {
360 %state = (); 369 %state = ();
361 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file", URL => $url }); 370 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file", URL => $url });
362 }); 371 });
363 372
373 $state{handle}->starttls ("connect") if $rscheme eq "https";
374
375 # handle actual, non-tunneled, request
376 my $handle_actual_request = sub {
377# $state{handle}->starttls ("connect") if $uscheme eq "https";
378
364 # send request 379 # send request
365 $state{handle}->push_write ( 380 $state{handle}->push_write (
366 "$method $rpath HTTP/1.0\015\012" 381 "$method $rpath HTTP/1.0\015\012"
367 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr) 382 . (join "", map "\u$_: $hdr{$_}\015\012", keys %hdr)
368 . "\015\012" 383 . "\015\012"
369 . (delete $arg{body}) 384 . (delete $arg{body})
370 );
371
372 %hdr = (); # reduce memory usage, save a kitten
373
374 # status line
375 $state{handle}->push_read (line => qr/\015?\012/, sub {
376 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
377 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])", URL => $url }));
378
379 my %hdr = ( # response headers
380 HTTPVersion => "\x00$1",
381 Status => "\x00$2",
382 Reason => "\x00$3",
383 URL => "\x00$url"
384 ); 385 );
385 386
387 %hdr = (); # reduce memory usage, save a kitten
388
389 # status line
390 $state{handle}->push_read (line => qr/\015?\012/, sub {
391 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
392 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])", URL => $url }));
393
394 my %hdr = ( # response headers
395 HTTPVersion => ",$1",
396 Status => ",$2",
397 Reason => ",$3",
398 URL => ",$url"
399 );
400
386 # headers, could be optimized a bit 401 # headers, could be optimized a bit
387 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 402 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
388 for ("$_[1]\012") { 403 for ("$_[1]\012") {
404 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
405
389 # we support spaces in field names, as lotus domino 406 # we support spaces in field names, as lotus domino
390 # creates them. 407 # creates them (actually spaces around seperators
408 # are strictly allowed in http, they are a security issue).
391 $hdr{lc $1} .= "\x00$2" 409 $hdr{lc $1} .= ",$2"
392 while /\G 410 while /\G
393 ([^:\000-\037]+): 411 ([^:\000-\037]+):
394 [\011\040]* 412 [\011\040]*
395 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 413 ((?: [^\012]+ | \012[\011\040] )*)
396 \015?\012 414 \012
397 /gxc; 415 /gxc;
398 416
399 /\G$/ 417 /\G$/
400 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers", URL => $url })); 418 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers", URL => $url }));
401 } 419 }
402 420
403 substr $_, 0, 1, "" 421 substr $_, 0, 1, ""
404 for values %hdr; 422 for values %hdr;
405 423
406 my $finish = sub { 424 my $finish = sub {
425 # TODO: use destroy method, when/if available
426 #$state{handle}->destroy;
427 $state{handle}->on_eof (undef);
428 $state{handle}->on_error (undef);
407 %state = (); 429 %state = ();
408 430
409 # set-cookie processing 431 # set-cookie processing
410 if ($arg{cookie_jar} && exists $hdr{"set-cookie"}) { 432 if ($arg{cookie_jar}) {
411 for (split /\x00/, $hdr{"set-cookie"}) { 433 for ($hdr{"set-cookie"}) {
412 my ($cookie, @arg) = split /;\s*/; 434 # parse NAME=VALUE
413 my ($name, $value) = split /=/, $cookie, 2; 435 my @kv;
414 my %kv = (value => $value, map { split /=/, $_, 2 } @arg); 436
415 437 while (/\G\s* ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )/gcxs) {
416 my $cdom = (delete $kv{domain}) || $uhost; 438 my $name = $1;
439 my $value = $3;
440
441 unless ($value) {
442 $value = $2;
443 $value =~ s/\\(.)/$1/gs;
444 }
445
446 push @kv, $name => $value;
447
448 last unless /\G\s*;/gc;
449 }
450
451 last unless @kv;
452
453 my $name = shift @kv;
454 my %kv = (value => shift @kv, @kv);
455
456 my $cdom;
417 my $cpath = (delete $kv{path}) || "/"; 457 my $cpath = (delete $kv{path}) || "/";
418 458
459 if (exists $kv{domain}) {
460 $cdom = delete $kv{domain};
461
419 $cdom =~ s/^\.?/./; # make sure it starts with a "." 462 $cdom =~ s/^\.?/./; # make sure it starts with a "."
420 463
421 next if $cdom =~ /\.$/; 464 next if $cdom =~ /\.$/;
422 465
423 # this is not rfc-like and not netscape-like. go figure. 466 # this is not rfc-like and not netscape-like. go figure.
424 my $ndots = $cdom =~ y/.//; 467 my $ndots = $cdom =~ y/.//;
425 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2); 468 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
426 469 } else {
470 $cdom = $uhost;
471 }
472
427 # store it 473 # store it
428 $arg{cookie_jar}{version} = 1; 474 $arg{cookie_jar}{version} = 1;
429 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv; 475 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
476
477 redo if /\G\s*,/gc;
478 }
479 }
480
481 # microsoft and other shitheads don't give a shit for following standards,
482 # try to support some common forms of broken Location headers.
483 if ($_[1]{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
484 $_[1]{location} =~ s/^\.\/+//;
485
486 my $url = "$rscheme://$uhost:$uport";
487
488 unless ($_[1]{location} =~ s/^\///) {
489 $url .= $upath;
490 $url =~ s/\/[^\/]*$//;
491 }
492
493 $_[1]{location} = "$url/$_[1]{location}";
494 }
495
496 if ($_[1]{Status} =~ /^30[12]$/ && $recurse && $method ne "POST") {
497 # apparently, mozilla et al. just change POST to GET here
498 # more research is needed before we do the same
499 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
500 } elsif ($_[1]{Status} == 303 && $recurse) {
501 # even http/1.1 is unclear on how to mutate the method
502 $method = "GET" unless $method eq "HEAD";
503 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
504 } elsif ($_[1]{Status} == 307 && $recurse && $method =~ /^(?:GET|HEAD)$/) {
505 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
506 } else {
507 $cb->($_[0], $_[1]);
508 }
509 };
510
511 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
512 $finish->(undef, \%hdr);
513 } else {
514 if (exists $hdr{"content-length"}) {
515 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
516 # could cache persistent connection now
517 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
518 # but we don't, due to misdesigns, this is annoyingly complex
519 };
520
521 $finish->($_[1], \%hdr);
522 });
523 } else {
524 # too bad, need to read until we get an error or EOF,
525 # no way to detect winged data.
526 $_[0]->on_error (sub {
527 $finish->($_[0]{rbuf}, \%hdr);
528 });
529 $_[0]->on_eof (undef);
530 $_[0]->on_read (sub { });
430 } 531 }
431 } 532 }
432
433 # microsoft and other shitheads don't give a shit for following standards,
434 # try to support some common forms of broken Location headers.
435 if ($_[1]{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
436 $_[1]{location} =~ s/^\.\/+//;
437
438 my $url = "$scheme://$uhost:$uport";
439
440 unless ($_[1]{location} =~ s/^\///) {
441 $url .= $upath;
442 $url =~ s/\/[^\/]*$//;
443 }
444
445 $_[1]{location} = "$url/$_[1]{location}";
446 }
447
448 if ($_[1]{Status} =~ /^30[12]$/ && $recurse && $method ne "POST") {
449 # apparently, mozilla et al. just change POST to GET here
450 # more research is needed before we do the same
451 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
452 } elsif ($_[1]{Status} == 303 && $recurse) {
453 # even http/1.1 is unlear on how to mutate the method
454 $method = "GET" unless $method eq "HEAD";
455 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
456 } elsif ($_[1]{Status} == 307 && $recurse && $method =~ /^(?:GET|HEAD)$/) {
457 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
458 } else {
459 $cb->($_[0], $_[1]);
460 }
461 }; 533 });
534 });
535 };
462 536
463 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") { 537 # now handle proxy-CONNECT method
464 $finish->(undef, \%hdr); 538 if ($proxy && $uscheme eq "https") {
539 # oh dear, we have to wrap it into a connect request
540
541 # maybe re-use $uauthority with patched port?
542 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012");
543 $state{handle}->push_read (line => qr/\015?\012\015?\012/, sub {
544 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
545 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid proxy connect response ($_[1])", URL => $url }));
546
547 if ($2 == 200) {
548 $rpath = $upath;
549 &$handle_actual_request;
465 } else { 550 } else {
466 if (exists $hdr{"content-length"}) { 551 %state = ();
467 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub { 552 $cb->(undef, { Status => $2, Reason => $3, URL => $url });
468 # could cache persistent connection now
469 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
470 # but we don't, due to misdesigns, this is annoyingly complex
471 };
472
473 $finish->($_[1], \%hdr);
474 });
475 } else {
476 # too bad, need to read until we get an error or EOF,
477 # no way to detect winged data.
478 $_[0]->on_error (sub {
479 $finish->($_[0]{rbuf}, \%hdr);
480 });
481 $_[0]->on_eof (undef);
482 $_[0]->on_read (sub { });
483 }
484 } 553 }
485 }); 554 });
555 } else {
556 &$handle_actual_request;
486 }); 557 }
558
487 }, sub { 559 }, sub {
488 $timeout 560 $timeout
489 }; 561 };
490 }; 562 };
491 563

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines