ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.121 by root, Tue Apr 7 01:18:20 2015 UTC vs.
Revision 1.130 by root, Thu Aug 30 01:21:27 2018 UTC

46use AnyEvent::Util (); 46use AnyEvent::Util ();
47use AnyEvent::Handle (); 47use AnyEvent::Handle ();
48 48
49use base Exporter::; 49use base Exporter::;
50 50
51our $VERSION = 2.21; 51our $VERSION = 2.24;
52 52
53our @EXPORT = qw(http_get http_post http_head http_request); 53our @EXPORT = qw(http_get http_post http_head http_request);
54 54
55our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 55our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
56our $MAX_RECURSE = 10; 56our $MAX_RECURSE = 10;
194 194
195C<$scheme> must be either missing or must be C<http> for HTTP. 195C<$scheme> must be either missing or must be C<http> for HTTP.
196 196
197If not specified, then the default proxy is used (see 197If not specified, then the default proxy is used (see
198C<AnyEvent::HTTP::set_proxy>). 198C<AnyEvent::HTTP::set_proxy>).
199
200Currently, if your proxy requires authorization, you have to specify an
201appropriate "Proxy-Authorization" header in every request.
199 202
200=item body => $string 203=item body => $string
201 204
202The request body, usually empty. Will be sent as-is (future versions of 205The request body, usually empty. Will be sent as-is (future versions of
203this module might offer more options). 206this module might offer more options).
451 454
452# expire cookies 455# expire cookies
453sub cookie_jar_expire($;$) { 456sub cookie_jar_expire($;$) {
454 my ($jar, $session_end) = @_; 457 my ($jar, $session_end) = @_;
455 458
456 %$jar = () if $jar->{version} != 1; 459 %$jar = () if $jar->{version} != 2;
457 460
458 my $anow = AE::now; 461 my $anow = AE::now;
459 462
460 while (my ($chost, $paths) = each %$jar) { 463 while (my ($chost, $paths) = each %$jar) {
461 next unless ref $paths; 464 next unless ref $paths;
481 484
482# extract cookies from jar 485# extract cookies from jar
483sub cookie_jar_extract($$$$) { 486sub cookie_jar_extract($$$$) {
484 my ($jar, $scheme, $host, $path) = @_; 487 my ($jar, $scheme, $host, $path) = @_;
485 488
486 %$jar = () if $jar->{version} != 1; 489 %$jar = () if $jar->{version} != 2;
490
491 $host = AnyEvent::Util::idn_to_ascii $host
492 if $host =~ /[^\x00-\x7f]/;
487 493
488 my @cookies; 494 my @cookies;
489 495
490 while (my ($chost, $paths) = each %$jar) { 496 while (my ($chost, $paths) = each %$jar) {
491 next unless ref $paths; 497 next unless ref $paths;
492 498
493 if ($chost =~ /^\./) { 499 # exact match or suffix including . match
494 next unless $chost eq substr $host, -length $chost; 500 $chost eq $host or ".$chost" eq substr $host, -1 - length $chost
495 } elsif ($chost =~ /\./) {
496 next unless $chost eq $host;
497 } else {
498 next; 501 or next;
499 }
500 502
501 while (my ($cpath, $cookies) = each %$paths) { 503 while (my ($cpath, $cookies) = each %$paths) {
502 next unless $cpath eq substr $path, 0, length $cpath; 504 next unless $cpath eq substr $path, 0, length $cpath;
503 505
504 while (my ($cookie, $kv) = each %$cookies) { 506 while (my ($cookie, $kv) = each %$cookies) {
525} 527}
526 528
527# parse set_cookie header into jar 529# parse set_cookie header into jar
528sub cookie_jar_set_cookie($$$$) { 530sub cookie_jar_set_cookie($$$$) {
529 my ($jar, $set_cookie, $host, $date) = @_; 531 my ($jar, $set_cookie, $host, $date) = @_;
532
533 %$jar = () if $jar->{version} != 2;
530 534
531 my $anow = int AE::now; 535 my $anow = int AE::now;
532 my $snow; # server-now 536 my $snow; # server-now
533 537
534 for ($set_cookie) { 538 for ($set_cookie) {
580 584
581 my $cdom; 585 my $cdom;
582 my $cpath = (delete $kv{path}) || "/"; 586 my $cpath = (delete $kv{path}) || "/";
583 587
584 if (exists $kv{domain}) { 588 if (exists $kv{domain}) {
585 $cdom = delete $kv{domain}; 589 $cdom = $kv{domain};
586 590
587 $cdom =~ s/^\.?/./; # make sure it starts with a "." 591 $cdom =~ s/^\.?/./; # make sure it starts with a "."
588 592
589 next if $cdom =~ /\.$/; 593 next if $cdom =~ /\.$/;
590 594
591 # this is not rfc-like and not netscape-like. go figure. 595 # this is not rfc-like and not netscape-like. go figure.
592 my $ndots = $cdom =~ y/.//; 596 my $ndots = $cdom =~ y/.//;
593 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2); 597 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
598
599 $cdom = substr $cdom, 1; # remove initial .
594 } else { 600 } else {
595 $cdom = $host; 601 $cdom = $host;
596 } 602 }
597 603
598 # store it 604 # store it
599 $jar->{version} = 1; 605 $jar->{version} = 2;
600 $jar->{lc $cdom}{$cpath}{$name} = \%kv; 606 $jar->{lc $cdom}{$cpath}{$name} = \%kv;
601 607
602 redo if /\G\s*,/gc; 608 redo if /\G\s*,/gc;
603 } 609 }
604} 610}
770 776
771 my $uport = $uscheme eq "http" ? 80 777 my $uport = $uscheme eq "http" ? 80
772 : $uscheme eq "https" ? 443 778 : $uscheme eq "https" ? 443
773 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" }); 779 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" });
774 780
775 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 781 $uauthority =~ /^(?: .*\@ )? ([^\@]+?) (?: : (\d+) )?$/x
776 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" }); 782 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" });
777 783
778 my $uhost = lc $1; 784 my $uhost = lc $1;
779 $uport = $2 if defined $2; 785 $uport = $2 if defined $2;
780 786
892 # we give our best and fall back to URI if available. 898 # we give our best and fall back to URI if available.
893 if (exists $hdr{location}) { 899 if (exists $hdr{location}) {
894 my $loc = $hdr{location}; 900 my $loc = $hdr{location};
895 901
896 if ($loc =~ m%^//%) { # // 902 if ($loc =~ m%^//%) { # //
897 $loc = "$rscheme:$loc"; 903 $loc = "$uscheme:$loc";
898 904
899 } elsif ($loc eq "") { 905 } elsif ($loc eq "") {
900 $loc = $url; 906 $loc = $url;
901 907
902 } elsif ($loc !~ /^(?: $ | [^:\/?\#]+ : )/x) { # anything "simple" 908 } elsif ($loc !~ /^(?: $ | [^:\/?\#]+ : )/x) { # anything "simple"
903 $loc =~ s/^\.\/+//; 909 $loc =~ s/^\.\/+//;
904 910
905 if ($loc !~ m%^[.?#]%) { 911 if ($loc !~ m%^[.?#]%) {
906 my $prefix = "$rscheme://$uhost:$uport"; 912 my $prefix = "$uscheme://$uauthority";
907 913
908 unless ($loc =~ s/^\///) { 914 unless ($loc =~ s/^\///) {
909 $prefix .= $upath; 915 $prefix .= $upath;
910 $prefix =~ s/\/[^\/]*$//; 916 $prefix =~ s/\/[^\/]*$//;
911 } 917 }
936 # also, the UA should ask the user for 301 and 307 and POST, 942 # also, the UA should ask the user for 301 and 307 and POST,
937 # industry standard seems to be to simply follow. 943 # industry standard seems to be to simply follow.
938 # we go with the industry standard. 308 is defined 944 # we go with the industry standard. 308 is defined
939 # by rfc7538 945 # by rfc7538
940 if ($status == 301 or $status == 302 or $status == 303) { 946 if ($status == 301 or $status == 302 or $status == 303) {
947 $redirect = 1;
941 # HTTP/1.1 is unclear on how to mutate the method 948 # HTTP/1.1 is unclear on how to mutate the method
942 $method = "GET" unless $method eq "HEAD"; 949 unless ($method eq "HEAD") {
943 $redirect = 1; 950 $method = "GET";
951 delete $arg{body};
952 }
944 } elsif ($status == 307 or $status == 308) { 953 } elsif ($status == 307 or $status == 308) {
945 $redirect = 1; 954 $redirect = 1;
946 } 955 }
947 } 956 }
948 957
1025 $finish->(delete $state{handle}); 1034 $finish->(delete $state{handle});
1026 1035
1027 } elsif ($chunked) { 1036 } elsif ($chunked) {
1028 my $cl = 0; 1037 my $cl = 0;
1029 my $body = ""; 1038 my $body = "";
1030 my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; 1039 my $on_body = (!$redirect && $arg{on_body}) || sub { $body .= shift; 1 };
1031 1040
1032 $state{read_chunk} = sub { 1041 $state{read_chunk} = sub {
1033 $_[1] =~ /^([0-9a-fA-F]+)/ 1042 $_[1] =~ /^([0-9a-fA-F]+)/
1034 or return $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); 1043 or return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
1035 1044
1068 } 1077 }
1069 }; 1078 };
1070 1079
1071 $_[0]->push_read (line => $state{read_chunk}); 1080 $_[0]->push_read (line => $state{read_chunk});
1072 1081
1073 } elsif ($arg{on_body}) { 1082 } elsif (!$redirect && $arg{on_body}) {
1074 if (defined $len) { 1083 if (defined $len) {
1075 $_[0]->on_read (sub { 1084 $_[0]->on_read (sub {
1076 $len -= length $_[0]{rbuf}; 1085 $len -= length $_[0]{rbuf};
1077 1086
1078 $arg{on_body}(delete $_[0]{rbuf}, \%hdr) 1087 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
1117 _destroy_state %state; 1126 _destroy_state %state;
1118 1127
1119 %state = (); 1128 %state = ();
1120 $state{recurse} = 1129 $state{recurse} =
1121 http_request ( 1130 http_request (
1122 $method => $url, 1131 $method => $url,
1123 %arg, 1132 %arg,
1124 recurse => $recurse - 1, 1133 recurse => $recurse - 1,
1125 keepalive => 0, 1134 persistent => 0,
1126 sub { 1135 sub {
1127 %state = (); 1136 %state = ();
1128 &$cb 1137 &$cb
1129 } 1138 }
1130 ); 1139 );
1176 1185
1177 # now handle proxy-CONNECT method 1186 # now handle proxy-CONNECT method
1178 if ($proxy && $uscheme eq "https") { 1187 if ($proxy && $uscheme eq "https") {
1179 # oh dear, we have to wrap it into a connect request 1188 # oh dear, we have to wrap it into a connect request
1180 1189
1190 my $auth = exists $hdr{"proxy-authorization"}
1191 ? "proxy-authorization: " . (delete $hdr{"proxy-authorization"}) . "\015\012"
1192 : "";
1193
1181 # maybe re-use $uauthority with patched port? 1194 # maybe re-use $uauthority with patched port?
1182 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012"); 1195 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012$auth\015\012");
1183 $state{handle}->push_read (line => $qr_nlnl, sub { 1196 $state{handle}->push_read (line => $qr_nlnl, sub {
1184 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 1197 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
1185 or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }; 1198 or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" };
1186 1199
1187 if ($2 == 200) { 1200 if ($2 == 200) {
1190 } else { 1203 } else {
1191 _error %state, $cb, { @pseudo, Status => $2, Reason => $3 }; 1204 _error %state, $cb, { @pseudo, Status => $2, Reason => $3 };
1192 } 1205 }
1193 }); 1206 });
1194 } else { 1207 } else {
1208 delete $hdr{"proxy-authorization"} unless $proxy;
1209
1195 $handle_actual_request->(); 1210 $handle_actual_request->();
1196 } 1211 }
1197 }; 1212 };
1198 1213
1199 _get_slot $uhost, sub { 1214 _get_slot $uhost, sub {
1205 # on a keepalive request (in theory, this should be a separate config option). 1220 # on a keepalive request (in theory, this should be a separate config option).
1206 if ($persistent && $KA_CACHE{$ka_key}) { 1221 if ($persistent && $KA_CACHE{$ka_key}) {
1207 $was_persistent = 1; 1222 $was_persistent = 1;
1208 1223
1209 $state{handle} = ka_fetch $ka_key; 1224 $state{handle} = ka_fetch $ka_key;
1210 $state{handle}->destroyed 1225# $state{handle}->destroyed
1211 and die "AnyEvent::HTTP: unexpectedly got a destructed handle (1), please report.";#d# 1226# and die "AnyEvent::HTTP: unexpectedly got a destructed handle (1), please report.";#d#
1212 $prepare_handle->(); 1227 $prepare_handle->();
1213 $state{handle}->destroyed 1228# $state{handle}->destroyed
1214 and die "AnyEvent::HTTP: unexpectedly got a destructed handle (2), please report.";#d# 1229# and die "AnyEvent::HTTP: unexpectedly got a destructed handle (2), please report.";#d#
1215 $handle_actual_request->(); 1230 $handle_actual_request->();
1216 1231
1217 } else { 1232 } else {
1218 my $tcp_connect = $arg{tcp_connect} 1233 my $tcp_connect = $arg{tcp_connect}
1219 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect }; 1234 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
1449 or die "$file: $!"; 1464 or die "$file: $!";
1450 1465
1451 my %hdr; 1466 my %hdr;
1452 my $ofs = 0; 1467 my $ofs = 0;
1453 1468
1454 warn stat $fh;
1455 warn -s _;
1456 if (stat $fh and -s _) { 1469 if (stat $fh and -s _) {
1457 $ofs = -s _; 1470 $ofs = -s _;
1458 warn "-s is ", $ofs; 1471 warn "-s is ", $ofs;
1459 $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9]; 1472 $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9];
1460 $hdr{"range"} = "bytes=$ofs-"; 1473 $hdr{"range"} = "bytes=$ofs-";
1488 my (undef, $hdr) = @_; 1501 my (undef, $hdr) = @_;
1489 1502
1490 my $status = $hdr->{Status}; 1503 my $status = $hdr->{Status};
1491 1504
1492 if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) { 1505 if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) {
1493 utime $fh, $time, $time; 1506 utime $time, $time, $fh;
1494 } 1507 }
1495 1508
1496 if ($status == 200 || $status == 206 || $status == 416) { 1509 if ($status == 200 || $status == 206 || $status == 416) {
1497 # download ok || resume ok || file already fully downloaded 1510 # download ok || resume ok || file already fully downloaded
1498 $cb->(1, $hdr); 1511 $cb->(1, $hdr);

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines