ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.9 by root, Wed Jun 4 13:51:53 2008 UTC vs.
Revision 1.10 by root, Thu Jun 5 13:06:43 2008 UTC

74response headers as second argument. 74response headers as second argument.
75 75
76All the headers in that hash are lowercased. In addition to the response 76All the headers in that hash are lowercased. In addition to the response
77headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and 77headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
78C<Reason> contain the three parts of the HTTP Status-Line of the same 78C<Reason> contain the three parts of the HTTP Status-Line of the same
79name. 79name. If the server sends a header multiple lines, then their contents
80will be joined together with C<\x00>.
80 81
81If an internal error occurs, such as not being able to resolve a hostname, 82If an internal error occurs, such as not being able to resolve a hostname,
82then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599> 83then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
83and the C<Reason> pseudo-header will contain an error message. 84and the C<Reason> pseudo-header will contain an error message.
84 85
123 124
124=item body => $string 125=item body => $string
125 126
126The request body, usually empty. Will be-sent as-is (future versions of 127The request body, usually empty. Will be-sent as-is (future versions of
127this module might offer more options). 128this module might offer more options).
129
130=item cookie_jar => $hash_ref
131
132Passing this parameter enables (simplified) cookie-processing, loosely
133based on the original netscape specification.
134
135The C<$hash_ref> must be an (initially empty) hash reference which will
136get updated automatically. It is possible to save the cookie_jar to
137persistent storage with something like JSON or Storable, but this is not
138recommended, as expire times are currently being ignored.
139
140Note that this cookie implementation is not of very high quality, nor
141meant to be complete. If you want complete cookie management you have to
142do that on your own. C<cookie_jar> is meant as a quick fix to get some
143cookie-using sites working. Cookies are a privacy disaster, do not use
144them unless required to.
128 145
129=back 146=back
130 147
131Example: make a simple HTTP GET request for http://www.nethype.de/ 148Example: make a simple HTTP GET request for http://www.nethype.de/
132 149
172 my $proxy = $arg{proxy} || $PROXY; 189 my $proxy = $arg{proxy} || $PROXY;
173 my $timeout = $arg{timeout} || $TIMEOUT; 190 my $timeout = $arg{timeout} || $TIMEOUT;
174 191
175 $hdr{"user-agent"} ||= $USERAGENT; 192 $hdr{"user-agent"} ||= $USERAGENT;
176 193
177 my ($host, $port, $path, $scheme); 194 my ($scheme, $authority, $upath, $query, $fragment) =
195 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
196
197 $scheme = lc $scheme;
198
199 my $uport = $scheme eq "http" ? 80
200 : $scheme eq "https" ? 443
201 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported" });
202
203 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
204 or return $cb->(undef, { Status => 599, Reason => "unparsable URL" });
205
206 my $uhost = $1;
207 $uport = $2 if defined $2;
208
209 $uhost =~ s/^\[(.*)\]$/$1/;
210 $upath .= "?$query" if length $query;
211
212 $upath =~ s%^/?%/%;
213
214 # cookie processing
215 if (my $jar = $arg{cookie_jar}) {
216 %$jar = () if $jar->{version} < 1;
217
218 my @cookie;
219
220 while (my ($chost, $v) = each %$jar) {
221 next unless $chost eq substr $uhost, -length $chost;
222 next unless $chost =~ /^\./;
223
224 while (my ($cpath, $v) = each %$v) {
225 next unless $cpath eq substr $upath, 0, length $cpath;
226
227 while (my ($k, $v) = each %$v) {
228 next if $scheme ne "https" && exists $v->{secure};
229 push @cookie, "$k=$v->{value}";
230 }
231 }
232 }
233
234 $hdr{cookie} = join "; ", @cookie
235 if @cookie;
236 }
237
238 my ($rhost, $rport, $rpath); # request host, port, path
178 239
179 if ($proxy) { 240 if ($proxy) {
180 ($host, $port, $scheme) = @$proxy; 241 ($rhost, $rport, $scheme) = @$proxy;
181 $path = $url; 242 $rpath = $url;
182 } else { 243 } else {
183 ($scheme, my $authority, $path, my $query, my $fragment) = 244 ($rhost, $rport, $rpath) = ($uhost, $uport, $upath);
184 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
185
186 $port = $scheme eq "http" ? 80
187 : $scheme eq "https" ? 443
188 : return $cb->(undef, { Status => 599, Reason => "$url: only http and https URLs supported" });
189
190 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
191 or return $cb->(undef, { Status => 599, Reason => "$url: unparsable URL" });
192
193 $host = $1;
194 $port = $2 if defined $2;
195
196 $host =~ s/^\[(.*)\]$/$1/;
197 $path .= "?$query" if length $query;
198
199 $path = "/" unless $path;
200
201 $hdr{host} = $host = lc $host; 245 $hdr{host} = $uhost;
202 } 246 }
203 247
204 $scheme = lc $scheme; 248 $hdr{"content-length"} = length $arg{body};
205 249
206 my %state; 250 my %state;
207 251
208 $hdr{"content-length"} = length $arg{body};
209
210 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub { 252 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub {
211 $state{fh} = shift 253 $state{fh} = shift
212 or return $cb->(undef, { Status => 599, Reason => "$!" }); 254 or return $cb->(undef, { Status => 599, Reason => "$!" });
213 255
214 delete $state{connect_guard}; # reduce memory usage, save a tree 256 delete $state{connect_guard}; # reduce memory usage, save a tree
215 257
239 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 281 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
240 }); 282 });
241 283
242 # send request 284 # send request
243 $state{handle}->push_write ( 285 $state{handle}->push_write (
244 "$method $path HTTP/1.0\015\012" 286 "$method $rpath HTTP/1.0\015\012"
245 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr) 287 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
246 . "\015\012" 288 . "\015\012"
247 . (delete $arg{body}) 289 . (delete $arg{body})
248 ); 290 );
249 291
253 $state{handle}->push_read (line => qr/\015?\012/, sub { 295 $state{handle}->push_read (line => qr/\015?\012/, sub {
254 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix 296 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
255 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" })); 297 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
256 298
257 my %hdr = ( # response headers 299 my %hdr = ( # response headers
258 HTTPVersion => ",$1", 300 HTTPVersion => "\x00$1",
259 Status => ",$2", 301 Status => "\x00$2",
260 Reason => ",$3", 302 Reason => "\x00$3",
261 ); 303 );
262 304
263 # headers, could be optimized a bit 305 # headers, could be optimized a bit
264 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 306 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
265 for ("$_[1]\012") { 307 for ("$_[1]\012") {
266 # we support spaces in field names, as lotus domino 308 # we support spaces in field names, as lotus domino
267 # creates them. 309 # creates them.
268 $hdr{lc $1} .= ",$2" 310 $hdr{lc $1} .= "\x00$2"
269 while /\G 311 while /\G
270 ([^:\000-\037]+): 312 ([^:\000-\037]+):
271 [\011\040]* 313 [\011\040]*
272 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 314 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
273 \015?\012 315 \015?\012
274 /gxc; 316 /gxc;
275 317
276 /\G$/ 318 /\G$/
277 or return $cb->(undef, { Status => 599, Reason => "garbled response headers" }); 319 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers" }));
278 } 320 }
279 321
280 substr $_, 0, 1, "" 322 substr $_, 0, 1, ""
281 for values %hdr; 323 for values %hdr;
282 324
283 my $finish = sub { 325 my $finish = sub {
326 %state = ();
327
328 # set-cookie processing
329 if ($arg{cookie_jar} && exists $hdr{"set-cookie"}) {
330 for (split /\x00/, $hdr{"set-cookie"}) {
331 my ($cookie, @arg) = split /;\s*/;
332 my ($name, $value) = split /=/, $cookie, 2;
333 my %kv = (value => $value, map { split /=/, $_, 2 } @arg);
334
335 my $cdom = (delete $kv{domain}) || $uhost;
336 my $cpath = (delete $kv{path}) || "/";
337
338 $cdom =~ s/^.?/./; # make sure it starts with a "."
339
340 my $ndots = $cdom =~ y/.//;
341 next if $ndots < ($cdom =~ /[^.]{3}$/ ? 2 : 3);
342
343 # store it
344 $arg{cookie_jar}{version} = 1;
345 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
346 }
347 }
348
284 if ($_[1]{Status} =~ /^30[12]$/ && $recurse) { 349 if ($_[1]{Status} =~ /^x30[12]$/ && $recurse) {
350 # microsoft and other assholes don't give a shit for following standards,
351 # try to support a common form of broken Location header.
352 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%;
353
285 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb); 354 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
286 } else { 355 } else {
287 $cb->($_[0], $_[1]); 356 $cb->($_[0], $_[1]);
288 } 357 }
289 }; 358 };
290 359
291 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") { 360 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
292 %state = ();
293 $finish->(undef, \%hdr); 361 $finish->(undef, \%hdr);
294 } else { 362 } else {
295 if (exists $hdr{"content-length"}) { 363 if (exists $hdr{"content-length"}) {
296 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub { 364 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
297 # could cache persistent connection now 365 # could cache persistent connection now
298 if ($hdr{connection} =~ /\bkeep-alive\b/i) { 366 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
299 # but we don't, due to misdesigns, this is annoyingly complex 367 # but we don't, due to misdesigns, this is annoyingly complex
300 }; 368 };
301 369
302 %state = ();
303 $finish->($_[1], \%hdr); 370 $finish->($_[1], \%hdr);
304 }); 371 });
305 } else { 372 } else {
306 # too bad, need to read until we get an error or EOF, 373 # too bad, need to read until we get an error or EOF,
307 # no way to detect winged data. 374 # no way to detect winged data.
308 $_[0]->on_error (sub { 375 $_[0]->on_error (sub {
309 %state = ();
310 $finish->($_[0]{rbuf}, \%hdr); 376 $finish->($_[0]{rbuf}, \%hdr);
311 }); 377 });
312 $_[0]->on_eof (undef); 378 $_[0]->on_eof (undef);
313 $_[0]->on_read (sub { }); 379 $_[0]->on_read (sub { });
314 } 380 }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines