ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.1 by root, Tue Jun 3 16:37:13 2008 UTC vs.
Revision 1.9 by root, Wed Jun 4 13:51:53 2008 UTC

33 33
34our $VERSION = '1.0'; 34our $VERSION = '1.0';
35 35
36our @EXPORT = qw(http_get http_request); 36our @EXPORT = qw(http_get http_request);
37 37
38our $MAX_REDIRECTS = 10;
39our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 38our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39our $MAX_RECURSE = 10;
40our $MAX_PERSISTENT = 8; 40our $MAX_PERSISTENT = 8;
41our $PERSISTENT_TIMEOUT = 15; 41our $PERSISTENT_TIMEOUT = 2;
42our $TIMEOUT = 60; 42our $TIMEOUT = 300;
43 43
44# changing these is evil 44# changing these is evil
45our $MAX_PERSISTENT_PER_HOST = 2; 45our $MAX_PERSISTENT_PER_HOST = 2;
46our $MAX_PER_HOST = 4; # not respected yet :( 46our $MAX_PER_HOST = 4; # not respected yet :(
47 47
48our $PROXY;
49
48my %KA_COUNT; # number of open keep-alive connections per host 50my %KA_COUNT; # number of open keep-alive connections per host
49 51
50=item http_get $url, key => value..., $cb->($data, $headers) 52=item http_get $url, key => value..., $cb->($data, $headers)
51 53
52Executes an HTTP-GET request. See the http_request function for details on 54Executes an HTTP-GET request. See the http_request function for details on
53additional parameters. 55additional parameters.
54 56
57=item http_head $url, key => value..., $cb->($data, $headers)
58
59Executes an HTTP-HEAD request. See the http_request function for details on
60additional parameters.
61
62=item http_post $url, $body, key => value..., $cb->($data, $headers)
63
64Executes an HTTP-POST request with a request body of C<$bod>. See the
65http_request function for details on additional parameters.
66
55=item http_request $method => $url, key => value..., $cb->($data, $headers) 67=item http_request $method => $url, key => value..., $cb->($data, $headers)
56 68
57Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL 69Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
58must be an absolute http or https URL. 70must be an absolute http or https URL.
59 71
72The callback will be called with the response data as first argument
73(or C<undef> if it wasn't available due to errors), and a hash-ref with
74response headers as second argument.
75
76All the headers in that hash are lowercased. In addition to the response
77headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
78C<Reason> contain the three parts of the HTTP Status-Line of the same
79name.
80
81If an internal error occurs, such as not being able to resolve a hostname,
82then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
83and the C<Reason> pseudo-header will contain an error message.
84
85A typical callback might look like this:
86
87 sub {
88 my ($body, $hdr) = @_;
89
90 if ($hdr->{Status} =~ /^2/) {
91 ... everything should be ok
92 } else {
93 print "error, $hdr->{Status} $hdr->{Reason}\n";
94 }
95 }
96
60Additional parameters are key-value pairs, and are fully optional. They 97Additional parameters are key-value pairs, and are fully optional. They
61include: 98include:
62 99
63=over 4 100=over 4
64 101
65=item recurse => $boolean (default: true) 102=item recurse => $count (default: $MAX_RECURSE)
66 103
67Whether to recurse requests or not, e.g. on redirects, authentication 104Whether to recurse requests or not, e.g. on redirects, authentication
68retries and so on. 105retries and so on, and how often to do so.
69 106
70=item headers => hashref 107=item headers => hashref
71 108
72The request headers to use. 109The request headers to use.
73 110
74=item timeout => $seconds 111=item timeout => $seconds
75 112
76The time-out to use for various stages - each connect attempt will reset 113The time-out to use for various stages - each connect attempt will reset
77the timeout, as will read or write activity. 114the timeout, as will read or write activity. Default timeout is 5 minutes.
115
116=item proxy => [$host, $port[, $scheme]] or undef
117
118Use the given http proxy for all requests. If not specified, then the
119default proxy (as specified by C<$ENV{http_proxy}>) is used.
120
121C<$scheme> must be either missing or C<http> for HTTP, or C<https> for
122HTTPS.
123
124=item body => $string
125
126The request body, usually empty. Will be-sent as-is (future versions of
127this module might offer more options).
78 128
79=back 129=back
80 130
81=back 131Example: make a simple HTTP GET request for http://www.nethype.de/
132
133 http_request GET => "http://www.nethype.de/", sub {
134 my ($body, $hdr) = @_;
135 print "$body\n";
136 };
137
138Example: make a HTTP HEAD request on https://www.google.com/, use a
139timeout of 30 seconds.
140
141 http_request
142 GET => "https://www.google.com",
143 timeout => 30,
144 sub {
145 my ($body, $hdr) = @_;
146 use Data::Dumper;
147 print Dumper $hdr;
148 }
149 ;
82 150
83=cut 151=cut
84 152
85sub http_request($$$;@) { 153sub http_request($$$;@) {
86 my $cb = pop; 154 my $cb = pop;
87 my ($method, $url, %arg) = @_; 155 my ($method, $url, %arg) = @_;
88 156
89 my %hdr; 157 my %hdr;
90 158
159 $method = uc $method;
160
91 if (my $hdr = delete $arg{headers}) { 161 if (my $hdr = $arg{headers}) {
92 while (my ($k, $v) = each %$hdr) { 162 while (my ($k, $v) = each %$hdr) {
93 $hdr{lc $k} = $v; 163 $hdr{lc $k} = $v;
94 } 164 }
95 } 165 }
96 166
167 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE;
168
169 return $cb->(undef, { Status => 599, Reason => "recursion limit reached" })
170 if $recurse < 0;
171
172 my $proxy = $arg{proxy} || $PROXY;
97 my $timeout = $arg{timeout} || $TIMEOUT; 173 my $timeout = $arg{timeout} || $TIMEOUT;
98 174
99 $hdr{"user-agent"} ||= $USERAGENT; 175 $hdr{"user-agent"} ||= $USERAGENT;
100 176
177 my ($host, $port, $path, $scheme);
178
179 if ($proxy) {
180 ($host, $port, $scheme) = @$proxy;
181 $path = $url;
182 } else {
101 my ($scheme, $authority, $path, $query, $fragment) = 183 ($scheme, my $authority, $path, my $query, my $fragment) =
102 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; 184 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
185
186 $port = $scheme eq "http" ? 80
187 : $scheme eq "https" ? 443
188 : return $cb->(undef, { Status => 599, Reason => "$url: only http and https URLs supported" });
189
190 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
191 or return $cb->(undef, { Status => 599, Reason => "$url: unparsable URL" });
192
193 $host = $1;
194 $port = $2 if defined $2;
195
196 $host =~ s/^\[(.*)\]$/$1/;
197 $path .= "?$query" if length $query;
198
199 $path = "/" unless $path;
200
201 $hdr{host} = $host = lc $host;
202 }
103 203
104 $scheme = lc $scheme; 204 $scheme = lc $scheme;
105 my $port = $scheme eq "http" ? 80
106 : $scheme eq "https" ? 443
107 : croak "$url: only http and https URLs supported";
108
109 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
110 or croak "$authority: unparsable URL";
111
112 my $host = $1;
113 $port = $2 if defined $2;
114
115 $host =~ s/^\[(.*)\]$/$1/;
116 $path .= "?$query" if length $query;
117
118 $hdr{host} = $host = lc $host;
119 205
120 my %state; 206 my %state;
121 207
122 my $body = "";
123 $state{body} = $body;
124
125 $hdr{"content-length"} = length $body; 208 $hdr{"content-length"} = length $arg{body};
126 209
127 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub { 210 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub {
128 $state{fh} = shift 211 $state{fh} = shift
129 or return $cb->(undef, { Status => 599, Reason => "$!" }); 212 or return $cb->(undef, { Status => 599, Reason => "$!" });
130 213
138 # limit the number of persistent connections 221 # limit the number of persistent connections
139 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 222 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
140 ++$KA_COUNT{$_[1]}; 223 ++$KA_COUNT{$_[1]};
141 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; 224 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
142 $hdr{connection} = "keep-alive"; 225 $hdr{connection} = "keep-alive";
226 delete $hdr{connection}; # keep-alive not yet supported
143 } else { 227 } else {
144 delete $hdr{connection}; 228 delete $hdr{connection};
145 } 229 }
146 230
147 # (re-)configure handle 231 # (re-)configure handle
155 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 239 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
156 }); 240 });
157 241
158 # send request 242 # send request
159 $state{handle}->push_write ( 243 $state{handle}->push_write (
160 "\U$method\E $path HTTP/1.0\015\012" 244 "$method $path HTTP/1.0\015\012"
161 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr) 245 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
162 . "\015\012" 246 . "\015\012"
163 . (delete $state{body}) 247 . (delete $arg{body})
164 ); 248 );
165 249
166 %hdr = (); # reduce memory usage, save a kitten 250 %hdr = (); # reduce memory usage, save a kitten
167 251
168 # status line 252 # status line
177 ); 261 );
178 262
179 # headers, could be optimized a bit 263 # headers, could be optimized a bit
180 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 264 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
181 for ("$_[1]\012") { 265 for ("$_[1]\012") {
266 # we support spaces in field names, as lotus domino
267 # creates them.
182 $hdr{lc $1} .= ",$2" 268 $hdr{lc $1} .= ",$2"
183 while /\G 269 while /\G
184 ([^:\000-\040]+): 270 ([^:\000-\037]+):
185 [\011\040]* 271 [\011\040]*
186 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 272 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
187 \015?\012 273 \015?\012
188 /gxc; 274 /gxc;
189 275
192 } 278 }
193 279
194 substr $_, 0, 1, "" 280 substr $_, 0, 1, ""
195 for values %hdr; 281 for values %hdr;
196 282
197 if (exists $hdr{"content-length"}) { 283 my $finish = sub {
198 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub { 284 if ($_[1]{Status} =~ /^30[12]$/ && $recurse) {
199 # could cache persistent connection now 285 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
200 if ($hdr{connection} =~ /\bkeep-alive\b/i) { 286 } else {
287 $cb->($_[0], $_[1]);
201 }; 288 }
289 };
202 290
291 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
203 %state = (); 292 %state = ();
204 $cb->($_[1], \%hdr); 293 $finish->(undef, \%hdr);
205 });
206 } else { 294 } else {
295 if (exists $hdr{"content-length"}) {
296 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
297 # could cache persistent connection now
298 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
299 # but we don't, due to misdesigns, this is annoyingly complex
300 };
301
302 %state = ();
303 $finish->($_[1], \%hdr);
304 });
305 } else {
207 # too bad, need to read until we get an error or EOF, 306 # too bad, need to read until we get an error or EOF,
208 # no way to detect winged data. 307 # no way to detect winged data.
209 $_[0]->on_error (sub { 308 $_[0]->on_error (sub {
210 %state = (); 309 %state = ();
211 $cb->($_[0]{rbuf}, \%hdr); 310 $finish->($_[0]{rbuf}, \%hdr);
212 }); 311 });
213 $_[0]->on_eof (undef); 312 $_[0]->on_eof (undef);
214 $_[0]->on_read (sub { }); 313 $_[0]->on_read (sub { });
314 }
215 } 315 }
216 }); 316 });
217 }); 317 });
218 }, sub { 318 }, sub {
219 $timeout 319 $timeout
225sub http_get($$;@) { 325sub http_get($$;@) {
226 unshift @_, "GET"; 326 unshift @_, "GET";
227 &http_request 327 &http_request
228} 328}
229 329
330sub http_head($$;@) {
331 unshift @_, "HEAD";
332 &http_request
333}
334
335sub http_post($$$;@) {
336 unshift @_, "POST", "body";
337 &http_request
338}
339
340=back
341
230=head2 GLOBAL VARIABLES 342=head2 GLOBAL FUNCTIONS AND VARIABLES
231 343
232=over 4 344=over 4
233 345
346=item AnyEvent::HTTP::set_proxy "proxy-url"
347
348Sets the default proxy server to use. The proxy-url must begin with a
349string of the form C<http://host:port> (optionally C<https:...>).
350
234=item $AnyEvent::HTTP::MAX_REDIRECTS 351=item $AnyEvent::HTTP::MAX_RECURSE
235 352
236The default value for the C<max_redirects> request parameter 353The default value for the C<recurse> request parameter (default: C<10>).
237(default: C<10>).
238 354
239=item $AnyEvent::HTTP::USERAGENT 355=item $AnyEvent::HTTP::USERAGENT
240 356
241The default value for the C<User-Agent> header (the default is 357The default value for the C<User-Agent> header (the default is
242C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>). 358C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
243 359
244=item $AnyEvent::HTTP::MAX_PERSISTENT 360=item $AnyEvent::HTTP::MAX_PERSISTENT
245 361
246The maximum number of persistent connections to keep open (default: 8). 362The maximum number of persistent connections to keep open (default: 8).
247 363
364Not implemented currently.
365
248=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT 366=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
249 367
250The maximum time to cache a persistent connection, in seconds (default: 15). 368The maximum time to cache a persistent connection, in seconds (default: 2).
369
370Not implemented currently.
251 371
252=back 372=back
253 373
254=cut 374=cut
375
376sub set_proxy($) {
377 $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix;
378}
379
380# initialise proxy from environment
381set_proxy $ENV{http_proxy};
255 382
256=head1 SEE ALSO 383=head1 SEE ALSO
257 384
258L<AnyEvent>. 385L<AnyEvent>.
259 386

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines