ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.1 by root, Tue Jun 3 16:37:13 2008 UTC vs.
Revision 1.8 by root, Wed Jun 4 12:32:30 2008 UTC

33 33
34our $VERSION = '1.0'; 34our $VERSION = '1.0';
35 35
36our @EXPORT = qw(http_get http_request); 36our @EXPORT = qw(http_get http_request);
37 37
38our $MAX_REDIRECTS = 10;
39our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 38our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39our $MAX_RECURSE = 10;
40our $MAX_PERSISTENT = 8; 40our $MAX_PERSISTENT = 8;
41our $PERSISTENT_TIMEOUT = 15; 41our $PERSISTENT_TIMEOUT = 2;
42our $TIMEOUT = 60; 42our $TIMEOUT = 300;
43 43
44# changing these is evil 44# changing these is evil
45our $MAX_PERSISTENT_PER_HOST = 2; 45our $MAX_PERSISTENT_PER_HOST = 2;
46our $MAX_PER_HOST = 4; # not respected yet :( 46our $MAX_PER_HOST = 4; # not respected yet :(
47 47
48our $PROXY;
49
48my %KA_COUNT; # number of open keep-alive connections per host 50my %KA_COUNT; # number of open keep-alive connections per host
49 51
50=item http_get $url, key => value..., $cb->($data, $headers) 52=item http_get $url, key => value..., $cb->($data, $headers)
51 53
52Executes an HTTP-GET request. See the http_request function for details on 54Executes an HTTP-GET request. See the http_request function for details on
53additional parameters. 55additional parameters.
54 56
57=item http_head $url, key => value..., $cb->($data, $headers)
58
59Executes an HTTP-HEAD request. See the http_request function for details on
60additional parameters.
61
62=item http_post $url, $body, key => value..., $cb->($data, $headers)
63
64Executes an HTTP-POST request with a request body of C<$bod>. See the
65http_request function for details on additional parameters.
66
55=item http_request $method => $url, key => value..., $cb->($data, $headers) 67=item http_request $method => $url, key => value..., $cb->($data, $headers)
56 68
57Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL 69Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
58must be an absolute http or https URL. 70must be an absolute http or https URL.
59 71
72The callback will be called with the response data as first argument
73(or C<undef> if it wasn't available due to errors), and a hash-ref with
74response headers as second argument.
75
76All the headers in that hash are lowercased. In addition to the response
77headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
78C<Reason> contain the three parts of the HTTP Status-Line of the same
79name.
80
81If an internal error occurs, such as not being able to resolve a hostname,
82then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
83and the C<Reason> pseudo-header will contain an error message.
84
85A typical callback might look like this:
86
87 sub {
88 my ($body, $hdr) = @_;
89
90 if ($hdr->{Status} =~ /^2/) {
91 ... everything should be ok
92 } else {
93 print "error, $hdr->{Status} $hdr->{Reason}\n";
94 }
95 }
96
60Additional parameters are key-value pairs, and are fully optional. They 97Additional parameters are key-value pairs, and are fully optional. They
61include: 98include:
62 99
63=over 4 100=over 4
64 101
65=item recurse => $boolean (default: true) 102=item recurse => $count (default: $MAX_RECURSE)
66 103
67Whether to recurse requests or not, e.g. on redirects, authentication 104Whether to recurse requests or not, e.g. on redirects, authentication
68retries and so on. 105retries and so on, and how often to do so.
69 106
70=item headers => hashref 107=item headers => hashref
71 108
72The request headers to use. 109The request headers to use.
73 110
74=item timeout => $seconds 111=item timeout => $seconds
75 112
76The time-out to use for various stages - each connect attempt will reset 113The time-out to use for various stages - each connect attempt will reset
77the timeout, as will read or write activity. 114the timeout, as will read or write activity. Default timeout is 5 minutes.
115
116=item proxy => [$host, $port[, $scheme]] or undef
117
118Use the given http proxy for all requests. If not specified, then the
119default proxy (as specified by C<$ENV{http_proxy}>) is used.
120
121C<$scheme> must be either missing or C<http> for HTTP, or C<https> for
122HTTPS.
123
124=item body => $string
125
126The request body, usually empty. Will be-sent as-is (future versions of
127this module might offer more options).
78 128
79=back 129=back
80 130
81=back 131=back
82 132
86 my $cb = pop; 136 my $cb = pop;
87 my ($method, $url, %arg) = @_; 137 my ($method, $url, %arg) = @_;
88 138
89 my %hdr; 139 my %hdr;
90 140
141 $method = uc $method;
142
91 if (my $hdr = delete $arg{headers}) { 143 if (my $hdr = $arg{headers}) {
92 while (my ($k, $v) = each %$hdr) { 144 while (my ($k, $v) = each %$hdr) {
93 $hdr{lc $k} = $v; 145 $hdr{lc $k} = $v;
94 } 146 }
95 } 147 }
96 148
149 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE;
150
151 return $cb->(undef, { Status => 599, Reason => "recursion limit reached" })
152 if $recurse < 0;
153
154 my $proxy = $arg{proxy} || $PROXY;
97 my $timeout = $arg{timeout} || $TIMEOUT; 155 my $timeout = $arg{timeout} || $TIMEOUT;
98 156
99 $hdr{"user-agent"} ||= $USERAGENT; 157 $hdr{"user-agent"} ||= $USERAGENT;
100 158
159 my ($host, $port, $path, $scheme);
160
161 if ($proxy) {
162 ($host, $port, $scheme) = @$proxy;
163 $path = $url;
164 } else {
101 my ($scheme, $authority, $path, $query, $fragment) = 165 ($scheme, my $authority, $path, my $query, my $fragment) =
102 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; 166 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
167
168 $port = $scheme eq "http" ? 80
169 : $scheme eq "https" ? 443
170 : return $cb->(undef, { Status => 599, Reason => "$url: only http and https URLs supported" });
171
172 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
173 or return $cb->(undef, { Status => 599, Reason => "$url: unparsable URL" });
174
175 $host = $1;
176 $port = $2 if defined $2;
177
178 $host =~ s/^\[(.*)\]$/$1/;
179 $path .= "?$query" if length $query;
180
181 $path = "/" unless $path;
182
183 $hdr{host} = $host = lc $host;
184 }
103 185
104 $scheme = lc $scheme; 186 $scheme = lc $scheme;
105 my $port = $scheme eq "http" ? 80
106 : $scheme eq "https" ? 443
107 : croak "$url: only http and https URLs supported";
108
109 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
110 or croak "$authority: unparsable URL";
111
112 my $host = $1;
113 $port = $2 if defined $2;
114
115 $host =~ s/^\[(.*)\]$/$1/;
116 $path .= "?$query" if length $query;
117
118 $hdr{host} = $host = lc $host;
119 187
120 my %state; 188 my %state;
121 189
122 my $body = "";
123 $state{body} = $body;
124
125 $hdr{"content-length"} = length $body; 190 $hdr{"content-length"} = length $arg{body};
126 191
127 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub { 192 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub {
128 $state{fh} = shift 193 $state{fh} = shift
129 or return $cb->(undef, { Status => 599, Reason => "$!" }); 194 or return $cb->(undef, { Status => 599, Reason => "$!" });
130 195
138 # limit the number of persistent connections 203 # limit the number of persistent connections
139 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 204 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
140 ++$KA_COUNT{$_[1]}; 205 ++$KA_COUNT{$_[1]};
141 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; 206 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
142 $hdr{connection} = "keep-alive"; 207 $hdr{connection} = "keep-alive";
208 delete $hdr{connection}; # keep-alive not yet supported
143 } else { 209 } else {
144 delete $hdr{connection}; 210 delete $hdr{connection};
145 } 211 }
146 212
147 # (re-)configure handle 213 # (re-)configure handle
155 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 221 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
156 }); 222 });
157 223
158 # send request 224 # send request
159 $state{handle}->push_write ( 225 $state{handle}->push_write (
160 "\U$method\E $path HTTP/1.0\015\012" 226 "$method $path HTTP/1.0\015\012"
161 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr) 227 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
162 . "\015\012" 228 . "\015\012"
163 . (delete $state{body}) 229 . (delete $arg{body})
164 ); 230 );
165 231
166 %hdr = (); # reduce memory usage, save a kitten 232 %hdr = (); # reduce memory usage, save a kitten
167 233
168 # status line 234 # status line
177 ); 243 );
178 244
179 # headers, could be optimized a bit 245 # headers, could be optimized a bit
180 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 246 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
181 for ("$_[1]\012") { 247 for ("$_[1]\012") {
248 # we support spaces in field names, as lotus domino
249 # creates them.
182 $hdr{lc $1} .= ",$2" 250 $hdr{lc $1} .= ",$2"
183 while /\G 251 while /\G
184 ([^:\000-\040]+): 252 ([^:\000-\037]+):
185 [\011\040]* 253 [\011\040]*
186 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 254 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
187 \015?\012 255 \015?\012
188 /gxc; 256 /gxc;
189 257
192 } 260 }
193 261
194 substr $_, 0, 1, "" 262 substr $_, 0, 1, ""
195 for values %hdr; 263 for values %hdr;
196 264
197 if (exists $hdr{"content-length"}) { 265 my $finish = sub {
198 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub { 266 if ($_[1]{Status} =~ /^30[12]$/ && $recurse) {
199 # could cache persistent connection now 267 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
200 if ($hdr{connection} =~ /\bkeep-alive\b/i) { 268 } else {
269 $cb->($_[0], $_[1]);
201 }; 270 }
271 };
202 272
273 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
203 %state = (); 274 %state = ();
204 $cb->($_[1], \%hdr); 275 $finish->(undef, \%hdr);
205 });
206 } else { 276 } else {
277 if (exists $hdr{"content-length"}) {
278 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
279 # could cache persistent connection now
280 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
281 # but we don't, due to misdesigns, this is annoyingly complex
282 };
283
284 %state = ();
285 $finish->($_[1], \%hdr);
286 });
287 } else {
207 # too bad, need to read until we get an error or EOF, 288 # too bad, need to read until we get an error or EOF,
208 # no way to detect winged data. 289 # no way to detect winged data.
209 $_[0]->on_error (sub { 290 $_[0]->on_error (sub {
210 %state = (); 291 %state = ();
211 $cb->($_[0]{rbuf}, \%hdr); 292 $finish->($_[0]{rbuf}, \%hdr);
212 }); 293 });
213 $_[0]->on_eof (undef); 294 $_[0]->on_eof (undef);
214 $_[0]->on_read (sub { }); 295 $_[0]->on_read (sub { });
296 }
215 } 297 }
216 }); 298 });
217 }); 299 });
218 }, sub { 300 }, sub {
219 $timeout 301 $timeout
225sub http_get($$;@) { 307sub http_get($$;@) {
226 unshift @_, "GET"; 308 unshift @_, "GET";
227 &http_request 309 &http_request
228} 310}
229 311
312sub http_head($$;@) {
313 unshift @_, "HEAD";
314 &http_request
315}
316
317sub http_post($$$;@) {
318 unshift @_, "POST", "body";
319 &http_request
320}
321
230=head2 GLOBAL VARIABLES 322=head2 GLOBAL FUNCTIONS AND VARIABLES
231 323
232=over 4 324=over 4
233 325
326=item AnyEvent::HTTP::set_proxy "proxy-url"
327
328Sets the default proxy server to use. The proxy-url must begin with a
329string of the form C<http://host:port> (optionally C<https:...>).
330
234=item $AnyEvent::HTTP::MAX_REDIRECTS 331=item $AnyEvent::HTTP::MAX_RECURSE
235 332
236The default value for the C<max_redirects> request parameter 333The default value for the C<recurse> request parameter (default: C<10>).
237(default: C<10>).
238 334
239=item $AnyEvent::HTTP::USERAGENT 335=item $AnyEvent::HTTP::USERAGENT
240 336
241The default value for the C<User-Agent> header (the default is 337The default value for the C<User-Agent> header (the default is
242C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>). 338C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
243 339
244=item $AnyEvent::HTTP::MAX_PERSISTENT 340=item $AnyEvent::HTTP::MAX_PERSISTENT
245 341
246The maximum number of persistent connections to keep open (default: 8). 342The maximum number of persistent connections to keep open (default: 8).
247 343
344Not implemented currently.
345
248=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT 346=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
249 347
250The maximum time to cache a persistent connection, in seconds (default: 15). 348The maximum time to cache a persistent connection, in seconds (default: 2).
349
350Not implemented currently.
251 351
252=back 352=back
253 353
254=cut 354=cut
355
356sub set_proxy($) {
357 $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix;
358}
359
360# initialise proxy from environment
361set_proxy $ENV{http_proxy};
255 362
256=head1 SEE ALSO 363=head1 SEE ALSO
257 364
258L<AnyEvent>. 365L<AnyEvent>.
259 366

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines