ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.1 by root, Tue Jun 3 16:37:13 2008 UTC vs.
Revision 1.7 by root, Wed Jun 4 12:10:27 2008 UTC

33 33
34our $VERSION = '1.0'; 34our $VERSION = '1.0';
35 35
36our @EXPORT = qw(http_get http_request); 36our @EXPORT = qw(http_get http_request);
37 37
38our $MAX_REDIRECTS = 10;
39our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 38our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39our $MAX_RECURSE = 10;
40our $MAX_PERSISTENT = 8; 40our $MAX_PERSISTENT = 8;
41our $PERSISTENT_TIMEOUT = 15; 41our $PERSISTENT_TIMEOUT = 2;
42our $TIMEOUT = 60; 42our $TIMEOUT = 300;
43 43
44# changing these is evil 44# changing these is evil
45our $MAX_PERSISTENT_PER_HOST = 2; 45our $MAX_PERSISTENT_PER_HOST = 2;
46our $MAX_PER_HOST = 4; # not respected yet :( 46our $MAX_PER_HOST = 4; # not respected yet :(
47 47
48our $PROXY;
49
48my %KA_COUNT; # number of open keep-alive connections per host 50my %KA_COUNT; # number of open keep-alive connections per host
49 51
50=item http_get $url, key => value..., $cb->($data, $headers) 52=item http_get $url, key => value..., $cb->($data, $headers)
51 53
52Executes an HTTP-GET request. See the http_request function for details on 54Executes an HTTP-GET request. See the http_request function for details on
53additional parameters. 55additional parameters.
54 56
57=item http_head $url, key => value..., $cb->($data, $headers)
58
59Executes an HTTP-HEAD request. See the http_request function for details on
60additional parameters.
61
62=item http_post $url, $body, key => value..., $cb->($data, $headers)
63
64Executes an HTTP-POST request with a request body of C<$bod>. See the
65http_request function for details on additional parameters.
66
55=item http_request $method => $url, key => value..., $cb->($data, $headers) 67=item http_request $method => $url, key => value..., $cb->($data, $headers)
56 68
57Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL 69Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
58must be an absolute http or https URL. 70must be an absolute http or https URL.
59 71
72The callback will be called with the response data as first argument
73(or C<undef> if it wasn't available due to errors), and a hash-ref with
74response headers as second argument.
75
76All the headers in that hash are lowercased. In addition to the response
77headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
78C<Reason> contain the three parts of the HTTP Status-Line of the same
79name.
80
81If an internal error occurs, such as not being able to resolve a hostname,
82then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
83and the C<Reason> pseudo-header will contain an error message.
84
85A typical callback might look like this:
86
87 sub {
88 my ($body, $hdr) = @_;
89
90 if ($hdr->{Status} =~ /^2/) {
91 ... everything should be ok
92 } else {
93 print "error, $hdr->{Status} $hdr->{Reason}\n";
94 }
95 }
96
60Additional parameters are key-value pairs, and are fully optional. They 97Additional parameters are key-value pairs, and are fully optional. They
61include: 98include:
62 99
63=over 4 100=over 4
64 101
65=item recurse => $boolean (default: true) 102=item recurse => $count (default: $MAX_RECURSE)
66 103
67Whether to recurse requests or not, e.g. on redirects, authentication 104Whether to recurse requests or not, e.g. on redirects, authentication
68retries and so on. 105retries and so on, and how often to do so.
69 106
70=item headers => hashref 107=item headers => hashref
71 108
72The request headers to use. 109The request headers to use.
73 110
74=item timeout => $seconds 111=item timeout => $seconds
75 112
76The time-out to use for various stages - each connect attempt will reset 113The time-out to use for various stages - each connect attempt will reset
77the timeout, as will read or write activity. 114the timeout, as will read or write activity. Default timeout is 5 minutes.
115
116=item proxy => [$host, $port[, $scheme]] or undef
117
118Use the given http proxy for all requests. If not specified, then the
119default proxy (as specified by C<$ENV{http_proxy}>) is used.
120
121C<$scheme> must be either missing or C<http> for HTTP, or C<https> for
122HTTPS.
123
124=item body => $string
125
126The request body, usually empty. Will be-sent as-is (future versions of
127this module might offer more options).
78 128
79=back 129=back
80 130
81=back 131=back
82 132
85sub http_request($$$;@) { 135sub http_request($$$;@) {
86 my $cb = pop; 136 my $cb = pop;
87 my ($method, $url, %arg) = @_; 137 my ($method, $url, %arg) = @_;
88 138
89 my %hdr; 139 my %hdr;
140
141 $method = uc $method;
90 142
91 if (my $hdr = delete $arg{headers}) { 143 if (my $hdr = delete $arg{headers}) {
92 while (my ($k, $v) = each %$hdr) { 144 while (my ($k, $v) = each %$hdr) {
93 $hdr{lc $k} = $v; 145 $hdr{lc $k} = $v;
94 } 146 }
95 } 147 }
96 148
149 my $proxy = $arg{proxy} || $PROXY;
97 my $timeout = $arg{timeout} || $TIMEOUT; 150 my $timeout = $arg{timeout} || $TIMEOUT;
151 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE;
98 152
99 $hdr{"user-agent"} ||= $USERAGENT; 153 $hdr{"user-agent"} ||= $USERAGENT;
100 154
155 my ($host, $port, $path, $scheme);
156
157 if ($proxy) {
158 ($host, $port, $scheme) = @$proxy;
159 $path = $url;
160 } else {
101 my ($scheme, $authority, $path, $query, $fragment) = 161 ($scheme, my $authority, $path, my $query, my $fragment) =
102 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; 162 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
103 163
104 $scheme = lc $scheme;
105 my $port = $scheme eq "http" ? 80 164 $port = $scheme eq "http" ? 80
106 : $scheme eq "https" ? 443 165 : $scheme eq "https" ? 443
107 : croak "$url: only http and https URLs supported"; 166 : croak "$url: only http and https URLs supported";
108 167
109 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 168 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
110 or croak "$authority: unparsable URL"; 169 or croak "$authority: unparsable URL";
111 170
112 my $host = $1; 171 $host = $1;
113 $port = $2 if defined $2; 172 $port = $2 if defined $2;
114 173
115 $host =~ s/^\[(.*)\]$/$1/; 174 $host =~ s/^\[(.*)\]$/$1/;
116 $path .= "?$query" if length $query; 175 $path .= "?$query" if length $query;
117 176
177 $path = "/" unless $path;
178
118 $hdr{host} = $host = lc $host; 179 $hdr{host} = $host = lc $host;
180 }
181
182 $scheme = lc $scheme;
119 183
120 my %state; 184 my %state;
121 185
122 my $body = "";
123 $state{body} = $body; 186 $state{body} = delete $arg{body};
124 187
125 $hdr{"content-length"} = length $body; 188 $hdr{"content-length"} = length $state{body};
126 189
127 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub { 190 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub {
128 $state{fh} = shift 191 $state{fh} = shift
129 or return $cb->(undef, { Status => 599, Reason => "$!" }); 192 or return $cb->(undef, { Status => 599, Reason => "$!" });
130 193
138 # limit the number of persistent connections 201 # limit the number of persistent connections
139 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 202 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
140 ++$KA_COUNT{$_[1]}; 203 ++$KA_COUNT{$_[1]};
141 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; 204 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
142 $hdr{connection} = "keep-alive"; 205 $hdr{connection} = "keep-alive";
206 delete $hdr{connection}; # keep-alive not yet supported
143 } else { 207 } else {
144 delete $hdr{connection}; 208 delete $hdr{connection};
145 } 209 }
146 210
147 # (re-)configure handle 211 # (re-)configure handle
155 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 219 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
156 }); 220 });
157 221
158 # send request 222 # send request
159 $state{handle}->push_write ( 223 $state{handle}->push_write (
160 "\U$method\E $path HTTP/1.0\015\012" 224 "$method $path HTTP/1.0\015\012"
161 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr) 225 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
162 . "\015\012" 226 . "\015\012"
163 . (delete $state{body}) 227 . (delete $state{body})
164 ); 228 );
165 229
177 ); 241 );
178 242
179 # headers, could be optimized a bit 243 # headers, could be optimized a bit
180 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 244 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
181 for ("$_[1]\012") { 245 for ("$_[1]\012") {
246 # we support spaces in field names, as lotus domino
247 # creates them.
182 $hdr{lc $1} .= ",$2" 248 $hdr{lc $1} .= ",$2"
183 while /\G 249 while /\G
184 ([^:\000-\040]+): 250 ([^:\000-\037]+):
185 [\011\040]* 251 [\011\040]*
186 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 252 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
187 \015?\012 253 \015?\012
188 /gxc; 254 /gxc;
189 255
192 } 258 }
193 259
194 substr $_, 0, 1, "" 260 substr $_, 0, 1, ""
195 for values %hdr; 261 for values %hdr;
196 262
197 if (exists $hdr{"content-length"}) { 263 if ($method eq "HEAD") {
198 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
199 # could cache persistent connection now
200 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
201 };
202
203 %state = (); 264 %state = ();
204 $cb->($_[1], \%hdr); 265 $cb->(undef, \%hdr);
205 });
206 } else { 266 } else {
267 if (exists $hdr{"content-length"}) {
268 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
269 # could cache persistent connection now
270 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
271 # but we don't, due to misdesigns, this is annoyingly complex
272 };
273
274 %state = ();
275 $cb->($_[1], \%hdr);
276 });
277 } else {
207 # too bad, need to read until we get an error or EOF, 278 # too bad, need to read until we get an error or EOF,
208 # no way to detect winged data. 279 # no way to detect winged data.
209 $_[0]->on_error (sub { 280 $_[0]->on_error (sub {
210 %state = (); 281 %state = ();
211 $cb->($_[0]{rbuf}, \%hdr); 282 $cb->($_[0]{rbuf}, \%hdr);
212 }); 283 });
213 $_[0]->on_eof (undef); 284 $_[0]->on_eof (undef);
214 $_[0]->on_read (sub { }); 285 $_[0]->on_read (sub { });
286 }
215 } 287 }
216 }); 288 });
217 }); 289 });
218 }, sub { 290 }, sub {
219 $timeout 291 $timeout
225sub http_get($$;@) { 297sub http_get($$;@) {
226 unshift @_, "GET"; 298 unshift @_, "GET";
227 &http_request 299 &http_request
228} 300}
229 301
302sub http_head($$;@) {
303 unshift @_, "HEAD";
304 &http_request
305}
306
307sub http_post($$$;@) {
308 unshift @_, "POST", "body";
309 &http_request
310}
311
230=head2 GLOBAL VARIABLES 312=head2 GLOBAL FUNCTIONS AND VARIABLES
231 313
232=over 4 314=over 4
233 315
316=item AnyEvent::HTTP::set_proxy "proxy-url"
317
318Sets the default proxy server to use. The proxy-url must begin with a
319string of the form C<http://host:port> (optionally C<https:...>).
320
234=item $AnyEvent::HTTP::MAX_REDIRECTS 321=item $AnyEvent::HTTP::MAX_RECURSE
235 322
236The default value for the C<max_redirects> request parameter 323The default value for the C<recurse> request parameter (default: C<10>).
237(default: C<10>).
238 324
239=item $AnyEvent::HTTP::USERAGENT 325=item $AnyEvent::HTTP::USERAGENT
240 326
241The default value for the C<User-Agent> header (the default is 327The default value for the C<User-Agent> header (the default is
242C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>). 328C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
243 329
244=item $AnyEvent::HTTP::MAX_PERSISTENT 330=item $AnyEvent::HTTP::MAX_PERSISTENT
245 331
246The maximum number of persistent connections to keep open (default: 8). 332The maximum number of persistent connections to keep open (default: 8).
247 333
334Not implemented currently.
335
248=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT 336=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
249 337
250The maximum time to cache a persistent connection, in seconds (default: 15). 338The maximum time to cache a persistent connection, in seconds (default: 2).
339
340Not implemented currently.
251 341
252=back 342=back
253 343
254=cut 344=cut
345
346sub set_proxy($) {
347 $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix;
348}
349
350# initialise proxy from environment
351set_proxy $ENV{http_proxy};
255 352
256=head1 SEE ALSO 353=head1 SEE ALSO
257 354
258L<AnyEvent>. 355L<AnyEvent>.
259 356

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines