ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.1 by root, Tue Jun 3 16:37:13 2008 UTC vs.
Revision 1.5 by root, Wed Jun 4 12:03:47 2008 UTC

33 33
34our $VERSION = '1.0'; 34our $VERSION = '1.0';
35 35
36our @EXPORT = qw(http_get http_request); 36our @EXPORT = qw(http_get http_request);
37 37
38our $MAX_REDIRECTS = 10;
39our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 38our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39our $MAX_RECURSE = 10;
40our $MAX_PERSISTENT = 8; 40our $MAX_PERSISTENT = 8;
41our $PERSISTENT_TIMEOUT = 15; 41our $PERSISTENT_TIMEOUT = 2;
42our $TIMEOUT = 60; 42our $TIMEOUT = 300;
43 43
44# changing these is evil 44# changing these is evil
45our $MAX_PERSISTENT_PER_HOST = 2; 45our $MAX_PERSISTENT_PER_HOST = 2;
46our $MAX_PER_HOST = 4; # not respected yet :( 46our $MAX_PER_HOST = 4; # not respected yet :(
47 47
48our $PROXY;
49
48my %KA_COUNT; # number of open keep-alive connections per host 50my %KA_COUNT; # number of open keep-alive connections per host
49 51
50=item http_get $url, key => value..., $cb->($data, $headers) 52=item http_get $url, key => value..., $cb->($data, $headers)
51 53
52Executes an HTTP-GET request. See the http_request function for details on 54Executes an HTTP-GET request. See the http_request function for details on
53additional parameters. 55additional parameters.
54 56
57=item http_head $url, key => value..., $cb->($data, $headers)
58
59Executes an HTTP-HEAD request. See the http_request function for details on
60additional parameters.
61
62=item http_post $url, $body, key => value..., $cb->($data, $headers)
63
64Executes an HTTP-POST request with a requets body of C<$bod>. See the
65http_request function for details on additional parameters.
66
55=item http_request $method => $url, key => value..., $cb->($data, $headers) 67=item http_request $method => $url, key => value..., $cb->($data, $headers)
56 68
57Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL 69Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
58must be an absolute http or https URL. 70must be an absolute http or https URL.
59 71
72The callback will be called with the response data as first argument
73(or C<undef> if it wasn't available due to errors), and a hash-ref with
74response headers as second argument.
75
76All the headers in that has are lowercased. In addition to the response
77headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
78C<Reason> contain the three parts of the HTTP Status-Line of the same
79name.
80
81If an internal error occurs, such as not being able to resolve a hostname,
82then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
83and the C<Reason> pseudo-header will contain an error message.
84
60Additional parameters are key-value pairs, and are fully optional. They 85Additional parameters are key-value pairs, and are fully optional. They
61include: 86include:
62 87
63=over 4 88=over 4
64 89
65=item recurse => $boolean (default: true) 90=item recurse => $count (default: $MAX_RECURSE)
66 91
67Whether to recurse requests or not, e.g. on redirects, authentication 92Whether to recurse requests or not, e.g. on redirects, authentication
68retries and so on. 93retries and so on, and how often to do so.
69 94
70=item headers => hashref 95=item headers => hashref
71 96
72The request headers to use. 97The request headers to use.
73 98
74=item timeout => $seconds 99=item timeout => $seconds
75 100
76The time-out to use for various stages - each connect attempt will reset 101The time-out to use for various stages - each connect attempt will reset
77the timeout, as will read or write activity. 102the timeout, as will read or write activity. Default timeout is 5 minutes.
103
104=item proxy => [$host, $port[, $scheme]] or undef
105
106Use the given http proxy for all requests. If not specified, then the
107default proxy (as specified by C<$ENV{http_proxy}>) is used.
108
109C<$scheme> must be either missing or C<http> for HTTP, or C<https> for
110HTTPS.
111
112=item body => $string
113
114The request body, usually empty. Will be-sent as-is (future versions of
115this module might offer more options).
78 116
79=back 117=back
80 118
81=back 119=back
82 120
85sub http_request($$$;@) { 123sub http_request($$$;@) {
86 my $cb = pop; 124 my $cb = pop;
87 my ($method, $url, %arg) = @_; 125 my ($method, $url, %arg) = @_;
88 126
89 my %hdr; 127 my %hdr;
128
129 $method = uc $method;
90 130
91 if (my $hdr = delete $arg{headers}) { 131 if (my $hdr = delete $arg{headers}) {
92 while (my ($k, $v) = each %$hdr) { 132 while (my ($k, $v) = each %$hdr) {
93 $hdr{lc $k} = $v; 133 $hdr{lc $k} = $v;
94 } 134 }
95 } 135 }
96 136
137 my $proxy = $arg{proxy} || $PROXY;
97 my $timeout = $arg{timeout} || $TIMEOUT; 138 my $timeout = $arg{timeout} || $TIMEOUT;
139 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE;
98 140
99 $hdr{"user-agent"} ||= $USERAGENT; 141 $hdr{"user-agent"} ||= $USERAGENT;
100 142
143 my ($host, $port, $path, $scheme);
144
145 if ($proxy) {
146 ($host, $port, $scheme) = @$proxy;
147 $path = $url;
148 } else {
101 my ($scheme, $authority, $path, $query, $fragment) = 149 ($scheme, my $authority, $path, my $query, my $fragment) =
102 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; 150 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
103 151
104 $scheme = lc $scheme;
105 my $port = $scheme eq "http" ? 80 152 $port = $scheme eq "http" ? 80
106 : $scheme eq "https" ? 443 153 : $scheme eq "https" ? 443
107 : croak "$url: only http and https URLs supported"; 154 : croak "$url: only http and https URLs supported";
108 155
109 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 156 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
110 or croak "$authority: unparsable URL"; 157 or croak "$authority: unparsable URL";
111 158
112 my $host = $1; 159 $host = $1;
113 $port = $2 if defined $2; 160 $port = $2 if defined $2;
114 161
115 $host =~ s/^\[(.*)\]$/$1/; 162 $host =~ s/^\[(.*)\]$/$1/;
116 $path .= "?$query" if length $query; 163 $path .= "?$query" if length $query;
117 164
165 $path = "/" unless $path;
166
118 $hdr{host} = $host = lc $host; 167 $hdr{host} = $host = lc $host;
168 }
169
170 $scheme = lc $scheme;
119 171
120 my %state; 172 my %state;
121 173
122 my $body = "";
123 $state{body} = $body; 174 $state{body} = delete $arg{body};
124 175
125 $hdr{"content-length"} = length $body; 176 $hdr{"content-length"} = length $state{body};
126 177
127 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub { 178 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub {
128 $state{fh} = shift 179 $state{fh} = shift
129 or return $cb->(undef, { Status => 599, Reason => "$!" }); 180 or return $cb->(undef, { Status => 599, Reason => "$!" });
130 181
138 # limit the number of persistent connections 189 # limit the number of persistent connections
139 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 190 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
140 ++$KA_COUNT{$_[1]}; 191 ++$KA_COUNT{$_[1]};
141 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; 192 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
142 $hdr{connection} = "keep-alive"; 193 $hdr{connection} = "keep-alive";
194 delete $hdr{connection}; # keep-alive not yet supported
143 } else { 195 } else {
144 delete $hdr{connection}; 196 delete $hdr{connection};
145 } 197 }
146 198
147 # (re-)configure handle 199 # (re-)configure handle
155 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 207 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
156 }); 208 });
157 209
158 # send request 210 # send request
159 $state{handle}->push_write ( 211 $state{handle}->push_write (
160 "\U$method\E $path HTTP/1.0\015\012" 212 "$method $path HTTP/1.0\015\012"
161 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr) 213 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
162 . "\015\012" 214 . "\015\012"
163 . (delete $state{body}) 215 . (delete $state{body})
164 ); 216 );
165 217
177 ); 229 );
178 230
179 # headers, could be optimized a bit 231 # headers, could be optimized a bit
180 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 232 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
181 for ("$_[1]\012") { 233 for ("$_[1]\012") {
234 # we support spaces in field names, as lotus domino
235 # creates them.
182 $hdr{lc $1} .= ",$2" 236 $hdr{lc $1} .= ",$2"
183 while /\G 237 while /\G
184 ([^:\000-\040]+): 238 ([^:\000-\037]+):
185 [\011\040]* 239 [\011\040]*
186 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 240 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
187 \015?\012 241 \015?\012
188 /gxc; 242 /gxc;
189 243
192 } 246 }
193 247
194 substr $_, 0, 1, "" 248 substr $_, 0, 1, ""
195 for values %hdr; 249 for values %hdr;
196 250
197 if (exists $hdr{"content-length"}) { 251 if ($method eq "HEAD") {
198 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
199 # could cache persistent connection now
200 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
201 };
202
203 %state = (); 252 %state = ();
204 $cb->($_[1], \%hdr); 253 $cb->(undef, \%hdr);
205 });
206 } else { 254 } else {
255 if (exists $hdr{"content-length"}) {
256 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
257 # could cache persistent connection now
258 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
259 # but we don't, due to misdesigns, this is annoyingly complex
260 };
261
262 %state = ();
263 $cb->($_[1], \%hdr);
264 });
265 } else {
207 # too bad, need to read until we get an error or EOF, 266 # too bad, need to read until we get an error or EOF,
208 # no way to detect winged data. 267 # no way to detect winged data.
209 $_[0]->on_error (sub { 268 $_[0]->on_error (sub {
210 %state = (); 269 %state = ();
211 $cb->($_[0]{rbuf}, \%hdr); 270 $cb->($_[0]{rbuf}, \%hdr);
212 }); 271 });
213 $_[0]->on_eof (undef); 272 $_[0]->on_eof (undef);
214 $_[0]->on_read (sub { }); 273 $_[0]->on_read (sub { });
274 }
215 } 275 }
216 }); 276 });
217 }); 277 });
218 }, sub { 278 }, sub {
219 $timeout 279 $timeout
225sub http_get($$;@) { 285sub http_get($$;@) {
226 unshift @_, "GET"; 286 unshift @_, "GET";
227 &http_request 287 &http_request
228} 288}
229 289
290sub http_head($$;@) {
291 unshift @_, "HEAD";
292 &http_request
293}
294
295sub http_post($$$;@) {
296 unshift @_, "POST", "body";
297 &http_request
298}
299
230=head2 GLOBAL VARIABLES 300=head2 GLOBAL FUNCTIONS AND VARIABLES
231 301
232=over 4 302=over 4
233 303
304=item AnyEvent::HTTP::set_proxy "proxy-url"
305
306Sets the default proxy server to use. The proxy-url must begin with a
307string of the form C<http://host:port> (optionally C<https:...>).
308
234=item $AnyEvent::HTTP::MAX_REDIRECTS 309=item $AnyEvent::HTTP::MAX_RECURSE
235 310
236The default value for the C<max_redirects> request parameter 311The default value for the C<recurse> request parameter (default: C<10>).
237(default: C<10>).
238 312
239=item $AnyEvent::HTTP::USERAGENT 313=item $AnyEvent::HTTP::USERAGENT
240 314
241The default value for the C<User-Agent> header (the default is 315The default value for the C<User-Agent> header (the default is
242C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>). 316C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
243 317
244=item $AnyEvent::HTTP::MAX_PERSISTENT 318=item $AnyEvent::HTTP::MAX_PERSISTENT
245 319
246The maximum number of persistent connections to keep open (default: 8). 320The maximum number of persistent connections to keep open (default: 8).
247 321
322Not implemented currently.
323
248=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT 324=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
249 325
250The maximum time to cache a persistent connection, in seconds (default: 15). 326The maximum time to cache a persistent connection, in seconds (default: 2).
327
328Not implemented currently.
251 329
252=back 330=back
253 331
254=cut 332=cut
333
334sub set_proxy($) {
335 $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix;
336}
337
338# initialise proxy from environment
339set_proxy $ENV{http_proxy};
255 340
256=head1 SEE ALSO 341=head1 SEE ALSO
257 342
258L<AnyEvent>. 343L<AnyEvent>.
259 344

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines