ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.1 by root, Tue Jun 3 16:37:13 2008 UTC vs.
Revision 1.4 by root, Wed Jun 4 11:59:22 2008 UTC

33 33
34our $VERSION = '1.0'; 34our $VERSION = '1.0';
35 35
36our @EXPORT = qw(http_get http_request); 36our @EXPORT = qw(http_get http_request);
37 37
38our $MAX_REDIRECTS = 10;
39our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 38our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39our $MAX_RECURSE = 10;
40our $MAX_PERSISTENT = 8; 40our $MAX_PERSISTENT = 8;
41our $PERSISTENT_TIMEOUT = 15; 41our $PERSISTENT_TIMEOUT = 2;
42our $TIMEOUT = 60; 42our $TIMEOUT = 300;
43 43
44# changing these is evil 44# changing these is evil
45our $MAX_PERSISTENT_PER_HOST = 2; 45our $MAX_PERSISTENT_PER_HOST = 2;
46our $MAX_PER_HOST = 4; # not respected yet :( 46our $MAX_PER_HOST = 4; # not respected yet :(
47 47
48our $PROXY;
49
48my %KA_COUNT; # number of open keep-alive connections per host 50my %KA_COUNT; # number of open keep-alive connections per host
49 51
50=item http_get $url, key => value..., $cb->($data, $headers) 52=item http_get $url, key => value..., $cb->($data, $headers)
51 53
52Executes an HTTP-GET request. See the http_request function for details on 54Executes an HTTP-GET request. See the http_request function for details on
53additional parameters. 55additional parameters.
54 56
57=item http_get $url, $body, key => value..., $cb->($data, $headers)
58
59Executes an HTTP-POST request with a requets body of C<$bod>. See the
60http_request function for details on additional parameters.
61
55=item http_request $method => $url, key => value..., $cb->($data, $headers) 62=item http_request $method => $url, key => value..., $cb->($data, $headers)
56 63
57Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL 64Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
58must be an absolute http or https URL. 65must be an absolute http or https URL.
59 66
67The callback will be called with the response data as first argument
68(or C<undef> if it wasn't available due to errors), and a hash-ref with
69response headers as second argument.
70
71All the headers in that has are lowercased. In addition to the response
72headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
73C<Reason> contain the three parts of the HTTP Status-Line of the same
74name.
75
76If an internal error occurs, such as not being able to resolve a hostname,
77then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
78and the C<Reason> pseudo-header will contain an error message.
79
60Additional parameters are key-value pairs, and are fully optional. They 80Additional parameters are key-value pairs, and are fully optional. They
61include: 81include:
62 82
63=over 4 83=over 4
64 84
65=item recurse => $boolean (default: true) 85=item recurse => $count (default: $MAX_RECURSE)
66 86
67Whether to recurse requests or not, e.g. on redirects, authentication 87Whether to recurse requests or not, e.g. on redirects, authentication
68retries and so on. 88retries and so on, and how often to do so.
69 89
70=item headers => hashref 90=item headers => hashref
71 91
72The request headers to use. 92The request headers to use.
73 93
74=item timeout => $seconds 94=item timeout => $seconds
75 95
76The time-out to use for various stages - each connect attempt will reset 96The time-out to use for various stages - each connect attempt will reset
77the timeout, as will read or write activity. 97the timeout, as will read or write activity. Default timeout is 5 minutes.
98
99=item proxy => [$host, $port[, $scheme]] or undef
100
101Use the given http proxy for all requests. If not specified, then the
102default proxy (as specified by C<$ENV{http_proxy}>) is used.
103
104C<$scheme> must be either missing or C<http> for HTTP, or C<https> for
105HTTPS.
106
107=item body => $string
108
109The request body, usually empty. Will be-sent as-is (future versions of
110this module might offer more options).
78 111
79=back 112=back
80 113
81=back 114=back
82 115
85sub http_request($$$;@) { 118sub http_request($$$;@) {
86 my $cb = pop; 119 my $cb = pop;
87 my ($method, $url, %arg) = @_; 120 my ($method, $url, %arg) = @_;
88 121
89 my %hdr; 122 my %hdr;
123
124 $method = uc $method;
90 125
91 if (my $hdr = delete $arg{headers}) { 126 if (my $hdr = delete $arg{headers}) {
92 while (my ($k, $v) = each %$hdr) { 127 while (my ($k, $v) = each %$hdr) {
93 $hdr{lc $k} = $v; 128 $hdr{lc $k} = $v;
94 } 129 }
95 } 130 }
96 131
132 my $proxy = $arg{proxy} || $PROXY;
97 my $timeout = $arg{timeout} || $TIMEOUT; 133 my $timeout = $arg{timeout} || $TIMEOUT;
134 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE;
98 135
99 $hdr{"user-agent"} ||= $USERAGENT; 136 $hdr{"user-agent"} ||= $USERAGENT;
100 137
138 my ($host, $port, $path, $scheme);
139
140 if ($proxy) {
141 ($host, $port, $scheme) = @$proxy;
142 $path = $url;
143 } else {
101 my ($scheme, $authority, $path, $query, $fragment) = 144 ($scheme, my $authority, $path, my $query, my $fragment) =
102 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; 145 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
103 146
104 $scheme = lc $scheme;
105 my $port = $scheme eq "http" ? 80 147 $port = $scheme eq "http" ? 80
106 : $scheme eq "https" ? 443 148 : $scheme eq "https" ? 443
107 : croak "$url: only http and https URLs supported"; 149 : croak "$url: only http and https URLs supported";
108 150
109 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 151 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
110 or croak "$authority: unparsable URL"; 152 or croak "$authority: unparsable URL";
111 153
112 my $host = $1; 154 $host = $1;
113 $port = $2 if defined $2; 155 $port = $2 if defined $2;
114 156
115 $host =~ s/^\[(.*)\]$/$1/; 157 $host =~ s/^\[(.*)\]$/$1/;
116 $path .= "?$query" if length $query; 158 $path .= "?$query" if length $query;
117 159
160 $path = "/" unless $path;
161
118 $hdr{host} = $host = lc $host; 162 $hdr{host} = $host = lc $host;
163 }
164
165 $scheme = lc $scheme;
119 166
120 my %state; 167 my %state;
121 168
122 my $body = "";
123 $state{body} = $body; 169 $state{body} = delete $arg{body};
124 170
125 $hdr{"content-length"} = length $body; 171 $hdr{"content-length"} = length $state{body};
126 172
127 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub { 173 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub {
128 $state{fh} = shift 174 $state{fh} = shift
129 or return $cb->(undef, { Status => 599, Reason => "$!" }); 175 or return $cb->(undef, { Status => 599, Reason => "$!" });
130 176
138 # limit the number of persistent connections 184 # limit the number of persistent connections
139 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 185 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
140 ++$KA_COUNT{$_[1]}; 186 ++$KA_COUNT{$_[1]};
141 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; 187 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
142 $hdr{connection} = "keep-alive"; 188 $hdr{connection} = "keep-alive";
189 delete $hdr{connection}; # keep-alive not yet supported
143 } else { 190 } else {
144 delete $hdr{connection}; 191 delete $hdr{connection};
145 } 192 }
146 193
147 # (re-)configure handle 194 # (re-)configure handle
155 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 202 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
156 }); 203 });
157 204
158 # send request 205 # send request
159 $state{handle}->push_write ( 206 $state{handle}->push_write (
160 "\U$method\E $path HTTP/1.0\015\012" 207 "$method $path HTTP/1.0\015\012"
161 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr) 208 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
162 . "\015\012" 209 . "\015\012"
163 . (delete $state{body}) 210 . (delete $state{body})
164 ); 211 );
165 212
177 ); 224 );
178 225
179 # headers, could be optimized a bit 226 # headers, could be optimized a bit
180 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 227 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
181 for ("$_[1]\012") { 228 for ("$_[1]\012") {
229 # we support spaces in field names, as lotus domino
230 # creates them.
182 $hdr{lc $1} .= ",$2" 231 $hdr{lc $1} .= ",$2"
183 while /\G 232 while /\G
184 ([^:\000-\040]+): 233 ([^:\000-\037]+):
185 [\011\040]* 234 [\011\040]*
186 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 235 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
187 \015?\012 236 \015?\012
188 /gxc; 237 /gxc;
189 238
192 } 241 }
193 242
194 substr $_, 0, 1, "" 243 substr $_, 0, 1, ""
195 for values %hdr; 244 for values %hdr;
196 245
197 if (exists $hdr{"content-length"}) { 246 if ($method eq "HEAD") {
198 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
199 # could cache persistent connection now
200 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
201 };
202
203 %state = (); 247 %state = ();
204 $cb->($_[1], \%hdr); 248 $cb->(undef, \%hdr);
205 });
206 } else { 249 } else {
250 if (exists $hdr{"content-length"}) {
251 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
252 # could cache persistent connection now
253 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
254 # but we don't, due to misdesigns, this is annoyingly complex
255 };
256
257 %state = ();
258 $cb->($_[1], \%hdr);
259 });
260 } else {
207 # too bad, need to read until we get an error or EOF, 261 # too bad, need to read until we get an error or EOF,
208 # no way to detect winged data. 262 # no way to detect winged data.
209 $_[0]->on_error (sub { 263 $_[0]->on_error (sub {
210 %state = (); 264 %state = ();
211 $cb->($_[0]{rbuf}, \%hdr); 265 $cb->($_[0]{rbuf}, \%hdr);
212 }); 266 });
213 $_[0]->on_eof (undef); 267 $_[0]->on_eof (undef);
214 $_[0]->on_read (sub { }); 268 $_[0]->on_read (sub { });
269 }
215 } 270 }
216 }); 271 });
217 }); 272 });
218 }, sub { 273 }, sub {
219 $timeout 274 $timeout
225sub http_get($$;@) { 280sub http_get($$;@) {
226 unshift @_, "GET"; 281 unshift @_, "GET";
227 &http_request 282 &http_request
228} 283}
229 284
285sub http_head($$;@) {
286 unshift @_, "HEAD";
287 &http_request
288}
289
290sub http_post($$$;@) {
291 unshift @_, "POST", "body";
292 &http_request
293}
294
230=head2 GLOBAL VARIABLES 295=head2 GLOBAL FUNCTIONS AND VARIABLES
231 296
232=over 4 297=over 4
233 298
299=item AnyEvent::HTTP::set_proxy "proxy-url"
300
301Sets the default proxy server to use. The proxy-url must begin with a
302string of the form C<http://host:port> (optionally C<https:...>).
303
234=item $AnyEvent::HTTP::MAX_REDIRECTS 304=item $AnyEvent::HTTP::MAX_RECURSE
235 305
236The default value for the C<max_redirects> request parameter 306The default value for the C<recurse> request parameter (default: C<10>).
237(default: C<10>).
238 307
239=item $AnyEvent::HTTP::USERAGENT 308=item $AnyEvent::HTTP::USERAGENT
240 309
241The default value for the C<User-Agent> header (the default is 310The default value for the C<User-Agent> header (the default is
242C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>). 311C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
243 312
244=item $AnyEvent::HTTP::MAX_PERSISTENT 313=item $AnyEvent::HTTP::MAX_PERSISTENT
245 314
246The maximum number of persistent connections to keep open (default: 8). 315The maximum number of persistent connections to keep open (default: 8).
247 316
317Not implemented currently.
318
248=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT 319=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
249 320
250The maximum time to cache a persistent connection, in seconds (default: 15). 321The maximum time to cache a persistent connection, in seconds (default: 2).
322
323Not implemented currently.
251 324
252=back 325=back
253 326
254=cut 327=cut
328
329sub set_proxy($) {
330 $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix;
331}
332
333# initialise proxy from environment
334set_proxy $ENV{http_proxy};
255 335
256=head1 SEE ALSO 336=head1 SEE ALSO
257 337
258L<AnyEvent>. 338L<AnyEvent>.
259 339

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines