ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.1 by root, Tue Jun 3 16:37:13 2008 UTC vs.
Revision 1.3 by root, Wed Jun 4 11:58:36 2008 UTC

33 33
34our $VERSION = '1.0'; 34our $VERSION = '1.0';
35 35
36our @EXPORT = qw(http_get http_request); 36our @EXPORT = qw(http_get http_request);
37 37
38our $MAX_REDIRECTS = 10;
39our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 38our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39our $MAX_RECURSE = 10;
40our $MAX_PERSISTENT = 8; 40our $MAX_PERSISTENT = 8;
41our $PERSISTENT_TIMEOUT = 15; 41our $PERSISTENT_TIMEOUT = 2;
42our $TIMEOUT = 60; 42our $TIMEOUT = 300;
43 43
44# changing these is evil 44# changing these is evil
45our $MAX_PERSISTENT_PER_HOST = 2; 45our $MAX_PERSISTENT_PER_HOST = 2;
46our $MAX_PER_HOST = 4; # not respected yet :( 46our $MAX_PER_HOST = 4; # not respected yet :(
47 47
48our $PROXY;
49
48my %KA_COUNT; # number of open keep-alive connections per host 50my %KA_COUNT; # number of open keep-alive connections per host
49 51
50=item http_get $url, key => value..., $cb->($data, $headers) 52=item http_get $url, key => value..., $cb->($data, $headers)
51 53
52Executes an HTTP-GET request. See the http_request function for details on 54Executes an HTTP-GET request. See the http_request function for details on
53additional parameters. 55additional parameters.
54 56
57=item http_get $url, $body, key => value..., $cb->($data, $headers)
58
59Executes an HTTP-POST request with a requets body of C<$bod>. See the
60http_request function for details on additional parameters.
61
55=item http_request $method => $url, key => value..., $cb->($data, $headers) 62=item http_request $method => $url, key => value..., $cb->($data, $headers)
56 63
57Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL 64Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
58must be an absolute http or https URL. 65must be an absolute http or https URL.
59 66
67The callback will be called with the response data as first argument
68(or C<undef> if it wasn't available due to errors), and a hash-ref with
69response headers as second argument.
70
71All the headers in that has are lowercased. In addition to the response
72headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
73C<Reason> contain the three parts of the HTTP Status-Line of the same
74name.
75
76If an internal error occurs, such as not being able to resolve a hostname,
77then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
78and the C<Reason> pseudo-header will contain an error message.
79
60Additional parameters are key-value pairs, and are fully optional. They 80Additional parameters are key-value pairs, and are fully optional. They
61include: 81include:
62 82
63=over 4 83=over 4
64 84
65=item recurse => $boolean (default: true) 85=item recurse => $count (default: $MAX_RECURSE)
66 86
67Whether to recurse requests or not, e.g. on redirects, authentication 87Whether to recurse requests or not, e.g. on redirects, authentication
68retries and so on. 88retries and so on, and how often to do so.
69 89
70=item headers => hashref 90=item headers => hashref
71 91
72The request headers to use. 92The request headers to use.
73 93
74=item timeout => $seconds 94=item timeout => $seconds
75 95
76The time-out to use for various stages - each connect attempt will reset 96The time-out to use for various stages - each connect attempt will reset
77the timeout, as will read or write activity. 97the timeout, as will read or write activity. Default timeout is 5 minutes.
98
99=item proxy => [$host, $port[, $scheme]] or undef
100
101Use the given http proxy for all requests. If not specified, then the
102default proxy (as specified by C<$ENV{http_proxy}>) is used.
103
104C<$scheme> must be either missing or C<http> for HTTP, or C<https> for
105HTTPS.
106
107=item body => $string
108
109The request body, usually empty. Will be-sent as-is (future versions of
110this module might offer more options).
78 111
79=back 112=back
80 113
81=back 114=back
82 115
85sub http_request($$$;@) { 118sub http_request($$$;@) {
86 my $cb = pop; 119 my $cb = pop;
87 my ($method, $url, %arg) = @_; 120 my ($method, $url, %arg) = @_;
88 121
89 my %hdr; 122 my %hdr;
123
124 $method = uc $method;
90 125
91 if (my $hdr = delete $arg{headers}) { 126 if (my $hdr = delete $arg{headers}) {
92 while (my ($k, $v) = each %$hdr) { 127 while (my ($k, $v) = each %$hdr) {
93 $hdr{lc $k} = $v; 128 $hdr{lc $k} = $v;
94 } 129 }
95 } 130 }
96 131
132 my $proxy = $arg{proxy} || $PROXY;
97 my $timeout = $arg{timeout} || $TIMEOUT; 133 my $timeout = $arg{timeout} || $TIMEOUT;
134 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE;
98 135
99 $hdr{"user-agent"} ||= $USERAGENT; 136 $hdr{"user-agent"} ||= $USERAGENT;
100 137
138 my ($host, $port, $path, $scheme);
139
140 if ($proxy) {
141 ($host, $port, $scheme) = @$proxy;
142 $path = $url;
143 } else {
101 my ($scheme, $authority, $path, $query, $fragment) = 144 ($scheme, my $authority, $path, my $query, my $fragment) =
102 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; 145 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
103 146
104 $scheme = lc $scheme;
105 my $port = $scheme eq "http" ? 80 147 $port = $scheme eq "http" ? 80
106 : $scheme eq "https" ? 443 148 : $scheme eq "https" ? 443
107 : croak "$url: only http and https URLs supported"; 149 : croak "$url: only http and https URLs supported";
108 150
109 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 151 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
110 or croak "$authority: unparsable URL"; 152 or croak "$authority: unparsable URL";
111 153
112 my $host = $1; 154 $host = $1;
113 $port = $2 if defined $2; 155 $port = $2 if defined $2;
114 156
115 $host =~ s/^\[(.*)\]$/$1/; 157 $host =~ s/^\[(.*)\]$/$1/;
116 $path .= "?$query" if length $query; 158 $path .= "?$query" if length $query;
117 159
160 $path = "/" unless $path;
161
118 $hdr{host} = $host = lc $host; 162 $hdr{host} = $host = lc $host;
163 }
164
165 $scheme = lc $scheme;
119 166
120 my %state; 167 my %state;
121 168
122 my $body = "";
123 $state{body} = $body; 169 $state{body} = delete $arg{body};
124 170
125 $hdr{"content-length"} = length $body; 171 $hdr{"content-length"} = length $state{body};
126 172
127 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub { 173 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub {
128 $state{fh} = shift 174 $state{fh} = shift
129 or return $cb->(undef, { Status => 599, Reason => "$!" }); 175 or return $cb->(undef, { Status => 599, Reason => "$!" });
130 176
138 # limit the number of persistent connections 184 # limit the number of persistent connections
139 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 185 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
140 ++$KA_COUNT{$_[1]}; 186 ++$KA_COUNT{$_[1]};
141 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; 187 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
142 $hdr{connection} = "keep-alive"; 188 $hdr{connection} = "keep-alive";
189 delete $hdr{connection}; # keep-alive not yet supported
143 } else { 190 } else {
144 delete $hdr{connection}; 191 delete $hdr{connection};
145 } 192 }
146 193
147 # (re-)configure handle 194 # (re-)configure handle
155 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 202 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
156 }); 203 });
157 204
158 # send request 205 # send request
159 $state{handle}->push_write ( 206 $state{handle}->push_write (
160 "\U$method\E $path HTTP/1.0\015\012" 207 "$method $path HTTP/1.0\015\012"
161 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr) 208 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
162 . "\015\012" 209 . "\015\012"
163 . (delete $state{body}) 210 . (delete $state{body})
164 ); 211 );
165 212
177 ); 224 );
178 225
179 # headers, could be optimized a bit 226 # headers, could be optimized a bit
180 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 227 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
181 for ("$_[1]\012") { 228 for ("$_[1]\012") {
229 # we support spaces in field names, as lotus domino
230 # creates them.
182 $hdr{lc $1} .= ",$2" 231 $hdr{lc $1} .= ",$2"
183 while /\G 232 while /\G
184 ([^:\000-\040]+): 233 ([^:\000-\037]+):
185 [\011\040]* 234 [\011\040]*
186 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 235 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
187 \015?\012 236 \015?\012
188 /gxc; 237 /gxc;
189 238
192 } 241 }
193 242
194 substr $_, 0, 1, "" 243 substr $_, 0, 1, ""
195 for values %hdr; 244 for values %hdr;
196 245
246 if ($method ne "HEAD") {
197 if (exists $hdr{"content-length"}) { 247 if (exists $hdr{"content-length"}) {
198 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub { 248 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
199 # could cache persistent connection now 249 # could cache persistent connection now
200 if ($hdr{connection} =~ /\bkeep-alive\b/i) { 250 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
251 # but we don't, due to misdesigns, this is annoyingly complex
252 };
253
254 %state = ();
255 $cb->($_[1], \%hdr);
201 }; 256 });
202
203 %state = ();
204 $cb->($_[1], \%hdr);
205 });
206 } else { 257 } else {
207 # too bad, need to read until we get an error or EOF, 258 # too bad, need to read until we get an error or EOF,
208 # no way to detect winged data. 259 # no way to detect winged data.
209 $_[0]->on_error (sub { 260 $_[0]->on_error (sub {
210 %state = (); 261 %state = ();
211 $cb->($_[0]{rbuf}, \%hdr); 262 $cb->($_[0]{rbuf}, \%hdr);
212 }); 263 });
213 $_[0]->on_eof (undef); 264 $_[0]->on_eof (undef);
214 $_[0]->on_read (sub { }); 265 $_[0]->on_read (sub { });
266 }
215 } 267 }
216 }); 268 });
217 }); 269 });
218 }, sub { 270 }, sub {
219 $timeout 271 $timeout
225sub http_get($$;@) { 277sub http_get($$;@) {
226 unshift @_, "GET"; 278 unshift @_, "GET";
227 &http_request 279 &http_request
228} 280}
229 281
282sub http_post($$$;@) {
283 unshift @_, "POST", "body";
284 &http_request
285}
286
230=head2 GLOBAL VARIABLES 287=head2 GLOBAL FUNCTIONS AND VARIABLES
231 288
232=over 4 289=over 4
233 290
291=item AnyEvent::HTTP::set_proxy "proxy-url"
292
293Sets the default proxy server to use. The proxy-url must begin with a
294string of the form C<http://host:port> (optionally C<https:...>).
295
234=item $AnyEvent::HTTP::MAX_REDIRECTS 296=item $AnyEvent::HTTP::MAX_RECURSE
235 297
236The default value for the C<max_redirects> request parameter 298The default value for the C<recurse> request parameter (default: C<10>).
237(default: C<10>).
238 299
239=item $AnyEvent::HTTP::USERAGENT 300=item $AnyEvent::HTTP::USERAGENT
240 301
241The default value for the C<User-Agent> header (the default is 302The default value for the C<User-Agent> header (the default is
242C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>). 303C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
243 304
244=item $AnyEvent::HTTP::MAX_PERSISTENT 305=item $AnyEvent::HTTP::MAX_PERSISTENT
245 306
246The maximum number of persistent connections to keep open (default: 8). 307The maximum number of persistent connections to keep open (default: 8).
247 308
309Not implemented currently.
310
248=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT 311=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
249 312
250The maximum time to cache a persistent connection, in seconds (default: 15). 313The maximum time to cache a persistent connection, in seconds (default: 2).
314
315Not implemented currently.
251 316
252=back 317=back
253 318
254=cut 319=cut
320
321sub set_proxy($) {
322 $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix;
323}
324
325# initialise proxy from environment
326set_proxy $ENV{http_proxy};
255 327
256=head1 SEE ALSO 328=head1 SEE ALSO
257 329
258L<AnyEvent>. 330L<AnyEvent>.
259 331

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines