ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
Revision: 1.1
Committed: Tue Jun 3 16:37:13 2008 UTC (15 years, 11 months ago) by root
Branch: MAIN
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4
5 =head1 SYNOPSIS
6
7 use AnyEvent::HTTP;
8
9 =head1 DESCRIPTION
10
11 This module is an L<AnyEvent> user, you need to make sure that you use and
12 run a supported event loop.
13
14 =head2 METHODS
15
16 =over 4
17
18 =cut
19
20 package AnyEvent::HTTP;
21
22 use strict;
23 no warnings;
24
25 use Carp;
26
27 use AnyEvent ();
28 use AnyEvent::Util ();
29 use AnyEvent::Socket ();
30 use AnyEvent::Handle ();
31
32 use base Exporter::;
33
34 our $VERSION = '1.0';
35
36 our @EXPORT = qw(http_get http_request);
37
38 our $MAX_REDIRECTS = 10;
39 our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
40 our $MAX_PERSISTENT = 8;
41 our $PERSISTENT_TIMEOUT = 15;
42 our $TIMEOUT = 60;
43
44 # changing these is evil
45 our $MAX_PERSISTENT_PER_HOST = 2;
46 our $MAX_PER_HOST = 4; # not respected yet :(
47
48 my %KA_COUNT; # number of open keep-alive connections per host
49
50 =item http_get $url, key => value..., $cb->($data, $headers)
51
52 Executes an HTTP-GET request. See the http_request function for details on
53 additional parameters.
54
55 =item http_request $method => $url, key => value..., $cb->($data, $headers)
56
57 Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
58 must be an absolute http or https URL.
59
60 Additional parameters are key-value pairs, and are fully optional. They
61 include:
62
63 =over 4
64
65 =item recurse => $boolean (default: true)
66
67 Whether to recurse requests or not, e.g. on redirects, authentication
68 retries and so on.
69
70 =item headers => hashref
71
72 The request headers to use.
73
74 =item timeout => $seconds
75
76 The time-out to use for various stages - each connect attempt will reset
77 the timeout, as will read or write activity.
78
79 =back
80
81 =back
82
83 =cut
84
85 sub http_request($$$;@) {
86 my $cb = pop;
87 my ($method, $url, %arg) = @_;
88
89 my %hdr;
90
91 if (my $hdr = delete $arg{headers}) {
92 while (my ($k, $v) = each %$hdr) {
93 $hdr{lc $k} = $v;
94 }
95 }
96
97 my $timeout = $arg{timeout} || $TIMEOUT;
98
99 $hdr{"user-agent"} ||= $USERAGENT;
100
101 my ($scheme, $authority, $path, $query, $fragment) =
102 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
103
104 $scheme = lc $scheme;
105 my $port = $scheme eq "http" ? 80
106 : $scheme eq "https" ? 443
107 : croak "$url: only http and https URLs supported";
108
109 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
110 or croak "$authority: unparsable URL";
111
112 my $host = $1;
113 $port = $2 if defined $2;
114
115 $host =~ s/^\[(.*)\]$/$1/;
116 $path .= "?$query" if length $query;
117
118 $hdr{host} = $host = lc $host;
119
120 my %state;
121
122 my $body = "";
123 $state{body} = $body;
124
125 $hdr{"content-length"} = length $body;
126
127 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub {
128 $state{fh} = shift
129 or return $cb->(undef, { Status => 599, Reason => "$!" });
130
131 delete $state{connect_guard}; # reduce memory usage, save a tree
132
133 # get handle
134 $state{handle} = new AnyEvent::Handle
135 fh => $state{fh},
136 ($scheme eq "https" ? (tls => "connect") : ());
137
138 # limit the number of persistent connections
139 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
140 ++$KA_COUNT{$_[1]};
141 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
142 $hdr{connection} = "keep-alive";
143 } else {
144 delete $hdr{connection};
145 }
146
147 # (re-)configure handle
148 $state{handle}->timeout ($timeout);
149 $state{handle}->on_error (sub {
150 %state = ();
151 $cb->(undef, { Status => 599, Reason => "$!" });
152 });
153 $state{handle}->on_eof (sub {
154 %state = ();
155 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
156 });
157
158 # send request
159 $state{handle}->push_write (
160 "\U$method\E $path HTTP/1.0\015\012"
161 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
162 . "\015\012"
163 . (delete $state{body})
164 );
165
166 %hdr = (); # reduce memory usage, save a kitten
167
168 # status line
169 $state{handle}->push_read (line => qr/\015?\012/, sub {
170 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
171 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
172
173 my %hdr = ( # response headers
174 HTTPVersion => ",$1",
175 Status => ",$2",
176 Reason => ",$3",
177 );
178
179 # headers, could be optimized a bit
180 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
181 for ("$_[1]\012") {
182 $hdr{lc $1} .= ",$2"
183 while /\G
184 ([^:\000-\040]+):
185 [\011\040]*
186 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
187 \015?\012
188 /gxc;
189
190 /\G$/
191 or return $cb->(undef, { Status => 599, Reason => "garbled response headers" });
192 }
193
194 substr $_, 0, 1, ""
195 for values %hdr;
196
197 if (exists $hdr{"content-length"}) {
198 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
199 # could cache persistent connection now
200 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
201 };
202
203 %state = ();
204 $cb->($_[1], \%hdr);
205 });
206 } else {
207 # too bad, need to read until we get an error or EOF,
208 # no way to detect winged data.
209 $_[0]->on_error (sub {
210 %state = ();
211 $cb->($_[0]{rbuf}, \%hdr);
212 });
213 $_[0]->on_eof (undef);
214 $_[0]->on_read (sub { });
215 }
216 });
217 });
218 }, sub {
219 $timeout
220 };
221
222 defined wantarray && AnyEvent::Util::guard { %state = () }
223 }
224
225 sub http_get($$;@) {
226 unshift @_, "GET";
227 &http_request
228 }
229
230 =head2 GLOBAL VARIABLES
231
232 =over 4
233
234 =item $AnyEvent::HTTP::MAX_REDIRECTS
235
236 The default value for the C<max_redirects> request parameter
237 (default: C<10>).
238
239 =item $AnyEvent::HTTP::USERAGENT
240
241 The default value for the C<User-Agent> header (the default is
242 C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
243
244 =item $AnyEvent::HTTP::MAX_PERSISTENT
245
246 The maximum number of persistent connections to keep open (default: 8).
247
248 =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
249
250 The maximum time to cache a persistent connection, in seconds (default: 15).
251
252 =back
253
254 =cut
255
256 =head1 SEE ALSO
257
258 L<AnyEvent>.
259
260 =head1 AUTHOR
261
262 Marc Lehmann <schmorp@schmorp.de>
263 http://home.schmorp.de/
264
265 =cut
266
267 1
268