ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
Revision: 1.1
Committed: Tue Jun 3 16:37:13 2008 UTC (15 years, 11 months ago) by root
Branch: MAIN
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4    
5     =head1 SYNOPSIS
6    
7     use AnyEvent::HTTP;
8    
9     =head1 DESCRIPTION
10    
11     This module is an L<AnyEvent> user, you need to make sure that you use and
12     run a supported event loop.
13    
14     =head2 METHODS
15    
16     =over 4
17    
18     =cut
19    
20     package AnyEvent::HTTP;
21    
22     use strict;
23     no warnings;
24    
25     use Carp;
26    
27     use AnyEvent ();
28     use AnyEvent::Util ();
29     use AnyEvent::Socket ();
30     use AnyEvent::Handle ();
31    
32     use base Exporter::;
33    
34     our $VERSION = '1.0';
35    
36     our @EXPORT = qw(http_get http_request);
37    
38     our $MAX_REDIRECTS = 10;
39     our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
40     our $MAX_PERSISTENT = 8;
41     our $PERSISTENT_TIMEOUT = 15;
42     our $TIMEOUT = 60;
43    
44     # changing these is evil
45     our $MAX_PERSISTENT_PER_HOST = 2;
46     our $MAX_PER_HOST = 4; # not respected yet :(
47    
48     my %KA_COUNT; # number of open keep-alive connections per host
49    
50     =item http_get $url, key => value..., $cb->($data, $headers)
51    
52     Executes an HTTP-GET request. See the http_request function for details on
53     additional parameters.
54    
55     =item http_request $method => $url, key => value..., $cb->($data, $headers)
56    
57     Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
58     must be an absolute http or https URL.
59    
60     Additional parameters are key-value pairs, and are fully optional. They
61     include:
62    
63     =over 4
64    
65     =item recurse => $boolean (default: true)
66    
67     Whether to recurse requests or not, e.g. on redirects, authentication
68     retries and so on.
69    
70     =item headers => hashref
71    
72     The request headers to use.
73    
74     =item timeout => $seconds
75    
76     The time-out to use for various stages - each connect attempt will reset
77     the timeout, as will read or write activity.
78    
79     =back
80    
81     =back
82    
83     =cut
84    
85     sub http_request($$$;@) {
86     my $cb = pop;
87     my ($method, $url, %arg) = @_;
88    
89     my %hdr;
90    
91     if (my $hdr = delete $arg{headers}) {
92     while (my ($k, $v) = each %$hdr) {
93     $hdr{lc $k} = $v;
94     }
95     }
96    
97     my $timeout = $arg{timeout} || $TIMEOUT;
98    
99     $hdr{"user-agent"} ||= $USERAGENT;
100    
101     my ($scheme, $authority, $path, $query, $fragment) =
102     $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
103    
104     $scheme = lc $scheme;
105     my $port = $scheme eq "http" ? 80
106     : $scheme eq "https" ? 443
107     : croak "$url: only http and https URLs supported";
108    
109     $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
110     or croak "$authority: unparsable URL";
111    
112     my $host = $1;
113     $port = $2 if defined $2;
114    
115     $host =~ s/^\[(.*)\]$/$1/;
116     $path .= "?$query" if length $query;
117    
118     $hdr{host} = $host = lc $host;
119    
120     my %state;
121    
122     my $body = "";
123     $state{body} = $body;
124    
125     $hdr{"content-length"} = length $body;
126    
127     $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub {
128     $state{fh} = shift
129     or return $cb->(undef, { Status => 599, Reason => "$!" });
130    
131     delete $state{connect_guard}; # reduce memory usage, save a tree
132    
133     # get handle
134     $state{handle} = new AnyEvent::Handle
135     fh => $state{fh},
136     ($scheme eq "https" ? (tls => "connect") : ());
137    
138     # limit the number of persistent connections
139     if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
140     ++$KA_COUNT{$_[1]};
141     $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
142     $hdr{connection} = "keep-alive";
143     } else {
144     delete $hdr{connection};
145     }
146    
147     # (re-)configure handle
148     $state{handle}->timeout ($timeout);
149     $state{handle}->on_error (sub {
150     %state = ();
151     $cb->(undef, { Status => 599, Reason => "$!" });
152     });
153     $state{handle}->on_eof (sub {
154     %state = ();
155     $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
156     });
157    
158     # send request
159     $state{handle}->push_write (
160     "\U$method\E $path HTTP/1.0\015\012"
161     . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
162     . "\015\012"
163     . (delete $state{body})
164     );
165    
166     %hdr = (); # reduce memory usage, save a kitten
167    
168     # status line
169     $state{handle}->push_read (line => qr/\015?\012/, sub {
170     $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
171     or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
172    
173     my %hdr = ( # response headers
174     HTTPVersion => ",$1",
175     Status => ",$2",
176     Reason => ",$3",
177     );
178    
179     # headers, could be optimized a bit
180     $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
181     for ("$_[1]\012") {
182     $hdr{lc $1} .= ",$2"
183     while /\G
184     ([^:\000-\040]+):
185     [\011\040]*
186     ((?: [^\015\012]+ | \015?\012[\011\040] )*)
187     \015?\012
188     /gxc;
189    
190     /\G$/
191     or return $cb->(undef, { Status => 599, Reason => "garbled response headers" });
192     }
193    
194     substr $_, 0, 1, ""
195     for values %hdr;
196    
197     if (exists $hdr{"content-length"}) {
198     $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
199     # could cache persistent connection now
200     if ($hdr{connection} =~ /\bkeep-alive\b/i) {
201     };
202    
203     %state = ();
204     $cb->($_[1], \%hdr);
205     });
206     } else {
207     # too bad, need to read until we get an error or EOF,
208     # no way to detect winged data.
209     $_[0]->on_error (sub {
210     %state = ();
211     $cb->($_[0]{rbuf}, \%hdr);
212     });
213     $_[0]->on_eof (undef);
214     $_[0]->on_read (sub { });
215     }
216     });
217     });
218     }, sub {
219     $timeout
220     };
221    
222     defined wantarray && AnyEvent::Util::guard { %state = () }
223     }
224    
225     sub http_get($$;@) {
226     unshift @_, "GET";
227     &http_request
228     }
229    
230     =head2 GLOBAL VARIABLES
231    
232     =over 4
233    
234     =item $AnyEvent::HTTP::MAX_REDIRECTS
235    
236     The default value for the C<max_redirects> request parameter
237     (default: C<10>).
238    
239     =item $AnyEvent::HTTP::USERAGENT
240    
241     The default value for the C<User-Agent> header (the default is
242     C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
243    
244     =item $AnyEvent::HTTP::MAX_PERSISTENT
245    
246     The maximum number of persistent connections to keep open (default: 8).
247    
248     =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
249    
250     The maximum time to cache a persistent connection, in seconds (default: 15).
251    
252     =back
253    
254     =cut
255    
256     =head1 SEE ALSO
257    
258     L<AnyEvent>.
259    
260     =head1 AUTHOR
261    
262     Marc Lehmann <schmorp@schmorp.de>
263     http://home.schmorp.de/
264    
265     =cut
266    
267     1
268