ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.1 by root, Tue Jun 3 16:37:13 2008 UTC vs.
Revision 1.2 by root, Wed Jun 4 11:37:41 2008 UTC

33 33
34our $VERSION = '1.0'; 34our $VERSION = '1.0';
35 35
36our @EXPORT = qw(http_get http_request); 36our @EXPORT = qw(http_get http_request);
37 37
38our $MAX_REDIRECTS = 10;
39our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 38our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39our $MAX_REDIRECTS = 10;
40our $MAX_PERSISTENT = 8; 40our $MAX_PERSISTENT = 8;
41our $PERSISTENT_TIMEOUT = 15; 41our $PERSISTENT_TIMEOUT = 2;
42our $TIMEOUT = 60; 42our $TIMEOUT = 300;
43 43
44# changing these is evil 44# changing these is evil
45our $MAX_PERSISTENT_PER_HOST = 2; 45our $MAX_PERSISTENT_PER_HOST = 2;
46our $MAX_PER_HOST = 4; # not respected yet :( 46our $MAX_PER_HOST = 4; # not respected yet :(
47 47
48our $PROXY;
49
48my %KA_COUNT; # number of open keep-alive connections per host 50my %KA_COUNT; # number of open keep-alive connections per host
49 51
50=item http_get $url, key => value..., $cb->($data, $headers) 52=item http_get $url, key => value..., $cb->($data, $headers)
51 53
52Executes an HTTP-GET request. See the http_request function for details on 54Executes an HTTP-GET request. See the http_request function for details on
55=item http_request $method => $url, key => value..., $cb->($data, $headers) 57=item http_request $method => $url, key => value..., $cb->($data, $headers)
56 58
57Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL 59Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
58must be an absolute http or https URL. 60must be an absolute http or https URL.
59 61
62The callback will be called with the response data as first argument
63(or C<undef> if it wasn't available due to errors), and a hash-ref with
64response headers as second argument.
65
66All the headers in that has are lowercased. In addition to the response
67headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
68C<Reason> contain the three parts of the HTTP Status-Line of the same
69name.
70
71If an internal error occurs, such as not being able to resolve a hostname,
72then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
73and the C<Reason> pseudo-header will contain an error message.
74
60Additional parameters are key-value pairs, and are fully optional. They 75Additional parameters are key-value pairs, and are fully optional. They
61include: 76include:
62 77
63=over 4 78=over 4
64 79
72The request headers to use. 87The request headers to use.
73 88
74=item timeout => $seconds 89=item timeout => $seconds
75 90
76The time-out to use for various stages - each connect attempt will reset 91The time-out to use for various stages - each connect attempt will reset
77the timeout, as will read or write activity. 92the timeout, as will read or write activity. Default timeout is 5 minutes.
93
94=item proxy => [$host, $port[, $scheme]] or undef
95
96Use the given http proxy for all requests. If not specified, then the
97default proxy (as specified by C<$ENV{http_proxy}>) is used.
98
99C<$scheme> must be either missing or C<http> for HTTP, or C<https> for
100HTTPS.
78 101
79=back 102=back
80 103
81=back 104=back
82 105
92 while (my ($k, $v) = each %$hdr) { 115 while (my ($k, $v) = each %$hdr) {
93 $hdr{lc $k} = $v; 116 $hdr{lc $k} = $v;
94 } 117 }
95 } 118 }
96 119
120 my $proxy = $arg{proxy} || $PROXY;
97 my $timeout = $arg{timeout} || $TIMEOUT; 121 my $timeout = $arg{timeout} || $TIMEOUT;
98 122
99 $hdr{"user-agent"} ||= $USERAGENT; 123 $hdr{"user-agent"} ||= $USERAGENT;
100 124
125 my ($host, $port, $path, $scheme);
126
127 if ($proxy) {
128 ($host, $port, $scheme) = @$proxy;
129 $path = $url;
130 } else {
101 my ($scheme, $authority, $path, $query, $fragment) = 131 ($scheme, my $authority, $path, my $query, my $fragment) =
102 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; 132 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
103 133
104 $scheme = lc $scheme;
105 my $port = $scheme eq "http" ? 80 134 $port = $scheme eq "http" ? 80
106 : $scheme eq "https" ? 443 135 : $scheme eq "https" ? 443
107 : croak "$url: only http and https URLs supported"; 136 : croak "$url: only http and https URLs supported";
108 137
109 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 138 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
110 or croak "$authority: unparsable URL"; 139 or croak "$authority: unparsable URL";
111 140
112 my $host = $1; 141 $host = $1;
113 $port = $2 if defined $2; 142 $port = $2 if defined $2;
114 143
115 $host =~ s/^\[(.*)\]$/$1/; 144 $host =~ s/^\[(.*)\]$/$1/;
116 $path .= "?$query" if length $query; 145 $path .= "?$query" if length $query;
117 146
147 $path = "/" unless $path;
148
118 $hdr{host} = $host = lc $host; 149 $hdr{host} = $host = lc $host;
150 }
151
152 $scheme = lc $scheme;
119 153
120 my %state; 154 my %state;
121 155
122 my $body = ""; 156 my $body = "";
123 $state{body} = $body; 157 $state{body} = $body;
138 # limit the number of persistent connections 172 # limit the number of persistent connections
139 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 173 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
140 ++$KA_COUNT{$_[1]}; 174 ++$KA_COUNT{$_[1]};
141 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; 175 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
142 $hdr{connection} = "keep-alive"; 176 $hdr{connection} = "keep-alive";
177 delete $hdr{connection}; # keep-alive not yet supported
143 } else { 178 } else {
144 delete $hdr{connection}; 179 delete $hdr{connection};
145 } 180 }
146 181
147 # (re-)configure handle 182 # (re-)configure handle
177 ); 212 );
178 213
179 # headers, could be optimized a bit 214 # headers, could be optimized a bit
180 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 215 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
181 for ("$_[1]\012") { 216 for ("$_[1]\012") {
217 # we support spaces in field names, as lotus domino
218 # creates them.
182 $hdr{lc $1} .= ",$2" 219 $hdr{lc $1} .= ",$2"
183 while /\G 220 while /\G
184 ([^:\000-\040]+): 221 ([^:\000-\037]+):
185 [\011\040]* 222 [\011\040]*
186 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 223 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
187 \015?\012 224 \015?\012
188 /gxc; 225 /gxc;
189 226
196 233
197 if (exists $hdr{"content-length"}) { 234 if (exists $hdr{"content-length"}) {
198 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub { 235 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
199 # could cache persistent connection now 236 # could cache persistent connection now
200 if ($hdr{connection} =~ /\bkeep-alive\b/i) { 237 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
238 # but we don't, due to misdesigns, this is annoyingly complex
201 }; 239 };
202 240
203 %state = (); 241 %state = ();
204 $cb->($_[1], \%hdr); 242 $cb->($_[1], \%hdr);
205 }); 243 });
225sub http_get($$;@) { 263sub http_get($$;@) {
226 unshift @_, "GET"; 264 unshift @_, "GET";
227 &http_request 265 &http_request
228} 266}
229 267
230=head2 GLOBAL VARIABLES 268=head2 GLOBAL FUNCTIONS AND VARIABLES
231 269
232=over 4 270=over 4
271
272=item AnyEvent::HTTP::set_proxy "proxy-url"
273
274Sets the default proxy server to use. The proxy-url must begin with a
275string of the form C<http://host:port> (optionally C<https:...>).
233 276
234=item $AnyEvent::HTTP::MAX_REDIRECTS 277=item $AnyEvent::HTTP::MAX_REDIRECTS
235 278
236The default value for the C<max_redirects> request parameter 279The default value for the C<max_redirects> request parameter
237(default: C<10>). 280(default: C<10>).
245 288
246The maximum number of persistent connections to keep open (default: 8). 289The maximum number of persistent connections to keep open (default: 8).
247 290
248=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT 291=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
249 292
250The maximum time to cache a persistent connection, in seconds (default: 15). 293The maximum time to cache a persistent connection, in seconds (default: 2).
251 294
252=back 295=back
253 296
254=cut 297=cut
298
299sub set_proxy($) {
300 $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix;
301}
302
303# initialise proxy from environment
304set_proxy $ENV{http_proxy};
255 305
256=head1 SEE ALSO 306=head1 SEE ALSO
257 307
258L<AnyEvent>. 308L<AnyEvent>.
259 309

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines