--- AnyEvent/lib/AnyEvent/Handle.pm 2008/06/15 21:44:56 1.69 +++ AnyEvent/lib/AnyEvent/Handle.pm 2008/10/01 08:52:06 1.92 @@ -1,7 +1,7 @@ package AnyEvent::Handle; no warnings; -use strict; +use strict qw(subs vars); use AnyEvent (); use AnyEvent::Util qw(WSAEWOULDBLOCK); @@ -16,7 +16,7 @@ =cut -our $VERSION = 4.151; +our $VERSION = 4.3; =head1 SYNOPSIS @@ -51,6 +51,9 @@ filehandles. For utility functions for doing non-blocking connects and accepts on sockets see L. +The L tutorial contains some well-documented +AnyEvent::Handle examples. + In the following, when the documentation refers to of "bytes" then this means characters. As sysread and syswrite are used for all I/O, their treatment of characters applies to this module as well. @@ -58,6 +61,14 @@ All callbacks will be invoked with the handle object as their first argument. +=head2 SIGPIPE is not handled by this module + +SIGPIPE is not handled by this module, so one of the practical +requirements of using it is to ignore SIGPIPE (C<$SIG{PIPE} = +'IGNORE'>). At least, this is highly recommend in a networked program: If +you use AnyEvent::Handle in a filter program (like sort), exiting on +SIGPIPE is probably the right thing to do. + =head1 METHODS =over 4 @@ -72,19 +83,28 @@ The filehandle this L object will operate on. -NOTE: The filehandle will be set to non-blocking (using -AnyEvent::Util::fh_nonblocking). +NOTE: The filehandle will be set to non-blocking mode (using +C) by the constructor and needs to stay in +that mode. =item on_eof => $cb->($handle) -Set the callback to be called when an end-of-file condition is detcted, +Set the callback to be called when an end-of-file condition is detected, i.e. in the case of a socket, when the other side has closed the connection cleanly. -While not mandatory, it is highly recommended to set an eof callback, +For sockets, this just means that the other side has stopped sending data, +you can still try to write data, and, in fact, one can return from the eof +callback and continue writing data, as only the read part has been shut +down. + +While not mandatory, it is I recommended to set an eof callback, otherwise you might end up with a closed socket while you are still waiting for data. +If an EOF condition has been detected but no C callback has been +set, then a fatal error will be raised with C<$!> set to <0>. + =item on_error => $cb->($handle, $fatal) This is the error callback, which is called when, well, some error @@ -92,10 +112,15 @@ connect or a read error. Some errors are fatal (which is indicated by C<$fatal> being true). On -fatal errors the handle object will be shut down and will not be -usable. Non-fatal errors can be retried by simply returning, but it is -recommended to simply ignore this parameter and instead abondon the handle -object when this callback is invoked. +fatal errors the handle object will be shut down and will not be usable +(but you are free to look at the current C<< ->rbuf >>). Examples of fatal +errors are an EOF condition with active (but unsatisifable) read watchers +(C) or I/O errors. + +Non-fatal errors can be retried by simply returning, but it is recommended +to simply ignore this parameter and instead abondon the handle object +when this callback is invoked. Examples of non-fatal errors are timeouts +C) or badly-formatted data (C). On callback entrance, the value of C<$!> contains the operating system error (or C, C, C or C). @@ -137,12 +162,13 @@ If non-zero, then this enables an "inactivity" timeout: whenever this many seconds pass without a successful read or write on the underlying file handle, the C callback will be invoked (and if that one is -missing, an C error will be raised). +missing, a non-fatal C error will be raised). Note that timeout processing is also active when you currently do not have any outstanding read or write requests: If you plan to keep the connection idle then you should disable the timout temporarily or ignore the timeout -in the C callback. +in the C callback, in which case AnyEvent::Handle will simply +restart the timeout. Zero (the default) disables this timeout. @@ -156,7 +182,7 @@ If defined, then a fatal error will be raised (with C<$!> set to C) when the read buffer ever (strictly) exceeds this size. This is useful to -avoid denial-of-service attacks. +avoid some forms of denial-of-service attacks. For example, a server accepting connections from untrusted sources should be configured to accept only so-and-so much data that it cannot act on @@ -164,10 +190,37 @@ amount of data without a callback ever being called as long as the line isn't finished). +=item autocork => + +When disabled (the default), then C will try to immediately +write the data to the handle, if possible. This avoids having to register +a write watcher and wait for the next event loop iteration, but can +be inefficient if you write multiple small chunks (on the wire, this +disadvantage is usually avoided by your kernel's nagle algorithm, see +C, but this option can save costly syscalls). + +When enabled, then writes will always be queued till the next event loop +iteration. This is efficient when you do many small writes per iteration, +but less efficient when you do a single write only per iteration (or when +the write buffer often is full). It also increases write latency. + +=item no_delay => + +When doing small writes on sockets, your operating system kernel might +wait a bit for more data before actually sending it out. This is called +the Nagle algorithm, and usually it is beneficial. + +In some situations you want as low a delay as possible, which can be +accomplishd by setting this option to a true value. + +The default is your opertaing system's default behaviour (most likely +enabled), this option explicitly enables or disables it, if possible. + =item read_size => -The default read block size (the amount of bytes this module will try to read -during each (loop iteration). Default: C<8192>. +The default read block size (the amount of bytes this module will +try to read during each loop iteration, which affects memory +requirements). Default: C<8192>. =item low_water_mark => @@ -175,39 +228,47 @@ buffer: If the write reaches this size or gets even samller it is considered empty. +Sometimes it can be beneficial (for performance reasons) to add data to +the write buffer before it is fully drained, but this is a rare case, as +the operating system kernel usually buffers data as well, so the default +is good in almost all cases. + =item linger => If non-zero (default: C<3600>), then the destructor of the -AnyEvent::Handle object will check wether there is still outstanding write -data and will install a watcher that will write out this data. No errors -will be reported (this mostly matches how the operating system treats -outstanding data at socket close time). +AnyEvent::Handle object will check whether there is still outstanding +write data and will install a watcher that will write this data to the +socket. No errors will be reported (this mostly matches how the operating +system treats outstanding data at socket close time). -This will not work for partial TLS data that could not yet been -encoded. This data will be lost. +This will not work for partial TLS data that could not be encoded +yet. This data will be lost. =item tls => "accept" | "connect" | Net::SSLeay::SSL object -When this parameter is given, it enables TLS (SSL) mode, that means it -will start making tls handshake and will transparently encrypt/decrypt -data. +When this parameter is given, it enables TLS (SSL) mode, that means +AnyEvent will start a TLS handshake as soon as the conenction has been +established and will transparently encrypt/decrypt data afterwards. TLS mode requires Net::SSLeay to be installed (it will be loaded -automatically when you try to create a TLS handle). - -For the TLS server side, use C, and for the TLS client side of a -connection, use C mode. +automatically when you try to create a TLS handle): this module doesn't +have a dependency on that module, so if your module requires it, you have +to add the dependency yourself. + +Unlike TCP, TLS has a server and client side: for the TLS server side, use +C, and for the TLS client side of a connection, use C +mode. You can also provide your own TLS connection object, but you have to make sure that you call either C or C on it before you pass it to AnyEvent::Handle. -See the C method if you need to start TLs negotiation later. +See the C<< ->starttls >> method for when need to start TLS negotiation later. =item tls_ctx => $ssl_ctx -Use the given Net::SSLeay::CTX object to create the new TLS connection +Use the given C object to create the new TLS connection (unless a connection object was specified directly). If this parameter is missing, then AnyEvent::Handle will use C. @@ -216,7 +277,8 @@ This is the json coder object used by the C read and write types. If you don't supply it, then AnyEvent::Handle will create and use a -suitable one, which will write and expect UTF-8 encoded JSON texts. +suitable one (on demand), which will write and expect UTF-8 encoded JSON +texts. Note that you are responsible to depend on the JSON module if you want to use this functionality, as AnyEvent does not have a dependency itself. @@ -225,7 +287,8 @@ =item filter_w => $cb -These exist, but are undocumented at this time. +These exist, but are undocumented at this time. (They are used internally +by the TLS code). =back @@ -248,7 +311,8 @@ $self->{_activity} = AnyEvent->now; $self->_timeout; - $self->on_drain (delete $self->{on_drain}) if $self->{on_drain}; + $self->on_drain (delete $self->{on_drain}) if exists $self->{on_drain}; + $self->no_delay (delete $self->{no_delay}) if exists $self->{no_delay}; $self->start_read if $self->{on_read}; @@ -264,7 +328,10 @@ delete $self->{_ww}; delete $self->{fh}; - $self->stoptls; + &_freetls; + + delete $self->{on_read}; + delete $self->{_queue}; } sub _error { @@ -284,7 +351,7 @@ =item $fh = $handle->fh -This method returns the file handle of the L object. +This method returns the file handle used to create the L object. =cut @@ -312,9 +379,9 @@ =item $handle->on_timeout ($cb) -Replace the current C callback, or disables the callback -(but not the timeout) if C<$cb> = C. See C constructor -argument. +Replace the current C callback, or disables the callback (but +not the timeout) if C<$cb> = C. See the C constructor +argument and method. =cut @@ -322,6 +389,29 @@ $_[0]{on_timeout} = $_[1]; } +=item $handle->autocork ($boolean) + +Enables or disables the current autocork behaviour (see C +constructor argument). + +=cut + +=item $handle->no_delay ($boolean) + +Enables or disables the C setting (see constructor argument of +the same name for details). + +=cut + +sub no_delay { + $_[0]{no_delay} = $_[1]; + + eval { + local $SIG{__DIE__}; + setsockopt $_[0]{fh}, &Socket::IPPROTO_TCP, &Socket::TCP_NODELAY, int $_[1]; + }; +} + ############################################################################# =item $handle->timeout ($seconds) @@ -444,7 +534,7 @@ }; # try to write data immediately - $cb->(); + $cb->() unless $self->{autocork}; # if still data left in wbuf, we need to poll $self->{_ww} = AnyEvent->io (fh => $self->{fh}, poll => "w", cb => $cb) @@ -680,19 +770,17 @@ defined $self->{rbuf_max} && $self->{rbuf_max} < length $self->{rbuf} ) { - return $self->_error (&Errno::ENOSPC, 1); + $self->_error (&Errno::ENOSPC, 1), return; } while () { - no strict 'refs'; - my $len = length $self->{rbuf}; if (my $cb = shift @{ $self->{_queue} }) { unless ($cb->($self)) { if ($self->{_eof}) { # no progress can be made (not enough data and no data forthcoming) - $self->_error (&Errno::EPIPE, 1), last; + $self->_error (&Errno::EPIPE, 1), return; } unshift @{ $self->{_queue} }, $cb; @@ -710,7 +798,7 @@ ) { # no further data will arrive # so no progress can be made - $self->_error (&Errno::EPIPE, 1), last + $self->_error (&Errno::EPIPE, 1), return if $self->{_eof}; last; # more data might arrive @@ -722,8 +810,13 @@ } } - $self->{on_eof}($self) - if $self->{_eof} && $self->{on_eof}; + if ($self->{_eof}) { + if ($self->{on_eof}) { + $self->{on_eof}($self) + } else { + $self->_error (0, 1); + } + } # may need to restart read watcher unless ($self->{_rw}) { @@ -859,15 +952,6 @@ } }; -# compatibility with older API -sub push_read_chunk { - $_[0]->push_read (chunk => $_[1], $_[2]); -} - -sub unshift_read_chunk { - $_[0]->unshift_read (chunk => $_[1], $_[2]); -} - =item line => [$eol, ]$cb->($handle, $line, $eol) The callback will be called only once a full line (including the end of @@ -892,29 +976,27 @@ register_read_type line => sub { my ($self, $cb, $eol) = @_; - $eol = qr|(\015?\012)| if @_ < 3; - $eol = quotemeta $eol unless ref $eol; - $eol = qr|^(.*?)($eol)|s; + if (@_ < 3) { + # this is more than twice as fast as the generic code below + sub { + $_[0]{rbuf} =~ s/^([^\015\012]*)(\015?\012)// or return; - sub { - $_[0]{rbuf} =~ s/$eol// or return; + $cb->($_[0], $1, $2); + 1 + } + } else { + $eol = quotemeta $eol unless ref $eol; + $eol = qr|^(.*?)($eol)|s; - $cb->($_[0], $1, $2); - 1 + sub { + $_[0]{rbuf} =~ s/$eol// or return; + + $cb->($_[0], $1, $2); + 1 + } } }; -# compatibility with older API -sub push_read_line { - my $self = shift; - $self->push_read (line => @_); -} - -sub unshift_read_line { - my $self = shift; - $self->unshift_read (line => @_); -} - =item regex => $accept[, $reject[, $skip], $cb->($handle, $data) Makes a regex match against the regex object C<$accept> and returns @@ -1044,14 +1126,23 @@ sub { # when we can use 5.10 we can use ".", but for 5.8 we use the re-pack method - defined (my $len = eval { unpack $format, $_[0]->{rbuf} }) + defined (my $len = eval { unpack $format, $_[0]{rbuf} }) or return; - # remove prefix - substr $_[0]->{rbuf}, 0, (length pack $format, $len), ""; + $format = length pack $format, $len; - # read rest - $_[0]->unshift_read (chunk => $len, $cb); + # bypass unshift if we already have the remaining chunk + if ($format + $len <= length $_[0]{rbuf}) { + my $data = substr $_[0]{rbuf}, $format, $len; + substr $_[0]{rbuf}, 0, $format + $len, ""; + $cb->($_[0], $data); + } else { + # remove prefix + substr $_[0]{rbuf}, 0, $format, ""; + + # read remaining chunk + $_[0]->unshift_read (chunk => $len, $cb); + } 1 } @@ -1118,20 +1209,31 @@ sub { # when we can use 5.10 we can use ".", but for 5.8 we use the re-pack method - defined (my $len = eval { unpack "w", $_[0]->{rbuf} }) + defined (my $len = eval { unpack "w", $_[0]{rbuf} }) or return; - # remove prefix - substr $_[0]->{rbuf}, 0, (length pack "w", $len), ""; + my $format = length pack "w", $len; - # read rest - $_[0]->unshift_read (chunk => $len, sub { - if (my $ref = eval { Storable::thaw ($_[1]) }) { - $cb->($_[0], $ref); - } else { - $self->_error (&Errno::EBADMSG); - } - }); + # bypass unshift if we already have the remaining chunk + if ($format + $len <= length $_[0]{rbuf}) { + my $data = substr $_[0]{rbuf}, $format, $len; + substr $_[0]{rbuf}, 0, $format + $len, ""; + $cb->($_[0], Storable::thaw ($data)); + } else { + # remove prefix + substr $_[0]{rbuf}, 0, $format, ""; + + # read remaining chunk + $_[0]->unshift_read (chunk => $len, sub { + if (my $ref = eval { Storable::thaw ($_[1]) }) { + $cb->($_[0], $ref); + } else { + $self->_error (&Errno::EBADMSG); + } + }); + } + + 1 } }; @@ -1219,21 +1321,17 @@ } } - if (length ($buf = Net::SSLeay::BIO_read ($self->{_wbio}))) { - $self->{wbuf} .= $buf; - $self->_drain_wbuf; - } - while (defined ($buf = Net::SSLeay::read ($self->{tls}))) { - if (length $buf) { - $self->{rbuf} .= $buf; - $self->_drain_rbuf unless $self->{_in_drain}; - } else { + unless (length $buf) { # let's treat SSL-eof as we treat normal EOF + delete $self->{_rw}; $self->{_eof} = 1; - $self->_shutdown; - return; + &_freetls; } + + $self->{rbuf} .= $buf; + $self->_drain_rbuf unless $self->{_in_drain}; + $self->{tls} or return; # tls session might have gone away in callback } my $err = Net::SSLeay::get_error ($self->{tls}, -1); @@ -1247,6 +1345,11 @@ # all others are fine for our purposes } + + if (length ($buf = Net::SSLeay::BIO_read ($self->{_wbio}))) { + $self->{wbuf} .= $buf; + $self->_drain_wbuf; + } } =item $handle->starttls ($tls[, $tls_ctx]) @@ -1265,13 +1368,17 @@ call and can be used or changed to your liking. Note that the handshake might have already started when this function returns. +If it an error to start a TLS handshake more than once per +AnyEvent::Handle object (this is due to bugs in OpenSSL). + =cut sub starttls { my ($self, $ssl, $ctx) = @_; - $self->stoptls; - + Carp::croak "it is an error to call starttls more than once on an Anyevent::Handle object" + if $self->{tls}; + if ($ssl eq "accept") { $ssl = Net::SSLeay::new ($ctx || TLS_CTX ()); Net::SSLeay::set_accept_state ($ssl); @@ -1287,6 +1394,12 @@ # (unfortunately, we have to hardcode constants because the abysmally misdesigned # and mismaintained ssleay-module doesn't even offer them). # http://www.mail-archive.com/openssl-dev@openssl.org/msg22420.html + # + # in short: this is a mess. + # + # note that we do not try to kepe the length constant between writes as we are required to do. + # we assume that most (but not all) of this insanity only applies to non-blocking cases, + # and we drive openssl fully in blocking mode here. Net::SSLeay::CTX_set_mode ($self->{tls}, (eval { local $SIG{__DIE__}; Net::SSLeay::MODE_ENABLE_PARTIAL_WRITE () } || 1) | (eval { local $SIG{__DIE__}; Net::SSLeay::MODE_ACCEPT_MOVING_WRITE_BUFFER () } || 2)); @@ -1304,31 +1417,47 @@ Net::SSLeay::BIO_write ($_[0]{_rbio}, ${$_[1]}); &_dotls; }; + + &_dotls; # need to trigger the initial negotiation exchange } =item $handle->stoptls -Destroys the SSL connection, if any. Partial read or write data will be -lost. +Shuts down the SSL connection - this makes a proper EOF handshake by +sending a close notify to the other side, but since OpenSSL doesn't +support non-blocking shut downs, it is not possible to re-use the stream +afterwards. =cut sub stoptls { my ($self) = @_; - Net::SSLeay::free (delete $self->{tls}) if $self->{tls}; + if ($self->{tls}) { + Net::SSLeay::shutdown $self->{tls}; + + &_dotls; + + # we don't give a shit. no, we do, but we can't. no... + # we, we... have to use openssl :/ + &_freetls; + } +} + +sub _freetls { + my ($self) = @_; + + return unless $self->{tls}; - delete $self->{_rbio}; - delete $self->{_wbio}; - delete $self->{_tls_wbuf}; - delete $self->{filter_r}; - delete $self->{filter_w}; + Net::SSLeay::free (delete $self->{tls}); + + delete @$self{qw(_rbio filter_w _wbio filter_r)}; } sub DESTROY { my $self = shift; - $self->stoptls; + &_freetls; my $linger = exists $self->{linger} ? $self->{linger} : 3600; @@ -1402,7 +1531,7 @@ =item * all constructor arguments become object members. At least initially, when you pass a C-argument to the constructor it -will end up in C<< $handle->{tls} >>. Those members might be changes or +will end up in C<< $handle->{tls} >>. Those members might be changed or mutated later on (for example C will hold the TLS connection object). =item * other object member names are prefixed with an C<_>.