--- AnyEvent-MP/MP.pm 2009/08/04 18:46:16 1.23 +++ AnyEvent-MP/MP.pm 2009/08/15 04:34:34 1.57 @@ -10,20 +10,44 @@ NODE # returns this node's noderef NODE $port # returns the noderef of the port + $SELF # receiving/own port id in rcv callbacks + + # initialise the node so it can send/receive messages + initialise_node; # -OR- + initialise_node "localhost:4040"; # -OR- + initialise_node "slave/", "localhost:4040" + + # ports are message endpoints + + # sending messages snd $port, type => data...; + snd $port, @msg; + snd @msg_with_first_element_being_a_port; - $SELF # receiving/own port id in rcv callbacks + # creating/using ports, the simple way + my $simple_port = port { my @msg = @_; 0 }; + + # creating/using ports, tagged message matching + my $port = port; + rcv $port, ping => sub { snd $_[0], "pong"; 0 }; + rcv $port, pong => sub { warn "pong received\n"; 0 }; + + # create a port on another node + my $port = spawn $node, $initfunc, @initdata; + + # monitoring + mon $port, $cb->(@msg) # callback is invoked on death + mon $port, $otherport # kill otherport on abnormal death + mon $port, $otherport, @msg # send message on death - rcv $port, smartmatch => $cb->($port, @msg); +=head1 CURRENT STATUS - # examples: - rcv $port2, ping => sub { snd $_[0], "pong"; 0 }; - rcv $port1, pong => sub { warn "pong received\n" }; - snd $port2, ping => $port1; - - # more, smarter, matches (_any_ is exported by this module) - rcv $port, [child_died => $pid] => sub { ... - rcv $port, [_any_, _any_, 3] => sub { .. $_[2] is 3 + AnyEvent::MP - stable API, should work + AnyEvent::MP::Intro - outdated + AnyEvent::MP::Kernel - WIP + AnyEvent::MP::Transport - mostly stable + + stay tuned. =head1 DESCRIPTION @@ -37,7 +61,7 @@ At the moment, this module family is severly broken and underdocumented, so do not use. This was uploaded mainly to reserve the CPAN namespace - -stay tuned! The basic API should be finished, however. +stay tuned! =head1 CONCEPTS @@ -45,31 +69,42 @@ =item port -A port is something you can send messages to with the C function, and -you can register C handlers with. All C handlers will receive -messages they match, messages will not be queued. +A port is something you can send messages to (with the C function). + +Ports allow you to register C handlers that can match all or just +some messages. Messages will not be queued. =item port id - C -A port id is always the noderef, a hash-mark (C<#>) as separator, followed -by a port name (a printable string of unspecified format). +A port ID is the concatenation of a noderef, a hash-mark (C<#>) as +separator, and a port name (a printable string of unspecified format). An +exception is the the node port, whose ID is identical to its node +reference. =item node -A node is a single process containing at least one port - the node -port. You can send messages to node ports to let them create new ports, -among other things. - -Initially, nodes are either private (single-process only) or hidden -(connected to a master node only). Only when they epxlicitly "become -public" can you send them messages from unrelated other nodes. +A node is a single process containing at least one port - the node port, +which provides nodes to manage each other remotely, and to create new +ports. + +Nodes are either private (single-process only), slaves (connected to a +master node only) or public nodes (connectable from unrelated nodes). =item noderef - C, C, C -A noderef is a string that either uniquely identifies a given node (for -private and hidden nodes), or contains a recipe on how to reach a given +A node reference is a string that either simply identifies the node (for +private and slave nodes), or contains a recipe on how to reach a given node (for public nodes). +This recipe is simply a comma-separated list of C pairs (for +TCP/IP, other protocols might look different). + +Node references come in two flavours: resolved (containing only numerical +addresses) or unresolved (where hostnames are used instead of addresses). + +Before using an unresolved node reference in a message you first have to +resolve it. + =back =head1 VARIABLES/FUNCTIONS @@ -80,7 +115,7 @@ package AnyEvent::MP; -use AnyEvent::MP::Base; +use AnyEvent::MP::Kernel; use common::sense; @@ -90,11 +125,12 @@ use base "Exporter"; -our $VERSION = '0.02'; +our $VERSION = $AnyEvent::MP::Kernel::VERSION; + our @EXPORT = qw( NODE $NODE *SELF node_of _any_ - become_slave become_public - snd rcv mon kil reg psub + resolve_node initialise_node + snd rcv mon kil reg psub spawn port ); @@ -108,14 +144,132 @@ =item $thisnode = NODE / $NODE -The C function returns, and the C<$NODE> variable contains -the noderef of the local node. The value is initialised by a call -to C or C, after which all local port -identifiers become invalid. +The C function returns, and the C<$NODE> variable contains the +noderef of the local node. The value is initialised by a call to +C. + +=item $noderef = node_of $port + +Extracts and returns the noderef from a port ID or a noderef. + +=item initialise_node $noderef, $seednode, $seednode... + +=item initialise_node "slave/", $master, $master... + +Before a node can talk to other nodes on the network it has to initialise +itself - the minimum a node needs to know is it's own name, and optionally +it should know the noderefs of some other nodes in the network. + +This function initialises a node - it must be called exactly once (or +never) before calling other AnyEvent::MP functions. + +All arguments (optionally except for the first) are noderefs, which can be +either resolved or unresolved. + +The first argument will be looked up in the configuration database first +(if it is C then the current nodename will be used instead) to find +the relevant configuration profile (see L). If none is found then +the default configuration is used. The configuration supplies additional +seed/master nodes and can override the actual noderef. + +There are two types of networked nodes, public nodes and slave nodes: + +=over 4 + +=item public nodes + +For public nodes, C<$noderef> (supplied either directly to +C or indirectly via a profile or the nodename) must be a +noderef (possibly unresolved, in which case it will be resolved). + +After resolving, the node will bind itself on all endpoints and try to +connect to all additional C<$seednodes> that are specified. Seednodes are +optional and can be used to quickly bootstrap the node into an existing +network. + +=item slave nodes + +When the C<$noderef> (either as given or overriden by the config file) +is the special string C, then the node will become a slave +node. Slave nodes cannot be contacted from outside and will route most of +their traffic to the master node that they attach to. + +At least one additional noderef is required (either by specifying it +directly or because it is part of the configuration profile): The node +will try to connect to all of them and will become a slave attached to the +first node it can successfully connect to. + +Note that slave nodes cannot change their name, and consequently, their +master, so if the master goes down, the slave node will not function well +anymore until it can re-establish conenciton to its master. This makes +slave nodes unsuitable for long-term nodes or fault-tolerant networks. + +=back + +This function will block until all nodes have been resolved and, for slave +nodes, until it has successfully established a connection to a master +server. + +All the seednodes will also be specially marked to automatically retry +connecting to them infinitely. + +Example: become a public node listening on the guessed noderef, or the one +specified via C for the current node. This should be the most common +form of invocation for "daemon"-type nodes. + + initialise_node; + +Example: become a slave node to any of the the seednodes specified via +C. This form is often used for commandline clients. + + initialise_node "slave/"; + +Example: become a slave node to any of the specified master servers. This +form is also often used for commandline clients. + + initialise_node "slave/", "master1", "192.168.13.17", "mp.example.net"; + +Example: become a public node, and try to contact some well-known master +servers to become part of the network. + + initialise_node undef, "master1", "master2"; + +Example: become a public node listening on port C<4041>. + + initialise_node 4041; + +Example: become a public node, only visible on localhost port 4044. + + initialise_node "localhost:4044"; + +=item $cv = resolve_node $noderef -=item $noderef = node_of $portid +Takes an unresolved node reference that may contain hostnames and +abbreviated IDs, resolves all of them and returns a resolved node +reference. -Extracts and returns the noderef from a portid or a noderef. +In addition to C pairs allowed in resolved noderefs, the +following forms are supported: + +=over 4 + +=item the empty string + +An empty-string component gets resolved as if the default port (4040) was +specified. + +=item naked port numbers (e.g. C<1234>) + +These are resolved by prepending the local nodename and a colon, to be +further resolved. + +=item hostnames (e.g. C, C) + +These are resolved by using AnyEvent::DNS to resolve them, optionally +looking up SRV records for the C port, if no port was +specified. + +=back =item $SELF @@ -128,16 +282,15 @@ just export C<$SELF>, all the symbols called C are exported by this module, but only C<$SELF> is currently used. -=item snd $portid, type => @data +=item snd $port, type => @data -=item snd $portid, @msg +=item snd $port, @msg Send the given message to the given port ID, which can identify either -a local or a remote port, and can be either a string or soemthignt hat -stringifies a sa port ID (such as a port object :). +a local or a remote port, and must be a port ID. While the message can be about anything, it is highly recommended to use a -string as first element (a portid, or some word that indicates a request +string as first element (a port ID, or some word that indicates a request type etc.). The message data effectively becomes read-only after a call to this @@ -150,40 +303,232 @@ that Storable can serialise and deserialise is allowed, and for the local node, anything can be passed. -=item kil $portid[, @reason] +=item $local_port = port -Kill the specified port with the given C<@reason>. +Create a new local port object and returns its port ID. Initially it has +no callbacks set and will throw an error when it receives messages. -If no C<@reason> is specified, then the port is killed "normally" (linked -ports will not be kileld, or even notified). +=item $local_port = port { my @msg = @_ } -Otherwise, linked ports get killed with the same reason (second form of -C, see below). +Creates a new local port, and returns its ID. Semantically the same as +creating a port and calling C on it. -Runtime errors while evaluating C callbacks or inside C blocks -will be reported as reason C<< die => $@ >>. +The block will be called for every message received on the port, with the +global variable C<$SELF> set to the port ID. Runtime errors will cause the +port to be Ced. The message will be passed as-is, no extra argument +(i.e. no port ID) will be passed to the callback. -Transport/communication errors are reported as C<< transport_error => -$message >>. +If you want to stop/destroy the port, simply C it: + + my $port = port { + my @msg = @_; + ... + kil $SELF; + }; + +=cut + +sub rcv($@); + +sub _kilme { + die "received message on port without callback"; +} + +sub port(;&) { + my $id = "$UNIQ." . $ID++; + my $port = "$NODE#$id"; + + rcv $port, shift || \&_kilme; + + $port +} + +=item rcv $local_port, $callback->(@msg) + +Replaces the default callback on the specified port. There is no way to +remove the default callback: use C to disable it, or better +C the port when it is no longer needed. + +The global C<$SELF> (exported by this module) contains C<$port> while +executing the callback. Runtime errors during callback execution will +result in the port being Ced. + +The default callback received all messages not matched by a more specific +C match. + +=item rcv $local_port, tag => $callback->(@msg_without_tag), ... + +Register (or replace) callbacks to be called on messages starting with the +given tag on the given port (and return the port), or unregister it (when +C<$callback> is C<$undef> or missing). There can only be one callback +registered for each tag. + +The original message will be passed to the callback, after the first +element (the tag) has been removed. The callback will use the same +environment as the default callback (see above). + +Example: create a port and bind receivers on it in one go. + + my $port = rcv port, + msg1 => sub { ... }, + msg2 => sub { ... }, + ; + +Example: create a port, bind receivers and send it in a message elsewhere +in one go: + + snd $otherport, reply => + rcv port, + msg1 => sub { ... }, + ... + ; + +Example: temporarily register a rcv callback for a tag matching some port +(e.g. for a rpc reply) and unregister it after a message was received. + + rcv $port, $otherport => sub { + my @reply = @_; + + rcv $SELF, $otherport; + }; + +=cut + +sub rcv($@) { + my $port = shift; + my ($noderef, $portid) = split /#/, $port, 2; + + ($NODE{$noderef} || add_node $noderef) == $NODE{""} + or Carp::croak "$port: rcv can only be called on local ports, caught"; + + while (@_) { + if (ref $_[0]) { + if (my $self = $PORT_DATA{$portid}) { + "AnyEvent::MP::Port" eq ref $self + or Carp::croak "$port: rcv can only be called on message matching ports, caught"; + + $self->[2] = shift; + } else { + my $cb = shift; + $PORT{$portid} = sub { + local $SELF = $port; + eval { &$cb }; _self_die if $@; + }; + } + } elsif (defined $_[0]) { + my $self = $PORT_DATA{$portid} ||= do { + my $self = bless [$PORT{$port} || sub { }, { }, $port], "AnyEvent::MP::Port"; + + $PORT{$portid} = sub { + local $SELF = $port; + + if (my $cb = $self->[1]{$_[0]}) { + shift; + eval { &$cb }; _self_die if $@; + } else { + &{ $self->[0] }; + } + }; + + $self + }; + + "AnyEvent::MP::Port" eq ref $self + or Carp::croak "$port: rcv can only be called on message matching ports, caught"; + + my ($tag, $cb) = splice @_, 0, 2; + + if (defined $cb) { + $self->[1]{$tag} = $cb; + } else { + delete $self->[1]{$tag}; + } + } + } + + $port +} + +=item $closure = psub { BLOCK } + +Remembers C<$SELF> and creates a closure out of the BLOCK. When the +closure is executed, sets up the environment in the same way as in C +callbacks, i.e. runtime errors will cause the port to get Ced. -=item $guard = mon $portid, $cb->(@reason) +This is useful when you register callbacks from C callbacks: + + rcv delayed_reply => sub { + my ($delay, @reply) = @_; + my $timer = AE::timer $delay, 0, psub { + snd @reply, $SELF; + }; + }; + +=cut + +sub psub(&) { + my $cb = shift; + + my $port = $SELF + or Carp::croak "psub can only be called from within rcv or psub callbacks, not"; + + sub { + local $SELF = $port; + + if (wantarray) { + my @res = eval { &$cb }; + _self_die if $@; + @res + } else { + my $res = eval { &$cb }; + _self_die if $@; + $res + } + } +} + +=item $guard = mon $port, $cb->(@reason) + +=item $guard = mon $port, $rcvport + +=item $guard = mon $port -=item $guard = mon $portid, $otherport +=item $guard = mon $port, $rcvport, @msg -=item $guard = mon $portid, $otherport, @msg +Monitor the given port and do something when the port is killed or +messages to it were lost, and optionally return a guard that can be used +to stop monitoring again. -Monitor the given port and do something when the port is killed. +C effectively guarantees that, in the absence of hardware failures, +that after starting the monitor, either all messages sent to the port +will arrive, or the monitoring action will be invoked after possible +message loss has been detected. No messages will be lost "in between" +(after the first lost message no further messages will be received by the +port). After the monitoring action was invoked, further messages might get +delivered again. -In the first form, the callback is simply called with any number -of C<@reason> elements (no @reason means that the port was deleted +In the first form (callback), the callback is simply called with any +number of C<@reason> elements (no @reason means that the port was deleted "normally"). Note also that I<< the callback B never die >>, so use C if unsure. -In the second form, the other port will be C'ed with C<@reason>, iff -a @reason was specified, i.e. on "normal" kils nothing happens, while -under all other conditions, the other port is killed with the same reason. - -In the last form, a message of the form C<@msg, @reason> will be C. +In the second form (another port given), the other port (C<$rcvport>) +will be C'ed with C<@reason>, iff a @reason was specified, i.e. on +"normal" kils nothing happens, while under all other conditions, the other +port is killed with the same reason. + +The third form (kill self) is the same as the second form, except that +C<$rvport> defaults to C<$SELF>. + +In the last form (message), a message of the form C<@msg, @reason> will be +C. + +As a rule of thumb, monitoring requests should always monitor a port from +a local port (or callback). The reason is that kill messages might get +lost, just like any other message. Another less obvious reason is that +even monitoring requests can get lost (for exmaple, when the connection +to the other node goes down permanently). When monitoring a port locally +these problems do not exist. Example: call a given callback when C<$port> is killed. @@ -191,21 +536,22 @@ Example: kill ourselves when C<$port> is killed abnormally. - mon $port, $self; + mon $port; -Example: send us a restart message another C<$port> is killed. +Example: send us a restart message when another C<$port> is killed. mon $port, $self => "restart"; =cut sub mon { - my ($noderef, $port, $cb) = ((split /#/, shift, 2), shift); + my ($noderef, $port) = split /#/, shift, 2; my $node = $NODE{$noderef} || add_node $noderef; - #TODO: ports must not be references - if (!ref $cb or "AnyEvent::MP::Port" eq ref $cb) { + my $cb = @_ ? shift : $SELF || Carp::croak 'mon: called with one argument only, but $SELF not set,'; + + unless (ref $cb) { if (@_) { # send a kill info message my (@msg) = ($cb, @_); @@ -244,259 +590,257 @@ sub mon_guard { my ($port, @refs) = @_; + #TODO: mon-less form? + mon $port, sub { 0 && @refs } } -=item $local_port = port - -Create a new local port object that supports message matching. +=item kil $port[, @reason] -=item $portid = port { my @msg = @_; $finished } - -Creates a "mini port", that is, a very lightweight port without any -pattern matching behind it, and returns its ID. +Kill the specified port with the given C<@reason>. -The block will be called for every message received on the port. When the -callback returns a true value its job is considered "done" and the port -will be destroyed. Otherwise it will stay alive. +If no C<@reason> is specified, then the port is killed "normally" (linked +ports will not be kileld, or even notified). -The message will be passed as-is, no extra argument (i.e. no port id) will -be passed to the callback. +Otherwise, linked ports get killed with the same reason (second form of +C, see below). -If you need the local port id in the callback, this works nicely: +Runtime errors while evaluating C callbacks or inside C blocks +will be reported as reason C<< die => $@ >>. - my $port; $port = miniport { - snd $otherport, reply => $port; - }; +Transport/communication errors are reported as C<< transport_error => +$message >>. =cut -sub port(;&) { - my $id = "$UNIQ." . $ID++; - my $port = "$NODE#$id"; +=item $port = spawn $node, $initfunc[, @initdata] - if (@_) { - my $cb = shift; - $PORT{$id} = sub { - local $SELF = $port; - eval { - &$cb - and kil $id; - }; - _self_die if $@; - }; - } else { - my $self = bless { - id => "$NODE#$id", - }, "AnyEvent::MP::Port"; - - $PORT_DATA{$id} = $self; - $PORT{$id} = sub { - local $SELF = $port; - - eval { - for (@{ $self->{rc0}{$_[0]} }) { - $_ && &{$_->[0]} - && undef $_; - } - - for (@{ $self->{rcv}{$_[0]} }) { - $_ && [@_[1 .. @{$_->[1]}]] ~~ $_->[1] - && &{$_->[0]} - && undef $_; - } - - for (@{ $self->{any} }) { - $_ && [@_[0 .. $#{$_->[1]}]] ~~ $_->[1] - && &{$_->[0]} - && undef $_; - } - }; - _self_die if $@; - }; - } +Creates a port on the node C<$node> (which can also be a port ID, in which +case it's the node where that port resides). - $port -} +The port ID of the newly created port is return immediately, and it is +permissible to immediately start sending messages or monitor the port. -=item reg $portid, $name +After the port has been created, the init function is +called. This function must be a fully-qualified function name +(e.g. C). To specify a function in the main +program, use C<::name>. -Registers the given port under the name C<$name>. If the name already -exists it is replaced. +If the function doesn't exist, then the node tries to C +the package, then the package above the package and so on (e.g. +C, C, C) until the function +exists or it runs out of package names. -A port can only be registered under one well known name. +The init function is then called with the newly-created port as context +object (C<$SELF>) and the C<@initdata> values as arguments. -A port automatically becomes unregistered when it is killed. +A common idiom is to pass your own port, monitor the spawned port, and +in the init function, monitor the original port. This two-way monitoring +ensures that both ports get cleaned up when there is a problem. -=cut +Example: spawn a chat server port on C<$othernode>. -sub reg(@) { - my ($portid, $name) = @_; + # this node, executed from within a port context: + my $server = spawn $othernode, "MyApp::Chat::Server::connect", $SELF; + mon $server; - $REG{$name} = $portid; -} + # init function on C<$othernode> + sub connect { + my ($srcport) = @_; -=item rcv $portid, tagstring => $callback->(@msg), ... + mon $srcport; -=item rcv $portid, $smartmatch => $callback->(@msg), ... + rcv $SELF, sub { + ... + }; + } -=item rcv $portid, [$smartmatch...] => $callback->(@msg), ... +=cut -Register callbacks to be called on matching messages on the given port. +sub _spawn { + my $port = shift; + my $init = shift; + + local $SELF = "$NODE#$port"; + eval { + &{ load_func $init } + }; + _self_die if $@; +} -The callback has to return a true value when its work is done, after -which is will be removed, or a false value in which case it will stay -registered. +sub spawn(@) { + my ($noderef, undef) = split /#/, shift, 2; -The global C<$SELF> (exported by this module) contains C<$portid> while -executing the callback. + my $id = "$RUNIQ." . $ID++; -Runtime errors wdurign callback execution will result in the port being -Ced. + $_[0] =~ /::/ + or Carp::croak "spawn init function must be a fully-qualified name, caught"; -If the match is an array reference, then it will be matched against the -first elements of the message, otherwise only the first element is being -matched. + snd_to_func $noderef, "AnyEvent::MP::_spawn" => $id, @_; -Any element in the match that is specified as C<_any_> (a function -exported by this module) matches any single element of the message. + "$noderef#$id" +} -While not required, it is highly recommended that the first matching -element is a string identifying the message. The one-string-only match is -also the most efficient match (by far). +=back -=cut +=head1 AnyEvent::MP vs. Distributed Erlang -sub rcv($@) { - my ($noderef, $port) = split /#/, shift, 2; +AnyEvent::MP got lots of its ideas from distributed Erlang (Erlang node +== aemp node, Erlang process == aemp port), so many of the documents and +programming techniques employed by Erlang apply to AnyEvent::MP. Here is a +sample: + + http://www.Erlang.se/doc/programming_rules.shtml + http://Erlang.org/doc/getting_started/part_frame.html # chapters 3 and 4 + http://Erlang.org/download/Erlang-book-part1.pdf # chapters 5 and 6 + http://Erlang.org/download/armstrong_thesis_2003.pdf # chapters 4 and 5 - ($NODE{$noderef} || add_node $noderef) == $NODE{""} - or Carp::croak "$noderef#$port: rcv can only be called on local ports, caught"; +Despite the similarities, there are also some important differences: - my $self = $PORT_DATA{$port} - or Carp::croak "$noderef#$port: rcv can only be called on message matching ports, caught"; +=over 4 - "AnyEvent::MP::Port" eq ref $self - or Carp::croak "$noderef#$port: rcv can only be called on message matching ports, caught"; +=item * Node references contain the recipe on how to contact them. - while (@_) { - my ($match, $cb) = splice @_, 0, 2; +Erlang relies on special naming and DNS to work everywhere in the +same way. AEMP relies on each node knowing it's own address(es), with +convenience functionality. - if (!ref $match) { - push @{ $self->{rc0}{$match} }, [$cb]; - } elsif (("ARRAY" eq ref $match && !ref $match->[0])) { - my ($type, @match) = @$match; - @match - ? push @{ $self->{rcv}{$match->[0]} }, [$cb, \@match] - : push @{ $self->{rc0}{$match->[0]} }, [$cb]; - } else { - push @{ $self->{any} }, [$cb, $match]; - } - } -} +This means that AEMP requires a less tightly controlled environment at the +cost of longer node references and a slightly higher management overhead. -=item $closure = psub { BLOCK } +=item * Erlang has a "remote ports are like local ports" philosophy, AEMP +uses "local ports are like remote ports". -Remembers C<$SELF> and creates a closure out of the BLOCK. When the -closure is executed, sets up the environment in the same way as in C -callbacks, i.e. runtime errors will cause the port to get Ced. +The failure modes for local ports are quite different (runtime errors +only) then for remote ports - when a local port dies, you I it dies, +when a connection to another node dies, you know nothing about the other +port. -This is useful when you register callbacks from C callbacks: +Erlang pretends remote ports are as reliable as local ports, even when +they are not. - rcv delayed_reply => sub { - my ($delay, @reply) = @_; - my $timer = AE::timer $delay, 0, psub { - snd @reply, $SELF; - }; - }; +AEMP encourages a "treat remote ports differently" philosophy, with local +ports being the special case/exception, where transport errors cannot +occur. -=cut +=item * Erlang uses processes and a mailbox, AEMP does not queue. -sub psub(&) { - my $cb = shift; +Erlang uses processes that selectively receive messages, and therefore +needs a queue. AEMP is event based, queuing messages would serve no +useful purpose. For the same reason the pattern-matching abilities of +AnyEvent::MP are more limited, as there is little need to be able to +filter messages without dequeing them. - my $port = $SELF - or Carp::croak "psub can only be called from within rcv or psub callbacks, not"; +(But see L for a more Erlang-like process model on top of AEMP). - sub { - local $SELF = $port; +=item * Erlang sends are synchronous, AEMP sends are asynchronous. - if (wantarray) { - my @res = eval { &$cb }; - _self_die if $@; - @res - } else { - my $res = eval { &$cb }; - _self_die if $@; - $res - } - } -} +Sending messages in Erlang is synchronous and blocks the process (and +so does not need a queue that can overflow). AEMP sends are immediate, +connection establishment is handled in the background. -=back +=item * Erlang suffers from silent message loss, AEMP does not. -=head1 FUNCTIONS FOR NODES +Erlang makes few guarantees on messages delivery - messages can get lost +without any of the processes realising it (i.e. you send messages a, b, +and c, and the other side only receives messages a and c). -=over 4 +AEMP guarantees correct ordering, and the guarantee that there are no +holes in the message sequence. -=item become_public endpoint... +=item * In Erlang, processes can be declared dead and later be found to be +alive. -Tells the node to become a public node, i.e. reachable from other nodes. +In Erlang it can happen that a monitored process is declared dead and +linked processes get killed, but later it turns out that the process is +still alive - and can receive messages. -If no arguments are given, or the first argument is C, then -AnyEvent::MP tries to bind on port C<4040> on all IP addresses that the -local nodename resolves to. - -Otherwise the first argument must be an array-reference with transport -endpoints ("ip:port", "hostname:port") or port numbers (in which case the -local nodename is used as hostname). The endpoints are all resolved and -will become the node reference. +In AEMP, when port monitoring detects a port as dead, then that port will +eventually be killed - it cannot happen that a node detects a port as dead +and then later sends messages to it, finding it is still alive. -=cut +=item * Erlang can send messages to the wrong port, AEMP does not. -=back +In Erlang it is quite likely that a node that restarts reuses a process ID +known to other nodes for a completely different process, causing messages +destined for that process to end up in an unrelated process. -=head1 NODE MESSAGES +AEMP never reuses port IDs, so old messages or old port IDs floating +around in the network will not be sent to an unrelated port. -Nodes understand the following messages sent to them. Many of them take -arguments called C<@reply>, which will simply be used to compose a reply -message - C<$reply[0]> is the port to reply to, C<$reply[1]> the type and -the remaining arguments are simply the message data. +=item * Erlang uses unprotected connections, AEMP uses secure +authentication and can use TLS. -=over 4 +AEMP can use a proven protocol - SSL/TLS - to protect connections and +securely authenticate nodes. -=cut +=item * The AEMP protocol is optimised for both text-based and binary +communications. -=item lookup => $name, @reply +The AEMP protocol, unlike the Erlang protocol, supports both +language-independent text-only protocols (good for debugging) and binary, +language-specific serialisers (e.g. Storable). -Replies with the port ID of the specified well-known port, or C. +It has also been carefully designed to be implementable in other languages +with a minimum of work while gracefully degrading fucntionality to make the +protocol simple. -=item devnull => ... +=item * AEMP has more flexible monitoring options than Erlang. -Generic data sink/CPU heat conversion. +In Erlang, you can chose to receive I exit signals as messages +or I, there is no in-between, so monitoring single processes is +difficult to implement. Monitoring in AEMP is more flexible than in +Erlang, as one can choose between automatic kill, exit message or callback +on a per-process basis. -=item relay => $port, @msg +=item * Erlang tries to hide remote/local connections, AEMP does not. -Simply forwards the message to the given port. +Monitoring in Erlang is not an indicator of process death/crashes, +as linking is (except linking is unreliable in Erlang). -=item eval => $string[ @reply] +In AEMP, you don't "look up" registered port names or send to named ports +that might or might not be persistent. Instead, you normally spawn a port +on the remote node. The init function monitors the you, and you monitor +the remote port. Since both monitors are local to the node, they are much +more reliable. -Evaluates the given string. If C<@reply> is given, then a message of the -form C<@reply, $@, @evalres> is sent. +This also saves round-trips and avoids sending messages to the wrong port +(hard to do in Erlang). -Example: crash another node. +=back - snd $othernode, eval => "exit"; +=head1 RATIONALE -=item time => @reply +=over 4 -Replies the the current node time to C<@reply>. +=item Why strings for ports and noderefs, why not objects? -Example: tell the current node to send the current time to C<$myport> in a -C message. +We considered "objects", but found that the actual number of methods +thatc an be called are very low. Since port IDs and noderefs travel over +the network frequently, the serialising/deserialising would add lots of +overhead, as well as having to keep a proxy object. + +Strings can easily be printed, easily serialised etc. and need no special +procedures to be "valid". + +And a a miniport consists of a single closure stored in a global hash - it +can't become much cheaper. + +=item Why favour JSON, why not real serialising format such as Storable? + +In fact, any AnyEvent::MP node will happily accept Storable as framing +format, but currently there is no way to make a node use Storable by +default. + +The default framing protocol is JSON because a) JSON::XS is many times +faster for small messages and b) most importantly, after years of +experience we found that object serialisation is causing more problems +than it gains: Just like function calls, objects simply do not travel +easily over the network, mostly because they will always be a copy, so you +always have to re-think your design. - snd $NODE, time => $myport, timereply => 1, 2; - # => snd $myport, timereply => 1, 2,