=head1 NAME AnyEvent::WebDriver - control browsers using the W3C WebDriver protocol =head1 SYNOPSIS # start geckodriver or any other w3c-compatible webdriver via the shell $ geckdriver -b myfirefox/firefox --log trace --port 4444 # then use it use AnyEvent::WebDriver; # create a new webdriver object my $wd = new AnyEvent::WebDriver; # create a new session with default capabilities. $wd->new_session ({}); $wd->navigate_to ("https://duckduckgo.com/html"); my $searchbox = $wd->find_element ("css selector" => 'input[type="text"]'); $wd->element_send_keys ($searchbox => "free software"); $wd->element_click ($wd->find_element ("css selector" => 'input[type="submit"]')); sleep 10; =head1 DESCRIPTION This module aims to implement the W3C WebDriver specification which is the standardised equivalent to the Selenium WebDriver API., which in turn aims at remotely controlling web browsers such as Firefox or Chromium. At the time of this writing, it was only available as a draft document, so changes will be expected. Also, only F did implement it, or at least, most of it. To make most of this module, or, in fact, to make any reasonable use of this module, you would need to refer tot he W3C WebDriver document, which can be found L: https://w3c.github.io/webdriver/ =cut package AnyEvent::WebDriver; use common::sense; use Carp (); use JSON::XS (); use AnyEvent::HTTP (); our $VERSION = 0; our $WEB_ELEMENT_IDENTIFIER = "element-6066-11e4-a52e-4f735466cecf"; my $json = JSON::XS->new ->utf8 ->boolean_values (0, 1); sub req_ { my ($wdf, $method, $ep, $body, $cb) = @_; AnyEvent::HTTP::http_request $method => "$wdf->{_ep}$ep", body => $body, timeout => $self->{timeout}, headers => { "content-type" => "application/json; charset=utf-8", "cache-control" => "no-cache" }, ($wdf->{proxy} eq "default" ? () : (proxy => $wdf->{proxy})), sub { my ($res, $hdr) = @_; $res = eval { $json->decode ($res) }; $hdr->{Status} = 500 unless exists $res->{value}; $cb->($hdr->{Status}, $res->{value}); } ; } sub get_ { my ($wdf, $ep, $cb) = @_; $wdf->req_ (GET => $ep, undef, $cb) } sub post_ { my ($wdf, $ep, $data, $cb) = @_; $wdf->req_ (POST => $ep, $json->encode ($data || {}), $cb) } sub delete_ { my ($wdf, $ep, $cb) = @_; $wdf->req_ (DELETE => $ep, "", $cb) } sub AUTOLOAD { our $AUTOLOAD; $_[0]->isa (__PACKAGE__) or Carp::croak "$AUTOLOAD: no such function"; (my $name = $AUTOLOAD) =~ s/^.*://; my $name_ = "$name\_"; defined &$name_ or Carp::croak "AUTOLOAD: no such method"; my $func_ = \&$name_; *$name = sub { $func_->(@_, my $cv = AE::cv); my ($status, $res) = $cv->recv; if ($status ne "200") { my $msg; if (exists $res->{error}) { $msg = "AyEvent::WebDriver: $res->{error}: $res->{message}"; $msg .= "\n$res->{stacktrace}" if length $res->{stacktrace}; } else { $msg = "AnyEvent::WebDriver: http status $status (wrong endpoint?), caught"; } Carp::croak $msg; } $res }; goto &$name; } =head2 CREATING WEBDRIVER OBJECTS =over =item new AnyEvent::WebDriver key => value... Create a new WebDriver object. Example for a remote webdriver connection (the only type supported at the moment): my $wd = new AnyEvent::WebDriver host => "localhost", port => 4444; Supported keys are: =over =item endpoint => $string For remote connections, the endpoint to connect to (defaults to C). =item proxy => $proxyspec The proxy to use (same as the C argument used by L). The default is C, which disables proxies. To use the system-provided proxy (e.g. C environment variable), specify a value of C. =item autodelete => $boolean If true (the default), then automatically execute C when the WebDriver object is destroyed with an active session. IF set to a false value, then the session will continue to exist. =item timeout => $seconds The HTTP timeout, in (fractional) seconds (default: C<300>, but this will likely drastically reduce). This timeout is reset on any activity, so it is not an overall requiest timeout. Also, individual requests might extend this timeout if they are known to take longer. =back =cut sub new { my ($class, %kv) = @_; bless { endpoint => "http://localhost:4444", proxy => undef, autodelete => 1, timeout => 300, %kv, }, $class } sub DESTROY { my ($wdf) = @_; $wdf->delete_session if exists $wdf->{sid}; } =back =head2 SIMPLIFIED API This section documents the simplified API, which is really just a very thin wrapper around the WebDriver protocol commands. They all block (using L condvars) the caller until the result is available, so must not be called from an event loop callback - see L for an alternative. The method names are preetty much taken directly from the W3C WebDriver specification, e.g. the request documented in the "Get All Cookies" section is implemented via the C method. The order is the same as in the WebDriver draft at the tiome of this writing, and only minimal massaging is done to request parameters and results. =head3 SESSIONS =over =cut =item $wd->new_session ({ key => value... }) Try to connect to a webdriver and initialize session with a "new session" command, passing the given key-value pairs as value (e.g. C). No session-dependent methods must be called before this function returns successfully. On success, C<< $wd->{sid} >> is set to the session id, and C<< $wd->{capabilities} >> is set to the returned capabilities. my $wd = new AnyEvent::Selenium endpoint => "http://localhost:4545"; $wd->new_session ({ capabilities => { pageLoadStrategy => "normal", }. }); =cut sub new_session_ { my ($wdf, $kv, $cb) = @_; local $wdf->{_ep} = "$wdf->{endpoint}/"; $wdf->post_ (session => $kv, sub { my ($status, $res) = @_; if ($status eq "200") { $wdf->{sid} = $res->{sessionId}; $wdf->{capabilities} = $res->{capabilities}; $wdf->{_ep} = "$wdf->{endpoint}/session/$wdf->{sid}/"; } $cb->($status, $res); }); } =item $wd->delete_session Deletes the session - the WebDriver object must not be used after this call. =cut sub delete_session_ { my ($wdf, $cb) = @_; local $wdf->{_ep} = "$wdf->{endpoint}/session/$wdf->{sid}"; $wdf->delete_ ("" => $cb); } =item $timeouts = $wd->get_timeouts Get the current timeouts, e.g.: my $timeouts = $wd->get_timeouts; # { implicit => 0, pageLoad => 300000, script => 30000 } =item $wd->set_timeouts ($timeouts) Sets one or more timeouts, e.g.: $wd->set_timeouts ({ script => 60000 }); =cut sub get_timeouts_ { $_[0]->get_ (timeouts => $_[1], $_[2]); } sub set_timeouts_ { $_[0]->post_ (timeouts => $_[1], $_[2], $_[3]); } =back =head3 NAVIGATION =over =cut =item $wd->navigate_to ($url) Navigates to the specified URL. =item $url = $wd->get_current_url Queries the czurrent page URL as set by C. =cut sub navigate_to_ { $_[0]->post_ (url => { url => "$_[1]" }, $_[2]); } sub get_current_url_ { $_[0]->get_ (url => $_[1]) } =item $wd->back The equivalent of pressing "back" in the browser. =item $wd->forward The equivalent of pressing "forward" in the browser. =item $wd->refresh The equivalent of pressing "refresh" in the browser. =cut sub back_ { $_[0]->post_ (back => undef, $_[1]); } sub forward_ { $_[0]->post_ (forward => undef, $_[1]); } sub refresh_ { $_[0]->post_ (refresh => undef, $_[1]); } =item $title = $wd->get_title Returns the current document title. =cut sub get_title_ { $_[0]->get_ (title => $_[1]); } =back =head3 COMMAND CONTEXTS =over =cut =item $handle = $wd->get_window_handle Returns the current window handle. =item $wd->close_window Closes the current browsing context. =item $wd->switch_to_window ($handle) Changes the current browsing context to the given window. =cut sub get_window_handle_ { $_[0]->get_ (window => $_[1]); } sub close_window_ { $_[0]->delete_ (window => $_[1]); } sub switch_to_window_ { $_[0]->post_ (window => "$_[1]", $_[2]); } =item $handles = $wd->get_window_handles Return the current window handles as an array-ref of handle IDs. =cut sub get_window_handles_ { $_[0]->get_ ("window/handles" => $_[1]); } =item $handles = $wd->switch_to_frame ($frame) Switch to the given frame. =cut sub switch_to_frame_ { $_[0]->post_ (frame => { id => "$_[1]" }, $_[2]); } =item $handles = $wd->switch_to_parent_frame Switch to the parent frame. =cut sub switch_to_parent_frame_ { $_[0]->post_ ("frame/parent" => undef, $_[1]); } =item $rect = $wd->get_window_rect Return the current window rect, e.g.: $rect = $wd->get_window_rect # { height => 1040, width => 540, x => 0, y => 0 } =item $wd->set_window_rect ($rect) Sets the window rect. =cut sub get_window_rect_ { $_[0]->get_ ("window/rect" => $_[1]); } sub set_window_rect_ { $_[0]->post_ ("window/rect" => $_[1], $_[2]); } =item $wd->maximize_window =item $wd->minimize_window =item $wd->fullscreen_window Changes the window size by either maximising, minimising or making it fullscreen. In my experience, this might timeout if no window manager is running. =cut sub maximize_window_ { $_[0]->post_ ("window/maximize" => undef, $_[1]); } sub minimize_window_ { $_[0]->post_ ("window/minimize" => undef, $_[1]); } sub fullscreen_window_ { $_[0]->post_ ("window/fullscreen" => undef, $_[1]); } =back =head3 ELEMENT RETRIEVAL =over =cut =item $element_id = $wd->find_element ($location_strategy, $selector) Finds the first element specified by the given selector and returns its web element id (the strong, not the object from the protocol). Raises an error when no element was found. $element = $wd->find_element ("css selector" => "body a"); $element = $wd->find_element ("link text" => "Click Here For Porn"); $element = $wd->find_element ("partial link text" => "orn"); $element = $wd->find_element ("tag name" => "input"); $element = $wd->find_element ("xpath" => '//input[@type="text"]'); # "decddca8-5986-4e1d-8c93-efe952505a5f" =item $element_ids = $wd->find_elements ($location_strategy, $selector) As above, but returns an arrayref of all found element IDs. =item $element_id = $wd->find_element_from_element ($element_id, $location_strategy, $selector) Like C, but looks only inside the specified C<$element>. =item $element_ids = $wd->find_elements_from_element ($element_id, $location_strategy, $selector) Like C, but looks only inside the specified C<$element>. my $head = $wd->find_element ("tag name" => "head"); my $links = $wd->find_elements_from_element ($head, "tag name", "link"); =item $element_id = $wd->get_active_element Returns the active element. =cut sub find_element_ { my $cb = pop; $_[0]->post_ (element => { using => "$_[1]", value => "$_[2]" }, sub { $cb->($_[0], $_[0] ne "200" ? $_[1] : $_[1]{$WEB_ELEMENT_IDENTIFIER}) }); } sub find_elements_ { my $cb = pop; $_[0]->post_ (elements => { using => "$_[1]", value => "$_[2]" }, sub { $cb->($_[0], $_[0] ne "200" ? $_[1] : [ map $_->{$WEB_ELEMENT_IDENTIFIER}, @{$_[1]} ]); }); } sub find_element_from_element_ { my $cb = pop; $_[0]->post_ ("element/$_[1]/element" => { using => "$_[2]", value => "$_[3]" }, sub { $cb->($_[0], $_[0] ne "200" ? $_[1] : $_[1]{$WEB_ELEMENT_IDENTIFIER}) }); } sub find_elements_from_element_ { my $cb = pop; $_[0]->post_ ("element/$_[1]/elements" => { using => "$_[2]", value => "$_[3]" }, sub { $cb->($_[0], $_[0] ne "200" ? $_[1] : [ map $_->{$WEB_ELEMENT_IDENTIFIER}, @{$_[1]} ]); }); } sub get_active_element_ { my $cb = pop; $_[0]->get_ ("element/active" => sub { $cb->($_[0], $_[0] ne "200" ? $_[1] : $_[1]{$WEB_ELEMENT_IDENTIFIER}) }); } =back =head3 ELEMENT STATE =over =cut =item $bool = $wd->is_element_selected Returns whether the given input or option element is selected or not. =item $string = $wd->get_element_attribute ($element_id, $name) Returns the value of the given attribute. =item $string = $wd->get_element_property ($element_id, $name) Returns the value of the given property. =item $string = $wd->get_element_css_value ($element_id, $name) Returns the value of the given css value. =item $string = $wd->get_element_text ($element_id) Returns the (rendered) text content of the given element. =item $string = $wd->get_element_tag_name ($element_id) Returns the tag of the given element. =item $rect = $wd->get_element_rect ($element_id) Returns the element rect of the given element. =item $bool = $wd->is_element_enabled Returns whether the element is enabled or not. =cut sub is_element_selected_ { $_[0]->get_ ("element/$_[1]/selected" => $_[2]); } sub get_element_attribute_ { $_[0]->get_ ("element/$_[1]/attribute/$_[2]" => $_[3]); } sub get_element_property_ { $_[0]->get_ ("element/$_[1]/property/$_[2]" => $_[3]); } sub get_element_css_value_ { $_[0]->get_ ("element/$_[1]/css/$_[2]" => $_[3]); } sub get_element_text_ { $_[0]->get_ ("element/$_[1]/text" => $_[2]); } sub get_element_tag_name_ { $_[0]->get_ ("element/$_[1]/name" => $_[2]); } sub get_element_rect_ { $_[0]->get_ ("element/$_[1]/rect" => $_[2]); } sub is_element_enabled_ { $_[0]->get_ ("element/$_[1]/enabled" => $_[2]); } =back =head3 ELEMENT INTERACTION =over =cut =item $wd->element_click ($element_id) Clicks the given element. =item $wd->element_clear ($element_id) Clear the contents of the given element. =item $wd->element_send_keys ($element_id, $text) Sends the given text as key events to the given element. =cut sub element_click_ { $_[0]->post_ ("element/$_[1]/click" => undef, $_[2]); } sub element_clear_ { $_[0]->post_ ("element/$_[1]/clear" => undef, $_[2]); } sub element_send_keys_ { $_[0]->post_ ("element/$_[1]/value" => { text => "$_[2]" }, $_[3]); } =back =head3 DOCUMENT HANDLING =over =cut =item $source = $wd->get_page_source Returns the (HTML/XML) page source of the current document. =item $results = $wd->execute_script ($javascript, $args) Synchronously execute the given script with given arguments and return its results (C<$args> can be C if no arguments are wanted/needed). $ten = $wd->execute_script ("return arguments[0]+arguments[1]", [3, 7]); =item $results = $wd->execute_async_script ($javascript, $args) Similar to C, but doesn't wait for script to return, but instead waits for the script to call its last argument, which is added to C<$args> automatically. $twenty = $wd->execute_async_script ("arguments[0](20)", undef); =cut sub get_page_source_ { $_[0]->get_ (source => $_[1]); } sub execute_script_ { $_[0]->post_ ("execute/sync" => { script => "$_[1]", args => $_[2] || [] }, $_[3]); } sub execute_async_script_ { $_[0]->post_ ("execute/async" => { script => "$_[1]", args => $_[2] || [] }, $_[3]); } =back =head3 COOKIES =over =cut =item $cookies = $wd->get_all_cookies Returns all cookies, as an arrayref of hashrefs. # google surely sets a lot of cookies without my consent $wd->navigate_to ("http://google.com"); use Data::Dump; ddx $wd->get_all_cookies; =item $cookie = $wd->get_named_cookie ($name) Returns a single cookie as a hashref. =item $wd->add_cookie ($cookie) Adds the given cookie hashref. =item $wd->delete_cookie ($name) Delete the named cookie. =item $wd->delete_all_cookies Delete all cookies. =cut sub get_all_cookies_ { $_[0]->get_ (cookie => $_[1]); } sub get_named_cookie_ { $_[0]->get_ ("cookie/$_[1]" => $_[2]); } sub add_cookie_ { $_[0]->post_ (cookie => { cookie => $_[1] }, $_[2]); } sub delete_cookie_ { $_[0]->delete_ ("cookie/$_[1]" => $_[2]); } sub delete_all_cookies_ { $_[0]->delete_ (cookie => $_[2]); } =back =head3 ACTIONS =over =cut =item $wd->perform_actions ($actions) Perform the given actions (an arrayref of action specifications simulating user activity). For further details, read the spec. An example to get you started: $wd->navigate_to ("https://duckduckgo.com/html"); $wd->set_timeouts ({ implicit => 10000 }); my $input = $wd->find_element ("css selector", 'input[type="text"]'); $wd->perform_actions ([ { id => "myfatfinger", type => "pointer", pointerType => "touch", actions => [ { type => "pointerMove", duration => 100, origin => $wd->element_object ($input), x => 40, y => 5 }, { type => "pointerDown", button => 1 }, { type => "pause", duration => 40 }, { type => "pointerUp", button => 1 }, ], }, { id => "mykeyboard", type => "key", actions => [ { type => "pause" }, { type => "pause" }, { type => "pause" }, { type => "pause" }, { type => "keyDown", value => "a" }, { type => "pause", duration => 100 }, { type => "keyUp", value => "a" }, { type => "pause", duration => 100 }, { type => "keyDown", value => "b" }, { type => "pause", duration => 100 }, { type => "keyUp", value => "b" }, { type => "pause", duration => 2000 }, { type => "keyDown", value => "\x{E007}" }, # enter { type => "pause", duration => 100 }, { type => "keyUp", value => "\x{E007}" }, # enter { type => "pause", duration => 5000 }, ], }, ]); =item $wd->release_actions Release all keys and pointer buttons currently depressed. =cut sub perform_actions_ { $_[0]->post_ (actions => { actions => $_[1] }, $_[2]); } sub release_actions_ { $_[0]->delete_ (actions => $_[1]); } =back =head3 USER PROMPTS =over =cut =item $wd->dismiss_alert Dismiss a simple dialog, if present. =item $wd->accept_alert Accept a simple dialog, if present. =item $text = $wd->get_alert_text Returns the text of any simple dialog. =item $text = $wd->send_alert_text Fills in the user prompt with the given text. =cut sub dismiss_alert_ { $_[0]->post_ ("alert/dismiss" => undef, $_[1]); } sub accept_alert_ { $_[0]->post_ ("alert/accept" => undef, $_[1]); } sub get_alert_text_ { $_[0]->get_ ("alert/text" => $_[1]); } sub send_alert_text_ { $_[0]->post_ ("alert/text" => { text => "$_[1]" }, $_[2]); } =back =head3 SCREEN CAPTURE =over =cut =item $wd->take_screenshot Create a screenshot, returning it as a PNG image in a data url. =item $wd->take_element_screenshot ($element_id) Accept a simple dialog, if present. =cut sub take_screenshot_ { $_[0]->get_ (screenshot => $_[1]); } sub take_element_screenshot_ { $_[0]->get_ ("element/$_[1]/screenshot" => $_[2]); } =back =head2 HELPER METHODS =over =cut =item $object = $wd->element_object ($element_id) Encoding element ids in data structures is done by represetning them as an object with a special key and the element id as value. This helper method does this for you. =cut sub element_object { +{ $WEB_ELEMENT_IDENTIFIER => $_[1] } } =back =head2 EVENT BASED API This module wouldn't be a good AnyEvent citizen if it didn't have a true event-based API. In fact, the simplified API, as documented above, is emulated via the event-based API and an C function that automatically provides blocking wrappers around the callback-based API. Every method documented in the L section has an equivalent event-based method that is formed by appending a underscore (C<_>) to the method name, and appending a callback to the argument list (mnemonic: the underscore indicates the "the action is not yet finished" after the call returns). For example, instead of a blocking calls to C, C and C, you can make a callback-based ones: my $cv = AE::cv; $wd->new_session ({}, sub { my ($status, $value) = @_, die "error $value->{error}" if $status ne "200"; $wd->navigate_to_ ("http://www.nethype.de", sub { $wd->back_ (sub { print "all done\n"; $cv->send; }); }); }); $cv->recv; While the blocking methods C on errors, the callback-based ones all pass two values to the callback, C<$status> and C<$res>, where C<$status> is the HTTP status code (200 for successful requests, typically 4xx ot 5xx for errors), and C<$res> is the value of the C key in the JSON response object. Other than that, the underscore variants and the blocking variants are identical. =head2 LOW LEVEL API All the simplfiied API methods are very thin wrappers around WebDriver commands of the same name. Theyx are all implemented in terms of the low-level methods (C, C, C and C), which exists in blocking and callback-based variants (C, C, C and C). Examples are after the function descriptions. =over =item $wd->req_ ($method, $uri, $body, $cb->($status, $value)) =item $value = $wd->req ($method, $uri, $body) Appends the C<$uri> to the C URL and makes a HTTP C<$method> request (C, C etc.). C requests can provide a UTF-8-encoded JSON text as HTTP request body, or the empty string to indicate no body is used. For the callback version, the callback gets passed the HTTP status code (200 for every successful request), and the value of the C key in the JSON response object as second argument. =item $wd->get_ ($uri, $cb->($status, $value)) =item $value = $wd->get ($uri) Simply a call to C with C<$method> set to C and an empty body. =item $wd->post_ ($uri, $data, $cb->($status, $value)) =item $value = $wd->post ($uri, $data) Simply a call to C with C<$method> set to C - if C<$body> is C, then an empty object is send, otherwise, C<$data> must be a valid request object, which gets encoded into JSON for you. =item $wd->delete_ ($uri, $cb->($status, $value)) =item $value = $wd->delete ($uri) Simply a call to C with C<$method> set to C and an empty body. =cut =back Example: implement C, which is a simple C request without any parameters: $cookies = $wd->get ("cookie"); Example: implement C, which needs some parameters: $results = $wd->post ("execute/sync" => { script => "$javascript", args => [] }); Example: call C to find all C elements, stripping the returned element objects to only return the element ID strings: my $elems = $wd->post (elements => { using => "css selector", value => "img" }); # yes, the W3C found an interetsing way around the typelessness of JSON $_ = $_->{"element-6066-11e4-a52e-4f735466cecf"} for @$elems; =cut =head1 HISTORY This module was unintentionally created (it started inside some quickly hacked-together script) simply because I couldn't get the existing C module to work, ever, despite multiple attempts over the years and trying to report multiple bugs, which have been completely ignored. It's also not event-based, so, yeah... =head1 AUTHOR Marc Lehmann http://anyevent.schmorp.de =cut 1