--- rxvt-unicode/src/perl/matcher 2014/05/17 13:38:23 1.15 +++ rxvt-unicode/src/perl/matcher 2021/11/21 19:33:32 1.39 @@ -2,12 +2,13 @@ # Author: Tim Pope # Bob Farrell +# Emanuele Giaquinta #:META:RESOURCE:%.launcher:string:default launcher command -#:META:RESOURCE:%.button:string:the button, yeah +#:META:RESOURCE:%.button:string:the mouse button used to activate a match #:META:RESOURCE:%.pattern.:string:extra pattern to match #:META:RESOURCE:%.launcher.:string:custom launcher for pattern -#:META:RESOURCE:%.rend.:string:custom rednition for pattern +#:META:RESOURCE:%.rend.:string:custom rendition for pattern =head1 NAME @@ -19,7 +20,7 @@ matching a certain pattern and make it clickable. When clicked with the mouse button specified in the C resource (default 2, or middle), the program specified in the C resource -(default, the C resource, C) will be started +(default, the C resource, C) will be started with the matched text as first argument. The default configuration is suitable for matching URLs and launching a web browser, like the former "mark-urls" extension. @@ -30,8 +31,48 @@ The launcher can also be overridden on a per-pattern basis. It is possible to activate the most recently seen match or a list of matches -from the keyboard. Simply bind a keysym to "perl:matcher:last" or -"perl:matcher:list" as seen in the example below. +from the keyboard. Simply bind a keysym to "matcher:last" or +"matcher:list" as seen in the example below. + +The C action enables a mode in which it is possible to +iterate over the matches using the keyboard and either activate them +or copy them to the clipboard. While the mode is active, normal terminal +input/output is suspended and the following bindings are recognized: + +=over + +=item C + +Search for a match upwards. + +=item C + +Search for a match downwards. + +=item C + +Jump to the topmost match. + +=item C + +Jump to the bottommost match. + +=item C + +Leave the mode and return to the point where search was started. + +=item C + +Activate the current match. + +=item C + +Copy the current match to the clipboard. + +=back + +It is also possible to cycle through the matches using a key +combination bound to the C action. Example: load and use the matcher extension with defaults. @@ -40,57 +81,97 @@ Example: use a custom configuration. URxvt.url-launcher: sensible-browser - URxvt.keysym.C-Delete: perl:matcher:last - URxvt.keysym.M-Delete: perl:matcher:list + URxvt.keysym.C-Delete: matcher:last + URxvt.keysym.M-Delete: matcher:list URxvt.matcher.button: 1 URxvt.matcher.pattern.1: \\bwww\\.[\\w-]+\\.[\\w./?&@#-]*[\\w/-] URxvt.matcher.pattern.2: \\B(/\\S+?):(\\d+)(?=:|$) URxvt.matcher.launcher.2: gvim +$2 $1 +=head2 Regex encoding/wide character matching + +Urxvt stores all text as unicode, in a special encoding that uses +one character/code point per column. For various reasons, the regular +expressions are matched directly against this encoding, which means there are a few things +you need to keep in mind: + +=over + +=item X resources/command line arguments are locale-encoded + +The regexes taken from the command line or resources will be converted +from locale encoding to unicode. This can change the number of code points +per character. + +=item Wide characters are column-padded with C<$urxvt::NOCHAR> + +Wide characters (such as kanji and sometimes tabs) are padded with +a special character value (C<$urxvt::NOCHAR>). That means that +constructs such as C<\w> or C<.> will only match part of a character, as +C<$urxvt::NOCHAR> is not matched by C<\w> and both only match the first +"column" of a wide character. + +That means you have to incorporate C<$urxvt::NOCHAR> into parts of regexes +that may match wide characters. For example, to match C<\w+> you might +want to use C<[\w$urxvt::NOCHAR]+> instead, and to match a single character +(C<.>) you might want to use C<.$urxvt::NOCHAR*> instead. + +=back + =cut my $url = qr{ (?:https?://|ftp://|news://|mailto:|file://|\bwww\.) - [\w\-\@;\/?:&=%\$.+!*\x27,~#]* + [\w\-\@;\/?:&=%\$.+!*\x27,~#$urxvt::NOCHAR]* ( - \([\w\-\@;\/?:&=%\$.+!*\x27,~#]*\)| # Allow a pair of matched parentheses + \([\w\-\@;\/?:&=%\$.+!*\x27,~#$urxvt::NOCHAR]*\)| # Allow a pair of matched parentheses [\w\-\@;\/?:&=%\$+*~] # exclude some trailing characters (heuristic) )+ }x; -sub on_key_press { +sub matchlist_key_press { my ($self, $event, $keysym, $octets) = @_; - if (! $self->{showing} ) { - return; - } + delete $self->{overlay}; + $self->disable ("key_press"); my $i = ($keysym == 96 ? 0 : $keysym - 48); - if (($i > scalar(@{$self->{urls}})) || ($i < 0)) { - $self->matchlist(); - return; + if ($i >= 0 && $i < @{ $self->{matches} }) { + my @exec = @{ $self->{matches}[$i] }; + $self->exec_async (@exec[5 .. $#exec]); } - my @args = ($self->{urls}[ -$i-1 ]); - $self->matchlist(); - - $self->exec_async( $self->{launcher}, @args ); + 1 } # backwards compat sub on_user_command { my ($self, $cmd) = @_; - if ($cmd =~ s/^matcher:list\b//) { + if ($cmd eq "matcher:list") { $self->matchlist; - } else { - if ($cmd =~ s/^matcher:last\b//) { - $self->most_recent; - } elsif ($cmd =~ s/^matcher\b//) { - # for backward compatibility - $self->most_recent; - } + } elsif ($cmd eq "matcher:last") { + $self->most_recent; + } elsif ($cmd eq "matcher:select") { + $self->select_enter; + } elsif ($cmd eq "matcher") { + # for backward compatibility + $self->most_recent; + } + + () +} + +sub on_action { + my ($self, $action) = @_; + + if ($action eq "list") { + $self->matchlist; + } elsif ($action eq "last") { + $self->most_recent; + } elsif ($action eq "select") { + $self->select_enter; } () @@ -98,69 +179,58 @@ sub matchlist { my ($self) = @_; - if ( $self->{showing} ) { - $self->{url_overlay}->hide; - $self->{showing} = 0; - return; - } - @{$self->{urls}} = (); - my $line; - for (my $i = 0; $i < $self->nrow; $i ++) { - $line = $self->line($i); - next if ($line->beg != $i); - for my $url ($self->get_urls_from_line($line->t)) { - if (scalar(@{$self->{urls}}) == 10) { - shift @{$self->{urls}}; - } - push @{$self->{urls}}, $url; - } - } - - if (! scalar(@{$self->{urls}})) { - return; - } - - my $max = 0; - my $i = scalar( @{$self->{urls}} ) - 1 ;; - - my @temp = (); - - for my $url (@{$self->{urls}}) { - my $url = "$i-$url"; - my $xpos = 0; - - if ($self->ncol + (length $url) >= $self->ncol) { - $url = substr( $url, 0, $self->ncol ); - } - - push @temp, $url; - - if( length $url > $max ) { - $max = length $url; - } - - $i--; - } - - @temp = reverse @temp; - - $self->{url_overlay} = $self->overlay(0, 0, $max, scalar( @temp ), urxvt::OVERLAY_RSTYLE, 2); - my $i = 0; - for my $url (@temp) { - $self->{url_overlay}->set( 0, $i, $url, [(urxvt::OVERLAY_RSTYLE) x length $url]); - $self->{showing} = 1; - $i++; - } + $self->{matches} = []; + my $row = $self->nrow - 1; + while ($row >= 0 && @{ $self->{matches} } < 10) { + my $line = $self->line ($row); + my @matches = $self->find_matches ($row); + + for (sort { $b->[0] <=> $a->[0] or $b->[1] <=> $a->[1] } @matches) { + push @{ $self->{matches} }, $_; + last if @{ $self->{matches} } == 10; + } + + $row = $line->beg - 1; + } + + return unless @{ $self->{matches} }; + + my $width = 0; + + my $i = 0; + for my $match (@{ $self->{matches} }) { + my $text = $match->[4]; + my $w = $self->strwidth ("$i-$text"); + + $width = $w if $w > $width; + $i++; + } + + $width = $self->ncol - 2 if $width > $self->ncol - 2; + + $self->{overlay} = $self->overlay (0, 0, $width, scalar (@{ $self->{matches} }), urxvt::OVERLAY_RSTYLE, 2); + my $i = 0; + for my $match (@{ $self->{matches} }) { + my $text = $match->[4]; + + $self->{overlay}->set (0, $i, "$i-$text"); + $i++; + } + + $self->enable (key_press => \&matchlist_key_press); } sub most_recent { my ($self) = shift; - my $row = $self->nrow; + my $row = $self->nrow - 1; my @exec; - while($row-- > $self->top_row) { + while ($row >= $self->top_row) { + my $line = $self->line ($row); @exec = $self->command_for($row); last if(@exec); + + $row = $line->beg - 1; } if(@exec) { return $self->exec_async (@exec); @@ -192,8 +262,6 @@ $self->{launcher} = $self->my_resource ("launcher") || $self->x_resource("url-launcher") || "sensible-browser"; - $self->{urls} = []; - $self->{showing} = 0; $self->{button} = 2; $self->{state} = 0; if($self->{argv}[0] || $self->my_resource ("button")) { @@ -215,9 +283,7 @@ my @defaults = ($url); my @matchers; - for (my $idx = 0; defined (my $res = $self->my_resource ("pattern.$idx") || $defaults[$idx]); $idx++) { - $res = $self->locale_decode ($res); - utf8::encode $res; + for (my $idx = 0; defined (my $res = $self->locale_decode ($self->my_resource ("pattern.$idx")) || $defaults[$idx]); $idx++) { my $launcher = $self->my_resource ("launcher.$idx"); $launcher =~ s/\$&|\$\{&\}/\${0}/g if $launcher; my $rend = $self->parse_rend($self->my_resource ("rend.$idx")); @@ -228,40 +294,29 @@ () } -sub get_urls_from_line { - my ($self, $line) = @_; - my @urls; - for my $matcher (@{$self->{matchers}}) { - while ($line =~ /$matcher->[0]/g) { - push @urls, substr( $line, $-[0], $+[0] - $-[0] ); - } - } - return @urls; -} - sub on_line_update { my ($self, $row) = @_; # fetch the line that has changed my $line = $self->line ($row); my $text = $line->t; - my $i = 0; + my $rend; # find all urls (if any) for my $matcher (@{$self->{matchers}}) { while ($text =~ /$matcher->[0]/g) { #print "$&\n"; - my $rend = $line->r; + $rend ||= $line->r; # mark all characters as underlined. we _must_ not toggle underline, # as we might get called on an already-marked url. &{$matcher->[2]} - for @{$rend}[ $-[0] .. $+[0] - 1]; - - $line->r ($rend); + for @{$rend}[$-[0] .. $+[0] - 1]; } } + $line->r ($rend) if $rend; + () } @@ -273,32 +328,49 @@ ($event->{state} & $mask) == $self->{state}); } -sub command_for { +sub find_matches { my ($self, $row, $col) = @_; my $line = $self->line ($row); my $text = $line->t; + my $off = $line->offset_of ($row, $col) if defined $col; + my @matches; for my $matcher (@{$self->{matchers}}) { my $launcher = $matcher->[1] || $self->{launcher}; - while (($text =~ /$matcher->[0]/g)) { - my $match = $&; + while ($text =~ /$matcher->[0]/g) { + my $match = substr $text, $-[0], $+[0] - $-[0]; my @begin = @-; my @end = @+; - if (!defined($col) || ($-[0] <= $col && $+[0] >= $col)) { + my @exec; + + if (!defined($off) || ($-[0] <= $off && $+[0] >= $off)) { if ($launcher !~ /\$/) { - return ($launcher,$match); + @exec = ($launcher, $match); } else { # It'd be nice to just access a list like ($&,$1,$2...), # but alas, m//g behaves differently in list context. - my @exec = map { s/\$(\d+)|\$\{(\d+)\}/ - substr($text,$begin[$1||$2],$end[$1||$2]-$begin[$1||$2]) - /egx; $_ } split(/\s+/, $launcher); - return @exec; + @exec = map { s/\$(\d+)|\$\{(\d+)\}/ + substr $text, $begin[$1 || $2], $end[$1 || $2] - $begin[$1 || $2] + /egx; $_ } split /\s+/, $launcher; } + + push @matches, [ $line->coord_of ($begin[0]), $line->coord_of ($end[0]), $match, @exec ]; } } } + @matches; +} + +sub command_for { + my ($self, $row, $col) = @_; + + my @matches = $self->find_matches ($row, $col); + if (@matches) { + my @match = @{ $matches[0] }; + return @match[5 .. $#match]; + } + () } @@ -332,7 +404,7 @@ && join("\x00", @$cmd) eq join("\x00", $self->command_for($row,$col))) { if($self->valid_button($event)) { - $self->exec_async (@$cmd); + $self->exec_async (@$cmd); } } @@ -340,4 +412,123 @@ 1; } +sub select_enter { + my ($self) = @_; + + $self->{view_start} = $self->view_start; + $self->{pty_ev_events} = $self->pty_ev_events (urxvt::EV_NONE); + $self->{cur_row} = $self->nrow - 1; + + $self->enable ( + key_press => \&select_key_press, + refresh_begin => \&select_refresh, + refresh_end => \&select_refresh, + ); + + $self->{overlay} = $self->overlay (0, -1, $self->ncol, 1, urxvt::OVERLAY_RSTYLE, 0); + $self->{overlay}->set (0, 0, "match-select"); +} + +sub select_leave { + my ($self) = @_; + + $self->disable ("key_press", "refresh_begin", "refresh_end"); + $self->pty_ev_events ($self->{pty_ev_events}); + + delete $self->{overlay}; + delete $self->{matches}; + delete $self->{id}; +} + +sub select_search { + my ($self, $dir, $row) = @_; + + while ($self->nrow > $row && $row >= $self->top_row) { + my $line = $self->line ($row) + or last; + + my @matches = $self->find_matches ($row); + if (@matches) { + @matches = sort { $a->[0] <=> $b->[0] or $a->[1] <=> $b->[1] } @matches; + $self->{matches} = \@matches; + $self->{cur_row} = $row; + $self->{id} = $dir < 0 ? @{ $self->{matches} } - 1 : 0; + $self->view_start ($row - ($self->nrow >> 1)); + $self->want_refresh; + return 1; + } + + $row = $dir < 0 ? $line->beg - 1 : $line->end + 1; + } + + $self->scr_bell; + + () +} + +sub select_refresh { + my ($self) = @_; + + return unless $self->{matches}; + + my $cur = $self->{matches}[$self->{id}]; + $self->scr_xor_span (@$cur[0 .. 3], urxvt::RS_RVid); + + () +} + +sub select_key_press { + my ($self, $event, $keysym, $string) = @_; + + if ($keysym == 0xff0d || $keysym == 0xff8d) { # enter + if ($self->{matches}) { + my @match = @{ $self->{matches}[$self->{id}] }; + $self->exec_async (@match[5 .. $#match]); + } + $self->select_leave; + } elsif ($keysym == 0x79) { # y + if ($self->{matches}) { + $self->selection ($self->{matches}[$self->{id}][4], 1); + $self->selection_grab (urxvt::CurrentTime, 1); + } + $self->select_leave; + } elsif ($keysym == 0xff1b) { # escape + $self->view_start ($self->{view_start}); + $self->select_leave; + } elsif ($keysym == 0xff50) { # home + $self->select_search (+1, $self->top_row) + } elsif ($keysym == 0xff57) { # end + $self->select_search (-1, $self->nrow - 1) + } elsif ($keysym == 0xff52) { # up + if ($self->{id} > 0) { + $self->{id}--; + $self->want_refresh; + } else { + my $line = $self->line ($self->{cur_row}); + $self->select_search (-1, $line->beg - 1) + if $line->beg > $self->top_row; + } + } elsif ($keysym == 0xff54) { # down + if ($self->{id} < @{ $self->{matches} } - 1) { + $self->{id}++; + $self->want_refresh; + } else { + my $line = $self->line ($self->{cur_row}); + $self->select_search (+1, $line->end + 1) + if $line->end < $self->nrow; + } + } elsif ($self->lookup_keysym ($keysym, $event->{state}) eq "matcher:select") { + if ($self->{id} > 0) { + $self->{id}--; + $self->want_refresh; + } else { + my $line = $self->line ($self->{cur_row}); + $self->select_search (-1, $self->nrow - 1) + unless $self->select_search (-1, $line->beg - 1); + } + } + + 1 +} + # vim:set sw=3 sts=3 et: