--- rxvt-unicode/src/perl/matcher 2014/06/07 20:07:38 1.19 +++ rxvt-unicode/src/perl/matcher 2022/12/09 05:06:46 1.40 @@ -2,12 +2,13 @@ # Author: Tim Pope # Bob Farrell +# Emanuele Giaquinta #:META:RESOURCE:%.launcher:string:default launcher command -#:META:RESOURCE:%.button:string:the button, yeah +#:META:RESOURCE:%.button:string:the mouse button used to activate a match #:META:RESOURCE:%.pattern.:string:extra pattern to match #:META:RESOURCE:%.launcher.:string:custom launcher for pattern -#:META:RESOURCE:%.rend.:string:custom rednition for pattern +#:META:RESOURCE:%.rend.:string:custom rendition for pattern =head1 NAME @@ -20,7 +21,7 @@ mouse button specified in the C resource (default 2, or middle), the program specified in the C resource (default, the C resource, C) will be started -with the matched text as first argument. The default configuration is +with the matched text as first argument. The default configuration is suitable for matching URLs and launching a web browser, like the former "mark-urls" extension. @@ -30,8 +31,48 @@ The launcher can also be overridden on a per-pattern basis. It is possible to activate the most recently seen match or a list of matches -from the keyboard. Simply bind a keysym to "perl:matcher:last" or -"perl:matcher:list" as seen in the example below. +from the keyboard. Simply bind a keysym to "matcher:last" or +"matcher:list" as seen in the example below. + +The C action enables a mode in which it is possible to +iterate over the matches using the keyboard and either activate them +or copy them to the clipboard. While the mode is active, normal terminal +input/output is suspended and the following bindings are recognized: + +=over + +=item C + +Search for a match upwards. + +=item C + +Search for a match downwards. + +=item C + +Jump to the topmost match. + +=item C + +Jump to the bottommost match. + +=item C + +Leave the mode and return to the point where search was started. + +=item C + +Activate the current match. + +=item C + +Copy the current match to the clipboard. + +=back + +It is also possible to cycle through the matches using a key +combination bound to the C action. Example: load and use the matcher extension with defaults. @@ -40,37 +81,65 @@ Example: use a custom configuration. URxvt.url-launcher: sensible-browser - URxvt.keysym.C-Delete: perl:matcher:last - URxvt.keysym.M-Delete: perl:matcher:list + URxvt.keysym.C-Delete: matcher:last + URxvt.keysym.M-Delete: matcher:list URxvt.matcher.button: 1 URxvt.matcher.pattern.1: \\bwww\\.[\\w-]+\\.[\\w./?&@#-]*[\\w/-] URxvt.matcher.pattern.2: \\B(/\\S+?):(\\d+)(?=:|$) URxvt.matcher.launcher.2: gvim +$2 $1 +=head2 Regex encoding/wide character matching + +Urxvt stores all text as unicode, in a special encoding that uses +one character/code point per column. For various reasons, the regular +expressions are matched directly against this encoding, which means there are a few things +you need to keep in mind: + +=over + +=item X resources/command line arguments are locale-encoded + +The regexes taken from the command line or resources will be converted +from locale encoding to unicode. This can change the number of code points +per character. + +=item Wide characters are column-padded with C<$urxvt::NOCHAR> + +Wide characters (such as kanji and sometimes tabs) are padded with +a special character value (C<$urxvt::NOCHAR>). That means that +constructs such as C<\w> or C<.> will only match part of a character, as +C<$urxvt::NOCHAR> is not matched by C<\w> and both only match the first +"column" of a wide character. + +That means you have to incorporate C<$urxvt::NOCHAR> into parts of regexes +that may match wide characters. For example, to match C<\w+> you might +want to use C<[\w$urxvt::NOCHAR]+> instead, and to match a single character +(C<.>) you might want to use C<.$urxvt::NOCHAR*> instead. + +=back + =cut my $url = qr{ (?:https?://|ftp://|news://|mailto:|file://|\bwww\.) - [\w\-\@;\/?:&=%\$.+!*\x27,~#]* + [\w\-\@;\/?:&=%\$.+!*\x27,~#$urxvt::NOCHAR]* ( - \([\w\-\@;\/?:&=%\$.+!*\x27,~#]*\)| # Allow a pair of matched parentheses + \([\w\-\@;\/?:&=%\$.+!*\x27,~#$urxvt::NOCHAR]*\)| # Allow a pair of matched parentheses [\w\-\@;\/?:&=%\$+*~] # exclude some trailing characters (heuristic) )+ }x; -sub on_key_press { +sub matchlist_key_press { my ($self, $event, $keysym, $octets) = @_; - return unless $self->{overlay}; - delete $self->{overlay}; + $self->disable ("key_press"); my $i = ($keysym == 96 ? 0 : $keysym - 48); if ($i >= 0 && $i < @{ $self->{matches} }) { my @exec = @{ $self->{matches}[$i] }; - shift @exec; - $self->exec_async (@exec); + $self->exec_async (@exec[5 .. $#exec]); } 1 @@ -80,15 +149,29 @@ sub on_user_command { my ($self, $cmd) = @_; - if ($cmd =~ s/^matcher:list\b//) { + if ($cmd eq "matcher:list") { $self->matchlist; - } else { - if ($cmd =~ s/^matcher:last\b//) { - $self->most_recent; - } elsif ($cmd =~ s/^matcher\b//) { - # for backward compatibility - $self->most_recent; - } + } elsif ($cmd eq "matcher:last") { + $self->most_recent; + } elsif ($cmd eq "matcher:select") { + $self->select_enter; + } elsif ($cmd eq "matcher") { + # for backward compatibility + $self->most_recent; + } + + () +} + +sub on_action { + my ($self, $action) = @_; + + if ($action eq "list") { + $self->matchlist; + } elsif ($action eq "last") { + $self->most_recent; + } elsif ($action eq "select") { + $self->select_enter; } () @@ -97,36 +180,13 @@ sub matchlist { my ($self) = @_; - @{ $self->{matches} } = (); + $self->{matches} = []; my $row = $self->nrow - 1; while ($row >= 0 && @{ $self->{matches} } < 10) { my $line = $self->line ($row); - my $text = $line->t; + my @matches = $self->find_matches ($row); - # FIXME: code duplicated from 'command_for' - my @matches; - for my $matcher (@{$self->{matchers}}) { - my $launcher = $matcher->[1] || $self->{launcher}; - while ($text =~ /$matcher->[0]/g) { - my $match = substr ($text, $-[0], $+[0] - $-[0]); - my @beg = @-; - my @end = @+; - my @exec; - - if ($launcher !~ /\$/) { - @exec = ($launcher, $match); - } else { - @exec = map { s/\$(\d+)|\$\{(\d+)\}/ - substr ($text, $beg[$1 || $2], $end[$1 || $2] - $beg[$1 || $2]) - /egx; $_ } split /\s+/, $launcher; - } - - push @matches, [ $beg[0], $match, @exec ]; - } - } - - for (sort { $b->[0] <=> $a->[0] } @matches) { - shift @$_; + for (sort { $b->[0] <=> $a->[0] or $b->[1] <=> $a->[1] } @matches) { push @{ $self->{matches} }, $_; last if @{ $self->{matches} } == 10; } @@ -140,7 +200,7 @@ my $i = 0; for my $match (@{ $self->{matches} }) { - my $text = $match->[0]; + my $text = $match->[4]; my $w = $self->strwidth ("$i-$text"); $width = $w if $w > $width; @@ -152,25 +212,32 @@ $self->{overlay} = $self->overlay (0, 0, $width, scalar (@{ $self->{matches} }), urxvt::OVERLAY_RSTYLE, 2); my $i = 0; for my $match (@{ $self->{matches} }) { - my $text = $match->[0]; + my $text = $match->[4]; $self->{overlay}->set (0, $i, "$i-$text"); $i++; } + $self->enable (key_press => \&matchlist_key_press); } sub most_recent { my ($self) = shift; - my $row = $self->nrow; + my $row = $self->nrow - 1; my @exec; - while($row-- > $self->top_row) { - @exec = $self->command_for($row); - last if(@exec); + + while ($row >= $self->top_row) { + my $line = $self->line ($row); + @exec = $self->command_for ($row); + last if @exec; + + $row = $line->beg - 1; } - if(@exec) { + + if (@exec) { return $self->exec_async (@exec); } + () } @@ -198,7 +265,6 @@ $self->{launcher} = $self->my_resource ("launcher") || $self->x_resource("url-launcher") || "sensible-browser"; - $self->{matches} = []; $self->{button} = 2; $self->{state} = 0; if($self->{argv}[0] || $self->my_resource ("button")) { @@ -220,9 +286,7 @@ my @defaults = ($url); my @matchers; - for (my $idx = 0; defined (my $res = $self->my_resource ("pattern.$idx") || $defaults[$idx]); $idx++) { - $res = $self->locale_decode ($res); - utf8::encode $res; + for (my $idx = 0; defined (my $res = $self->locale_decode ($self->my_resource ("pattern.$idx")) || $defaults[$idx]); $idx++) { my $launcher = $self->my_resource ("launcher.$idx"); $launcher =~ s/\$&|\$\{&\}/\${0}/g if $launcher; my $rend = $self->parse_rend($self->my_resource ("rend.$idx")); @@ -239,23 +303,23 @@ # fetch the line that has changed my $line = $self->line ($row); my $text = $line->t; - my $i = 0; + my $rend; # find all urls (if any) for my $matcher (@{$self->{matchers}}) { while ($text =~ /$matcher->[0]/g) { #print "$&\n"; - my $rend = $line->r; + $rend ||= $line->r; # mark all characters as underlined. we _must_ not toggle underline, # as we might get called on an already-marked url. &{$matcher->[2]} - for @{$rend}[ $-[0] .. $+[0] - 1]; - - $line->r ($rend); + for @{$rend}[$-[0] .. $+[0] - 1]; } } + $line->r ($rend) if $rend; + () } @@ -267,39 +331,62 @@ ($event->{state} & $mask) == $self->{state}); } -sub command_for { +sub find_matches { my ($self, $row, $col) = @_; my $line = $self->line ($row); my $text = $line->t; + my $off = $line->offset_of ($row, $col) if defined $col; + my @matches; for my $matcher (@{$self->{matchers}}) { my $launcher = $matcher->[1] || $self->{launcher}; - while (($text =~ /$matcher->[0]/g)) { - my $match = $&; + while ($text =~ /$matcher->[0]/g) { + my $match = substr $text, $-[0], $+[0] - $-[0]; my @begin = @-; my @end = @+; - if (!defined($col) || ($-[0] <= $col && $+[0] >= $col)) { + my @exec; + + if (!(defined $off) || ($-[0] <= $off && $+[0] >= $off)) { if ($launcher !~ /\$/) { - return ($launcher,$match); + @exec = ($launcher, $match); } else { # It'd be nice to just access a list like ($&,$1,$2...), # but alas, m//g behaves differently in list context. - my @exec = map { s/\$(\d+)|\$\{(\d+)\}/ - substr($text,$begin[$1||$2],$end[$1||$2]-$begin[$1||$2]) - /egx; $_ } split(/\s+/, $launcher); - return @exec; + @exec = map { + s{\$(\d+)|\$\{(\d+)\}}{ + substr $text, $begin[$1 || $2], $end[$1 || $2] - $begin[$1 || $2] + }egx; + $_ + } split /\s+/, $launcher; } + + push @matches, [ $line->coord_of ($begin[0]), $line->coord_of ($end[0]), $match, @exec ]; } } } + @matches +} + +sub command_for { + my ($self, $row, $col) = @_; + + my @matches = $self->find_matches ($row, $col); + if (@matches) { + my @match = @{ $matches[0] }; + return @match[5 .. $#match]; + } + () } sub on_button_press { my ($self, $event) = @_; - if($self->valid_button($event) - && (my @exec = $self->command_for($event->{row},$event->{col}))) { + + if ( + $self->valid_button ($event) + && (my @exec = $self->command_for ($event->{row}, $event->{col})) + ) { $self->{row} = $event->{row}; $self->{col} = $event->{col}; $self->{cmd} = \@exec; @@ -322,16 +409,136 @@ return if !defined $row; - if($row == $event->{row} && abs($col-$event->{col}) < 2 - && join("\x00", @$cmd) eq join("\x00", $self->command_for($row,$col))) { - if($self->valid_button($event)) { - + if ( + $row == $event->{row} + && (abs $col-$event->{col}) < 2 + && (join "\x00", @$cmd) eq (join "\x00", $self->command_for ($row, $col)) + ) { + if ($self->valid_button ($event)) { $self->exec_async (@$cmd); - } } 1; } +sub select_enter { + my ($self) = @_; + + $self->{view_start} = $self->view_start; + $self->{pty_ev_events} = $self->pty_ev_events (urxvt::EV_NONE); + $self->{cur_row} = $self->nrow - 1; + + $self->enable ( + key_press => \&select_key_press, + refresh_begin => \&select_refresh, + refresh_end => \&select_refresh, + ); + + $self->{overlay} = $self->overlay (0, -1, $self->ncol, 1, urxvt::OVERLAY_RSTYLE, 0); + $self->{overlay}->set (0, 0, "match-select"); +} + +sub select_leave { + my ($self) = @_; + + $self->disable ("key_press", "refresh_begin", "refresh_end"); + $self->pty_ev_events ($self->{pty_ev_events}); + + delete $self->{overlay}; + delete $self->{matches}; + delete $self->{id}; +} + +sub select_search { + my ($self, $dir, $row) = @_; + + while ($self->nrow > $row && $row >= $self->top_row) { + my $line = $self->line ($row) + or last; + + my @matches = $self->find_matches ($row); + if (@matches) { + @matches = sort { $a->[0] <=> $b->[0] or $a->[1] <=> $b->[1] } @matches; + $self->{matches} = \@matches; + $self->{cur_row} = $row; + $self->{id} = $dir < 0 ? @{ $self->{matches} } - 1 : 0; + $self->view_start ($row - ($self->nrow >> 1)); + $self->want_refresh; + return 1; + } + + $row = $dir < 0 ? $line->beg - 1 : $line->end + 1; + } + + $self->scr_bell; + + () +} + +sub select_refresh { + my ($self) = @_; + + return unless $self->{matches}; + + my $cur = $self->{matches}[$self->{id}]; + $self->scr_xor_span (@$cur[0 .. 3], urxvt::RS_RVid); + + () +} + +sub select_key_press { + my ($self, $event, $keysym, $string) = @_; + + if ($keysym == 0xff0d || $keysym == 0xff8d) { # enter + if ($self->{matches}) { + my @match = @{ $self->{matches}[$self->{id}] }; + $self->exec_async (@match[5 .. $#match]); + } + $self->select_leave; + } elsif ($keysym == 0x79) { # y + if ($self->{matches}) { + $self->selection ($self->{matches}[$self->{id}][4], 1); + $self->selection_grab (urxvt::CurrentTime, 1); + } + $self->select_leave; + } elsif ($keysym == 0xff1b) { # escape + $self->view_start ($self->{view_start}); + $self->select_leave; + } elsif ($keysym == 0xff50) { # home + $self->select_search (+1, $self->top_row) + } elsif ($keysym == 0xff57) { # end + $self->select_search (-1, $self->nrow - 1) + } elsif ($keysym == 0xff52) { # up + if ($self->{id} > 0) { + $self->{id}--; + $self->want_refresh; + } else { + my $line = $self->line ($self->{cur_row}); + $self->select_search (-1, $line->beg - 1) + if $line->beg > $self->top_row; + } + } elsif ($keysym == 0xff54) { # down + if ($self->{id} < @{ $self->{matches} } - 1) { + $self->{id}++; + $self->want_refresh; + } else { + my $line = $self->line ($self->{cur_row}); + $self->select_search (+1, $line->end + 1) + if $line->end < $self->nrow; + } + } elsif ($self->lookup_keysym ($keysym, $event->{state}) eq "matcher:select") { + if ($self->{id} > 0) { + $self->{id}--; + $self->want_refresh; + } else { + my $line = $self->line ($self->{cur_row}); + $self->select_search (-1, $self->nrow - 1) + unless $self->select_search (-1, $line->beg - 1); + } + } + + 1 +} + # vim:set sw=3 sts=3 et: