--- rxvt-unicode/src/perl/matcher 2014/10/14 09:00:43 1.34 +++ rxvt-unicode/src/perl/matcher 2022/12/09 05:06:46 1.40 @@ -21,7 +21,7 @@ mouse button specified in the C resource (default 2, or middle), the program specified in the C resource (default, the C resource, C) will be started -with the matched text as first argument. The default configuration is +with the matched text as first argument. The default configuration is suitable for matching URLs and launching a web browser, like the former "mark-urls" extension. @@ -31,15 +31,15 @@ The launcher can also be overridden on a per-pattern basis. It is possible to activate the most recently seen match or a list of matches -from the keyboard. Simply bind a keysym to "matcher:last" or +from the keyboard. Simply bind a keysym to "matcher:last" or "matcher:list" as seen in the example below. -The 'matcher:select' action enables a mode in which it is possible to +The C action enables a mode in which it is possible to iterate over the matches using the keyboard and either activate them or copy them to the clipboard. While the mode is active, normal terminal input/output is suspended and the following bindings are recognized: -=over 4 +=over =item C @@ -71,6 +71,9 @@ =back +It is also possible to cycle through the matches using a key +combination bound to the C action. + Example: load and use the matcher extension with defaults. URxvt.perl-ext: default,matcher @@ -85,14 +88,44 @@ URxvt.matcher.pattern.2: \\B(/\\S+?):(\\d+)(?=:|$) URxvt.matcher.launcher.2: gvim +$2 $1 +=head2 Regex encoding/wide character matching + +Urxvt stores all text as unicode, in a special encoding that uses +one character/code point per column. For various reasons, the regular +expressions are matched directly against this encoding, which means there are a few things +you need to keep in mind: + +=over + +=item X resources/command line arguments are locale-encoded + +The regexes taken from the command line or resources will be converted +from locale encoding to unicode. This can change the number of code points +per character. + +=item Wide characters are column-padded with C<$urxvt::NOCHAR> + +Wide characters (such as kanji and sometimes tabs) are padded with +a special character value (C<$urxvt::NOCHAR>). That means that +constructs such as C<\w> or C<.> will only match part of a character, as +C<$urxvt::NOCHAR> is not matched by C<\w> and both only match the first +"column" of a wide character. + +That means you have to incorporate C<$urxvt::NOCHAR> into parts of regexes +that may match wide characters. For example, to match C<\w+> you might +want to use C<[\w$urxvt::NOCHAR]+> instead, and to match a single character +(C<.>) you might want to use C<.$urxvt::NOCHAR*> instead. + +=back + =cut my $url = qr{ (?:https?://|ftp://|news://|mailto:|file://|\bwww\.) - [\w\-\@;\/?:&=%\$.+!*\x27,~#]* + [\w\-\@;\/?:&=%\$.+!*\x27,~#$urxvt::NOCHAR]* ( - \([\w\-\@;\/?:&=%\$.+!*\x27,~#]*\)| # Allow a pair of matched parentheses + \([\w\-\@;\/?:&=%\$.+!*\x27,~#$urxvt::NOCHAR]*\)| # Allow a pair of matched parentheses [\w\-\@;\/?:&=%\$+*~] # exclude some trailing characters (heuristic) )+ }x; @@ -192,16 +225,19 @@ my ($self) = shift; my $row = $self->nrow - 1; my @exec; + while ($row >= $self->top_row) { my $line = $self->line ($row); - @exec = $self->command_for($row); - last if(@exec); + @exec = $self->command_for ($row); + last if @exec; $row = $line->beg - 1; } - if(@exec) { + + if (@exec) { return $self->exec_async (@exec); } + () } @@ -250,9 +286,7 @@ my @defaults = ($url); my @matchers; - for (my $idx = 0; defined (my $res = $self->my_resource ("pattern.$idx") || $defaults[$idx]); $idx++) { - $res = $self->locale_decode ($res); - utf8::encode $res; + for (my $idx = 0; defined (my $res = $self->locale_decode ($self->my_resource ("pattern.$idx")) || $defaults[$idx]); $idx++) { my $launcher = $self->my_resource ("launcher.$idx"); $launcher =~ s/\$&|\$\{&\}/\${0}/g if $launcher; my $rend = $self->parse_rend($self->my_resource ("rend.$idx")); @@ -312,15 +346,18 @@ my @end = @+; my @exec; - if (!defined($off) || ($-[0] <= $off && $+[0] >= $off)) { + if (!(defined $off) || ($-[0] <= $off && $+[0] >= $off)) { if ($launcher !~ /\$/) { @exec = ($launcher, $match); } else { # It'd be nice to just access a list like ($&,$1,$2...), # but alas, m//g behaves differently in list context. - @exec = map { s/\$(\d+)|\$\{(\d+)\}/ - substr $text, $begin[$1 || $2], $end[$1 || $2] - $begin[$1 || $2] - /egx; $_ } split /\s+/, $launcher; + @exec = map { + s{\$(\d+)|\$\{(\d+)\}}{ + substr $text, $begin[$1 || $2], $end[$1 || $2] - $begin[$1 || $2] + }egx; + $_ + } split /\s+/, $launcher; } push @matches, [ $line->coord_of ($begin[0]), $line->coord_of ($end[0]), $match, @exec ]; @@ -328,7 +365,7 @@ } } - @matches; + @matches } sub command_for { @@ -345,8 +382,11 @@ sub on_button_press { my ($self, $event) = @_; - if($self->valid_button($event) - && (my @exec = $self->command_for($event->{row},$event->{col}))) { + + if ( + $self->valid_button ($event) + && (my @exec = $self->command_for ($event->{row}, $event->{col})) + ) { $self->{row} = $event->{row}; $self->{col} = $event->{col}; $self->{cmd} = \@exec; @@ -369,12 +409,13 @@ return if !defined $row; - if($row == $event->{row} && abs($col-$event->{col}) < 2 - && join("\x00", @$cmd) eq join("\x00", $self->command_for($row,$col))) { - if($self->valid_button($event)) { - + if ( + $row == $event->{row} + && (abs $col-$event->{col}) < 2 + && (join "\x00", @$cmd) eq (join "\x00", $self->command_for ($row, $col)) + ) { + if ($self->valid_button ($event)) { $self->exec_async (@$cmd); - } } @@ -422,15 +463,17 @@ $self->{matches} = \@matches; $self->{cur_row} = $row; $self->{id} = $dir < 0 ? @{ $self->{matches} } - 1 : 0; - $self->view_start (List::Util::min 0, $row - ($self->nrow >> 1)); + $self->view_start ($row - ($self->nrow >> 1)); $self->want_refresh; - return; + return 1; } $row = $dir < 0 ? $line->beg - 1 : $line->end + 1; } $self->scr_bell; + + () } sub select_refresh { @@ -484,6 +527,15 @@ $self->select_search (+1, $line->end + 1) if $line->end < $self->nrow; } + } elsif ($self->lookup_keysym ($keysym, $event->{state}) eq "matcher:select") { + if ($self->{id} > 0) { + $self->{id}--; + $self->want_refresh; + } else { + my $line = $self->line ($self->{cur_row}); + $self->select_search (-1, $self->nrow - 1) + unless $self->select_search (-1, $line->beg - 1); + } } 1