--- rxvt-unicode/src/perl/selection	2007/11/19 09:26:41	1.43
+++ rxvt-unicode/src/perl/selection	2014/05/17 13:38:23	1.58
@@ -1,5 +1,60 @@
 #! perl
 
+#:META:RESOURCE:%.pattern-0:string:first selection pattern
+
+=head1 NAME
+
+selection - more intelligent selection (enabled by default)
+
+=head1 DESCRIPTION
+
+This extension tries to be more intelligent when the user extends
+selections (double-click and further clicks). Right now, it tries to
+select words, urls and complete shell-quoted arguments, which is very
+convenient, too, if your F<ls> supports C<--quoting-style=shell>.
+
+A double-click usually selects the word under the cursor, further clicks
+will enlarge the selection.
+
+The selection works by trying to match a number of regexes and displaying
+them in increasing order of length. You can add your own regexes by
+specifying resources of the form:
+
+   URxvt.selection.pattern-0: perl-regex
+   URxvt.selection.pattern-1: perl-regex
+   ...
+
+The index number (0, 1...) must not have any holes, and each regex must
+contain at least one pair of capturing parentheses, which will be used for
+the match. For example, the following adds a regex that matches everything
+between two vertical bars:
+
+   URxvt.selection.pattern-0: \\|([^|]+)\\|
+
+Another example: Programs I use often output "absolute path: " at the
+beginning of a line when they process multiple files. The following
+pattern matches the filename (note, there is a single space at the very
+end):
+
+   URxvt.selection.pattern-0: ^(/[^:]+):\ 
+
+You can look at the source of the selection extension to see more
+interesting uses, such as parsing a line from beginning to end.
+
+This extension also offers following bindable keyboard commands:
+
+=over 4
+
+=item rot13
+
+Rot-13 the selection when activated. Used via keyboard trigger:
+
+   URxvt.keysym.C-M-r: perl:selection:rot13
+
+=back
+
+=cut
+
 sub on_user_command {
    my ($self, $cmd) = @_;
 
@@ -9,6 +64,10 @@
    ()
 }
 
+sub on_keyboard_dispatch {
+   &on_user_command;
+}
+
 sub on_init {
    my ($self) = @_;
 
@@ -19,7 +78,6 @@
 
    for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) {
       $res = $self->locale_decode ($res);
-      utf8::encode $res;
       push @{ $self->{patterns} }, qr/$res/;
    }
 
@@ -34,26 +92,25 @@
 
 # "find interesting things"-patterns
 my @mark_patterns = (
-   qr{ ([[:word:]]+) }x,
+#   qr{ ([[:word:]]+) }x,
    qr{ ([^[:space:]]+) }x,
 
    # common types of "parentheses"
-   qr{ (?<![^[:space:]])  ` ([^‘’]+)  ` (?![^[:space:]]) }x,
+   qr{ (?<![^[:space:]])  [`'] ([^`']+)  [`'] (?![^[:space:]]) }x,
    qr{ (?<![^[:space:]])  ‘ ([^‘’]+)  ’ (?![^[:space:]]) }x,
-   qr{ (?<![^[:space:]])  ` ([^`']+)  ' (?![^[:space:]]) }x,
-
-   qr{ (?<![^[:space:]])  ‘ ([^`']+)  ’ (?![^[:space:]]) }x,
-   qr{ (?<![^[:space:]])  “ ([^`']+)  ” (?![^[:space:]]) }x,
+   qr{ (?<![^[:space:]])  “ ([^“”]+)  ” (?![^[:space:]]) }x,
 
-   qr{ (?<![^[:space:]]) (' [^[:space:]] [^']* ')                 }x,
+   qr{ (?<![^[:space:]]) (' [^[:space:]] [^']* ')                  }x,
    qr{                   (' [^']* [^[:space:]] ') (?![^[:space:]]) }x,
-   qr{ (?<![^[:space:]]) (" [^[:space:]] [^"]* ")                 }x,
+   qr{ (?<![^[:space:]]) (` [^[:space:]] [^']* ')                  }x,
+   qr{                   (` [^']* [^[:space:]] ') (?![^[:space:]]) }x,
+   qr{ (?<![^[:space:]]) (" [^[:space:]] [^"]* ")                  }x,
    qr{                   (" [^"]* [^[:space:]] ") (?![^[:space:]]) }x,
 
-   qr{ \< ([^`']+) \> }x,
-   qr{ \( ([^`']+) \) }x,
-   qr{ \{ ([^`']+) \} }x,
-   qr{ \[ ([^`']+) \] }x,
+   qr{ \{ ([^\{\}]+) \} }x,
+   qr{ \( ([^\(\)]+) \) }x,
+   qr{ \[ ([^\[\]]+) \] }x,
+   qr{ \< ([^\<\>]+) \> }x,
 
    # urls, just a heuristic
    qr{(
@@ -94,17 +151,6 @@
    my @matches;
 
    if ($markofs < $line->l) {
-      # convert markofs from character to UTF-8 offset space
-      {
-         my $prefix = substr $text, 0, $markofs;
-         utf8::encode $prefix;
-         $markofs = length $prefix;
-      }
-
-      # not doing matches in unicode mode helps speed
-      # enourmously here. working in utf-8 should be
-      # equivalent due to the magic of utf-8 encoding.
-      utf8::encode $text;
       study $text; # _really_ helps, too :)
 
       for my $regex (@mark_patterns, @{ $self->{patterns} }) {
@@ -134,18 +180,6 @@
 
       next if $len <= $curlen;
 
-      # convert back from UTF-8 offset space to character space
-      {
-         my $length = substr "$text ", $ofs, $len;
-         utf8::decode $length;
-         $len = length $length;
-      }
-      {
-         my $prefix = substr $text, 0, $ofs;
-         utf8::decode $prefix;
-         $ofs = length $prefix;
-      }
-
       $self->selection_beg ($line->coord_of ($ofs));
       $self->selection_end ($line->coord_of ($ofs + $len));
       return 1;