[ViewVC] Diff of: cvs/rxvt-unicode/src/perl/selection

Comparing rxvt-unicode/src/perl/selection (file contents):
Revision 1.22 by root, Thu Jan 12 10:21:50 2006 UTC vs.
Revision 1.34 by root, Mon Jan 23 23:13:03 2006 UTC

       and $self->selection (map { y/A-Za-z/N-ZA-Mn-za-m/; $_ } $self->selection);
    ()
 }
+sub on_init {
+   my ($self) = @_;
+   for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) {
+      $res = $self->locale_decode ($res);
+      utf8::encode $res;
+      push @{ $self->{patterns} }, qr/$res/;
+   }
+   ()
+}
-# "find interetsing things"-patterns
+# "find interesting things"-patterns
 my @mark_patterns = (
-   qr{([[:word:]]+)},
-   # common "parentheses"
+   # common types of "parentheses"
-   qr{(?:^|\s) ‘  ([^‘’]+?) ’ (?:\s|\)|$)}x,
+   qr{ (?<![^[:space:]]) ‘ ([^‘’]+) ’ (?![^[:space]]) }x,
-   qr{(?:^|\s) `  ([^`']+?) ' (?:\s|\)|$)}x,
+   qr{ (?<![^[:space:]]) ` ([^`']+) ' (?![^[:space]]) }x,
-   qr{         \{ ([^{}]+?) \} }x,
+   qr{ (?<![^[:space:]]) (" [^[:space:]] [^"]* ")                 }x,
-   qr{         \[ ([^{}]+?) \] }x,
+   qr{                   (" [^"]* [^[:space:]] ") (?![^[:space]]) }x,
-   qr{         \( ([^()]+?) \) }x,
+   qr{ \< ([^<>[:space:]]+) \> }x,
+   qr{ \{ ([^{}[:space:]]+) \} }x,
+   qr{ \[ ([^{}[:space:]]+) \] }x,
+   qr{ \( ([^()[:space:]]+) \) }x,
    # urls, just a heuristic
    qr{(
       (?:https?|ftp|news|mailto|file)://[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~]+
       [ab-zA-Z0-9\-\@;\/?:&=%\$_+!*\x27()~]   # exclude some trailing characters (heuristic)
    )}x,
    # shell-like argument quoting, basically always matches
-   qr{\G [\ \t|&;<>()] *(
+   qr{\G [\ \t|&;<>()]* (
       (?:
          [^\\"'\ \t|&;<>()]+
          | \\.
          | " (?: [^\\"]+ | \\. )* "
          | ' [^']* '
    my $curlen  = $line->offset_of ($self->selection_end)
                - $line->offset_of ($self->selection_beg);
    my @matches;
-   for my $regex (@mark_patterns) {
+   if ($markofs < $line->l) {
-      while ($text =~ /$regex/g) {
+      # convert markofs form character to UTF-8 offset space
-         if ($-[1] <= $markofs and $markofs <= $+[1]) {
+      {
-            my $ofs = $-[1];
+         my $prefix = substr $text, 0, $markofs;
-            my $match = $1;
+         utf8::encode $prefix;
+         $markofs = length $prefix;
+      }
-            push @matches, [$ofs, length $match];
+      # not doing matches in unicode mode helps speed
+      # enourmously here. working in utf-8 should be
+      # equivalent due to the magic of utf-8 encoding.
+      utf8::encode $text;
+      study $text; # _really_ helps, too :)
+      for my $regex (@mark_patterns, @{ $self->{patterns} }) {
+         while ($text =~ /$regex/g) {
+            if ($-[1] <= $markofs and $markofs <= $+[1]) {
+               my $ofs = $-[1];
+               my $match = $1;
-            for my $regex (@simplify_patterns) {
+               for my $regex (@simplify_patterns) {
-               if ($match =~ $regex) {
+                  if ($match =~ $regex) {
-                  $match = $1;
+                     $match = $1;
-                  $ofs += $-[1];
+                     $ofs += $-[1];
+                  }
                }
+               push @matches, [$ofs, length $match];
             }
-            push @matches, [$ofs, length $match];
          }
       }
    }
    # whole line
    for (sort { $a->[1] <=> $b->[1] or $b->[0] <=> $a->[0] } @matches) {
       my ($ofs, $len) = @$_;
       next if $len <= $curlen;
+      # convert back from UTF-8 offset space to character space
+      {
+         my $length = substr "$text ", $ofs, $len;
+         utf8::decode $length;
+         $len = length $length;
+      }
+      {
+         my $prefix = substr $text, 0, $ofs;
+         utf8::decode $prefix;
+         $ofs = length $prefix;
+      }
       $self->selection_beg ($line->coord_of ($ofs));
       $self->selection_end ($line->coord_of ($ofs + $len));
       return 1;
    }

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing rxvt-unicode/src/perl/selection (file contents): Revision 1.22 by root, Thu Jan 12 10:21:50 2006 UTC vs. Revision 1.34 by root, Mon Jan 23 23:13:03 2006 UTC

Diff Legend

Comparing rxvt-unicode/src/perl/selection (file contents):
Revision 1.22 by root, Thu Jan 12 10:21:50 2006 UTC vs.
Revision 1.34 by root, Mon Jan 23 23:13:03 2006 UTC