--- rxvt-unicode/src/perl/selection	2006/01/17 13:07:16	1.27
+++ rxvt-unicode/src/perl/selection	2007/11/19 09:27:08	1.44
@@ -1,102 +1,68 @@
 #! perl
-use Digest::MD5 qw/md5_hex/;
 
-my $timers = {};
-my $pastebin_cmd;
-my $pastebin_url;
-
-sub on_start {
-   my ($self) = @_;
-   $pastebin_cmd =
-      (urxvt::untaint $self->x_resource ("selection-pastebin-cmd"))
-      or "scp -p % ruth:/var/www/www.ta-sa.org/files/txt/";
-
-   $pastebin_url = 
-      (urxvt::untaint $self->x_resource ("selection-pastebin-url"))
-      or "http://www.ta-sa.org/files/txt/";
-   ();
-}
-
-sub upload_paste {
-   my ($self) = @_;
-
-   my $txt = $self->selection;
-   my $h = md5_hex ($txt);
-
-   my $fn = "/tmp/$h.txt";
-
-   my $msg = "uploaded $h.txt";
-
-   if (open my $o, ">" . $fn) {
-      print $o $txt;
-      close $o;
-   } else {
-      $msg = "couldn't write $fn: $!";
-   }
-
-   my $cmd = $pastebin_cmd;
-   $cmd =~ s/%/$fn/;
-   unless (system ($cmd) == 0) {
-      $msg = "couldn't upload, '$cmd' failed";
-   }
-
-   $self->selection ($pastebin_url . "$h.txt");
-
-   my $ov = $timers->{ov} = $self->overlay (-1, 0, length ($msg), 1, urxvt::OVERLAY_RSTYLE, 0);
-   $ov->set (0, 0, $msg);
-
-   $timers->{t1} =
-      urxvt::timer
-              ->new
-              ->start ((int urxvt::NOW) + 5) # make sure we update "on" the second
-              ->interval (1)
-              ->cb (sub { delete $timers->{ov}; delete $timers->{t1}; });
-}
-
-sub on_keyboard_command {
+sub on_user_command {
    my ($self, $cmd) = @_;
 
    $cmd eq "selection:rot13"
       and $self->selection (map { y/A-Za-z/N-ZA-Mn-za-m/; $_ } $self->selection);
 
-   $cmd eq "selection:remote-pastebin"
-      and upload_paste ($self);
-
    ()
 }
 
 sub on_init {
    my ($self) = @_;
 
+   if (defined (my $res = $self->resource ("cutchars"))) {
+      $res = $self->locale_decode ($res);
+      push @{ $self->{patterns} }, qr{\G [\Q$res\E[:space:]]* ([^\Q$res\E[:space:]]+) }x;
+   }
+
    for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) {
-      no re 'eval'; # just to be sure
-      $res = utf8::encode $self->locale_decode ($res);
+      $res = $self->locale_decode ($res);
+      utf8::encode $res;
       push @{ $self->{patterns} }, qr/$res/;
    }
 
+   $self->{enabled} = 1;
+
+   push @{ $self->{term}{option_popup_hook} }, sub {
+      ("new selection" => $self->{enabled}, sub { $self->{enabled} = shift })
+   };
+
    ()
 }
 
-# "find interetsing things"-patterns
+# "find interesting things"-patterns
 my @mark_patterns = (
+   qr{ ([[:word:]]+) }x,
+   qr{ ([^[:space:]]+) }x,
+
    # common types of "parentheses"
-   qr{ (?<![^[:space:]]) ‘ ([^‘’]+) ’ (?![^[:space]]) }x,
-   qr{ (?<![^[:space:]]) ` ([^`']+) ' (?![^[:space]]) }x,
+   qr{ (?<![^[:space:]])  ` ([^‘’]+)  ` (?![^[:space:]]) }x,
+   qr{ (?<![^[:space:]])  ‘ ([^‘’]+)  ’ (?![^[:space:]]) }x,
+   qr{ (?<![^[:space:]])  ` ([^`']+)  ' (?![^[:space:]]) }x,
+
+   qr{ (?<![^[:space:]])  ‘ ([^`']+)  ’ (?![^[:space:]]) }x,
+   qr{ (?<![^[:space:]])  “ ([^`']+)  ” (?![^[:space:]]) }x,
+
+   qr{ (?<![^[:space:]]) (' [^[:space:]] [^']* ')                 }x,
+   qr{                   (' [^']* [^[:space:]] ') (?![^[:space:]]) }x,
    qr{ (?<![^[:space:]]) (" [^[:space:]] [^"]* ")                 }x,
-   qr{                   (" [^"]* [^[:space:]] ") (?![^[:space]]) }x,
-   qr{ \< ([^<>[:space:]]+) \> }x,
-   qr{ \{ ([^{}[:space:]]+) \} }x,
-   qr{ \[ ([^{}[:space:]]+) \] }x,
-   qr{ \( ([^()[:space:]]+) \) }x,
+   qr{                   (" [^"]* [^[:space:]] ") (?![^[:space:]]) }x,
+
+   qr{ \< ([^>]+) \> }x,
+   qr{ \( ([^)]+) \) }x,
+   qr{ \{ ([^}]+) \} }x,
+   qr{ \[ ([^]]+) \] }x,
 
    # urls, just a heuristic
    qr{(
-      (?:https?|ftp|news|mailto|file)://[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~]+
-      [ab-zA-Z0-9\-\@;\/?:&=%\$_+!*\x27()~]   # exclude some trailing characters (heuristic)
+      (?:https?://|ftp://|news://|mailto:|file://|\bwww\.)[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~#]+
+      [ab-zA-Z0-9\-\@;\/?:&=%\$_+*()~]   # exclude some trailing characters (heuristic)
    )}x,
 
    # shell-like argument quoting, basically always matches
-   qr{\G [\ \t|&;<>()] *(
+   qr{\G [\ \t|&;<>()]* (
       (?:
          [^\\"'\ \t|&;<>()]+
          | \\.
@@ -115,6 +81,9 @@
 sub on_sel_extend {
    my ($self, $time) = @_;
 
+   $self->{enabled}
+      or return;
+
    my ($row, $col) = $self->selection_mark;
    my $line = $self->line ($row);
    my $text = $line->t;
@@ -124,26 +93,35 @@
 
    my @matches;
 
-   # not doing matches in unicode mode helps speed
-   # enourmously here. working in utf-8 should be
-   # equivalent due to the magic of utf-8 encoding.
-   utf8::encode $text;
-   study $text; # _really_ helps, too :)
-
-   for my $regex (@mark_patterns, @{ $self->{patterns} }) {
-      while ($text =~ /$regex/g) {
-         if ($-[1] <= $markofs and $markofs <= $+[1]) {
-            my $ofs = $-[1];
-            my $match = $1;
-
-            for my $regex (@simplify_patterns) {
-               if ($match =~ $regex) {
-                  $match = $1;
-                  $ofs += $-[1];
+   if ($markofs < $line->l) {
+      # convert markofs from character to UTF-8 offset space
+      {
+         my $prefix = substr $text, 0, $markofs;
+         utf8::encode $prefix;
+         $markofs = length $prefix;
+      }
+
+      # not doing matches in unicode mode helps speed
+      # enourmously here. working in utf-8 should be
+      # equivalent due to the magic of utf-8 encoding.
+      utf8::encode $text;
+      study $text; # _really_ helps, too :)
+
+      for my $regex (@mark_patterns, @{ $self->{patterns} }) {
+         while ($text =~ /$regex/g) {
+            if ($-[1] <= $markofs and $markofs <= $+[1]) {
+               my $ofs = $-[1];
+               my $match = $1;
+
+               for my $regex (@simplify_patterns) {
+                  if ($match =~ $regex) {
+                     $match = $1;
+                     $ofs += $-[1];
+                  }
                }
-            }
 
-            push @matches, [$ofs, length $match];
+               push @matches, [$ofs, length $match];
+            }
          }
       }
    }
@@ -156,6 +134,18 @@
 
       next if $len <= $curlen;
 
+      # convert back from UTF-8 offset space to character space
+      {
+         my $length = substr "$text ", $ofs, $len;
+         utf8::decode $length;
+         $len = length $length;
+      }
+      {
+         my $prefix = substr $text, 0, $ofs;
+         utf8::decode $prefix;
+         $ofs = length $prefix;
+      }
+
       $self->selection_beg ($line->coord_of ($ofs));
       $self->selection_end ($line->coord_of ($ofs + $len));
       return 1;