--- rxvt-unicode/src/perl/selection 2006/01/17 13:07:16 1.27 +++ rxvt-unicode/src/perl/selection 2007/11/19 09:27:08 1.44 @@ -1,102 +1,68 @@ #! perl -use Digest::MD5 qw/md5_hex/; -my $timers = {}; -my $pastebin_cmd; -my $pastebin_url; - -sub on_start { - my ($self) = @_; - $pastebin_cmd = - (urxvt::untaint $self->x_resource ("selection-pastebin-cmd")) - or "scp -p % ruth:/var/www/www.ta-sa.org/files/txt/"; - - $pastebin_url = - (urxvt::untaint $self->x_resource ("selection-pastebin-url")) - or "http://www.ta-sa.org/files/txt/"; - (); -} - -sub upload_paste { - my ($self) = @_; - - my $txt = $self->selection; - my $h = md5_hex ($txt); - - my $fn = "/tmp/$h.txt"; - - my $msg = "uploaded $h.txt"; - - if (open my $o, ">" . $fn) { - print $o $txt; - close $o; - } else { - $msg = "couldn't write $fn: $!"; - } - - my $cmd = $pastebin_cmd; - $cmd =~ s/%/$fn/; - unless (system ($cmd) == 0) { - $msg = "couldn't upload, '$cmd' failed"; - } - - $self->selection ($pastebin_url . "$h.txt"); - - my $ov = $timers->{ov} = $self->overlay (-1, 0, length ($msg), 1, urxvt::OVERLAY_RSTYLE, 0); - $ov->set (0, 0, $msg); - - $timers->{t1} = - urxvt::timer - ->new - ->start ((int urxvt::NOW) + 5) # make sure we update "on" the second - ->interval (1) - ->cb (sub { delete $timers->{ov}; delete $timers->{t1}; }); -} - -sub on_keyboard_command { +sub on_user_command { my ($self, $cmd) = @_; $cmd eq "selection:rot13" and $self->selection (map { y/A-Za-z/N-ZA-Mn-za-m/; $_ } $self->selection); - $cmd eq "selection:remote-pastebin" - and upload_paste ($self); - () } sub on_init { my ($self) = @_; + if (defined (my $res = $self->resource ("cutchars"))) { + $res = $self->locale_decode ($res); + push @{ $self->{patterns} }, qr{\G [\Q$res\E[:space:]]* ([^\Q$res\E[:space:]]+) }x; + } + for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) { - no re 'eval'; # just to be sure - $res = utf8::encode $self->locale_decode ($res); + $res = $self->locale_decode ($res); + utf8::encode $res; push @{ $self->{patterns} }, qr/$res/; } + $self->{enabled} = 1; + + push @{ $self->{term}{option_popup_hook} }, sub { + ("new selection" => $self->{enabled}, sub { $self->{enabled} = shift }) + }; + () } -# "find interetsing things"-patterns +# "find interesting things"-patterns my @mark_patterns = ( + qr{ ([[:word:]]+) }x, + qr{ ([^[:space:]]+) }x, + # common types of "parentheses" - qr{ (?[:space:]]+) \> }x, - qr{ \{ ([^{}[:space:]]+) \} }x, - qr{ \[ ([^{}[:space:]]+) \] }x, - qr{ \( ([^()[:space:]]+) \) }x, + qr{ (" [^"]* [^[:space:]] ") (?![^[:space:]]) }x, + + qr{ \< ([^>]+) \> }x, + qr{ \( ([^)]+) \) }x, + qr{ \{ ([^}]+) \} }x, + qr{ \[ ([^]]+) \] }x, # urls, just a heuristic qr{( - (?:https?|ftp|news|mailto|file)://[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~]+ - [ab-zA-Z0-9\-\@;\/?:&=%\$_+!*\x27()~] # exclude some trailing characters (heuristic) + (?:https?://|ftp://|news://|mailto:|file://|\bwww\.)[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~#]+ + [ab-zA-Z0-9\-\@;\/?:&=%\$_+*()~] # exclude some trailing characters (heuristic) )}x, # shell-like argument quoting, basically always matches - qr{\G [\ \t|&;<>()] *( + qr{\G [\ \t|&;<>()]* ( (?: [^\\"'\ \t|&;<>()]+ | \\. @@ -115,6 +81,9 @@ sub on_sel_extend { my ($self, $time) = @_; + $self->{enabled} + or return; + my ($row, $col) = $self->selection_mark; my $line = $self->line ($row); my $text = $line->t; @@ -124,26 +93,35 @@ my @matches; - # not doing matches in unicode mode helps speed - # enourmously here. working in utf-8 should be - # equivalent due to the magic of utf-8 encoding. - utf8::encode $text; - study $text; # _really_ helps, too :) - - for my $regex (@mark_patterns, @{ $self->{patterns} }) { - while ($text =~ /$regex/g) { - if ($-[1] <= $markofs and $markofs <= $+[1]) { - my $ofs = $-[1]; - my $match = $1; - - for my $regex (@simplify_patterns) { - if ($match =~ $regex) { - $match = $1; - $ofs += $-[1]; + if ($markofs < $line->l) { + # convert markofs from character to UTF-8 offset space + { + my $prefix = substr $text, 0, $markofs; + utf8::encode $prefix; + $markofs = length $prefix; + } + + # not doing matches in unicode mode helps speed + # enourmously here. working in utf-8 should be + # equivalent due to the magic of utf-8 encoding. + utf8::encode $text; + study $text; # _really_ helps, too :) + + for my $regex (@mark_patterns, @{ $self->{patterns} }) { + while ($text =~ /$regex/g) { + if ($-[1] <= $markofs and $markofs <= $+[1]) { + my $ofs = $-[1]; + my $match = $1; + + for my $regex (@simplify_patterns) { + if ($match =~ $regex) { + $match = $1; + $ofs += $-[1]; + } } - } - push @matches, [$ofs, length $match]; + push @matches, [$ofs, length $match]; + } } } } @@ -156,6 +134,18 @@ next if $len <= $curlen; + # convert back from UTF-8 offset space to character space + { + my $length = substr "$text ", $ofs, $len; + utf8::decode $length; + $len = length $length; + } + { + my $prefix = substr $text, 0, $ofs; + utf8::decode $prefix; + $ofs = length $prefix; + } + $self->selection_beg ($line->coord_of ($ofs)); $self->selection_end ($line->coord_of ($ofs + $len)); return 1;