--- rxvt-unicode/src/perl/selection 2006/05/20 18:17:38 1.38 +++ rxvt-unicode/src/perl/selection 2011/03/13 17:07:36 1.53 @@ -19,7 +19,6 @@ for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) { $res = $self->locale_decode ($res); - utf8::encode $res; push @{ $self->{patterns} }, qr/$res/; } @@ -34,20 +33,30 @@ # "find interesting things"-patterns my @mark_patterns = ( +# qr{ ([[:word:]]+) }x, + qr{ ([^[:space:]]+) }x, + # common types of "parentheses" - qr{ (?[:space:]]+) \> }x, - qr{ \{ ([^{}[:space:]]+) \} }x, - qr{ \[ ([^{}[:space:]]+) \] }x, - qr{ \( ([^()[:space:]]+) \) }x, + qr{ (?]+) \> }x, # urls, just a heuristic qr{( - (?:https?://|ftp://|news://|mailto:|file://)[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~]+ - [ab-zA-Z0-9\-\@;\/?:&=%\$_+!*\x27()~] # exclude some trailing characters (heuristic) + (?:https?://|ftp://|news://|mailto:|file://|\bwww\.)[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~#]+ + [ab-zA-Z0-9\-\@;\/?:&=%\$_+*()~] # exclude some trailing characters (heuristic) )}x, # shell-like argument quoting, basically always matches @@ -83,17 +92,6 @@ my @matches; if ($markofs < $line->l) { - # convert markofs form character to UTF-8 offset space - { - my $prefix = substr $text, 0, $markofs; - utf8::encode $prefix; - $markofs = length $prefix; - } - - # not doing matches in unicode mode helps speed - # enourmously here. working in utf-8 should be - # equivalent due to the magic of utf-8 encoding. - utf8::encode $text; study $text; # _really_ helps, too :) for my $regex (@mark_patterns, @{ $self->{patterns} }) { @@ -123,18 +121,6 @@ next if $len <= $curlen; - # convert back from UTF-8 offset space to character space - { - my $length = substr "$text ", $ofs, $len; - utf8::decode $length; - $len = length $length; - } - { - my $prefix = substr $text, 0, $ofs; - utf8::decode $prefix; - $ofs = length $prefix; - } - $self->selection_beg ($line->coord_of ($ofs)); $self->selection_end ($line->coord_of ($ofs + $len)); return 1;