… | |
… | |
17 | push @{ $self->{patterns} }, qr{\G [\Q$res\E[:space:]]* ([^\Q$res\E[:space:]]+) }x; |
17 | push @{ $self->{patterns} }, qr{\G [\Q$res\E[:space:]]* ([^\Q$res\E[:space:]]+) }x; |
18 | } |
18 | } |
19 | |
19 | |
20 | for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) { |
20 | for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) { |
21 | $res = $self->locale_decode ($res); |
21 | $res = $self->locale_decode ($res); |
22 | utf8::encode $res; |
|
|
23 | push @{ $self->{patterns} }, qr/$res/; |
22 | push @{ $self->{patterns} }, qr/$res/; |
24 | } |
23 | } |
25 | |
24 | |
26 | $self->{enabled} = 1; |
25 | $self->{enabled} = 1; |
27 | |
26 | |
… | |
… | |
32 | () |
31 | () |
33 | } |
32 | } |
34 | |
33 | |
35 | # "find interesting things"-patterns |
34 | # "find interesting things"-patterns |
36 | my @mark_patterns = ( |
35 | my @mark_patterns = ( |
|
|
36 | # qr{ ([[:word:]]+) }x, |
|
|
37 | qr{ ([^[:space:]]+) }x, |
|
|
38 | |
37 | # common types of "parentheses" |
39 | # common types of "parentheses" |
|
|
40 | qr{ (?<![^[:space:]]) [`'] ([^`']+) [`'] (?![^[:space:]]) }x, |
38 | qr{ (?<![^[:space:]]) ‘ ([^‘’]+) ’ (?![^[:space]]) }x, |
41 | qr{ (?<![^[:space:]]) ‘ ([^‘’]+) ’ (?![^[:space:]]) }x, |
39 | qr{ (?<![^[:space:]]) ` ([^`']+) ' (?![^[:space]]) }x, |
42 | qr{ (?<![^[:space:]]) “ ([^“”]+) ” (?![^[:space:]]) }x, |
|
|
43 | |
|
|
44 | qr{ (?<![^[:space:]]) (' [^[:space:]] [^']* ') }x, |
|
|
45 | qr{ (' [^']* [^[:space:]] ') (?![^[:space:]]) }x, |
|
|
46 | qr{ (?<![^[:space:]]) (` [^[:space:]] [^']* ') }x, |
|
|
47 | qr{ (` [^']* [^[:space:]] ') (?![^[:space:]]) }x, |
40 | qr{ (?<![^[:space:]]) (" [^[:space:]] [^"]* ") }x, |
48 | qr{ (?<![^[:space:]]) (" [^[:space:]] [^"]* ") }x, |
41 | qr{ (" [^"]* [^[:space:]] ") (?![^[:space]]) }x, |
49 | qr{ (" [^"]* [^[:space:]] ") (?![^[:space:]]) }x, |
42 | qr{ \< ([^<>[:space:]]+) \> }x, |
50 | |
43 | qr{ \{ ([^{}[:space:]]+) \} }x, |
51 | qr{ \{ ([^\{\}]+) \} }x, |
44 | qr{ \[ ([^{}[:space:]]+) \] }x, |
|
|
45 | qr{ \( ([^()[:space:]]+) \) }x, |
52 | qr{ \( ([^\(\)]+) \) }x, |
|
|
53 | qr{ \[ ([^\[\]]+) \] }x, |
|
|
54 | qr{ \< ([^\<\>]+) \> }x, |
46 | |
55 | |
47 | # urls, just a heuristic |
56 | # urls, just a heuristic |
48 | qr{( |
57 | qr{( |
49 | (?:https?|ftp|news|mailto|file)://[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~]+ |
58 | (?:https?://|ftp://|news://|mailto:|file://|\bwww\.)[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~#]+ |
50 | [ab-zA-Z0-9\-\@;\/?:&=%\$_+!*\x27()~] # exclude some trailing characters (heuristic) |
59 | [ab-zA-Z0-9\-\@;\/?:&=%\$_+*()~] # exclude some trailing characters (heuristic) |
51 | )}x, |
60 | )}x, |
52 | |
61 | |
53 | # shell-like argument quoting, basically always matches |
62 | # shell-like argument quoting, basically always matches |
54 | qr{\G [\ \t|&;<>()]* ( |
63 | qr{\G [\ \t|&;<>()]* ( |
55 | (?: |
64 | (?: |
… | |
… | |
81 | - $line->offset_of ($self->selection_beg); |
90 | - $line->offset_of ($self->selection_beg); |
82 | |
91 | |
83 | my @matches; |
92 | my @matches; |
84 | |
93 | |
85 | if ($markofs < $line->l) { |
94 | if ($markofs < $line->l) { |
86 | # convert markofs form character to UTF-8 offset space |
|
|
87 | { |
|
|
88 | my $prefix = substr $text, 0, $markofs; |
|
|
89 | utf8::encode $prefix; |
|
|
90 | $markofs = length $prefix; |
|
|
91 | } |
|
|
92 | |
|
|
93 | # not doing matches in unicode mode helps speed |
|
|
94 | # enourmously here. working in utf-8 should be |
|
|
95 | # equivalent due to the magic of utf-8 encoding. |
|
|
96 | utf8::encode $text; |
|
|
97 | study $text; # _really_ helps, too :) |
95 | study $text; # _really_ helps, too :) |
98 | |
96 | |
99 | for my $regex (@mark_patterns, @{ $self->{patterns} }) { |
97 | for my $regex (@mark_patterns, @{ $self->{patterns} }) { |
100 | while ($text =~ /$regex/g) { |
98 | while ($text =~ /$regex/g) { |
101 | if ($-[1] <= $markofs and $markofs <= $+[1]) { |
99 | if ($-[1] <= $markofs and $markofs <= $+[1]) { |
… | |
… | |
121 | for (sort { $a->[1] <=> $b->[1] or $b->[0] <=> $a->[0] } @matches) { |
119 | for (sort { $a->[1] <=> $b->[1] or $b->[0] <=> $a->[0] } @matches) { |
122 | my ($ofs, $len) = @$_; |
120 | my ($ofs, $len) = @$_; |
123 | |
121 | |
124 | next if $len <= $curlen; |
122 | next if $len <= $curlen; |
125 | |
123 | |
126 | # convert back from UTF-8 offset space to character space |
|
|
127 | { |
|
|
128 | my $length = substr "$text ", $ofs, $len; |
|
|
129 | utf8::decode $length; |
|
|
130 | $len = length $length; |
|
|
131 | } |
|
|
132 | { |
|
|
133 | my $prefix = substr $text, 0, $ofs; |
|
|
134 | utf8::decode $prefix; |
|
|
135 | $ofs = length $prefix; |
|
|
136 | } |
|
|
137 | |
|
|
138 | $self->selection_beg ($line->coord_of ($ofs)); |
124 | $self->selection_beg ($line->coord_of ($ofs)); |
139 | $self->selection_end ($line->coord_of ($ofs + $len)); |
125 | $self->selection_end ($line->coord_of ($ofs + $len)); |
140 | return 1; |
126 | return 1; |
141 | } |
127 | } |
142 | |
128 | |