… | |
… | |
10 | } |
10 | } |
11 | |
11 | |
12 | sub on_init { |
12 | sub on_init { |
13 | my ($self) = @_; |
13 | my ($self) = @_; |
14 | |
14 | |
|
|
15 | if (defined (my $res = $self->resource ("cutchars"))) { |
|
|
16 | $res = $self->locale_decode ($res); |
|
|
17 | push @{ $self->{patterns} }, qr{\G [\Q$res\E[:space:]]* ([^\Q$res\E[:space:]]+) }x; |
|
|
18 | } |
|
|
19 | |
15 | for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) { |
20 | for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) { |
16 | no re 'eval'; # just to be sure |
|
|
17 | $res = $self->locale_decode ($res); |
21 | $res = $self->locale_decode ($res); |
18 | utf8::encode $res; |
22 | utf8::encode $res; |
19 | push @{ $self->{patterns} }, qr/$res/; |
23 | push @{ $self->{patterns} }, qr/$res/; |
20 | } |
24 | } |
21 | |
25 | |
22 | () |
26 | () |
23 | } |
27 | } |
24 | |
28 | |
25 | # "find interetsing things"-patterns |
29 | # "find interesting things"-patterns |
26 | my @mark_patterns = ( |
30 | my @mark_patterns = ( |
27 | # common types of "parentheses" |
31 | # common types of "parentheses" |
28 | qr{ (?<![^[:space:]]) ‘ ([^‘’]+) ’ (?![^[:space]]) }x, |
32 | qr{ (?<![^[:space:]]) ‘ ([^‘’]+) ’ (?![^[:space]]) }x, |
29 | qr{ (?<![^[:space:]]) ` ([^`']+) ' (?![^[:space]]) }x, |
33 | qr{ (?<![^[:space:]]) ` ([^`']+) ' (?![^[:space]]) }x, |
30 | qr{ (?<![^[:space:]]) (" [^[:space:]] [^"]* ") }x, |
34 | qr{ (?<![^[:space:]]) (" [^[:space:]] [^"]* ") }x, |
… | |
… | |
39 | (?:https?|ftp|news|mailto|file)://[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~]+ |
43 | (?:https?|ftp|news|mailto|file)://[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~]+ |
40 | [ab-zA-Z0-9\-\@;\/?:&=%\$_+!*\x27()~] # exclude some trailing characters (heuristic) |
44 | [ab-zA-Z0-9\-\@;\/?:&=%\$_+!*\x27()~] # exclude some trailing characters (heuristic) |
41 | )}x, |
45 | )}x, |
42 | |
46 | |
43 | # shell-like argument quoting, basically always matches |
47 | # shell-like argument quoting, basically always matches |
44 | qr{\G [\ \t|&;<>()] *( |
48 | qr{\G [\ \t|&;<>()]* ( |
45 | (?: |
49 | (?: |
46 | [^\\"'\ \t|&;<>()]+ |
50 | [^\\"'\ \t|&;<>()]+ |
47 | | \\. |
51 | | \\. |
48 | | " (?: [^\\"]+ | \\. )* " |
52 | | " (?: [^\\"]+ | \\. )* " |
49 | | ' [^']* ' |
53 | | ' [^']* ' |
… | |
… | |
67 | my $curlen = $line->offset_of ($self->selection_end) |
71 | my $curlen = $line->offset_of ($self->selection_end) |
68 | - $line->offset_of ($self->selection_beg); |
72 | - $line->offset_of ($self->selection_beg); |
69 | |
73 | |
70 | my @matches; |
74 | my @matches; |
71 | |
75 | |
72 | # not doing matches in unicode mode helps speed |
76 | if ($markofs < $line->l) { |
73 | # enourmously here. working in utf-8 should be |
77 | # convert markofs form character to UTF-8 offset space |
74 | # equivalent due to the magic of utf-8 encoding. |
78 | { |
|
|
79 | my $prefix = substr $text, 0, $markofs; |
75 | utf8::encode $text; |
80 | utf8::encode $prefix; |
76 | study $text; # _really_ helps, too :) |
81 | $markofs = length $prefix; |
|
|
82 | } |
77 | |
83 | |
78 | for my $regex (@mark_patterns, @{ $self->{patterns} }) { |
84 | # not doing matches in unicode mode helps speed |
79 | while ($text =~ /$regex/g) { |
85 | # enourmously here. working in utf-8 should be |
80 | if ($-[1] <= $markofs and $markofs <= $+[1]) { |
86 | # equivalent due to the magic of utf-8 encoding. |
81 | my $ofs = $-[1]; |
87 | utf8::encode $text; |
82 | my $match = $1; |
88 | study $text; # _really_ helps, too :) |
83 | |
89 | |
|
|
90 | for my $regex (@mark_patterns, @{ $self->{patterns} }) { |
|
|
91 | while ($text =~ /$regex/g) { |
|
|
92 | if ($-[1] <= $markofs and $markofs <= $+[1]) { |
|
|
93 | my $ofs = $-[1]; |
|
|
94 | my $match = $1; |
|
|
95 | |
84 | for my $regex (@simplify_patterns) { |
96 | for my $regex (@simplify_patterns) { |
85 | if ($match =~ $regex) { |
97 | if ($match =~ $regex) { |
86 | $match = $1; |
98 | $match = $1; |
87 | $ofs += $-[1]; |
99 | $ofs += $-[1]; |
|
|
100 | } |
88 | } |
101 | } |
|
|
102 | |
|
|
103 | push @matches, [$ofs, length $match]; |
89 | } |
104 | } |
90 | |
|
|
91 | push @matches, [$ofs, length $match]; |
|
|
92 | } |
105 | } |
93 | } |
106 | } |
94 | } |
107 | } |
95 | |
108 | |
96 | # whole line |
109 | # whole line |
… | |
… | |
99 | for (sort { $a->[1] <=> $b->[1] or $b->[0] <=> $a->[0] } @matches) { |
112 | for (sort { $a->[1] <=> $b->[1] or $b->[0] <=> $a->[0] } @matches) { |
100 | my ($ofs, $len) = @$_; |
113 | my ($ofs, $len) = @$_; |
101 | |
114 | |
102 | next if $len <= $curlen; |
115 | next if $len <= $curlen; |
103 | |
116 | |
|
|
117 | # convert back from UTF-8 offset space to character space |
|
|
118 | { |
|
|
119 | my $length = substr "$text ", $ofs, $len; |
|
|
120 | utf8::decode $length; |
|
|
121 | $len = length $length; |
|
|
122 | } |
|
|
123 | { |
|
|
124 | my $prefix = substr $text, 0, $ofs; |
|
|
125 | utf8::decode $prefix; |
|
|
126 | $ofs = length $prefix; |
|
|
127 | } |
|
|
128 | |
104 | $self->selection_beg ($line->coord_of ($ofs)); |
129 | $self->selection_beg ($line->coord_of ($ofs)); |
105 | $self->selection_end ($line->coord_of ($ofs + $len)); |
130 | $self->selection_end ($line->coord_of ($ofs + $len)); |
106 | return 1; |
131 | return 1; |
107 | } |
132 | } |
108 | |
133 | |