… | |
… | |
12 | sub on_init { |
12 | sub on_init { |
13 | my ($self) = @_; |
13 | my ($self) = @_; |
14 | |
14 | |
15 | for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) { |
15 | for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) { |
16 | no re 'eval'; # just to be sure |
16 | no re 'eval'; # just to be sure |
|
|
17 | $res = utf8::encode $self->locale_decode ($res); |
17 | push @{ $self->{patterns} }, qr/$res/; |
18 | push @{ $self->{patterns} }, qr/$res/; |
18 | } |
19 | } |
19 | |
20 | |
20 | () |
21 | () |
21 | } |
22 | } |
22 | |
23 | |
23 | # "find interetsing things"-patterns |
24 | # "find interetsing things"-patterns |
24 | my @mark_patterns = ( |
25 | my @mark_patterns = ( |
25 | # common types of "parentheses" |
26 | # common types of "parentheses" |
26 | qr{(?:^|\s) ‘ ([^‘’]+?) ’ (?:\s|\)|$)}x, |
27 | qr{ (?<![^[:space:]]) ‘ ([^‘’]+) ’ (?![^[:space]]) }x, |
27 | qr{(?:^|\s) ` ([^`']+?) ' (?:\s|\)|$)}x, |
28 | qr{ (?<![^[:space:]]) ` ([^`']+) ' (?![^[:space]]) }x, |
28 | qr{ \{ ([^{}]+?) \} }x, |
29 | qr{ (?<![^[:space:]]) (" [^[:space:]] [^"]* ") }x, |
29 | qr{ \[ ([^{}]+?) \] }x, |
30 | qr{ (" [^"]* [^[:space:]] ") (?![^[:space]]) }x, |
30 | qr{ \( ([^()]+?) \) }x, |
31 | qr{ \< ([^<>[:space:]]+) \> }x, |
|
|
32 | qr{ \{ ([^{}[:space:]]+) \} }x, |
|
|
33 | qr{ \[ ([^{}[:space:]]+) \] }x, |
|
|
34 | qr{ \( ([^()[:space:]]+) \) }x, |
31 | |
35 | |
32 | # urls, just a heuristic |
36 | # urls, just a heuristic |
33 | qr{( |
37 | qr{( |
34 | (?:https?|ftp|news|mailto|file)://[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~]+ |
38 | (?:https?|ftp|news|mailto|file)://[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~]+ |
35 | [ab-zA-Z0-9\-\@;\/?:&=%\$_+!*\x27()~] # exclude some trailing characters (heuristic) |
39 | [ab-zA-Z0-9\-\@;\/?:&=%\$_+!*\x27()~] # exclude some trailing characters (heuristic) |
… | |
… | |
62 | my $curlen = $line->offset_of ($self->selection_end) |
66 | my $curlen = $line->offset_of ($self->selection_end) |
63 | - $line->offset_of ($self->selection_beg); |
67 | - $line->offset_of ($self->selection_beg); |
64 | |
68 | |
65 | my @matches; |
69 | my @matches; |
66 | |
70 | |
|
|
71 | # not doing matches in unicode mode helps speed |
|
|
72 | # enourmously here. working in utf-8 should be |
|
|
73 | # equivalent due to the magic of utf-8 encoding. |
|
|
74 | utf8::encode $text; |
|
|
75 | study $text; # _really_ helps, too :) |
|
|
76 | |
67 | for my $regex (@mark_patterns, @{ $self->{patterns} }) { |
77 | for my $regex (@mark_patterns, @{ $self->{patterns} }) { |
68 | while ($text =~ /$regex/g) { |
78 | while ($text =~ /$regex/g) { |
69 | if ($-[1] <= $markofs and $markofs <= $+[1]) { |
79 | if ($-[1] <= $markofs and $markofs <= $+[1]) { |
70 | my $ofs = $-[1]; |
80 | my $ofs = $-[1]; |
71 | my $match = $1; |
81 | my $match = $1; |
72 | |
|
|
73 | push @matches, [$ofs, length $match]; |
|
|
74 | |
82 | |
75 | for my $regex (@simplify_patterns) { |
83 | for my $regex (@simplify_patterns) { |
76 | if ($match =~ $regex) { |
84 | if ($match =~ $regex) { |
77 | $match = $1; |
85 | $match = $1; |
78 | $ofs += $-[1]; |
86 | $ofs += $-[1]; |