… | |
… | |
11 | |
11 | |
12 | sub on_init { |
12 | sub on_init { |
13 | my ($self) = @_; |
13 | my ($self) = @_; |
14 | |
14 | |
15 | for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) { |
15 | for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) { |
16 | no re 'eval'; # just to be sure |
|
|
17 | $res = $self->locale_decode ($res); |
16 | $res = $self->locale_decode ($res); |
18 | utf8::encode $res; |
17 | utf8::encode $res; |
19 | push @{ $self->{patterns} }, qr/$res/; |
18 | push @{ $self->{patterns} }, qr/$res/; |
20 | } |
19 | } |
21 | |
20 | |
22 | () |
21 | () |
23 | } |
22 | } |
24 | |
23 | |
25 | # "find interetsing things"-patterns |
24 | # "find interesting things"-patterns |
26 | my @mark_patterns = ( |
25 | my @mark_patterns = ( |
27 | # common types of "parentheses" |
26 | # common types of "parentheses" |
28 | qr{ (?<![^[:space:]]) ‘ ([^‘’]+) ’ (?![^[:space]]) }x, |
27 | qr{ (?<![^[:space:]]) ‘ ([^‘’]+) ’ (?![^[:space]]) }x, |
29 | qr{ (?<![^[:space:]]) ` ([^`']+) ' (?![^[:space]]) }x, |
28 | qr{ (?<![^[:space:]]) ` ([^`']+) ' (?![^[:space]]) }x, |
30 | qr{ (?<![^[:space:]]) (" [^[:space:]] [^"]* ") }x, |
29 | qr{ (?<![^[:space:]]) (" [^[:space:]] [^"]* ") }x, |
… | |
… | |
39 | (?:https?|ftp|news|mailto|file)://[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~]+ |
38 | (?:https?|ftp|news|mailto|file)://[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~]+ |
40 | [ab-zA-Z0-9\-\@;\/?:&=%\$_+!*\x27()~] # exclude some trailing characters (heuristic) |
39 | [ab-zA-Z0-9\-\@;\/?:&=%\$_+!*\x27()~] # exclude some trailing characters (heuristic) |
41 | )}x, |
40 | )}x, |
42 | |
41 | |
43 | # shell-like argument quoting, basically always matches |
42 | # shell-like argument quoting, basically always matches |
44 | qr{\G [\ \t|&;<>()] *( |
43 | qr{\G [\ \t|&;<>()]* ( |
45 | (?: |
44 | (?: |
46 | [^\\"'\ \t|&;<>()]+ |
45 | [^\\"'\ \t|&;<>()]+ |
47 | | \\. |
46 | | \\. |
48 | | " (?: [^\\"]+ | \\. )* " |
47 | | " (?: [^\\"]+ | \\. )* " |
49 | | ' [^']* ' |
48 | | ' [^']* ' |
… | |
… | |
110 | |
109 | |
111 | next if $len <= $curlen; |
110 | next if $len <= $curlen; |
112 | |
111 | |
113 | # convert back from UTF-8 offset space to character space |
112 | # convert back from UTF-8 offset space to character space |
114 | { |
113 | { |
115 | my $length = substr $text, $ofs, $len; |
114 | my $length = substr "$text ", $ofs, $len; |
116 | utf8::decode $length; |
115 | utf8::decode $length; |
117 | $len = length $length; |
116 | $len = length $length; |
118 | } |
117 | } |
119 | { |
118 | { |
120 | my $prefix = substr $text, 0, $ofs; |
119 | my $prefix = substr $text, 0, $ofs; |