1 | #! perl |
1 | #! perl |
2 | |
2 | |
3 | sub on_keyboard_command { |
3 | sub on_user_command { |
4 | my ($self, $cmd) = @_; |
4 | my ($self, $cmd) = @_; |
5 | |
5 | |
6 | $cmd eq "selection:rot13" |
6 | $cmd eq "selection:rot13" |
7 | and $self->selection (map { y/A-Za-z/N-ZA-Mn-za-m/; $_ } $self->selection); |
7 | and $self->selection (map { y/A-Za-z/N-ZA-Mn-za-m/; $_ } $self->selection); |
8 | |
8 | |
9 | () |
9 | () |
10 | } |
10 | } |
11 | |
11 | |
|
|
12 | sub on_init { |
|
|
13 | my ($self) = @_; |
|
|
14 | |
|
|
15 | if (defined (my $res = $self->resource ("cutchars"))) { |
|
|
16 | $res = $self->locale_decode ($res); |
|
|
17 | push @{ $self->{patterns} }, qr{\G [\Q$res\E[:space:]]* ([^\Q$res\E[:space:]]+) }x; |
|
|
18 | } |
|
|
19 | |
|
|
20 | for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) { |
|
|
21 | $res = $self->locale_decode ($res); |
|
|
22 | utf8::encode $res; |
|
|
23 | push @{ $self->{patterns} }, qr/$res/; |
|
|
24 | } |
|
|
25 | |
|
|
26 | $self->{enabled} = 1; |
|
|
27 | |
|
|
28 | push @{ $self->{term}{option_popup_hook} }, sub { |
|
|
29 | ("new selection" => $self->{enabled}, sub { $self->{enabled} = shift }) |
|
|
30 | }; |
|
|
31 | |
|
|
32 | () |
|
|
33 | } |
|
|
34 | |
|
|
35 | # "find interesting things"-patterns |
12 | my @mark_patterns = ( |
36 | my @mark_patterns = ( |
|
|
37 | # qr{ ([[:word:]]+) }x, |
|
|
38 | qr{ ([^[:space:]]+) }x, |
|
|
39 | |
|
|
40 | # common types of "parentheses" |
|
|
41 | qr{ (?<![^[:space:]]) [`'] ([^`']+) [`'] (?![^[:space:]]) }x, |
|
|
42 | qr{ (?<![^[:space:]]) ‘ ([^‘’]+) ’ (?![^[:space:]]) }x, |
|
|
43 | qr{ (?<![^[:space:]]) “ ([^“”]+) ” (?![^[:space:]]) }x, |
|
|
44 | |
|
|
45 | qr{ (?<![^[:space:]]) (' [^[:space:]] [^']* ') }x, |
|
|
46 | qr{ (' [^']* [^[:space:]] ') (?![^[:space:]]) }x, |
|
|
47 | qr{ (?<![^[:space:]]) (` [^[:space:]] [^']* ') }x, |
|
|
48 | qr{ (` [^']* [^[:space:]] ') (?![^[:space:]]) }x, |
|
|
49 | qr{ (?<![^[:space:]]) (" [^[:space:]] [^"]* ") }x, |
|
|
50 | qr{ (" [^"]* [^[:space:]] ") (?![^[:space:]]) }x, |
|
|
51 | |
|
|
52 | qr{ \{ ([^\{\}]+) \} }x, |
|
|
53 | qr{ \( ([^\(\)]+) \) }x, |
|
|
54 | qr{ \[ ([^\[\]]+) \] }x, |
|
|
55 | qr{ \< ([^\<\>]+) \> }x, |
|
|
56 | |
13 | # urls, just a heuristic |
57 | # urls, just a heuristic |
14 | qr{( |
58 | qr{( |
15 | (?:https?|ftp|news|mailto|file)://[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~]+ |
59 | (?:https?://|ftp://|news://|mailto:|file://|\bwww\.)[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~#]+ |
16 | [ab-zA-Z0-9\-\@;\/?:&=%\$_+!*\x27(),~] # do not include a trailing dot, its wrong too often |
60 | [ab-zA-Z0-9\-\@;\/?:&=%\$_+*()~] # exclude some trailing characters (heuristic) |
17 | )}x, |
61 | )}x, |
18 | |
62 | |
19 | # common forms of quoting |
63 | # shell-like argument quoting, basically always matches |
20 | qr{(?:^|\s) [‘`] ([^‘`’']+) [’'] (?:\s|$)}x, |
|
|
21 | |
|
|
22 | # shell-like argument quoting |
|
|
23 | qr{\G [\ \t|&;<>()] *( |
64 | qr{\G [\ \t|&;<>()]* ( |
24 | (?: |
65 | (?: |
25 | [^\\"'\ \t|&;<>()]+ |
66 | [^\\"'\ \t|&;<>()]+ |
26 | | \\. |
67 | | \\. |
27 | | " (?: [^\\"]+ | \\. )* " |
68 | | " (?: [^\\"]+ | \\. )* " |
28 | | ' [^']* ' |
69 | | ' [^']* ' |
29 | )+ |
70 | )+ |
30 | )}x, |
71 | )}x, |
31 | ); |
72 | ); |
32 | |
73 | |
|
|
74 | # "correct obvious? crap"-patterns |
33 | my @simplify_patterns = ( |
75 | my @simplify_patterns = ( |
34 | qr{^"([^\\"'\ \t|&;<>()*?]+)"$}, # "simple" => simple |
76 | qr{^"([^\\"'\ \t|&;<>()*?]+)"$}, # "simple" => simple |
35 | qr{^(.*)[,\-]$}, |
77 | qr{^(.*)[,\-]$}, # strip off trailing , and - |
36 | ); |
78 | ); |
37 | |
79 | |
38 | sub on_sel_extend { |
80 | sub on_sel_extend { |
39 | my ($self) = @_; |
81 | my ($self, $time) = @_; |
|
|
82 | |
|
|
83 | $self->{enabled} |
|
|
84 | or return; |
40 | |
85 | |
41 | my ($row, $col) = $self->selection_mark; |
86 | my ($row, $col) = $self->selection_mark; |
42 | my $line = $self->line ($row); |
87 | my $line = $self->line ($row); |
43 | my $offset = $line->offset_of ($row, $col); |
|
|
44 | my $text = $line->t; |
88 | my $text = $line->t; |
|
|
89 | my $markofs = $line->offset_of ($row, $col); |
|
|
90 | my $curlen = $line->offset_of ($self->selection_end) |
|
|
91 | - $line->offset_of ($self->selection_beg); |
45 | |
92 | |
46 | for my $regex (@mark_patterns) { |
93 | my @matches; |
47 | while ($text =~ /$regex/g) { |
|
|
48 | if ($-[1] <= $offset and $offset <= $+[1]) { |
|
|
49 | my $match = $1; |
|
|
50 | my ($ofs1, $ofs2) = ($-[1], $+[1]); |
|
|
51 | |
94 | |
|
|
95 | if ($markofs < $line->l) { |
|
|
96 | # convert markofs from character to UTF-8 offset space |
|
|
97 | { |
|
|
98 | my $prefix = substr $text, 0, $markofs; |
|
|
99 | utf8::encode $prefix; |
|
|
100 | $markofs = length $prefix; |
|
|
101 | } |
|
|
102 | |
|
|
103 | # not doing matches in unicode mode helps speed |
|
|
104 | # enourmously here. working in utf-8 should be |
|
|
105 | # equivalent due to the magic of utf-8 encoding. |
|
|
106 | utf8::encode $text; |
|
|
107 | study $text; # _really_ helps, too :) |
|
|
108 | |
|
|
109 | for my $regex (@mark_patterns, @{ $self->{patterns} }) { |
|
|
110 | while ($text =~ /$regex/g) { |
|
|
111 | if ($-[1] <= $markofs and $markofs <= $+[1]) { |
|
|
112 | my $ofs = $-[1]; |
|
|
113 | my $match = $1; |
|
|
114 | |
52 | for my $regex (@simplify_patterns) { |
115 | for my $regex (@simplify_patterns) { |
53 | if ($match =~ $regex) { |
116 | if ($match =~ $regex) { |
54 | $match = $1; |
117 | $match = $1; |
55 | $ofs1 += $-[1]; |
118 | $ofs += $-[1]; |
56 | $ofs2 = $ofs1 + length $match; |
119 | } |
57 | } |
120 | } |
|
|
121 | |
|
|
122 | push @matches, [$ofs, length $match]; |
58 | } |
123 | } |
59 | |
|
|
60 | $self->selection_beg ($line->coord_of ($ofs1)); |
|
|
61 | $self->selection_end ($line->coord_of ($ofs2)); |
|
|
62 | return 1; |
|
|
63 | } |
124 | } |
64 | } |
125 | } |
65 | } |
126 | } |
66 | |
127 | |
|
|
128 | # whole line |
|
|
129 | push @matches, [0, ($line->end - $line->beg + 1) * $self->ncol]; |
|
|
130 | |
|
|
131 | for (sort { $a->[1] <=> $b->[1] or $b->[0] <=> $a->[0] } @matches) { |
|
|
132 | my ($ofs, $len) = @$_; |
|
|
133 | |
|
|
134 | next if $len <= $curlen; |
|
|
135 | |
|
|
136 | # convert back from UTF-8 offset space to character space |
|
|
137 | { |
|
|
138 | my $length = substr "$text ", $ofs, $len; |
|
|
139 | utf8::decode $length; |
|
|
140 | $len = length $length; |
|
|
141 | } |
|
|
142 | { |
|
|
143 | my $prefix = substr $text, 0, $ofs; |
|
|
144 | utf8::decode $prefix; |
|
|
145 | $ofs = length $prefix; |
|
|
146 | } |
|
|
147 | |
|
|
148 | $self->selection_beg ($line->coord_of ($ofs)); |
|
|
149 | $self->selection_end ($line->coord_of ($ofs + $len)); |
|
|
150 | return 1; |
|
|
151 | } |
|
|
152 | |
67 | () |
153 | () |
68 | } |
154 | } |