… | |
… | |
86 | URxvt.matcher.button: 1 |
86 | URxvt.matcher.button: 1 |
87 | URxvt.matcher.pattern.1: \\bwww\\.[\\w-]+\\.[\\w./?&@#-]*[\\w/-] |
87 | URxvt.matcher.pattern.1: \\bwww\\.[\\w-]+\\.[\\w./?&@#-]*[\\w/-] |
88 | URxvt.matcher.pattern.2: \\B(/\\S+?):(\\d+)(?=:|$) |
88 | URxvt.matcher.pattern.2: \\B(/\\S+?):(\\d+)(?=:|$) |
89 | URxvt.matcher.launcher.2: gvim +$2 $1 |
89 | URxvt.matcher.launcher.2: gvim +$2 $1 |
90 | |
90 | |
|
|
91 | =head2 Regex encoding/wide character matching |
|
|
92 | |
|
|
93 | Urxvt stores all text as unicode, in a special encoding that uses |
|
|
94 | one character/code point per column. For various reasons, the regular |
|
|
95 | expressions are matched directly against this encoding, which means there are a few things |
|
|
96 | you need to keep in mind: |
|
|
97 | |
|
|
98 | =over |
|
|
99 | |
|
|
100 | =item X resources/command line arguments are locale-encoded |
|
|
101 | |
|
|
102 | The regexes taken from the command line or resources will be converted |
|
|
103 | from locale encoding to unicode. This can change the number of code points |
|
|
104 | per character. |
|
|
105 | |
|
|
106 | =item Wide characters are column-padded with C<$urxvt::NOCHAR> |
|
|
107 | |
|
|
108 | Wide characters (such as kanji and sometimes tabs) are padded with |
|
|
109 | a special character value (C<$urxvt::NOCHAR>). That means that |
|
|
110 | constructs such as C<\w> or C<.> will only match part of a character, as |
|
|
111 | C<$urxvt::NOCHAR> is not matched by C<\w> and both only match the first |
|
|
112 | "column" of a wide character. |
|
|
113 | |
|
|
114 | That means you have to incorporate C<$urxvt::NOCHAR> into parts of regexes |
|
|
115 | that may match wide characters. For example, to match C<\w+> you might |
|
|
116 | want to use C<[\w$urxvt::NOCHAR]+> instead, and to match a single character |
|
|
117 | (C<.>) you might want to use C<.$urxvt::NOCHAR*> instead. |
|
|
118 | |
|
|
119 | =back |
|
|
120 | |
91 | =cut |
121 | =cut |
92 | |
122 | |
93 | my $url = |
123 | my $url = |
94 | qr{ |
124 | qr{ |
95 | (?:https?://|ftp://|news://|mailto:|file://|\bwww\.) |
125 | (?:https?://|ftp://|news://|mailto:|file://|\bwww\.) |
96 | [\w\-\@;\/?:&=%\$.+!*\x27,~#]* |
126 | [\w\-\@;\/?:&=%\$.+!*\x27,~#$urxvt::NOCHAR]* |
97 | ( |
127 | ( |
98 | \([\w\-\@;\/?:&=%\$.+!*\x27,~#]*\)| # Allow a pair of matched parentheses |
128 | \([\w\-\@;\/?:&=%\$.+!*\x27,~#$urxvt::NOCHAR]*\)| # Allow a pair of matched parentheses |
99 | [\w\-\@;\/?:&=%\$+*~] # exclude some trailing characters (heuristic) |
129 | [\w\-\@;\/?:&=%\$+*~] # exclude some trailing characters (heuristic) |
100 | )+ |
130 | )+ |
101 | }x; |
131 | }x; |
102 | |
132 | |
103 | sub matchlist_key_press { |
133 | sub matchlist_key_press { |
… | |
… | |
251 | } |
281 | } |
252 | } |
282 | } |
253 | |
283 | |
254 | my @defaults = ($url); |
284 | my @defaults = ($url); |
255 | my @matchers; |
285 | my @matchers; |
256 | for (my $idx = 0; defined (my $res = $self->my_resource ("pattern.$idx") || $defaults[$idx]); $idx++) { |
286 | for (my $idx = 0; defined (my $res = $self->locale_decode ($self->my_resource ("pattern.$idx")) || $defaults[$idx]); $idx++) { |
257 | $res = $self->locale_decode ($res); |
|
|
258 | utf8::encode $res; |
|
|
259 | my $launcher = $self->my_resource ("launcher.$idx"); |
287 | my $launcher = $self->my_resource ("launcher.$idx"); |
260 | $launcher =~ s/\$&|\$\{&\}/\${0}/g if $launcher; |
288 | $launcher =~ s/\$&|\$\{&\}/\${0}/g if $launcher; |
261 | my $rend = $self->parse_rend($self->my_resource ("rend.$idx")); |
289 | my $rend = $self->parse_rend($self->my_resource ("rend.$idx")); |
262 | unshift @matchers, [qr($res)x,$launcher,$rend]; |
290 | unshift @matchers, [qr($res)x,$launcher,$rend]; |
263 | } |
291 | } |