[ViewVC] Annotation of: cvs/rxvt-unicode/src/perl/selection

#! perl

sub on_user_command {
   my ($self, $cmd) = @_;

   $cmd eq "selection:rot13"
      and $self->selection (map { y/A-Za-z/N-ZA-Mn-za-m/; $_ } $self->selection);

   ()
}

sub on_init {
   my ($self) = @_;

   if (defined (my $res = $self->resource ("cutchars"))) {
      $res = $self->locale_decode ($res);
      push @{ $self->{patterns} }, qr{\G [\Q$res\E[:space:]]* ([^\Q$res\E[:space:]]+) }x;
   }

   for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) {
      $res = $self->locale_decode ($res);
      utf8::encode $res;
      push @{ $self->{patterns} }, qr/$res/;
   }

   ()
}

# "find interesting things"-patterns
my @mark_patterns = (
   # common types of "parentheses"
   qr{ (?<![^[:space:]]) ‘ ([^‘’]+) ’ (?![^[:space]]) }x,
   qr{ (?<![^[:space:]]) ` ([^`']+) ' (?![^[:space]]) }x,
   qr{ (?<![^[:space:]]) (" [^[:space:]] [^"]* ")                 }x,
   qr{                   (" [^"]* [^[:space:]] ") (?![^[:space]]) }x,
   qr{ \< ([^<>[:space:]]+) \> }x,
   qr{ \{ ([^{}[:space:]]+) \} }x,
   qr{ \[ ([^{}[:space:]]+) \] }x,
   qr{ \( ([^()[:space:]]+) \) }x,

   # urls, just a heuristic
   qr{(
      (?:https?|ftp|news|mailto|file)://[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~]+
      [ab-zA-Z0-9\-\@;\/?:&=%\$_+!*\x27()~]   # exclude some trailing characters (heuristic)
   )}x,

   # shell-like argument quoting, basically always matches
   qr{\G [\ \t|&;<>()]* (
      (?:
         [^\\"'\ \t|&;<>()]+
         | \\.
         | " (?: [^\\"]+ | \\. )* "
         | ' [^']* '
      )+
   )}x,
);

# "correct obvious? crap"-patterns
my @simplify_patterns = (
   qr{^"([^\\"'\ \t|&;<>()*?]+)"$}, # "simple" => simple
   qr{^(.*)[,\-]$},                 # strip off trailing , and -
);

sub on_sel_extend {
   my ($self, $time) = @_;

   my ($row, $col) = $self->selection_mark;
   my $line = $self->line ($row);
   my $text = $line->t;
   my $markofs = $line->offset_of ($row, $col);
   my $curlen  = $line->offset_of ($self->selection_end)
               - $line->offset_of ($self->selection_beg);

   my @matches;

   if ($markofs < $line->l) {
      # convert markofs form character to UTF-8 offset space
      {
         my $prefix = substr $text, 0, $markofs;
         utf8::encode $prefix;
         $markofs = length $prefix;
      }

      # not doing matches in unicode mode helps speed
      # enourmously here. working in utf-8 should be
      # equivalent due to the magic of utf-8 encoding.
      utf8::encode $text;
      study $text; # _really_ helps, too :)

      for my $regex (@mark_patterns, @{ $self->{patterns} }) {
         while ($text =~ /$regex/g) {
            if ($-[1] <= $markofs and $markofs <= $+[1]) {
               my $ofs = $-[1];
               my $match = $1;

               for my $regex (@simplify_patterns) {
                  if ($match =~ $regex) {
                     $match = $1;
                     $ofs += $-[1];
                  }
               }

               push @matches, [$ofs, length $match];
            }
         }
      }
   }

   # whole line
   push @matches, [0, ($line->end - $line->beg + 1) * $self->ncol];

   for (sort { $a->[1] <=> $b->[1] or $b->[0] <=> $a->[0] } @matches) {
      my ($ofs, $len) = @$_;

      next if $len <= $curlen;

      # convert back from UTF-8 offset space to character space
      {
         my $length = substr "$text ", $ofs, $len;
         utf8::decode $length;
         $len = length $length;
      }
      {
         my $prefix = substr $text, 0, $ofs;
         utf8::decode $prefix;
         $ofs = length $prefix;
      }

      $self->selection_beg ($line->coord_of ($ofs));
      $self->selection_end ($line->coord_of ($ofs + $len));
      return 1;
   }

   ()
}
Revision:	1.36
Committed:	Wed Jan 25 15:33:43 2006 UTC (18 years, 4 months ago) by root
Branch:	MAIN
CVS Tags:	rel-7_3, rel-7_5, rel-7_4, rel-7_3a
Changes since 1.35:	+1 -1 lines
Log Message:	* empty log message *
#	User	Rev	Content
1	root	1.4	#! perl
2
3	root	1.36	sub on_user_command {
4	root	1.7	my ($self, $cmd) = @_;
5	elmex	1.2
6			$cmd eq "selection:rot13"
7	root	1.7	and $self->selection (map { y/A-Za-z/N-ZA-Mn-za-m/; $_ } $self->selection);
8	elmex	1.3
9			()
10	elmex	1.1	}
11	root	1.5
12	root	1.23	sub on_init {
13			my ($self) = @_;
14
15	root	1.35	if (defined (my $res = $self->resource ("cutchars"))) {
16			$res = $self->locale_decode ($res);
17			push @{ $self->{patterns} }, qr{\G [\Q$res\E[:space:]]* ([^\Q$res\E[:space:]]+) }x;
18			}
19
20	root	1.23	for (my $idx = 0; defined (my $res = $self->x_resource ("selection.pattern-$idx")); $idx++) {
21	root	1.31	$res = $self->locale_decode ($res);
22			utf8::encode $res;
23	root	1.23	push @{ $self->{patterns} }, qr/$res/;
24			}
25
26			()
27			}
28
29	root	1.34	# "find interesting things"-patterns
30	root	1.15	my @mark_patterns = (
31	root	1.24	# common types of "parentheses"
32	root	1.25	qr{ (?<![^[:space:]]) ‘ ([^‘’]+) ’ (?![^[:space]]) }x,
33			qr{ (?<![^[:space:]]) ` ([^`']+) ' (?![^[:space]]) }x,
34			qr{ (?<![^[:space:]]) (" [^[:space:]] [^"]* ") }x,
35			qr{ (" [^"]* [^[:space:]] ") (?![^[:space]]) }x,
36			qr{ \< ([^<>[:space:]]+) \> }x,
37			qr{ \{ ([^{}[:space:]]+) \} }x,
38			qr{ \[ ([^{}[:space:]]+) \] }x,
39			qr{ \( ([^()[:space:]]+) \) }x,
40	root	1.14
41	root	1.20	# urls, just a heuristic
42			qr{(
43			(?:https?\|ftp\|news\|mailto\|file)://[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~]+
44			[ab-zA-Z0-9\-\@;\/?:&=%\$_+!*\x27()~] # exclude some trailing characters (heuristic)
45			)}x,
46
47	root	1.17	# shell-like argument quoting, basically always matches
48	root	1.34	qr{\G [\ \t\|&;<>()]* (
49	root	1.9	(?:
50	root	1.11	[^\\"'\ \t\|&;<>()]+
51	root	1.9	\| \\.
52	root	1.13	\| " (?: [^\\"]+ \| \\. )* "
53	root	1.9	\| ' [^']* '
54			)+
55	root	1.14	)}x,
56	root	1.8	);
57
58	root	1.16	# "correct obvious? crap"-patterns
59	root	1.15	my @simplify_patterns = (
60			qr{^"([^\\"'\ \t\|&;<>()*?]+)"$}, # "simple" => simple
61	root	1.16	qr{^(.*)[,\-]$}, # strip off trailing , and -
62	root	1.15	);
63	root	1.14
64	root	1.6	sub on_sel_extend {
65	root	1.21	my ($self, $time) = @_;
66	root	1.8
67			my ($row, $col) = $self->selection_mark;
68			my $line = $self->line ($row);
69			my $text = $line->t;
70	root	1.20	my $markofs = $line->offset_of ($row, $col);
71			my $curlen = $line->offset_of ($self->selection_end)
72			- $line->offset_of ($self->selection_beg);
73
74			my @matches;
75	root	1.8
76	root	1.32	if ($markofs < $line->l) {
77			# convert markofs form character to UTF-8 offset space
78			{
79			my $prefix = substr $text, 0, $markofs;
80			utf8::encode $prefix;
81			$markofs = length $prefix;
82			}
83
84			# not doing matches in unicode mode helps speed
85			# enourmously here. working in utf-8 should be
86			# equivalent due to the magic of utf-8 encoding.
87			utf8::encode $text;
88			study $text; # _really_ helps, too :)
89
90			for my $regex (@mark_patterns, @{ $self->{patterns} }) {
91			while ($text =~ /$regex/g) {
92			if ($-[1] <= $markofs and $markofs <= $+[1]) {
93			my $ofs = $-[1];
94			my $match = $1;
95
96			for my $regex (@simplify_patterns) {
97			if ($match =~ $regex) {
98			$match = $1;
99			$ofs += $-[1];
100			}
101	root	1.15	}
102	root	1.32
103			push @matches, [$ofs, length $match];
104	root	1.15	}
105	root	1.8	}
106			}
107			}
108
109	root	1.21	# whole line
110			push @matches, [0, ($line->end - $line->beg + 1) * $self->ncol];
111
112	root	1.20	for (sort { $a->[1] <=> $b->[1] or $b->[0] <=> $a->[0] } @matches) {
113			my ($ofs, $len) = @$_;
114
115			next if $len <= $curlen;
116
117	root	1.32	# convert back from UTF-8 offset space to character space
118			{
119	root	1.33	my $length = substr "$text ", $ofs, $len;
120	root	1.32	utf8::decode $length;
121			$len = length $length;
122			}
123			{
124			my $prefix = substr $text, 0, $ofs;
125			utf8::decode $prefix;
126			$ofs = length $prefix;
127			}
128
129	root	1.20	$self->selection_beg ($line->coord_of ($ofs));
130			$self->selection_end ($line->coord_of ($ofs + $len));
131			return 1;
132			}
133
134	root	1.21	()
135	root	1.5	}