1 |
root |
1.4 |
|
2 |
elmex |
1.1 |
/* |
3 |
|
|
* static char *rcsid_player_c = |
4 |
root |
1.4 |
* "$Id: re-cmp.C,v 1.3 2006-09-08 04:51:08 pippijn Exp $"; |
5 |
elmex |
1.1 |
*/ |
6 |
|
|
|
7 |
|
|
|
8 |
|
|
/* re-cmp.c |
9 |
|
|
* Pattern match a string, parsing some of the common RE-metacharacters. |
10 |
|
|
* |
11 |
|
|
* This code is public domain, but I would appreciate to hear of |
12 |
|
|
* improvements or even the fact that you use it in your program. |
13 |
|
|
* |
14 |
|
|
* Deliberate BUGS: |
15 |
|
|
* - These tokens are not supported: | ( ) |
16 |
|
|
* - You will get the longest expansion of the _first_ string which |
17 |
|
|
* matches the RE, not the longest string which would be the proper |
18 |
|
|
* behaviour for a RE-matcher. |
19 |
|
|
* |
20 |
|
|
* Author: Kjetil T. Homme <kjetilho@ifi.uio.no> May 1993 |
21 |
|
|
*/ |
22 |
|
|
|
23 |
|
|
#include <stdio.h> |
24 |
|
|
#include <stdlib.h> |
25 |
|
|
#include <memory.h> |
26 |
|
|
#include <limits.h> |
27 |
|
|
#include <re-cmp.h> |
28 |
|
|
#include <ctype.h> |
29 |
|
|
|
30 |
|
|
/* Get prototype functions to prevent warnings. */ |
31 |
|
|
#if defined (__sun__) && defined(StupidSunHeaders) |
32 |
root |
1.4 |
# include <sys/types.h> |
33 |
|
|
# include <sys/time.h> |
34 |
|
|
# include "sunos.h" /* Prototypes for standard libraries, sunos lack those */ |
35 |
elmex |
1.1 |
#endif |
36 |
|
|
|
37 |
|
|
|
38 |
|
|
/* P r o t o t y p e s |
39 |
|
|
*/ |
40 |
root |
1.4 |
const char *re_cmp (const char *, const char *); |
41 |
|
|
static bool re_cmp_step (const char *, const char *, unsigned, int); |
42 |
|
|
static void re_init (void); |
43 |
|
|
static bool re_match_token (unsigned char, selection *); |
44 |
|
|
static const char *re_get_token (selection *, const char *); |
45 |
|
|
|
46 |
elmex |
1.1 |
#ifdef DEBUG2 |
47 |
root |
1.4 |
static void re_dump_sel (selection *); |
48 |
elmex |
1.1 |
#endif |
49 |
|
|
|
50 |
|
|
/* G l o b a l v a r i a b l e s |
51 |
|
|
*/ |
52 |
root |
1.4 |
static bool re_init_done = false; |
53 |
|
|
static selection *re_token[RE_TOKEN_MAX]; |
54 |
|
|
static const char *re_substr[RE_TOKEN_MAX]; |
55 |
|
|
static unsigned int re_token_depth; |
56 |
elmex |
1.1 |
|
57 |
|
|
/* E x t e r n a l f u n c t i o n |
58 |
|
|
*/ |
59 |
|
|
|
60 |
|
|
/* re-cmp - get regular expression match. |
61 |
|
|
* Return values: NULL - no match or error in regexp. |
62 |
|
|
* pointer to beginning of matching string |
63 |
|
|
*/ |
64 |
|
|
const char * |
65 |
root |
1.4 |
re_cmp (const char *str, const char *regexp) |
66 |
|
|
{ |
67 |
|
|
const char *next_regexp; |
68 |
|
|
bool once = false; |
69 |
|
|
bool matched; |
70 |
elmex |
1.1 |
|
71 |
root |
1.4 |
if (re_init_done == false) |
72 |
|
|
re_init (); |
73 |
elmex |
1.1 |
|
74 |
|
|
#ifdef SAFE_CHECKS |
75 |
root |
1.4 |
if (regexp == NULL || str == NULL) |
76 |
|
|
return NULL; |
77 |
elmex |
1.1 |
#endif |
78 |
root |
1.4 |
if (*regexp == '^') |
79 |
|
|
{ |
80 |
|
|
once = true; |
81 |
|
|
++regexp; |
82 |
|
|
} |
83 |
|
|
if (*regexp == 0) |
84 |
|
|
{ |
85 |
|
|
/* // or /^/ matches any string */ |
86 |
|
|
return str; |
87 |
|
|
} |
88 |
|
|
|
89 |
|
|
next_regexp = re_get_token (re_token[0], regexp); |
90 |
|
|
re_token_depth = 0; |
91 |
|
|
re_substr[0] = next_regexp; |
92 |
|
|
|
93 |
|
|
matched = false; |
94 |
|
|
while (*str != '\0' && !(matched = re_match_token (*str, re_token[0]))) |
95 |
|
|
str++; |
96 |
|
|
|
97 |
|
|
if (matched && *next_regexp == 0) |
98 |
|
|
return str; |
99 |
|
|
|
100 |
|
|
/* Apologies for the nearly duplicated code below, hopefully it |
101 |
|
|
* speeds things up. |
102 |
|
|
*/ |
103 |
|
|
if (once) |
104 |
|
|
{ |
105 |
|
|
switch (re_token[0]->repeat) |
106 |
|
|
{ |
107 |
root |
1.2 |
case rep_once: |
108 |
root |
1.4 |
if (matched == false) |
109 |
|
|
return NULL; |
110 |
|
|
break; |
111 |
root |
1.2 |
case rep_once_or_more: |
112 |
root |
1.4 |
if (matched == false) |
113 |
|
|
return NULL; |
114 |
root |
1.2 |
|
115 |
root |
1.4 |
if (re_cmp_step (str + 1, regexp, 0, 1)) |
116 |
|
|
return str; |
117 |
|
|
break; |
118 |
root |
1.2 |
case rep_null_or_once: |
119 |
root |
1.4 |
if (matched == false) |
120 |
|
|
return re_cmp_step (str, next_regexp, 1, 0) ? str : NULL; |
121 |
|
|
break; |
122 |
root |
1.2 |
case rep_null_or_more: |
123 |
root |
1.4 |
if (matched) |
124 |
|
|
{ |
125 |
|
|
if (re_cmp_step (str + 1, regexp, 0, 1)) |
126 |
|
|
return str; |
127 |
|
|
} |
128 |
|
|
else |
129 |
|
|
{ |
130 |
|
|
return re_cmp_step (str, next_regexp, 1, 0) ? str : NULL; |
131 |
root |
1.2 |
} |
132 |
root |
1.4 |
break; |
133 |
root |
1.2 |
} |
134 |
root |
1.4 |
return re_cmp_step (str + 1, next_regexp, 1, 0) ? str : NULL; |
135 |
elmex |
1.1 |
} |
136 |
|
|
|
137 |
root |
1.4 |
if (matched) |
138 |
|
|
{ |
139 |
|
|
switch (re_token[0]->repeat) |
140 |
|
|
{ |
141 |
root |
1.2 |
case rep_once: |
142 |
|
|
case rep_null_or_once: |
143 |
root |
1.4 |
break; |
144 |
root |
1.2 |
case rep_once_or_more: |
145 |
|
|
case rep_null_or_more: |
146 |
root |
1.4 |
if (re_cmp_step (str + 1, regexp, 0, 1)) |
147 |
|
|
return str; |
148 |
|
|
break; |
149 |
root |
1.2 |
} |
150 |
root |
1.4 |
/* The logic here is that re_match_token only sees |
151 |
|
|
* if the one letter matches. Thus, if the |
152 |
|
|
* regex is like '@match eureca', and the |
153 |
|
|
* the user enters anything with an e, re_match_token |
154 |
|
|
* returns true, but they really need to match the |
155 |
|
|
* entire regexp, which re_cmp_step will do. |
156 |
|
|
* However, what happens is that there can be a case |
157 |
|
|
* where the string being match is something like |
158 |
|
|
* 'where is eureca'. In this case, the re_match_token |
159 |
|
|
* matches that first e, but the re_cmp_step below, |
160 |
|
|
* fails because the next character (r) doesn't match |
161 |
|
|
* the u. So we call re_cmp with the string |
162 |
|
|
* after the first r, so that it should hopefully match |
163 |
|
|
* up properly. |
164 |
|
|
*/ |
165 |
|
|
if (re_cmp_step (str + 1, next_regexp, 1, 0)) |
166 |
|
|
return str; |
167 |
|
|
else if (*(str + 1) != 0) |
168 |
|
|
return re_cmp (str + 1, regexp); |
169 |
elmex |
1.1 |
} |
170 |
root |
1.4 |
return NULL; |
171 |
elmex |
1.1 |
} |
172 |
|
|
|
173 |
|
|
/* A u x i l l i a r y f u n c t i o n s |
174 |
|
|
*/ |
175 |
|
|
|
176 |
pippijn |
1.3 |
static bool |
177 |
root |
1.4 |
re_cmp_step (const char *str, const char *regexp, unsigned slot, int matches) |
178 |
|
|
{ |
179 |
|
|
/* str - string to match |
180 |
|
|
* regexp - pattern |
181 |
|
|
* slot - number of the token which under consideration |
182 |
|
|
* matches - how many times the token has matched |
183 |
|
|
*/ |
184 |
|
|
const char *next_regexp; |
185 |
|
|
bool matched; |
186 |
elmex |
1.1 |
|
187 |
|
|
#ifdef DEBUG |
188 |
root |
1.4 |
|
189 |
elmex |
1.1 |
/* fprintf(stderr, "['%s', '%s', %u, %d]\n", str, regexp, slot, matches);*/ |
190 |
|
|
#endif |
191 |
|
|
|
192 |
root |
1.4 |
if (*regexp == 0) |
193 |
|
|
{ |
194 |
|
|
/* When we reach the end of the regexp, the match is a success |
195 |
|
|
*/ |
196 |
|
|
return true; |
197 |
|
|
} |
198 |
|
|
|
199 |
|
|
/* This chunk of code makes sure that the regexp-tokenising happens |
200 |
|
|
* only once. We only tokenise as much as we need. |
201 |
|
|
*/ |
202 |
|
|
if (slot > re_token_depth) |
203 |
|
|
{ |
204 |
|
|
re_token_depth = slot; |
205 |
|
|
if (re_token[slot] == NULL) |
206 |
|
|
re_token[slot] = (selection *) malloc (sizeof (selection)); |
207 |
|
|
next_regexp = re_get_token (re_token[slot], regexp); |
208 |
|
|
if (next_regexp == NULL) |
209 |
|
|
{ |
210 |
|
|
/* Syntax error, what else can we do? */ |
211 |
|
|
return false; |
212 |
root |
1.2 |
} |
213 |
root |
1.4 |
re_substr[slot] = next_regexp; |
214 |
|
|
} |
215 |
|
|
else |
216 |
|
|
{ |
217 |
|
|
next_regexp = re_substr[slot]; |
218 |
elmex |
1.1 |
} |
219 |
|
|
|
220 |
root |
1.4 |
matched = re_match_token (*str, re_token[slot]); |
221 |
|
|
if (matched) |
222 |
|
|
++matches; |
223 |
elmex |
1.1 |
|
224 |
root |
1.4 |
if (*str == 0) |
225 |
|
|
return (*next_regexp == 0 || re_token[slot]->type == sel_end) && matched; |
226 |
elmex |
1.1 |
|
227 |
root |
1.4 |
switch (re_token[slot]->repeat) |
228 |
|
|
{ |
229 |
root |
1.2 |
case rep_once: |
230 |
root |
1.4 |
if (matches == 1) |
231 |
|
|
{ /* (matches == 1) => (matched == true) */ |
232 |
|
|
return re_cmp_step (str + 1, next_regexp, slot + 1, 0); |
233 |
root |
1.2 |
} |
234 |
root |
1.4 |
return false; |
235 |
root |
1.2 |
case rep_once_or_more: |
236 |
root |
1.4 |
if (matched) |
237 |
|
|
{ /* (matched == true) => (matches >= 1) */ |
238 |
|
|
/* First check if the current token repeats more */ |
239 |
|
|
if (re_cmp_step (str + 1, regexp, slot, matches)) |
240 |
|
|
return true; |
241 |
|
|
return re_cmp_step (str + 1, next_regexp, slot + 1, 0); |
242 |
root |
1.2 |
} |
243 |
root |
1.4 |
return false; |
244 |
root |
1.2 |
case rep_null_or_once: |
245 |
root |
1.4 |
/* We must go on to the next token, but should we advance str? */ |
246 |
|
|
if (matches == 0) |
247 |
|
|
{ |
248 |
|
|
return re_cmp_step (str, next_regexp, slot + 1, 0); |
249 |
|
|
} |
250 |
|
|
else if (matches == 1) |
251 |
|
|
{ |
252 |
|
|
return re_cmp_step (str + 1, next_regexp, slot + 1, 0); |
253 |
root |
1.2 |
} |
254 |
root |
1.4 |
return false; /* Not reached */ |
255 |
root |
1.2 |
case rep_null_or_more: |
256 |
root |
1.4 |
if (matched) |
257 |
|
|
{ |
258 |
|
|
/* Look for further repeats, advance str */ |
259 |
|
|
if (re_cmp_step (str + 1, regexp, slot, matches)) |
260 |
|
|
return true; |
261 |
|
|
return re_cmp_step (str, next_regexp, slot + 1, 0); |
262 |
root |
1.2 |
} |
263 |
root |
1.4 |
return re_cmp_step (str, next_regexp, slot + 1, 0); |
264 |
elmex |
1.1 |
} |
265 |
root |
1.4 |
return false; |
266 |
elmex |
1.1 |
} |
267 |
|
|
|
268 |
|
|
static void |
269 |
root |
1.4 |
re_init (void) |
270 |
|
|
{ |
271 |
|
|
int i; |
272 |
|
|
|
273 |
|
|
re_token[0] = (selection *) malloc (sizeof (selection)); |
274 |
|
|
for (i = 1; i < RE_TOKEN_MAX; i++) |
275 |
|
|
re_token[i] = NULL; |
276 |
elmex |
1.1 |
|
277 |
root |
1.4 |
re_init_done = true; |
278 |
elmex |
1.1 |
} |
279 |
|
|
|
280 |
pippijn |
1.3 |
static bool |
281 |
root |
1.4 |
re_match_token (unsigned char c, selection * sel) |
282 |
|
|
{ |
283 |
|
|
switch (sel->type) |
284 |
|
|
{ |
285 |
root |
1.2 |
case sel_any: |
286 |
root |
1.4 |
return true; |
287 |
root |
1.2 |
case sel_end: |
288 |
root |
1.4 |
return (c == 0); |
289 |
root |
1.2 |
case sel_single: |
290 |
root |
1.4 |
return (tolower (c) == tolower (sel->u.single)); |
291 |
root |
1.2 |
case sel_range: |
292 |
root |
1.4 |
return (c >= sel->u.range.low && c <= sel->u.range.high); |
293 |
root |
1.2 |
case sel_array: |
294 |
root |
1.4 |
return (sel->u.array[c]); |
295 |
root |
1.2 |
case sel_not_single: |
296 |
root |
1.4 |
return (tolower (c) != tolower (sel->u.single)); |
297 |
root |
1.2 |
case sel_not_range: |
298 |
root |
1.4 |
return (c < sel->u.range.low && c > sel->u.range.high); |
299 |
elmex |
1.1 |
} |
300 |
root |
1.4 |
return false; |
301 |
elmex |
1.1 |
} |
302 |
|
|
|
303 |
|
|
/* re_get_token - get regular expression token |
304 |
|
|
* Returns the first token found in <regexp> in <sel> |
305 |
|
|
* Return values: NULL syntax error |
306 |
|
|
* pointer to first character past token. |
307 |
|
|
*/ |
308 |
|
|
static const char * |
309 |
root |
1.4 |
re_get_token (selection * sel, const char *regexp) |
310 |
|
|
{ |
311 |
elmex |
1.1 |
|
312 |
|
|
#ifdef SAFE_CHECKS |
313 |
root |
1.4 |
# define exit_if_null if (*regexp == 0) return NULL |
314 |
elmex |
1.1 |
#else |
315 |
root |
1.4 |
# define exit_if_null |
316 |
elmex |
1.1 |
#endif |
317 |
|
|
|
318 |
root |
1.4 |
bool quoted = false; |
319 |
|
|
unsigned char looking_at; |
320 |
elmex |
1.1 |
|
321 |
|
|
#ifdef SAFE_CHECKS |
322 |
root |
1.4 |
if (sel == NULL || regexp == NULL || *regexp == 0) |
323 |
|
|
return NULL; |
324 |
elmex |
1.1 |
#endif |
325 |
|
|
|
326 |
root |
1.4 |
do |
327 |
|
|
{ |
328 |
|
|
looking_at = *regexp++; |
329 |
|
|
switch (looking_at) |
330 |
|
|
{ |
331 |
root |
1.2 |
case '$': |
332 |
root |
1.4 |
if (quoted) |
333 |
|
|
{ |
334 |
|
|
quoted = false; |
335 |
|
|
sel->type = sel_single; |
336 |
|
|
sel->u.single = looking_at; |
337 |
|
|
} |
338 |
|
|
else |
339 |
|
|
{ |
340 |
|
|
sel->type = sel_end; |
341 |
root |
1.2 |
} |
342 |
root |
1.4 |
break; |
343 |
root |
1.2 |
case '.': |
344 |
root |
1.4 |
if (quoted) |
345 |
|
|
{ |
346 |
|
|
quoted = false; |
347 |
|
|
sel->type = sel_single; |
348 |
|
|
sel->u.single = looking_at; |
349 |
root |
1.2 |
} |
350 |
root |
1.4 |
else |
351 |
|
|
{ |
352 |
|
|
sel->type = sel_any; |
353 |
|
|
} |
354 |
|
|
break; |
355 |
root |
1.2 |
case '[': |
356 |
root |
1.4 |
/* The fun stuff... perhaps a little obfuscated since I |
357 |
|
|
* don't trust the compiler to analyse liveness. |
358 |
|
|
*/ |
359 |
|
|
if (quoted) |
360 |
|
|
{ |
361 |
|
|
quoted = false; |
362 |
|
|
sel->type = sel_single; |
363 |
|
|
sel->u.single = looking_at; |
364 |
|
|
} |
365 |
|
|
else |
366 |
|
|
{ |
367 |
|
|
bool neg = false; |
368 |
|
|
unsigned char first, last = 0; |
369 |
root |
1.2 |
|
370 |
root |
1.4 |
exit_if_null; |
371 |
|
|
looking_at = *regexp++; |
372 |
root |
1.2 |
|
373 |
root |
1.4 |
if (looking_at == '^') |
374 |
|
|
{ |
375 |
|
|
neg = true; |
376 |
|
|
exit_if_null; |
377 |
|
|
looking_at = *regexp++; |
378 |
|
|
} |
379 |
|
|
first = looking_at; |
380 |
|
|
exit_if_null; |
381 |
|
|
looking_at = *regexp++; |
382 |
|
|
if (looking_at == ']') |
383 |
|
|
{ |
384 |
|
|
/* On the form [q] or [^q] */ |
385 |
|
|
sel->type = neg ? sel_not_single : sel_single; |
386 |
|
|
sel->u.single = first; |
387 |
|
|
break; |
388 |
root |
1.2 |
} |
389 |
root |
1.4 |
else if (looking_at == '-') |
390 |
|
|
{ |
391 |
|
|
exit_if_null; |
392 |
|
|
last = *regexp++; |
393 |
|
|
if (last == ']') |
394 |
|
|
{ |
395 |
|
|
/* On the form [A-] or [^A-]. Checking for |
396 |
|
|
* [,-] and making it a range is probably not |
397 |
|
|
* worth it :-) |
398 |
|
|
*/ |
399 |
|
|
sel->type = sel_array; |
400 |
|
|
memset (sel->u.array, neg, sizeof (sel->u.array)); |
401 |
|
|
sel->u.array[first] = sel->u.array['-'] = !neg; |
402 |
|
|
break; |
403 |
|
|
} |
404 |
|
|
else |
405 |
|
|
{ |
406 |
|
|
exit_if_null; |
407 |
|
|
looking_at = *regexp++; |
408 |
|
|
if (looking_at == ']') |
409 |
|
|
{ |
410 |
|
|
/* On the form [A-G] or [^A-G]. Note that [G-A] |
411 |
|
|
* is a syntax error. Fair enough, I think. |
412 |
|
|
*/ |
413 |
elmex |
1.1 |
#ifdef SAFE_CHECK |
414 |
root |
1.4 |
if (first > last) |
415 |
|
|
return NULL; |
416 |
elmex |
1.1 |
#endif |
417 |
root |
1.4 |
sel->type = neg ? sel_not_range : sel_range; |
418 |
|
|
sel->u.range.low = first; |
419 |
|
|
sel->u.range.high = last; |
420 |
|
|
break; |
421 |
root |
1.2 |
} |
422 |
|
|
} |
423 |
|
|
} |
424 |
root |
1.4 |
{ |
425 |
|
|
/* The datastructure can only represent a RE this |
426 |
|
|
* complex with an array. |
427 |
|
|
*/ |
428 |
|
|
int i; |
429 |
|
|
unsigned char previous; |
430 |
|
|
|
431 |
|
|
sel->type = sel_array; |
432 |
|
|
memset (sel->u.array, neg, sizeof (sel->u.array)); |
433 |
|
|
if (last) |
434 |
|
|
{ |
435 |
|
|
/* It starts with a range */ |
436 |
elmex |
1.1 |
#ifdef SAFE_CHECK |
437 |
root |
1.4 |
if (first > last) |
438 |
|
|
return NULL; |
439 |
elmex |
1.1 |
#endif |
440 |
root |
1.4 |
for (i = first; i <= last; i++) |
441 |
|
|
{ |
442 |
|
|
sel->u.array[i] = !neg; |
443 |
|
|
} |
444 |
|
|
} |
445 |
|
|
else |
446 |
|
|
{ |
447 |
|
|
/* It begins with a "random" character */ |
448 |
|
|
sel->u.array[first] = !neg; |
449 |
|
|
} |
450 |
|
|
sel->u.array[looking_at] = !neg; |
451 |
root |
1.2 |
|
452 |
root |
1.4 |
exit_if_null; |
453 |
|
|
previous = looking_at; |
454 |
|
|
looking_at = *regexp++; |
455 |
root |
1.2 |
|
456 |
root |
1.4 |
/* Add more characters to the array until we reach |
457 |
|
|
* ]. Quoting doesn't and shouldn't work in here. |
458 |
|
|
* ("]" should be put first, and "-" last if they |
459 |
|
|
* are needed inside this construct.) |
460 |
|
|
* Look for ranges as we go along. |
461 |
|
|
*/ |
462 |
|
|
while (looking_at != ']') |
463 |
|
|
{ |
464 |
|
|
if (looking_at == '-') |
465 |
|
|
{ |
466 |
|
|
exit_if_null; |
467 |
|
|
looking_at = *regexp++; |
468 |
|
|
if (looking_at != ']') |
469 |
|
|
{ |
470 |
elmex |
1.1 |
#ifdef SAFE_CHECK |
471 |
root |
1.4 |
if (previous > looking_at) |
472 |
|
|
return NULL; |
473 |
elmex |
1.1 |
#endif |
474 |
root |
1.4 |
for (i = previous + 1; i < looking_at; i++) |
475 |
|
|
{ |
476 |
|
|
/* previous has already been set and |
477 |
|
|
* looking_at is set below. |
478 |
|
|
*/ |
479 |
|
|
sel->u.array[i] = !neg; |
480 |
|
|
} |
481 |
|
|
exit_if_null; |
482 |
|
|
} |
483 |
|
|
else |
484 |
|
|
{ |
485 |
|
|
sel->u.array['-'] = !neg; |
486 |
|
|
break; |
487 |
|
|
} |
488 |
|
|
} |
489 |
|
|
sel->u.array[looking_at] = !neg; |
490 |
|
|
previous = looking_at; |
491 |
|
|
exit_if_null; |
492 |
|
|
looking_at = *regexp++; |
493 |
|
|
} |
494 |
|
|
} |
495 |
root |
1.2 |
} |
496 |
root |
1.4 |
break; |
497 |
root |
1.2 |
case '\\': |
498 |
root |
1.4 |
if (quoted) |
499 |
|
|
{ |
500 |
|
|
quoted = false; |
501 |
|
|
sel->type = sel_single; |
502 |
|
|
sel->u.single = looking_at; |
503 |
|
|
} |
504 |
|
|
else |
505 |
|
|
{ |
506 |
|
|
quoted = true; |
507 |
root |
1.2 |
} |
508 |
root |
1.4 |
break; |
509 |
root |
1.2 |
default: |
510 |
root |
1.4 |
quoted = false; |
511 |
|
|
sel->type = sel_single; |
512 |
|
|
sel->u.single = looking_at; |
513 |
|
|
break; |
514 |
root |
1.2 |
} |
515 |
root |
1.4 |
} |
516 |
|
|
while (quoted); |
517 |
elmex |
1.1 |
|
518 |
root |
1.4 |
if (*regexp == '*') |
519 |
|
|
{ |
520 |
|
|
sel->repeat = rep_null_or_more; |
521 |
|
|
++regexp; |
522 |
|
|
} |
523 |
|
|
else if (*regexp == '?') |
524 |
|
|
{ |
525 |
|
|
sel->repeat = rep_null_or_once; |
526 |
|
|
++regexp; |
527 |
|
|
} |
528 |
|
|
else if (*regexp == '+') |
529 |
|
|
{ |
530 |
|
|
sel->repeat = rep_once_or_more; |
531 |
|
|
++regexp; |
532 |
|
|
} |
533 |
|
|
else |
534 |
|
|
{ |
535 |
|
|
sel->repeat = rep_once; |
536 |
elmex |
1.1 |
} |
537 |
|
|
|
538 |
root |
1.4 |
return regexp; |
539 |
elmex |
1.1 |
} |
540 |
|
|
|
541 |
|
|
/* D e b u g c o d e |
542 |
|
|
*/ |
543 |
root |
1.4 |
#ifdef DEBUG2 /* compile all with DEBUG also ? hevi@lut.fi */ |
544 |
elmex |
1.1 |
static void |
545 |
root |
1.4 |
re_dump_sel (selection * sel) |
546 |
|
|
{ |
547 |
|
|
switch (sel->type) |
548 |
|
|
{ |
549 |
root |
1.2 |
case sel_any: |
550 |
root |
1.4 |
printf ("."); |
551 |
|
|
break; |
552 |
root |
1.2 |
case sel_end: |
553 |
root |
1.4 |
printf ("$"); |
554 |
|
|
break; |
555 |
root |
1.2 |
case sel_single: |
556 |
root |
1.4 |
printf ("<%c>", sel->u.single); |
557 |
|
|
break; |
558 |
root |
1.2 |
case sel_range: |
559 |
root |
1.4 |
printf ("[%c-%c]", sel->u.range.low, sel->u.range.high); |
560 |
|
|
break; |
561 |
root |
1.2 |
case sel_array: |
562 |
root |
1.4 |
{ |
563 |
|
|
int i; |
564 |
|
|
|
565 |
|
|
printf ("["); |
566 |
|
|
for (i = 0; i < uchar_MAX; i++) |
567 |
|
|
{ |
568 |
|
|
if (sel->u.array[i]) |
569 |
|
|
{ |
570 |
|
|
printf ("%c", i); |
571 |
|
|
} |
572 |
|
|
} |
573 |
|
|
printf ("]"); |
574 |
|
|
} |
575 |
|
|
break; |
576 |
root |
1.2 |
case sel_not_single: |
577 |
root |
1.4 |
printf ("[^%c]", sel->u.single); |
578 |
|
|
break; |
579 |
root |
1.2 |
case sel_not_range: |
580 |
root |
1.4 |
printf ("[^%c-%c]", sel->u.range.low, sel->u.range.high); |
581 |
|
|
break; |
582 |
root |
1.2 |
default: |
583 |
root |
1.4 |
printf ("<UNKNOWN TOKEN!>"); |
584 |
|
|
break; |
585 |
elmex |
1.1 |
} |
586 |
root |
1.4 |
switch (sel->repeat) |
587 |
|
|
{ |
588 |
root |
1.2 |
case rep_once: |
589 |
root |
1.4 |
break; |
590 |
root |
1.2 |
case rep_null_or_once: |
591 |
root |
1.4 |
printf ("?"); |
592 |
|
|
break; |
593 |
root |
1.2 |
case rep_null_or_more: |
594 |
root |
1.4 |
printf ("*"); |
595 |
|
|
break; |
596 |
root |
1.2 |
case rep_once_or_more: |
597 |
root |
1.4 |
printf ("+"); |
598 |
|
|
break; |
599 |
root |
1.2 |
default: |
600 |
root |
1.4 |
printf ("<UNKNOWN REP-TOKEN!>"); |
601 |
|
|
break; |
602 |
elmex |
1.1 |
} |
603 |
|
|
} |
604 |
|
|
|
605 |
|
|
int |
606 |
root |
1.4 |
main (int argc, char *argv[]) |
607 |
|
|
{ |
608 |
|
|
char *re, *m; |
609 |
|
|
selection sel; |
610 |
|
|
|
611 |
|
|
re = re_get_token (&sel, argv[1]); |
612 |
|
|
|
613 |
|
|
printf ("'%s' -> '%s'\n", argv[1], re); |
614 |
|
|
re_dump_sel (&sel); |
615 |
|
|
printf ("\n"); |
616 |
|
|
m = re_cmp (argv[2], argv[1]); |
617 |
|
|
if (m) |
618 |
|
|
printf ("MATCH! -> '%s'\n", m); |
619 |
|
|
return 0; |
620 |
elmex |
1.1 |
} |
621 |
|
|
#endif |