1 | /* |
1 | /* |
2 | * static char *rcsid_player_c = |
2 | * static char *rcsid_player_c = |
3 | * "$Id: re-cmp.C,v 1.2 2006/08/29 08:01:35 root Exp $"; |
3 | * "$Id: re-cmp.C,v 1.3 2006/09/08 04:51:08 pippijn Exp $"; |
4 | */ |
4 | */ |
5 | |
5 | |
6 | |
6 | |
7 | /* re-cmp.c |
7 | /* re-cmp.c |
8 | * Pattern match a string, parsing some of the common RE-metacharacters. |
8 | * Pattern match a string, parsing some of the common RE-metacharacters. |
… | |
… | |
35 | |
35 | |
36 | |
36 | |
37 | /* P r o t o t y p e s |
37 | /* P r o t o t y p e s |
38 | */ |
38 | */ |
39 | const char *re_cmp(const char *, const char *); |
39 | const char *re_cmp(const char *, const char *); |
40 | static Boolean re_cmp_step(const char *, const char *, unsigned, int); |
40 | static bool re_cmp_step(const char *, const char *, unsigned, int); |
41 | static void re_init(void); |
41 | static void re_init(void); |
42 | static Boolean re_match_token(uchar, selection *); |
42 | static bool re_match_token(unsigned char, selection *); |
43 | static const char *re_get_token(selection *, const char *); |
43 | static const char *re_get_token(selection *, const char *); |
44 | #ifdef DEBUG2 |
44 | #ifdef DEBUG2 |
45 | static void re_dump_sel(selection *); |
45 | static void re_dump_sel(selection *); |
46 | #endif |
46 | #endif |
47 | |
47 | |
48 | /* G l o b a l v a r i a b l e s |
48 | /* G l o b a l v a r i a b l e s |
49 | */ |
49 | */ |
50 | static Boolean re_init_done = False; |
50 | static bool re_init_done = false; |
51 | static selection *re_token[RE_TOKEN_MAX]; |
51 | static selection *re_token[RE_TOKEN_MAX]; |
52 | static const char *re_substr[RE_TOKEN_MAX]; |
52 | static const char *re_substr[RE_TOKEN_MAX]; |
53 | static unsigned int re_token_depth; |
53 | static unsigned int re_token_depth; |
54 | |
54 | |
55 | /* E x t e r n a l f u n c t i o n |
55 | /* E x t e r n a l f u n c t i o n |
… | |
… | |
60 | * pointer to beginning of matching string |
60 | * pointer to beginning of matching string |
61 | */ |
61 | */ |
62 | const char * |
62 | const char * |
63 | re_cmp(const char *str, const char *regexp) { |
63 | re_cmp(const char *str, const char *regexp) { |
64 | const char *next_regexp; |
64 | const char *next_regexp; |
65 | Boolean once = False; |
65 | bool once = false; |
66 | Boolean matched; |
66 | bool matched; |
67 | |
67 | |
68 | if (re_init_done == False) |
68 | if (re_init_done == false) |
69 | re_init(); |
69 | re_init(); |
70 | |
70 | |
71 | #ifdef SAFE_CHECKS |
71 | #ifdef SAFE_CHECKS |
72 | if (regexp == NULL || str == NULL) |
72 | if (regexp == NULL || str == NULL) |
73 | return NULL; |
73 | return NULL; |
74 | #endif |
74 | #endif |
75 | if (*regexp == '^') { |
75 | if (*regexp == '^') { |
76 | once = True; |
76 | once = true; |
77 | ++regexp; |
77 | ++regexp; |
78 | } |
78 | } |
79 | if (*regexp == 0) { |
79 | if (*regexp == 0) { |
80 | /* // or /^/ matches any string */ |
80 | /* // or /^/ matches any string */ |
81 | return str; |
81 | return str; |
… | |
… | |
83 | |
83 | |
84 | next_regexp = re_get_token(re_token[0], regexp); |
84 | next_regexp = re_get_token(re_token[0], regexp); |
85 | re_token_depth = 0; |
85 | re_token_depth = 0; |
86 | re_substr[0] = next_regexp; |
86 | re_substr[0] = next_regexp; |
87 | |
87 | |
88 | matched = False; |
88 | matched = false; |
89 | while (*str != '\0' && !(matched = re_match_token(*str, re_token[0]))) |
89 | while (*str != '\0' && !(matched = re_match_token(*str, re_token[0]))) |
90 | str++; |
90 | str++; |
91 | |
91 | |
92 | if (matched && *next_regexp == 0) |
92 | if (matched && *next_regexp == 0) |
93 | return str; |
93 | return str; |
… | |
… | |
96 | * speeds things up. |
96 | * speeds things up. |
97 | */ |
97 | */ |
98 | if (once) { |
98 | if (once) { |
99 | switch (re_token[0]->repeat) { |
99 | switch (re_token[0]->repeat) { |
100 | case rep_once: |
100 | case rep_once: |
101 | if (matched == False) |
101 | if (matched == false) |
102 | return NULL; |
102 | return NULL; |
103 | break; |
103 | break; |
104 | case rep_once_or_more: |
104 | case rep_once_or_more: |
105 | if (matched == False) |
105 | if (matched == false) |
106 | return NULL; |
106 | return NULL; |
107 | |
107 | |
108 | if (re_cmp_step(str+1, regexp, 0, 1)) |
108 | if (re_cmp_step(str+1, regexp, 0, 1)) |
109 | return str; |
109 | return str; |
110 | break; |
110 | break; |
111 | case rep_null_or_once: |
111 | case rep_null_or_once: |
112 | if (matched == False) |
112 | if (matched == false) |
113 | return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL; |
113 | return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL; |
114 | break; |
114 | break; |
115 | case rep_null_or_more: |
115 | case rep_null_or_more: |
116 | if (matched) { |
116 | if (matched) { |
117 | if (re_cmp_step(str+1, regexp, 0, 1)) |
117 | if (re_cmp_step(str+1, regexp, 0, 1)) |
… | |
… | |
159 | } |
159 | } |
160 | |
160 | |
161 | /* A u x i l l i a r y f u n c t i o n s |
161 | /* A u x i l l i a r y f u n c t i o n s |
162 | */ |
162 | */ |
163 | |
163 | |
164 | static Boolean |
164 | static bool |
165 | re_cmp_step(const char *str, const char *regexp, unsigned slot, int matches) { |
165 | re_cmp_step(const char *str, const char *regexp, unsigned slot, int matches) { |
166 | /* str - string to match |
166 | /* str - string to match |
167 | * regexp - pattern |
167 | * regexp - pattern |
168 | * slot - number of the token which under consideration |
168 | * slot - number of the token which under consideration |
169 | * matches - how many times the token has matched |
169 | * matches - how many times the token has matched |
170 | */ |
170 | */ |
171 | const char *next_regexp; |
171 | const char *next_regexp; |
172 | Boolean matched; |
172 | bool matched; |
173 | |
173 | |
174 | #ifdef DEBUG |
174 | #ifdef DEBUG |
175 | /* fprintf(stderr, "['%s', '%s', %u, %d]\n", str, regexp, slot, matches);*/ |
175 | /* fprintf(stderr, "['%s', '%s', %u, %d]\n", str, regexp, slot, matches);*/ |
176 | #endif |
176 | #endif |
177 | |
177 | |
178 | if (*regexp == 0) { |
178 | if (*regexp == 0) { |
179 | /* When we reach the end of the regexp, the match is a success |
179 | /* When we reach the end of the regexp, the match is a success |
180 | */ |
180 | */ |
181 | return True; |
181 | return true; |
182 | } |
182 | } |
183 | |
183 | |
184 | /* This chunk of code makes sure that the regexp-tokenising happens |
184 | /* This chunk of code makes sure that the regexp-tokenising happens |
185 | * only once. We only tokenise as much as we need. |
185 | * only once. We only tokenise as much as we need. |
186 | */ |
186 | */ |
… | |
… | |
189 | if (re_token[slot] == NULL) |
189 | if (re_token[slot] == NULL) |
190 | re_token[slot] = (selection *) malloc(sizeof(selection)); |
190 | re_token[slot] = (selection *) malloc(sizeof(selection)); |
191 | next_regexp = re_get_token(re_token[slot], regexp); |
191 | next_regexp = re_get_token(re_token[slot], regexp); |
192 | if (next_regexp == NULL) { |
192 | if (next_regexp == NULL) { |
193 | /* Syntax error, what else can we do? */ |
193 | /* Syntax error, what else can we do? */ |
194 | return False; |
194 | return false; |
195 | } |
195 | } |
196 | re_substr[slot] = next_regexp; |
196 | re_substr[slot] = next_regexp; |
197 | } else { |
197 | } else { |
198 | next_regexp = re_substr[slot]; |
198 | next_regexp = re_substr[slot]; |
199 | } |
199 | } |
… | |
… | |
205 | if (*str == 0) |
205 | if (*str == 0) |
206 | return (*next_regexp == 0 || re_token[slot]->type == sel_end) && matched; |
206 | return (*next_regexp == 0 || re_token[slot]->type == sel_end) && matched; |
207 | |
207 | |
208 | switch (re_token[slot]->repeat) { |
208 | switch (re_token[slot]->repeat) { |
209 | case rep_once: |
209 | case rep_once: |
210 | if (matches == 1) { /* (matches == 1) => (matched == True) */ |
210 | if (matches == 1) { /* (matches == 1) => (matched == true) */ |
211 | return re_cmp_step(str+1, next_regexp, slot+1, 0); |
211 | return re_cmp_step(str+1, next_regexp, slot+1, 0); |
212 | } |
212 | } |
213 | return False; |
213 | return false; |
214 | case rep_once_or_more: |
214 | case rep_once_or_more: |
215 | if (matched) { /* (matched == True) => (matches >= 1) */ |
215 | if (matched) { /* (matched == true) => (matches >= 1) */ |
216 | /* First check if the current token repeats more */ |
216 | /* First check if the current token repeats more */ |
217 | if (re_cmp_step(str+1, regexp, slot, matches)) |
217 | if (re_cmp_step(str+1, regexp, slot, matches)) |
218 | return True; |
218 | return true; |
219 | return re_cmp_step(str+1, next_regexp, slot+1, 0); |
219 | return re_cmp_step(str+1, next_regexp, slot+1, 0); |
220 | } |
220 | } |
221 | return False; |
221 | return false; |
222 | case rep_null_or_once: |
222 | case rep_null_or_once: |
223 | /* We must go on to the next token, but should we advance str? */ |
223 | /* We must go on to the next token, but should we advance str? */ |
224 | if (matches == 0) { |
224 | if (matches == 0) { |
225 | return re_cmp_step(str, next_regexp, slot+1, 0); |
225 | return re_cmp_step(str, next_regexp, slot+1, 0); |
226 | } else if (matches == 1) { |
226 | } else if (matches == 1) { |
227 | return re_cmp_step(str+1, next_regexp, slot+1, 0); |
227 | return re_cmp_step(str+1, next_regexp, slot+1, 0); |
228 | } |
228 | } |
229 | return False; /* Not reached */ |
229 | return false; /* Not reached */ |
230 | case rep_null_or_more: |
230 | case rep_null_or_more: |
231 | if (matched) { |
231 | if (matched) { |
232 | /* Look for further repeats, advance str */ |
232 | /* Look for further repeats, advance str */ |
233 | if (re_cmp_step(str+1, regexp, slot, matches)) |
233 | if (re_cmp_step(str+1, regexp, slot, matches)) |
234 | return True; |
234 | return true; |
235 | return re_cmp_step(str, next_regexp, slot+1, 0); |
235 | return re_cmp_step(str, next_regexp, slot+1, 0); |
236 | } |
236 | } |
237 | return re_cmp_step(str, next_regexp, slot+1, 0); |
237 | return re_cmp_step(str, next_regexp, slot+1, 0); |
238 | } |
238 | } |
239 | return False; |
239 | return false; |
240 | } |
240 | } |
241 | |
241 | |
242 | static void |
242 | static void |
243 | re_init(void) { |
243 | re_init(void) { |
244 | int i; |
244 | int i; |
245 | |
245 | |
246 | re_token[0] = (selection *) malloc(sizeof(selection)); |
246 | re_token[0] = (selection *) malloc(sizeof(selection)); |
247 | for (i = 1; i < RE_TOKEN_MAX; i++) |
247 | for (i = 1; i < RE_TOKEN_MAX; i++) |
248 | re_token[i] = NULL; |
248 | re_token[i] = NULL; |
249 | |
249 | |
250 | re_init_done = True; |
250 | re_init_done = true; |
251 | } |
251 | } |
252 | |
252 | |
253 | static Boolean |
253 | static bool |
254 | re_match_token(uchar c, selection *sel) { |
254 | re_match_token(unsigned char c, selection *sel) { |
255 | switch (sel->type) { |
255 | switch (sel->type) { |
256 | case sel_any: |
256 | case sel_any: |
257 | return True; |
257 | return true; |
258 | case sel_end: |
258 | case sel_end: |
259 | return (c == 0); |
259 | return (c == 0); |
260 | case sel_single: |
260 | case sel_single: |
261 | return (tolower(c) == tolower(sel->u.single)); |
261 | return (tolower(c) == tolower(sel->u.single)); |
262 | case sel_range: |
262 | case sel_range: |
… | |
… | |
266 | case sel_not_single: |
266 | case sel_not_single: |
267 | return (tolower(c) != tolower(sel->u.single)); |
267 | return (tolower(c) != tolower(sel->u.single)); |
268 | case sel_not_range: |
268 | case sel_not_range: |
269 | return (c < sel->u.range.low && c > sel->u.range.high); |
269 | return (c < sel->u.range.low && c > sel->u.range.high); |
270 | } |
270 | } |
271 | return False; |
271 | return false; |
272 | } |
272 | } |
273 | |
273 | |
274 | /* re_get_token - get regular expression token |
274 | /* re_get_token - get regular expression token |
275 | * Returns the first token found in <regexp> in <sel> |
275 | * Returns the first token found in <regexp> in <sel> |
276 | * Return values: NULL syntax error |
276 | * Return values: NULL syntax error |
… | |
… | |
283 | # define exit_if_null if (*regexp == 0) return NULL |
283 | # define exit_if_null if (*regexp == 0) return NULL |
284 | #else |
284 | #else |
285 | # define exit_if_null |
285 | # define exit_if_null |
286 | #endif |
286 | #endif |
287 | |
287 | |
288 | Boolean quoted = False; |
288 | bool quoted = false; |
289 | uchar looking_at; |
289 | unsigned char looking_at; |
290 | |
290 | |
291 | #ifdef SAFE_CHECKS |
291 | #ifdef SAFE_CHECKS |
292 | if (sel == NULL || regexp == NULL || *regexp == 0) |
292 | if (sel == NULL || regexp == NULL || *regexp == 0) |
293 | return NULL; |
293 | return NULL; |
294 | #endif |
294 | #endif |
… | |
… | |
296 | do { |
296 | do { |
297 | looking_at = *regexp++; |
297 | looking_at = *regexp++; |
298 | switch (looking_at) { |
298 | switch (looking_at) { |
299 | case '$': |
299 | case '$': |
300 | if (quoted) { |
300 | if (quoted) { |
301 | quoted = False; |
301 | quoted = false; |
302 | sel->type = sel_single; |
302 | sel->type = sel_single; |
303 | sel->u.single = looking_at; |
303 | sel->u.single = looking_at; |
304 | } else { |
304 | } else { |
305 | sel->type = sel_end; |
305 | sel->type = sel_end; |
306 | } |
306 | } |
307 | break; |
307 | break; |
308 | case '.': |
308 | case '.': |
309 | if (quoted) { |
309 | if (quoted) { |
310 | quoted = False; |
310 | quoted = false; |
311 | sel->type = sel_single; |
311 | sel->type = sel_single; |
312 | sel->u.single = looking_at; |
312 | sel->u.single = looking_at; |
313 | } else { |
313 | } else { |
314 | sel->type = sel_any; |
314 | sel->type = sel_any; |
315 | } |
315 | } |
… | |
… | |
317 | case '[': |
317 | case '[': |
318 | /* The fun stuff... perhaps a little obfuscated since I |
318 | /* The fun stuff... perhaps a little obfuscated since I |
319 | * don't trust the compiler to analyse liveness. |
319 | * don't trust the compiler to analyse liveness. |
320 | */ |
320 | */ |
321 | if (quoted) { |
321 | if (quoted) { |
322 | quoted = False; |
322 | quoted = false; |
323 | sel->type = sel_single; |
323 | sel->type = sel_single; |
324 | sel->u.single = looking_at; |
324 | sel->u.single = looking_at; |
325 | } else { |
325 | } else { |
326 | Boolean neg = False; |
326 | bool neg = false; |
327 | uchar first, last = 0; |
327 | unsigned char first, last = 0; |
328 | |
328 | |
329 | exit_if_null; |
329 | exit_if_null; |
330 | looking_at = *regexp++; |
330 | looking_at = *regexp++; |
331 | |
331 | |
332 | if (looking_at == '^') { |
332 | if (looking_at == '^') { |
333 | neg = True; |
333 | neg = true; |
334 | exit_if_null; |
334 | exit_if_null; |
335 | looking_at = *regexp++; |
335 | looking_at = *regexp++; |
336 | } |
336 | } |
337 | first = looking_at; |
337 | first = looking_at; |
338 | exit_if_null; |
338 | exit_if_null; |
… | |
… | |
375 | { |
375 | { |
376 | /* The datastructure can only represent a RE this |
376 | /* The datastructure can only represent a RE this |
377 | * complex with an array. |
377 | * complex with an array. |
378 | */ |
378 | */ |
379 | int i; |
379 | int i; |
380 | uchar previous; |
380 | unsigned char previous; |
381 | |
381 | |
382 | sel->type = sel_array; |
382 | sel->type = sel_array; |
383 | memset(sel->u.array, neg, sizeof(sel->u.array)); |
383 | memset(sel->u.array, neg, sizeof(sel->u.array)); |
384 | if (last) { |
384 | if (last) { |
385 | /* It starts with a range */ |
385 | /* It starts with a range */ |
… | |
… | |
435 | } |
435 | } |
436 | } |
436 | } |
437 | break; |
437 | break; |
438 | case '\\': |
438 | case '\\': |
439 | if (quoted) { |
439 | if (quoted) { |
440 | quoted = False; |
440 | quoted = false; |
441 | sel->type = sel_single; |
441 | sel->type = sel_single; |
442 | sel->u.single = looking_at; |
442 | sel->u.single = looking_at; |
443 | } else { |
443 | } else { |
444 | quoted = True; |
444 | quoted = true; |
445 | } |
445 | } |
446 | break; |
446 | break; |
447 | default: |
447 | default: |
448 | quoted = False; |
448 | quoted = false; |
449 | sel->type = sel_single; |
449 | sel->type = sel_single; |
450 | sel->u.single = looking_at; |
450 | sel->u.single = looking_at; |
451 | break; |
451 | break; |
452 | } |
452 | } |
453 | } while (quoted); |
453 | } while (quoted); |
… | |
… | |
488 | break; |
488 | break; |
489 | case sel_array: |
489 | case sel_array: |
490 | { |
490 | { |
491 | int i; |
491 | int i; |
492 | printf("["); |
492 | printf("["); |
493 | for (i = 0; i < UCHAR_MAX; i++) { |
493 | for (i = 0; i < uchar_MAX; i++) { |
494 | if (sel->u.array[i]) { |
494 | if (sel->u.array[i]) { |
495 | printf("%c", i); |
495 | printf("%c", i); |
496 | } |
496 | } |
497 | } |
497 | } |
498 | printf("]"); |
498 | printf("]"); |