ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/deliantra/server/common/re-cmp.C
(Generate patch)

Comparing deliantra/server/common/re-cmp.C (file contents):
Revision 1.1 by elmex, Sun Aug 13 17:16:00 2006 UTC vs.
Revision 1.5 by root, Thu Sep 14 22:34:00 2006 UTC

1/*
2 * static char *rcsid_player_c =
3 * "$Id: re-cmp.C,v 1.1 2006/08/13 17:16:00 elmex Exp $";
4 */
5
6
7/* re-cmp.c 1/* re-cmp.c
8 * Pattern match a string, parsing some of the common RE-metacharacters. 2 * Pattern match a string, parsing some of the common RE-metacharacters.
9 * 3 *
10 * This code is public domain, but I would appreciate to hear of 4 * This code is public domain, but I would appreciate to hear of
11 * improvements or even the fact that you use it in your program. 5 * improvements or even the fact that you use it in your program.
26#include <re-cmp.h> 20#include <re-cmp.h>
27#include <ctype.h> 21#include <ctype.h>
28 22
29/* Get prototype functions to prevent warnings. */ 23/* Get prototype functions to prevent warnings. */
30#if defined (__sun__) && defined(StupidSunHeaders) 24#if defined (__sun__) && defined(StupidSunHeaders)
31# include <sys/types.h> 25# include <sys/types.h>
32# include <sys/time.h> 26# include <sys/time.h>
33# include "sunos.h" /* Prototypes for standard libraries, sunos lack those */ 27# include "sunos.h" /* Prototypes for standard libraries, sunos lack those */
34#endif 28#endif
35 29
36 30
37/* P r o t o t y p e s 31/* P r o t o t y p e s
38 */ 32 */
39const char *re_cmp(const char *, const char *); 33const char *re_cmp (const char *, const char *);
40static Boolean re_cmp_step(const char *, const char *, unsigned, int); 34static bool re_cmp_step (const char *, const char *, unsigned, int);
41static void re_init(void); 35static void re_init (void);
42static Boolean re_match_token(uchar, selection *); 36static bool re_match_token (unsigned char, selection *);
43static const char *re_get_token(selection *, const char *); 37static const char *re_get_token (selection *, const char *);
38
44#ifdef DEBUG2 39#ifdef DEBUG2
45static void re_dump_sel(selection *); 40static void re_dump_sel (selection *);
46#endif 41#endif
47 42
48/* G l o b a l v a r i a b l e s 43/* G l o b a l v a r i a b l e s
49 */ 44 */
50static Boolean re_init_done = False; 45static bool re_init_done = false;
51static selection *re_token[RE_TOKEN_MAX]; 46static selection *re_token[RE_TOKEN_MAX];
52static const char *re_substr[RE_TOKEN_MAX]; 47static const char *re_substr[RE_TOKEN_MAX];
53static unsigned int re_token_depth; 48static unsigned int re_token_depth;
54 49
55/* E x t e r n a l f u n c t i o n 50/* E x t e r n a l f u n c t i o n
56 */ 51 */
57 52
58/* re-cmp - get regular expression match. 53/* re-cmp - get regular expression match.
59 * Return values: NULL - no match or error in regexp. 54 * Return values: NULL - no match or error in regexp.
60 * pointer to beginning of matching string 55 * pointer to beginning of matching string
61 */ 56 */
62const char * 57const char *
63re_cmp(const char *str, const char *regexp) { 58re_cmp (const char *str, const char *regexp)
59{
64 const char *next_regexp; 60 const char *next_regexp;
65 Boolean once = False; 61 bool once = false;
66 Boolean matched; 62 bool matched;
67 63
68 if (re_init_done == False) 64 if (re_init_done == false)
69 re_init(); 65 re_init ();
70 66
71#ifdef SAFE_CHECKS 67#ifdef SAFE_CHECKS
72 if (regexp == NULL || str == NULL) 68 if (regexp == NULL || str == NULL)
73 return NULL;
74#endif
75 if (*regexp == '^') {
76 once = True;
77 ++regexp;
78 }
79 if (*regexp == 0) {
80 /* // or /^/ matches any string */
81 return str;
82 }
83
84 next_regexp = re_get_token(re_token[0], regexp);
85 re_token_depth = 0;
86 re_substr[0] = next_regexp;
87
88 matched = False;
89 while (*str != '\0' && !(matched = re_match_token(*str, re_token[0])))
90 str++;
91
92 if (matched && *next_regexp == 0)
93 return str;
94
95 /* Apologies for the nearly duplicated code below, hopefully it
96 * speeds things up.
97 */
98 if (once) {
99 switch (re_token[0]->repeat) {
100 case rep_once:
101 if (matched == False)
102 return NULL;
103 break;
104 case rep_once_or_more:
105 if (matched == False)
106 return NULL;
107
108 if (re_cmp_step(str+1, regexp, 0, 1))
109 return str;
110 break;
111 case rep_null_or_once:
112 if (matched == False)
113 return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL;
114 break;
115 case rep_null_or_more:
116 if (matched) {
117 if (re_cmp_step(str+1, regexp, 0, 1))
118 return str;
119 } else {
120 return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL;
121 }
122 break;
123 }
124 return re_cmp_step(str+1, next_regexp, 1, 0) ? str : NULL;
125 }
126
127 if (matched) {
128 switch (re_token[0]->repeat) {
129 case rep_once:
130 case rep_null_or_once:
131 break;
132 case rep_once_or_more:
133 case rep_null_or_more:
134 if (re_cmp_step(str+1, regexp, 0, 1))
135 return str;
136 break;
137 }
138 /* The logic here is that re_match_token only sees
139 * if the one letter matches. Thus, if the
140 * regex is like '@match eureca', and the
141 * the user enters anything with an e, re_match_token
142 * returns true, but they really need to match the
143 * entire regexp, which re_cmp_step will do.
144 * However, what happens is that there can be a case
145 * where the string being match is something like
146 * 'where is eureca'. In this case, the re_match_token
147 * matches that first e, but the re_cmp_step below,
148 * fails because the next character (r) doesn't match
149 * the u. So we call re_cmp with the string
150 * after the first r, so that it should hopefully match
151 * up properly.
152 */
153 if (re_cmp_step(str+1, next_regexp, 1, 0))
154 return str;
155 else if (*(str+1) != 0)
156 return re_cmp(str+1, regexp);
157 }
158 return NULL; 69 return NULL;
70#endif
71 if (*regexp == '^')
72 {
73 once = true;
74 ++regexp;
75 }
76 if (*regexp == 0)
77 {
78 /* // or /^/ matches any string */
79 return str;
80 }
81
82 next_regexp = re_get_token (re_token[0], regexp);
83 re_token_depth = 0;
84 re_substr[0] = next_regexp;
85
86 matched = false;
87 while (*str != '\0' && !(matched = re_match_token (*str, re_token[0])))
88 str++;
89
90 if (matched && *next_regexp == 0)
91 return str;
92
93 /* Apologies for the nearly duplicated code below, hopefully it
94 * speeds things up.
95 */
96 if (once)
97 {
98 switch (re_token[0]->repeat)
99 {
100 case rep_once:
101 if (matched == false)
102 return NULL;
103 break;
104 case rep_once_or_more:
105 if (matched == false)
106 return NULL;
107
108 if (re_cmp_step (str + 1, regexp, 0, 1))
109 return str;
110 break;
111 case rep_null_or_once:
112 if (matched == false)
113 return re_cmp_step (str, next_regexp, 1, 0) ? str : NULL;
114 break;
115 case rep_null_or_more:
116 if (matched)
117 {
118 if (re_cmp_step (str + 1, regexp, 0, 1))
119 return str;
120 }
121 else
122 {
123 return re_cmp_step (str, next_regexp, 1, 0) ? str : NULL;
124 }
125 break;
126 }
127 return re_cmp_step (str + 1, next_regexp, 1, 0) ? str : NULL;
128 }
129
130 if (matched)
131 {
132 switch (re_token[0]->repeat)
133 {
134 case rep_once:
135 case rep_null_or_once:
136 break;
137 case rep_once_or_more:
138 case rep_null_or_more:
139 if (re_cmp_step (str + 1, regexp, 0, 1))
140 return str;
141 break;
142 }
143 /* The logic here is that re_match_token only sees
144 * if the one letter matches. Thus, if the
145 * regex is like '@match eureca', and the
146 * the user enters anything with an e, re_match_token
147 * returns true, but they really need to match the
148 * entire regexp, which re_cmp_step will do.
149 * However, what happens is that there can be a case
150 * where the string being match is something like
151 * 'where is eureca'. In this case, the re_match_token
152 * matches that first e, but the re_cmp_step below,
153 * fails because the next character (r) doesn't match
154 * the u. So we call re_cmp with the string
155 * after the first r, so that it should hopefully match
156 * up properly.
157 */
158 if (re_cmp_step (str + 1, next_regexp, 1, 0))
159 return str;
160 else if (*(str + 1) != 0)
161 return re_cmp (str + 1, regexp);
162 }
163 return NULL;
159} 164}
160 165
161/* A u x i l l i a r y f u n c t i o n s 166/* A u x i l l i a r y f u n c t i o n s
162 */ 167 */
163 168
164static Boolean 169static bool
165re_cmp_step(const char *str, const char *regexp, unsigned slot, int matches) { 170re_cmp_step (const char *str, const char *regexp, unsigned slot, int matches)
171{
166 /* str - string to match 172 /* str - string to match
167 * regexp - pattern 173 * regexp - pattern
168 * slot - number of the token which under consideration 174 * slot - number of the token which under consideration
169 * matches - how many times the token has matched 175 * matches - how many times the token has matched
170 */ 176 */
171 const char *next_regexp; 177 const char *next_regexp;
172 Boolean matched; 178 bool matched;
173 179
174#ifdef DEBUG 180#ifdef DEBUG
181
175/* fprintf(stderr, "['%s', '%s', %u, %d]\n", str, regexp, slot, matches);*/ 182/* fprintf(stderr, "['%s', '%s', %u, %d]\n", str, regexp, slot, matches);*/
176#endif 183#endif
177 184
178 if (*regexp == 0) { 185 if (*regexp == 0)
186 {
179 /* When we reach the end of the regexp, the match is a success 187 /* When we reach the end of the regexp, the match is a success
180 */ 188 */
181 return True; 189 return true;
182 } 190 }
183 191
184 /* This chunk of code makes sure that the regexp-tokenising happens 192 /* This chunk of code makes sure that the regexp-tokenising happens
185 * only once. We only tokenise as much as we need. 193 * only once. We only tokenise as much as we need.
186 */ 194 */
187 if (slot > re_token_depth) { 195 if (slot > re_token_depth)
196 {
188 re_token_depth = slot; 197 re_token_depth = slot;
189 if (re_token[slot] == NULL) 198 if (re_token[slot] == NULL)
190 re_token[slot] = (selection *) malloc(sizeof(selection)); 199 re_token[slot] = (selection *) malloc (sizeof (selection));
191 next_regexp = re_get_token(re_token[slot], regexp); 200 next_regexp = re_get_token (re_token[slot], regexp);
192 if (next_regexp == NULL) { 201 if (next_regexp == NULL)
202 {
193 /* Syntax error, what else can we do? */ 203 /* Syntax error, what else can we do? */
194 return False; 204 return false;
195 } 205 }
196 re_substr[slot] = next_regexp; 206 re_substr[slot] = next_regexp;
197 } else { 207 }
208 else
209 {
198 next_regexp = re_substr[slot]; 210 next_regexp = re_substr[slot];
199 } 211 }
200 212
201 matched = re_match_token(*str, re_token[slot]); 213 matched = re_match_token (*str, re_token[slot]);
202 if (matched) 214 if (matched)
203 ++matches; 215 ++matches;
204 216
205 if (*str == 0) 217 if (*str == 0)
206 return (*next_regexp == 0 || re_token[slot]->type == sel_end) && matched; 218 return (*next_regexp == 0 || re_token[slot]->type == sel_end) && matched;
207 219
208 switch (re_token[slot]->repeat) { 220 switch (re_token[slot]->repeat)
221 {
209 case rep_once: 222 case rep_once:
210 if (matches == 1) { /* (matches == 1) => (matched == True) */ 223 if (matches == 1)
224 { /* (matches == 1) => (matched == true) */
211 return re_cmp_step(str+1, next_regexp, slot+1, 0); 225 return re_cmp_step (str + 1, next_regexp, slot + 1, 0);
212 } 226 }
213 return False; 227 return false;
214 case rep_once_or_more: 228 case rep_once_or_more:
215 if (matched) { /* (matched == True) => (matches >= 1) */ 229 if (matched)
230 { /* (matched == true) => (matches >= 1) */
216 /* First check if the current token repeats more */ 231 /* First check if the current token repeats more */
217 if (re_cmp_step(str+1, regexp, slot, matches)) 232 if (re_cmp_step (str + 1, regexp, slot, matches))
218 return True; 233 return true;
219 return re_cmp_step(str+1, next_regexp, slot+1, 0); 234 return re_cmp_step (str + 1, next_regexp, slot + 1, 0);
220 } 235 }
221 return False; 236 return false;
222 case rep_null_or_once: 237 case rep_null_or_once:
223 /* We must go on to the next token, but should we advance str? */ 238 /* We must go on to the next token, but should we advance str? */
224 if (matches == 0) { 239 if (matches == 0)
240 {
225 return re_cmp_step(str, next_regexp, slot+1, 0); 241 return re_cmp_step (str, next_regexp, slot + 1, 0);
226 } else if (matches == 1) { 242 }
243 else if (matches == 1)
244 {
227 return re_cmp_step(str+1, next_regexp, slot+1, 0); 245 return re_cmp_step (str + 1, next_regexp, slot + 1, 0);
228 } 246 }
229 return False; /* Not reached */ 247 return false; /* Not reached */
230 case rep_null_or_more: 248 case rep_null_or_more:
231 if (matched) { 249 if (matched)
250 {
232 /* Look for further repeats, advance str */ 251 /* Look for further repeats, advance str */
233 if (re_cmp_step(str+1, regexp, slot, matches)) 252 if (re_cmp_step (str + 1, regexp, slot, matches))
234 return True; 253 return true;
235 return re_cmp_step(str, next_regexp, slot+1, 0); 254 return re_cmp_step (str, next_regexp, slot + 1, 0);
236 } 255 }
237 return re_cmp_step(str, next_regexp, slot+1, 0); 256 return re_cmp_step (str, next_regexp, slot + 1, 0);
238 } 257 }
239 return False; 258 return false;
240} 259}
241 260
242static void 261static void
243re_init(void) { 262re_init (void)
263{
244 int i; 264 int i;
245 265
246 re_token[0] = (selection *) malloc(sizeof(selection)); 266 re_token[0] = (selection *) malloc (sizeof (selection));
247 for (i = 1; i < RE_TOKEN_MAX; i++) 267 for (i = 1; i < RE_TOKEN_MAX; i++)
248 re_token[i] = NULL; 268 re_token[i] = NULL;
249 269
250 re_init_done = True; 270 re_init_done = true;
251} 271}
252 272
253static Boolean 273static bool
254re_match_token(uchar c, selection *sel) { 274re_match_token (unsigned char c, selection * sel)
275{
255 switch (sel->type) { 276 switch (sel->type)
256 case sel_any: 277 {
278 case sel_any:
257 return True; 279 return true;
258 case sel_end: 280 case sel_end:
259 return (c == 0); 281 return (c == 0);
260 case sel_single: 282 case sel_single:
261 return (tolower(c) == tolower(sel->u.single)); 283 return (tolower (c) == tolower (sel->u.single));
262 case sel_range: 284 case sel_range:
263 return (c >= sel->u.range.low && c <= sel->u.range.high); 285 return (c >= sel->u.range.low && c <= sel->u.range.high);
264 case sel_array: 286 case sel_array:
265 return (sel->u.array[c]); 287 return (sel->u.array[c]);
266 case sel_not_single: 288 case sel_not_single:
267 return (tolower(c) != tolower(sel->u.single)); 289 return (tolower (c) != tolower (sel->u.single));
268 case sel_not_range: 290 case sel_not_range:
269 return (c < sel->u.range.low && c > sel->u.range.high); 291 return (c < sel->u.range.low && c > sel->u.range.high);
270 } 292 }
271 return False; 293 return false;
272} 294}
273 295
274/* re_get_token - get regular expression token 296/* re_get_token - get regular expression token
275 * Returns the first token found in <regexp> in <sel> 297 * Returns the first token found in <regexp> in <sel>
276 * Return values: NULL syntax error 298 * Return values: NULL syntax error
277 * pointer to first character past token. 299 * pointer to first character past token.
278 */ 300 */
279static const char * 301static const char *
280re_get_token(selection *sel, const char *regexp) { 302re_get_token (selection * sel, const char *regexp)
303{
281 304
282#ifdef SAFE_CHECKS 305#ifdef SAFE_CHECKS
283# define exit_if_null if (*regexp == 0) return NULL 306# define exit_if_null if (*regexp == 0) return NULL
284#else 307#else
285# define exit_if_null 308# define exit_if_null
286#endif 309#endif
287 310
288 Boolean quoted = False; 311 bool quoted = false;
289 uchar looking_at; 312 unsigned char looking_at;
290 313
291#ifdef SAFE_CHECKS 314#ifdef SAFE_CHECKS
292 if (sel == NULL || regexp == NULL || *regexp == 0) 315 if (sel == NULL || regexp == NULL || *regexp == 0)
293 return NULL; 316 return NULL;
294#endif 317#endif
295 318
319 do
296 do { 320 {
297 looking_at = *regexp++; 321 looking_at = *regexp++;
298 switch (looking_at) { 322 switch (looking_at)
299 case '$': 323 {
324 case '$':
300 if (quoted) { 325 if (quoted)
301 quoted = False; 326 {
327 quoted = false;
302 sel->type = sel_single; 328 sel->type = sel_single;
303 sel->u.single = looking_at; 329 sel->u.single = looking_at;
304 } else { 330 }
331 else
332 {
305 sel->type = sel_end; 333 sel->type = sel_end;
306 } 334 }
307 break; 335 break;
308 case '.': 336 case '.':
309 if (quoted) { 337 if (quoted)
310 quoted = False; 338 {
339 quoted = false;
311 sel->type = sel_single; 340 sel->type = sel_single;
312 sel->u.single = looking_at; 341 sel->u.single = looking_at;
313 } else { 342 }
343 else
344 {
314 sel->type = sel_any; 345 sel->type = sel_any;
315 } 346 }
316 break; 347 break;
317 case '[': 348 case '[':
318 /* The fun stuff... perhaps a little obfuscated since I 349 /* The fun stuff... perhaps a little obfuscated since I
319 * don't trust the compiler to analyse liveness. 350 * don't trust the compiler to analyse liveness.
320 */ 351 */
321 if (quoted) { 352 if (quoted)
322 quoted = False; 353 {
354 quoted = false;
323 sel->type = sel_single; 355 sel->type = sel_single;
324 sel->u.single = looking_at; 356 sel->u.single = looking_at;
325 } else { 357 }
326 Boolean neg = False; 358 else
327 uchar first, last = 0; 359 {
360 bool neg = false;
361 unsigned char first, last = 0;
328 362
329 exit_if_null; 363 exit_if_null;
330 looking_at = *regexp++; 364 looking_at = *regexp++;
331 365
332 if (looking_at == '^') { 366 if (looking_at == '^')
333 neg = True; 367 {
334 exit_if_null; 368 neg = true;
335 looking_at = *regexp++; 369 exit_if_null;
336 } 370 looking_at = *regexp++;
337 first = looking_at; 371 }
338 exit_if_null; 372 first = looking_at;
373 exit_if_null;
339 looking_at = *regexp++; 374 looking_at = *regexp++;
340 if (looking_at == ']') { 375 if (looking_at == ']')
341 /* On the form [q] or [^q] */ 376 {
377 /* On the form [q] or [^q] */
342 sel->type = neg ? sel_not_single : sel_single; 378 sel->type = neg ? sel_not_single : sel_single;
343 sel->u.single = first; 379 sel->u.single = first;
344 break; 380 break;
345 } else if (looking_at == '-') { 381 }
346 exit_if_null; 382 else if (looking_at == '-')
347 last = *regexp++; 383 {
348 if (last == ']') { 384 exit_if_null;
385 last = *regexp++;
386 if (last == ']')
387 {
349 /* On the form [A-] or [^A-]. Checking for 388 /* On the form [A-] or [^A-]. Checking for
350 * [,-] and making it a range is probably not 389 * [,-] and making it a range is probably not
351 * worth it :-) 390 * worth it :-)
352 */ 391 */
353 sel->type = sel_array; 392 sel->type = sel_array;
354 memset(sel->u.array, neg, sizeof(sel->u.array)); 393 memset (sel->u.array, neg, sizeof (sel->u.array));
355 sel->u.array[first] = sel->u.array['-'] = !neg; 394 sel->u.array[first] = sel->u.array['-'] = !neg;
356 break; 395 break;
357 } else { 396 }
358 exit_if_null; 397 else
359 looking_at = *regexp++; 398 {
360 if (looking_at == ']') { 399 exit_if_null;
361 /* On the form [A-G] or [^A-G]. Note that [G-A] 400 looking_at = *regexp++;
362 * is a syntax error. Fair enough, I think. 401 if (looking_at == ']')
363 */ 402 {
403 /* On the form [A-G] or [^A-G]. Note that [G-A]
404 * is a syntax error. Fair enough, I think.
405 */
364#ifdef SAFE_CHECK 406#ifdef SAFE_CHECK
365 if (first > last) 407 if (first > last)
366 return NULL; 408 return NULL;
367#endif 409#endif
368 sel->type = neg ? sel_not_range : sel_range; 410 sel->type = neg ? sel_not_range : sel_range;
369 sel->u.range.low = first; 411 sel->u.range.low = first;
370 sel->u.range.high = last; 412 sel->u.range.high = last;
371 break; 413 break;
372 } 414 }
373 } 415 }
374 } 416 }
375 { 417 {
376 /* The datastructure can only represent a RE this 418 /* The datastructure can only represent a RE this
377 * complex with an array. 419 * complex with an array.
378 */ 420 */
379 int i; 421 int i;
380 uchar previous; 422 unsigned char previous;
381 423
382 sel->type = sel_array; 424 sel->type = sel_array;
383 memset(sel->u.array, neg, sizeof(sel->u.array)); 425 memset (sel->u.array, neg, sizeof (sel->u.array));
384 if (last) { 426 if (last)
385 /* It starts with a range */ 427 {
428 /* It starts with a range */
386#ifdef SAFE_CHECK 429#ifdef SAFE_CHECK
387 if (first > last) 430 if (first > last)
388 return NULL; 431 return NULL;
389#endif 432#endif
390 for (i = first; i <= last; i++) { 433 for (i = first; i <= last; i++)
391 sel->u.array[i] = !neg; 434 {
392 } 435 sel->u.array[i] = !neg;
393 } else { 436 }
437 }
438 else
439 {
394 /* It begins with a "random" character */ 440 /* It begins with a "random" character */
395 sel->u.array[first] = !neg; 441 sel->u.array[first] = !neg;
396 } 442 }
397 sel->u.array[looking_at] = !neg; 443 sel->u.array[looking_at] = !neg;
398 444
399 exit_if_null; 445 exit_if_null;
400 previous = looking_at; 446 previous = looking_at;
401 looking_at = *regexp++; 447 looking_at = *regexp++;
402 448
403 /* Add more characters to the array until we reach 449 /* Add more characters to the array until we reach
404 * ]. Quoting doesn't and shouldn't work in here. 450 * ]. Quoting doesn't and shouldn't work in here.
405 * ("]" should be put first, and "-" last if they 451 * ("]" should be put first, and "-" last if they
406 * are needed inside this construct.) 452 * are needed inside this construct.)
407 * Look for ranges as we go along. 453 * Look for ranges as we go along.
408 */ 454 */
409 while (looking_at != ']') { 455 while (looking_at != ']')
410 if (looking_at == '-') { 456 {
411 exit_if_null; 457 if (looking_at == '-')
412 looking_at = *regexp++; 458 {
413 if (looking_at != ']') { 459 exit_if_null;
460 looking_at = *regexp++;
461 if (looking_at != ']')
462 {
414#ifdef SAFE_CHECK 463#ifdef SAFE_CHECK
415 if (previous > looking_at) 464 if (previous > looking_at)
416 return NULL; 465 return NULL;
417#endif 466#endif
418 for (i = previous+1; i < looking_at; i++) { 467 for (i = previous + 1; i < looking_at; i++)
419 /* previous has already been set and 468 {
420 * looking_at is set below. 469 /* previous has already been set and
421 */ 470 * looking_at is set below.
422 sel->u.array[i] = !neg; 471 */
423 } 472 sel->u.array[i] = !neg;
424 exit_if_null; 473 }
425 } else { 474 exit_if_null;
426 sel->u.array['-'] = !neg; 475 }
427 break; 476 else
428 } 477 {
429 } 478 sel->u.array['-'] = !neg;
479 break;
480 }
481 }
430 sel->u.array[looking_at] = !neg; 482 sel->u.array[looking_at] = !neg;
431 previous = looking_at; 483 previous = looking_at;
432 exit_if_null; 484 exit_if_null;
433 looking_at = *regexp++; 485 looking_at = *regexp++;
434 } 486 }
435 } 487 }
436 } 488 }
437 break; 489 break;
438 case '\\': 490 case '\\':
439 if (quoted) { 491 if (quoted)
440 quoted = False; 492 {
493 quoted = false;
494 sel->type = sel_single;
495 sel->u.single = looking_at;
496 }
497 else
498 {
499 quoted = true;
500 }
501 break;
502 default:
503 quoted = false;
441 sel->type = sel_single; 504 sel->type = sel_single;
442 sel->u.single = looking_at; 505 sel->u.single = looking_at;
443 } else { 506 break;
444 quoted = True; 507 }
445 } 508 }
446 break;
447 default:
448 quoted = False;
449 sel->type = sel_single;
450 sel->u.single = looking_at;
451 break;
452 }
453 } while (quoted); 509 while (quoted);
454 510
455 if (*regexp == '*') { 511 if (*regexp == '*')
512 {
456 sel->repeat = rep_null_or_more; 513 sel->repeat = rep_null_or_more;
457 ++regexp; 514 ++regexp;
515 }
458 } else if (*regexp == '?') { 516 else if (*regexp == '?')
517 {
459 sel->repeat = rep_null_or_once; 518 sel->repeat = rep_null_or_once;
460 ++regexp; 519 ++regexp;
520 }
461 } else if (*regexp == '+') { 521 else if (*regexp == '+')
522 {
462 sel->repeat = rep_once_or_more; 523 sel->repeat = rep_once_or_more;
463 ++regexp; 524 ++regexp;
464 } else { 525 }
526 else
527 {
465 sel->repeat = rep_once; 528 sel->repeat = rep_once;
466 } 529 }
467 530
468 return regexp; 531 return regexp;
469} 532}
470 533
471/* D e b u g c o d e 534/* D e b u g c o d e
472 */ 535 */
473#ifdef DEBUG2 /* compile all with DEBUG also ? hevi@lut.fi */ 536#ifdef DEBUG2 /* compile all with DEBUG also ? hevi@lut.fi */
474static void 537static void
475re_dump_sel(selection *sel) { 538re_dump_sel (selection * sel)
539{
476 switch(sel->type) { 540 switch (sel->type)
477 case sel_any: 541 {
542 case sel_any:
478 printf("."); 543 printf (".");
479 break; 544 break;
480 case sel_end: 545 case sel_end:
481 printf("$"); 546 printf ("$");
482 break; 547 break;
483 case sel_single: 548 case sel_single:
484 printf("<%c>", sel->u.single); 549 printf ("<%c>", sel->u.single);
485 break; 550 break;
486 case sel_range: 551 case sel_range:
487 printf("[%c-%c]", sel->u.range.low, sel->u.range.high); 552 printf ("[%c-%c]", sel->u.range.low, sel->u.range.high);
488 break; 553 break;
489 case sel_array: 554 case sel_array:
490 { 555 {
491 int i; 556 int i;
492 printf("["); 557
493 for (i = 0; i < UCHAR_MAX; i++) { 558 printf ("[");
559 for (i = 0; i < uchar_MAX; i++)
560 {
494 if (sel->u.array[i]) { 561 if (sel->u.array[i])
495 printf("%c", i); 562 {
496 } 563 printf ("%c", i);
497 } 564 }
498 printf("]"); 565 }
499 } 566 printf ("]");
500 break; 567 }
568 break;
501 case sel_not_single: 569 case sel_not_single:
502 printf("[^%c]", sel->u.single); 570 printf ("[^%c]", sel->u.single);
503 break; 571 break;
504 case sel_not_range: 572 case sel_not_range:
505 printf("[^%c-%c]", sel->u.range.low, sel->u.range.high); 573 printf ("[^%c-%c]", sel->u.range.low, sel->u.range.high);
506 break; 574 break;
507 default: 575 default:
508 printf("<UNKNOWN TOKEN!>"); 576 printf ("<UNKNOWN TOKEN!>");
509 break; 577 break;
510 } 578 }
511 switch(sel->repeat) { 579 switch (sel->repeat)
580 {
512 case rep_once: 581 case rep_once:
513 break; 582 break;
514 case rep_null_or_once: 583 case rep_null_or_once:
515 printf("?"); 584 printf ("?");
516 break; 585 break;
517 case rep_null_or_more: 586 case rep_null_or_more:
518 printf("*"); 587 printf ("*");
519 break; 588 break;
520 case rep_once_or_more: 589 case rep_once_or_more:
521 printf("+"); 590 printf ("+");
522 break; 591 break;
523 default: 592 default:
524 printf("<UNKNOWN REP-TOKEN!>"); 593 printf ("<UNKNOWN REP-TOKEN!>");
525 break; 594 break;
526 } 595 }
527} 596}
528 597
529int 598int
530main(int argc, char *argv[]) { 599main (int argc, char *argv[])
600{
531 char *re, *m; 601 char *re, *m;
532 selection sel; 602 selection sel;
533 603
534 re = re_get_token(&sel, argv[1]); 604 re = re_get_token (&sel, argv[1]);
535 605
536 printf("'%s' -> '%s'\n", argv[1], re); 606 printf ("'%s' -> '%s'\n", argv[1], re);
537 re_dump_sel(&sel); 607 re_dump_sel (&sel);
538 printf("\n"); 608 printf ("\n");
539 m = re_cmp(argv[2], argv[1]); 609 m = re_cmp (argv[2], argv[1]);
540 if (m) 610 if (m)
541 printf("MATCH! -> '%s'\n", m); 611 printf ("MATCH! -> '%s'\n", m);
542 return 0; 612 return 0;
543} 613}
544#endif 614#endif

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines