ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/deliantra/server/common/re-cmp.C
(Generate patch)

Comparing deliantra/server/common/re-cmp.C (file contents):
Revision 1.2 by root, Tue Aug 29 08:01:35 2006 UTC vs.
Revision 1.3 by pippijn, Fri Sep 8 04:51:08 2006 UTC

1/* 1/*
2 * static char *rcsid_player_c = 2 * static char *rcsid_player_c =
3 * "$Id: re-cmp.C,v 1.2 2006/08/29 08:01:35 root Exp $"; 3 * "$Id: re-cmp.C,v 1.3 2006/09/08 04:51:08 pippijn Exp $";
4 */ 4 */
5 5
6 6
7/* re-cmp.c 7/* re-cmp.c
8 * Pattern match a string, parsing some of the common RE-metacharacters. 8 * Pattern match a string, parsing some of the common RE-metacharacters.
35 35
36 36
37/* P r o t o t y p e s 37/* P r o t o t y p e s
38 */ 38 */
39const char *re_cmp(const char *, const char *); 39const char *re_cmp(const char *, const char *);
40static Boolean re_cmp_step(const char *, const char *, unsigned, int); 40static bool re_cmp_step(const char *, const char *, unsigned, int);
41static void re_init(void); 41static void re_init(void);
42static Boolean re_match_token(uchar, selection *); 42static bool re_match_token(unsigned char, selection *);
43static const char *re_get_token(selection *, const char *); 43static const char *re_get_token(selection *, const char *);
44#ifdef DEBUG2 44#ifdef DEBUG2
45static void re_dump_sel(selection *); 45static void re_dump_sel(selection *);
46#endif 46#endif
47 47
48/* G l o b a l v a r i a b l e s 48/* G l o b a l v a r i a b l e s
49 */ 49 */
50static Boolean re_init_done = False; 50static bool re_init_done = false;
51static selection *re_token[RE_TOKEN_MAX]; 51static selection *re_token[RE_TOKEN_MAX];
52static const char *re_substr[RE_TOKEN_MAX]; 52static const char *re_substr[RE_TOKEN_MAX];
53static unsigned int re_token_depth; 53static unsigned int re_token_depth;
54 54
55/* E x t e r n a l f u n c t i o n 55/* E x t e r n a l f u n c t i o n
60 * pointer to beginning of matching string 60 * pointer to beginning of matching string
61 */ 61 */
62const char * 62const char *
63re_cmp(const char *str, const char *regexp) { 63re_cmp(const char *str, const char *regexp) {
64 const char *next_regexp; 64 const char *next_regexp;
65 Boolean once = False; 65 bool once = false;
66 Boolean matched; 66 bool matched;
67 67
68 if (re_init_done == False) 68 if (re_init_done == false)
69 re_init(); 69 re_init();
70 70
71#ifdef SAFE_CHECKS 71#ifdef SAFE_CHECKS
72 if (regexp == NULL || str == NULL) 72 if (regexp == NULL || str == NULL)
73 return NULL; 73 return NULL;
74#endif 74#endif
75 if (*regexp == '^') { 75 if (*regexp == '^') {
76 once = True; 76 once = true;
77 ++regexp; 77 ++regexp;
78 } 78 }
79 if (*regexp == 0) { 79 if (*regexp == 0) {
80 /* // or /^/ matches any string */ 80 /* // or /^/ matches any string */
81 return str; 81 return str;
83 83
84 next_regexp = re_get_token(re_token[0], regexp); 84 next_regexp = re_get_token(re_token[0], regexp);
85 re_token_depth = 0; 85 re_token_depth = 0;
86 re_substr[0] = next_regexp; 86 re_substr[0] = next_regexp;
87 87
88 matched = False; 88 matched = false;
89 while (*str != '\0' && !(matched = re_match_token(*str, re_token[0]))) 89 while (*str != '\0' && !(matched = re_match_token(*str, re_token[0])))
90 str++; 90 str++;
91 91
92 if (matched && *next_regexp == 0) 92 if (matched && *next_regexp == 0)
93 return str; 93 return str;
96 * speeds things up. 96 * speeds things up.
97 */ 97 */
98 if (once) { 98 if (once) {
99 switch (re_token[0]->repeat) { 99 switch (re_token[0]->repeat) {
100 case rep_once: 100 case rep_once:
101 if (matched == False) 101 if (matched == false)
102 return NULL; 102 return NULL;
103 break; 103 break;
104 case rep_once_or_more: 104 case rep_once_or_more:
105 if (matched == False) 105 if (matched == false)
106 return NULL; 106 return NULL;
107 107
108 if (re_cmp_step(str+1, regexp, 0, 1)) 108 if (re_cmp_step(str+1, regexp, 0, 1))
109 return str; 109 return str;
110 break; 110 break;
111 case rep_null_or_once: 111 case rep_null_or_once:
112 if (matched == False) 112 if (matched == false)
113 return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL; 113 return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL;
114 break; 114 break;
115 case rep_null_or_more: 115 case rep_null_or_more:
116 if (matched) { 116 if (matched) {
117 if (re_cmp_step(str+1, regexp, 0, 1)) 117 if (re_cmp_step(str+1, regexp, 0, 1))
159} 159}
160 160
161/* A u x i l l i a r y f u n c t i o n s 161/* A u x i l l i a r y f u n c t i o n s
162 */ 162 */
163 163
164static Boolean 164static bool
165re_cmp_step(const char *str, const char *regexp, unsigned slot, int matches) { 165re_cmp_step(const char *str, const char *regexp, unsigned slot, int matches) {
166 /* str - string to match 166 /* str - string to match
167 * regexp - pattern 167 * regexp - pattern
168 * slot - number of the token which under consideration 168 * slot - number of the token which under consideration
169 * matches - how many times the token has matched 169 * matches - how many times the token has matched
170 */ 170 */
171 const char *next_regexp; 171 const char *next_regexp;
172 Boolean matched; 172 bool matched;
173 173
174#ifdef DEBUG 174#ifdef DEBUG
175/* fprintf(stderr, "['%s', '%s', %u, %d]\n", str, regexp, slot, matches);*/ 175/* fprintf(stderr, "['%s', '%s', %u, %d]\n", str, regexp, slot, matches);*/
176#endif 176#endif
177 177
178 if (*regexp == 0) { 178 if (*regexp == 0) {
179 /* When we reach the end of the regexp, the match is a success 179 /* When we reach the end of the regexp, the match is a success
180 */ 180 */
181 return True; 181 return true;
182 } 182 }
183 183
184 /* This chunk of code makes sure that the regexp-tokenising happens 184 /* This chunk of code makes sure that the regexp-tokenising happens
185 * only once. We only tokenise as much as we need. 185 * only once. We only tokenise as much as we need.
186 */ 186 */
189 if (re_token[slot] == NULL) 189 if (re_token[slot] == NULL)
190 re_token[slot] = (selection *) malloc(sizeof(selection)); 190 re_token[slot] = (selection *) malloc(sizeof(selection));
191 next_regexp = re_get_token(re_token[slot], regexp); 191 next_regexp = re_get_token(re_token[slot], regexp);
192 if (next_regexp == NULL) { 192 if (next_regexp == NULL) {
193 /* Syntax error, what else can we do? */ 193 /* Syntax error, what else can we do? */
194 return False; 194 return false;
195 } 195 }
196 re_substr[slot] = next_regexp; 196 re_substr[slot] = next_regexp;
197 } else { 197 } else {
198 next_regexp = re_substr[slot]; 198 next_regexp = re_substr[slot];
199 } 199 }
205 if (*str == 0) 205 if (*str == 0)
206 return (*next_regexp == 0 || re_token[slot]->type == sel_end) && matched; 206 return (*next_regexp == 0 || re_token[slot]->type == sel_end) && matched;
207 207
208 switch (re_token[slot]->repeat) { 208 switch (re_token[slot]->repeat) {
209 case rep_once: 209 case rep_once:
210 if (matches == 1) { /* (matches == 1) => (matched == True) */ 210 if (matches == 1) { /* (matches == 1) => (matched == true) */
211 return re_cmp_step(str+1, next_regexp, slot+1, 0); 211 return re_cmp_step(str+1, next_regexp, slot+1, 0);
212 } 212 }
213 return False; 213 return false;
214 case rep_once_or_more: 214 case rep_once_or_more:
215 if (matched) { /* (matched == True) => (matches >= 1) */ 215 if (matched) { /* (matched == true) => (matches >= 1) */
216 /* First check if the current token repeats more */ 216 /* First check if the current token repeats more */
217 if (re_cmp_step(str+1, regexp, slot, matches)) 217 if (re_cmp_step(str+1, regexp, slot, matches))
218 return True; 218 return true;
219 return re_cmp_step(str+1, next_regexp, slot+1, 0); 219 return re_cmp_step(str+1, next_regexp, slot+1, 0);
220 } 220 }
221 return False; 221 return false;
222 case rep_null_or_once: 222 case rep_null_or_once:
223 /* We must go on to the next token, but should we advance str? */ 223 /* We must go on to the next token, but should we advance str? */
224 if (matches == 0) { 224 if (matches == 0) {
225 return re_cmp_step(str, next_regexp, slot+1, 0); 225 return re_cmp_step(str, next_regexp, slot+1, 0);
226 } else if (matches == 1) { 226 } else if (matches == 1) {
227 return re_cmp_step(str+1, next_regexp, slot+1, 0); 227 return re_cmp_step(str+1, next_regexp, slot+1, 0);
228 } 228 }
229 return False; /* Not reached */ 229 return false; /* Not reached */
230 case rep_null_or_more: 230 case rep_null_or_more:
231 if (matched) { 231 if (matched) {
232 /* Look for further repeats, advance str */ 232 /* Look for further repeats, advance str */
233 if (re_cmp_step(str+1, regexp, slot, matches)) 233 if (re_cmp_step(str+1, regexp, slot, matches))
234 return True; 234 return true;
235 return re_cmp_step(str, next_regexp, slot+1, 0); 235 return re_cmp_step(str, next_regexp, slot+1, 0);
236 } 236 }
237 return re_cmp_step(str, next_regexp, slot+1, 0); 237 return re_cmp_step(str, next_regexp, slot+1, 0);
238 } 238 }
239 return False; 239 return false;
240} 240}
241 241
242static void 242static void
243re_init(void) { 243re_init(void) {
244 int i; 244 int i;
245 245
246 re_token[0] = (selection *) malloc(sizeof(selection)); 246 re_token[0] = (selection *) malloc(sizeof(selection));
247 for (i = 1; i < RE_TOKEN_MAX; i++) 247 for (i = 1; i < RE_TOKEN_MAX; i++)
248 re_token[i] = NULL; 248 re_token[i] = NULL;
249 249
250 re_init_done = True; 250 re_init_done = true;
251} 251}
252 252
253static Boolean 253static bool
254re_match_token(uchar c, selection *sel) { 254re_match_token(unsigned char c, selection *sel) {
255 switch (sel->type) { 255 switch (sel->type) {
256 case sel_any: 256 case sel_any:
257 return True; 257 return true;
258 case sel_end: 258 case sel_end:
259 return (c == 0); 259 return (c == 0);
260 case sel_single: 260 case sel_single:
261 return (tolower(c) == tolower(sel->u.single)); 261 return (tolower(c) == tolower(sel->u.single));
262 case sel_range: 262 case sel_range:
266 case sel_not_single: 266 case sel_not_single:
267 return (tolower(c) != tolower(sel->u.single)); 267 return (tolower(c) != tolower(sel->u.single));
268 case sel_not_range: 268 case sel_not_range:
269 return (c < sel->u.range.low && c > sel->u.range.high); 269 return (c < sel->u.range.low && c > sel->u.range.high);
270 } 270 }
271 return False; 271 return false;
272} 272}
273 273
274/* re_get_token - get regular expression token 274/* re_get_token - get regular expression token
275 * Returns the first token found in <regexp> in <sel> 275 * Returns the first token found in <regexp> in <sel>
276 * Return values: NULL syntax error 276 * Return values: NULL syntax error
283# define exit_if_null if (*regexp == 0) return NULL 283# define exit_if_null if (*regexp == 0) return NULL
284#else 284#else
285# define exit_if_null 285# define exit_if_null
286#endif 286#endif
287 287
288 Boolean quoted = False; 288 bool quoted = false;
289 uchar looking_at; 289 unsigned char looking_at;
290 290
291#ifdef SAFE_CHECKS 291#ifdef SAFE_CHECKS
292 if (sel == NULL || regexp == NULL || *regexp == 0) 292 if (sel == NULL || regexp == NULL || *regexp == 0)
293 return NULL; 293 return NULL;
294#endif 294#endif
296 do { 296 do {
297 looking_at = *regexp++; 297 looking_at = *regexp++;
298 switch (looking_at) { 298 switch (looking_at) {
299 case '$': 299 case '$':
300 if (quoted) { 300 if (quoted) {
301 quoted = False; 301 quoted = false;
302 sel->type = sel_single; 302 sel->type = sel_single;
303 sel->u.single = looking_at; 303 sel->u.single = looking_at;
304 } else { 304 } else {
305 sel->type = sel_end; 305 sel->type = sel_end;
306 } 306 }
307 break; 307 break;
308 case '.': 308 case '.':
309 if (quoted) { 309 if (quoted) {
310 quoted = False; 310 quoted = false;
311 sel->type = sel_single; 311 sel->type = sel_single;
312 sel->u.single = looking_at; 312 sel->u.single = looking_at;
313 } else { 313 } else {
314 sel->type = sel_any; 314 sel->type = sel_any;
315 } 315 }
317 case '[': 317 case '[':
318 /* The fun stuff... perhaps a little obfuscated since I 318 /* The fun stuff... perhaps a little obfuscated since I
319 * don't trust the compiler to analyse liveness. 319 * don't trust the compiler to analyse liveness.
320 */ 320 */
321 if (quoted) { 321 if (quoted) {
322 quoted = False; 322 quoted = false;
323 sel->type = sel_single; 323 sel->type = sel_single;
324 sel->u.single = looking_at; 324 sel->u.single = looking_at;
325 } else { 325 } else {
326 Boolean neg = False; 326 bool neg = false;
327 uchar first, last = 0; 327 unsigned char first, last = 0;
328 328
329 exit_if_null; 329 exit_if_null;
330 looking_at = *regexp++; 330 looking_at = *regexp++;
331 331
332 if (looking_at == '^') { 332 if (looking_at == '^') {
333 neg = True; 333 neg = true;
334 exit_if_null; 334 exit_if_null;
335 looking_at = *regexp++; 335 looking_at = *regexp++;
336 } 336 }
337 first = looking_at; 337 first = looking_at;
338 exit_if_null; 338 exit_if_null;
375 { 375 {
376 /* The datastructure can only represent a RE this 376 /* The datastructure can only represent a RE this
377 * complex with an array. 377 * complex with an array.
378 */ 378 */
379 int i; 379 int i;
380 uchar previous; 380 unsigned char previous;
381 381
382 sel->type = sel_array; 382 sel->type = sel_array;
383 memset(sel->u.array, neg, sizeof(sel->u.array)); 383 memset(sel->u.array, neg, sizeof(sel->u.array));
384 if (last) { 384 if (last) {
385 /* It starts with a range */ 385 /* It starts with a range */
435 } 435 }
436 } 436 }
437 break; 437 break;
438 case '\\': 438 case '\\':
439 if (quoted) { 439 if (quoted) {
440 quoted = False; 440 quoted = false;
441 sel->type = sel_single; 441 sel->type = sel_single;
442 sel->u.single = looking_at; 442 sel->u.single = looking_at;
443 } else { 443 } else {
444 quoted = True; 444 quoted = true;
445 } 445 }
446 break; 446 break;
447 default: 447 default:
448 quoted = False; 448 quoted = false;
449 sel->type = sel_single; 449 sel->type = sel_single;
450 sel->u.single = looking_at; 450 sel->u.single = looking_at;
451 break; 451 break;
452 } 452 }
453 } while (quoted); 453 } while (quoted);
488 break; 488 break;
489 case sel_array: 489 case sel_array:
490 { 490 {
491 int i; 491 int i;
492 printf("["); 492 printf("[");
493 for (i = 0; i < UCHAR_MAX; i++) { 493 for (i = 0; i < uchar_MAX; i++) {
494 if (sel->u.array[i]) { 494 if (sel->u.array[i]) {
495 printf("%c", i); 495 printf("%c", i);
496 } 496 }
497 } 497 }
498 printf("]"); 498 printf("]");

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines