--- deliantra/server/common/re-cmp.C 2006/08/13 17:16:00 1.1 +++ deliantra/server/common/re-cmp.C 2006/08/29 08:01:35 1.2 @@ -1,6 +1,6 @@ /* * static char *rcsid_player_c = - * "$Id: re-cmp.C,v 1.1 2006/08/13 17:16:00 elmex Exp $"; + * "$Id: re-cmp.C,v 1.2 2006/08/29 08:01:35 root Exp $"; */ @@ -66,19 +66,19 @@ Boolean matched; if (re_init_done == False) - re_init(); + re_init(); #ifdef SAFE_CHECKS if (regexp == NULL || str == NULL) - return NULL; + return NULL; #endif if (*regexp == '^') { - once = True; - ++regexp; + once = True; + ++regexp; } if (*regexp == 0) { - /* // or /^/ matches any string */ - return str; + /* // or /^/ matches any string */ + return str; } next_regexp = re_get_token(re_token[0], regexp); @@ -87,73 +87,73 @@ matched = False; while (*str != '\0' && !(matched = re_match_token(*str, re_token[0]))) - str++; + str++; if (matched && *next_regexp == 0) - return str; + return str; /* Apologies for the nearly duplicated code below, hopefully it * speeds things up. */ if (once) { - switch (re_token[0]->repeat) { - case rep_once: - if (matched == False) - return NULL; - break; - case rep_once_or_more: - if (matched == False) - return NULL; - - if (re_cmp_step(str+1, regexp, 0, 1)) - return str; - break; - case rep_null_or_once: - if (matched == False) - return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL; - break; - case rep_null_or_more: - if (matched) { - if (re_cmp_step(str+1, regexp, 0, 1)) - return str; - } else { - return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL; - } - break; - } - return re_cmp_step(str+1, next_regexp, 1, 0) ? str : NULL; + switch (re_token[0]->repeat) { + case rep_once: + if (matched == False) + return NULL; + break; + case rep_once_or_more: + if (matched == False) + return NULL; + + if (re_cmp_step(str+1, regexp, 0, 1)) + return str; + break; + case rep_null_or_once: + if (matched == False) + return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL; + break; + case rep_null_or_more: + if (matched) { + if (re_cmp_step(str+1, regexp, 0, 1)) + return str; + } else { + return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL; + } + break; + } + return re_cmp_step(str+1, next_regexp, 1, 0) ? str : NULL; } if (matched) { - switch (re_token[0]->repeat) { - case rep_once: - case rep_null_or_once: - break; - case rep_once_or_more: - case rep_null_or_more: - if (re_cmp_step(str+1, regexp, 0, 1)) - return str; - break; - } - /* The logic here is that re_match_token only sees - * if the one letter matches. Thus, if the - * regex is like '@match eureca', and the - * the user enters anything with an e, re_match_token - * returns true, but they really need to match the - * entire regexp, which re_cmp_step will do. - * However, what happens is that there can be a case - * where the string being match is something like - * 'where is eureca'. In this case, the re_match_token - * matches that first e, but the re_cmp_step below, - * fails because the next character (r) doesn't match - * the u. So we call re_cmp with the string - * after the first r, so that it should hopefully match - * up properly. - */ - if (re_cmp_step(str+1, next_regexp, 1, 0)) - return str; - else if (*(str+1) != 0) - return re_cmp(str+1, regexp); + switch (re_token[0]->repeat) { + case rep_once: + case rep_null_or_once: + break; + case rep_once_or_more: + case rep_null_or_more: + if (re_cmp_step(str+1, regexp, 0, 1)) + return str; + break; + } + /* The logic here is that re_match_token only sees + * if the one letter matches. Thus, if the + * regex is like '@match eureca', and the + * the user enters anything with an e, re_match_token + * returns true, but they really need to match the + * entire regexp, which re_cmp_step will do. + * However, what happens is that there can be a case + * where the string being match is something like + * 'where is eureca'. In this case, the re_match_token + * matches that first e, but the re_cmp_step below, + * fails because the next character (r) doesn't match + * the u. So we call re_cmp with the string + * after the first r, so that it should hopefully match + * up properly. + */ + if (re_cmp_step(str+1, next_regexp, 1, 0)) + return str; + else if (*(str+1) != 0) + return re_cmp(str+1, regexp); } return NULL; } @@ -176,65 +176,65 @@ #endif if (*regexp == 0) { - /* When we reach the end of the regexp, the match is a success - */ - return True; + /* When we reach the end of the regexp, the match is a success + */ + return True; } /* This chunk of code makes sure that the regexp-tokenising happens * only once. We only tokenise as much as we need. */ if (slot > re_token_depth) { - re_token_depth = slot; - if (re_token[slot] == NULL) - re_token[slot] = (selection *) malloc(sizeof(selection)); - next_regexp = re_get_token(re_token[slot], regexp); - if (next_regexp == NULL) { - /* Syntax error, what else can we do? */ - return False; - } - re_substr[slot] = next_regexp; + re_token_depth = slot; + if (re_token[slot] == NULL) + re_token[slot] = (selection *) malloc(sizeof(selection)); + next_regexp = re_get_token(re_token[slot], regexp); + if (next_regexp == NULL) { + /* Syntax error, what else can we do? */ + return False; + } + re_substr[slot] = next_regexp; } else { - next_regexp = re_substr[slot]; + next_regexp = re_substr[slot]; } matched = re_match_token(*str, re_token[slot]); if (matched) - ++matches; + ++matches; if (*str == 0) - return (*next_regexp == 0 || re_token[slot]->type == sel_end) && matched; + return (*next_regexp == 0 || re_token[slot]->type == sel_end) && matched; switch (re_token[slot]->repeat) { - case rep_once: - if (matches == 1) { /* (matches == 1) => (matched == True) */ - return re_cmp_step(str+1, next_regexp, slot+1, 0); - } - return False; - case rep_once_or_more: - if (matched) { /* (matched == True) => (matches >= 1) */ - /* First check if the current token repeats more */ - if (re_cmp_step(str+1, regexp, slot, matches)) - return True; - return re_cmp_step(str+1, next_regexp, slot+1, 0); - } - return False; - case rep_null_or_once: - /* We must go on to the next token, but should we advance str? */ - if (matches == 0) { - return re_cmp_step(str, next_regexp, slot+1, 0); - } else if (matches == 1) { - return re_cmp_step(str+1, next_regexp, slot+1, 0); - } - return False; /* Not reached */ - case rep_null_or_more: - if (matched) { - /* Look for further repeats, advance str */ - if (re_cmp_step(str+1, regexp, slot, matches)) - return True; - return re_cmp_step(str, next_regexp, slot+1, 0); - } - return re_cmp_step(str, next_regexp, slot+1, 0); + case rep_once: + if (matches == 1) { /* (matches == 1) => (matched == True) */ + return re_cmp_step(str+1, next_regexp, slot+1, 0); + } + return False; + case rep_once_or_more: + if (matched) { /* (matched == True) => (matches >= 1) */ + /* First check if the current token repeats more */ + if (re_cmp_step(str+1, regexp, slot, matches)) + return True; + return re_cmp_step(str+1, next_regexp, slot+1, 0); + } + return False; + case rep_null_or_once: + /* We must go on to the next token, but should we advance str? */ + if (matches == 0) { + return re_cmp_step(str, next_regexp, slot+1, 0); + } else if (matches == 1) { + return re_cmp_step(str+1, next_regexp, slot+1, 0); + } + return False; /* Not reached */ + case rep_null_or_more: + if (matched) { + /* Look for further repeats, advance str */ + if (re_cmp_step(str+1, regexp, slot, matches)) + return True; + return re_cmp_step(str, next_regexp, slot+1, 0); + } + return re_cmp_step(str, next_regexp, slot+1, 0); } return False; } @@ -245,7 +245,7 @@ re_token[0] = (selection *) malloc(sizeof(selection)); for (i = 1; i < RE_TOKEN_MAX; i++) - re_token[i] = NULL; + re_token[i] = NULL; re_init_done = True; } @@ -253,20 +253,20 @@ static Boolean re_match_token(uchar c, selection *sel) { switch (sel->type) { - case sel_any: - return True; - case sel_end: - return (c == 0); - case sel_single: - return (tolower(c) == tolower(sel->u.single)); - case sel_range: - return (c >= sel->u.range.low && c <= sel->u.range.high); - case sel_array: - return (sel->u.array[c]); - case sel_not_single: - return (tolower(c) != tolower(sel->u.single)); - case sel_not_range: - return (c < sel->u.range.low && c > sel->u.range.high); + case sel_any: + return True; + case sel_end: + return (c == 0); + case sel_single: + return (tolower(c) == tolower(sel->u.single)); + case sel_range: + return (c >= sel->u.range.low && c <= sel->u.range.high); + case sel_array: + return (sel->u.array[c]); + case sel_not_single: + return (tolower(c) != tolower(sel->u.single)); + case sel_not_range: + return (c < sel->u.range.low && c > sel->u.range.high); } return False; } @@ -290,179 +290,179 @@ #ifdef SAFE_CHECKS if (sel == NULL || regexp == NULL || *regexp == 0) - return NULL; + return NULL; #endif do { - looking_at = *regexp++; - switch (looking_at) { - case '$': - if (quoted) { - quoted = False; - sel->type = sel_single; - sel->u.single = looking_at; - } else { - sel->type = sel_end; - } - break; - case '.': - if (quoted) { - quoted = False; - sel->type = sel_single; - sel->u.single = looking_at; - } else { - sel->type = sel_any; - } - break; - case '[': - /* The fun stuff... perhaps a little obfuscated since I - * don't trust the compiler to analyse liveness. - */ - if (quoted) { - quoted = False; - sel->type = sel_single; - sel->u.single = looking_at; - } else { - Boolean neg = False; - uchar first, last = 0; - - exit_if_null; - looking_at = *regexp++; - - if (looking_at == '^') { - neg = True; - exit_if_null; - looking_at = *regexp++; - } - first = looking_at; - exit_if_null; - looking_at = *regexp++; - if (looking_at == ']') { - /* On the form [q] or [^q] */ - sel->type = neg ? sel_not_single : sel_single; - sel->u.single = first; - break; - } else if (looking_at == '-') { - exit_if_null; - last = *regexp++; - if (last == ']') { - /* On the form [A-] or [^A-]. Checking for - * [,-] and making it a range is probably not - * worth it :-) - */ - sel->type = sel_array; - memset(sel->u.array, neg, sizeof(sel->u.array)); - sel->u.array[first] = sel->u.array['-'] = !neg; - break; - } else { - exit_if_null; - looking_at = *regexp++; - if (looking_at == ']') { - /* On the form [A-G] or [^A-G]. Note that [G-A] - * is a syntax error. Fair enough, I think. - */ + looking_at = *regexp++; + switch (looking_at) { + case '$': + if (quoted) { + quoted = False; + sel->type = sel_single; + sel->u.single = looking_at; + } else { + sel->type = sel_end; + } + break; + case '.': + if (quoted) { + quoted = False; + sel->type = sel_single; + sel->u.single = looking_at; + } else { + sel->type = sel_any; + } + break; + case '[': + /* The fun stuff... perhaps a little obfuscated since I + * don't trust the compiler to analyse liveness. + */ + if (quoted) { + quoted = False; + sel->type = sel_single; + sel->u.single = looking_at; + } else { + Boolean neg = False; + uchar first, last = 0; + + exit_if_null; + looking_at = *regexp++; + + if (looking_at == '^') { + neg = True; + exit_if_null; + looking_at = *regexp++; + } + first = looking_at; + exit_if_null; + looking_at = *regexp++; + if (looking_at == ']') { + /* On the form [q] or [^q] */ + sel->type = neg ? sel_not_single : sel_single; + sel->u.single = first; + break; + } else if (looking_at == '-') { + exit_if_null; + last = *regexp++; + if (last == ']') { + /* On the form [A-] or [^A-]. Checking for + * [,-] and making it a range is probably not + * worth it :-) + */ + sel->type = sel_array; + memset(sel->u.array, neg, sizeof(sel->u.array)); + sel->u.array[first] = sel->u.array['-'] = !neg; + break; + } else { + exit_if_null; + looking_at = *regexp++; + if (looking_at == ']') { + /* On the form [A-G] or [^A-G]. Note that [G-A] + * is a syntax error. Fair enough, I think. + */ #ifdef SAFE_CHECK - if (first > last) - return NULL; + if (first > last) + return NULL; #endif - sel->type = neg ? sel_not_range : sel_range; - sel->u.range.low = first; - sel->u.range.high = last; - break; - } - } - } - { - /* The datastructure can only represent a RE this - * complex with an array. - */ - int i; - uchar previous; - - sel->type = sel_array; - memset(sel->u.array, neg, sizeof(sel->u.array)); - if (last) { - /* It starts with a range */ + sel->type = neg ? sel_not_range : sel_range; + sel->u.range.low = first; + sel->u.range.high = last; + break; + } + } + } + { + /* The datastructure can only represent a RE this + * complex with an array. + */ + int i; + uchar previous; + + sel->type = sel_array; + memset(sel->u.array, neg, sizeof(sel->u.array)); + if (last) { + /* It starts with a range */ #ifdef SAFE_CHECK - if (first > last) - return NULL; + if (first > last) + return NULL; #endif - for (i = first; i <= last; i++) { - sel->u.array[i] = !neg; - } - } else { - /* It begins with a "random" character */ - sel->u.array[first] = !neg; - } - sel->u.array[looking_at] = !neg; - - exit_if_null; - previous = looking_at; - looking_at = *regexp++; - - /* Add more characters to the array until we reach - * ]. Quoting doesn't and shouldn't work in here. - * ("]" should be put first, and "-" last if they - * are needed inside this construct.) - * Look for ranges as we go along. - */ - while (looking_at != ']') { - if (looking_at == '-') { - exit_if_null; - looking_at = *regexp++; - if (looking_at != ']') { + for (i = first; i <= last; i++) { + sel->u.array[i] = !neg; + } + } else { + /* It begins with a "random" character */ + sel->u.array[first] = !neg; + } + sel->u.array[looking_at] = !neg; + + exit_if_null; + previous = looking_at; + looking_at = *regexp++; + + /* Add more characters to the array until we reach + * ]. Quoting doesn't and shouldn't work in here. + * ("]" should be put first, and "-" last if they + * are needed inside this construct.) + * Look for ranges as we go along. + */ + while (looking_at != ']') { + if (looking_at == '-') { + exit_if_null; + looking_at = *regexp++; + if (looking_at != ']') { #ifdef SAFE_CHECK - if (previous > looking_at) - return NULL; + if (previous > looking_at) + return NULL; #endif - for (i = previous+1; i < looking_at; i++) { - /* previous has already been set and - * looking_at is set below. - */ - sel->u.array[i] = !neg; - } - exit_if_null; - } else { - sel->u.array['-'] = !neg; - break; - } - } - sel->u.array[looking_at] = !neg; - previous = looking_at; - exit_if_null; - looking_at = *regexp++; - } - } - } - break; - case '\\': - if (quoted) { - quoted = False; - sel->type = sel_single; - sel->u.single = looking_at; - } else { - quoted = True; - } - break; - default: - quoted = False; - sel->type = sel_single; - sel->u.single = looking_at; - break; - } + for (i = previous+1; i < looking_at; i++) { + /* previous has already been set and + * looking_at is set below. + */ + sel->u.array[i] = !neg; + } + exit_if_null; + } else { + sel->u.array['-'] = !neg; + break; + } + } + sel->u.array[looking_at] = !neg; + previous = looking_at; + exit_if_null; + looking_at = *regexp++; + } + } + } + break; + case '\\': + if (quoted) { + quoted = False; + sel->type = sel_single; + sel->u.single = looking_at; + } else { + quoted = True; + } + break; + default: + quoted = False; + sel->type = sel_single; + sel->u.single = looking_at; + break; + } } while (quoted); if (*regexp == '*') { - sel->repeat = rep_null_or_more; - ++regexp; + sel->repeat = rep_null_or_more; + ++regexp; } else if (*regexp == '?') { - sel->repeat = rep_null_or_once; - ++regexp; + sel->repeat = rep_null_or_once; + ++regexp; } else if (*regexp == '+') { - sel->repeat = rep_once_or_more; - ++regexp; + sel->repeat = rep_once_or_more; + ++regexp; } else { - sel->repeat = rep_once; + sel->repeat = rep_once; } return regexp; @@ -474,55 +474,55 @@ static void re_dump_sel(selection *sel) { switch(sel->type) { - case sel_any: - printf("."); - break; - case sel_end: - printf("$"); - break; - case sel_single: - printf("<%c>", sel->u.single); - break; - case sel_range: - printf("[%c-%c]", sel->u.range.low, sel->u.range.high); - break; - case sel_array: - { - int i; - printf("["); - for (i = 0; i < UCHAR_MAX; i++) { - if (sel->u.array[i]) { - printf("%c", i); - } - } - printf("]"); - } - break; - case sel_not_single: - printf("[^%c]", sel->u.single); - break; - case sel_not_range: - printf("[^%c-%c]", sel->u.range.low, sel->u.range.high); - break; - default: - printf(""); - break; + case sel_any: + printf("."); + break; + case sel_end: + printf("$"); + break; + case sel_single: + printf("<%c>", sel->u.single); + break; + case sel_range: + printf("[%c-%c]", sel->u.range.low, sel->u.range.high); + break; + case sel_array: + { + int i; + printf("["); + for (i = 0; i < UCHAR_MAX; i++) { + if (sel->u.array[i]) { + printf("%c", i); + } + } + printf("]"); + } + break; + case sel_not_single: + printf("[^%c]", sel->u.single); + break; + case sel_not_range: + printf("[^%c-%c]", sel->u.range.low, sel->u.range.high); + break; + default: + printf(""); + break; } switch(sel->repeat) { - case rep_once: - break; - case rep_null_or_once: - printf("?"); - break; - case rep_null_or_more: - printf("*"); - break; - case rep_once_or_more: - printf("+"); - break; - default: - printf(""); - break; + case rep_once: + break; + case rep_null_or_once: + printf("?"); + break; + case rep_null_or_more: + printf("*"); + break; + case rep_once_or_more: + printf("+"); + break; + default: + printf(""); + break; } } @@ -538,7 +538,7 @@ printf("\n"); m = re_cmp(argv[2], argv[1]); if (m) - printf("MATCH! -> '%s'\n", m); + printf("MATCH! -> '%s'\n", m); return 0; } #endif