1 |
root |
1.1 |
/* ANTLRParser.C |
2 |
|
|
* |
3 |
|
|
* SOFTWARE RIGHTS |
4 |
|
|
* |
5 |
|
|
* We reserve no LEGAL rights to the Purdue Compiler Construction Tool |
6 |
|
|
* Set (PCCTS) -- PCCTS is in the public domain. An individual or |
7 |
|
|
* company may do whatever they wish with source code distributed with |
8 |
|
|
* PCCTS or the code generated by PCCTS, including the incorporation of |
9 |
|
|
* PCCTS, or its output, into commerical software. |
10 |
|
|
* |
11 |
|
|
* We encourage users to develop software with PCCTS. However, we do ask |
12 |
|
|
* that credit is given to us for developing PCCTS. By "credit", |
13 |
|
|
* we mean that if you incorporate our source code into one of your |
14 |
|
|
* programs (commercial product, research project, or otherwise) that you |
15 |
|
|
* acknowledge this fact somewhere in the documentation, research report, |
16 |
|
|
* etc... If you like PCCTS and have developed a nice tool with the |
17 |
|
|
* output, please mention that you developed it using PCCTS. In |
18 |
|
|
* addition, we ask that this header remain intact in our source code. |
19 |
|
|
* As long as these guidelines are kept, we expect to continue enhancing |
20 |
|
|
* this system and expect to make other tools available as they are |
21 |
|
|
* completed. |
22 |
|
|
* |
23 |
|
|
* ANTLR 1.33 |
24 |
|
|
* Terence Parr |
25 |
|
|
* Parr Research Corporation |
26 |
|
|
* with Purdue University and AHPCRC, University of Minnesota |
27 |
|
|
* 1989-2000 |
28 |
|
|
*/ |
29 |
|
|
|
30 |
|
|
#include "pcctscfg.h" |
31 |
|
|
|
32 |
|
|
#include "pccts_stdlib.h" |
33 |
|
|
#include "pccts_stdarg.h" |
34 |
|
|
#include "pccts_string.h" |
35 |
|
|
#include "pccts_stdio.h" |
36 |
|
|
|
37 |
|
|
PCCTS_NAMESPACE_STD |
38 |
|
|
|
39 |
|
|
/* I have to put this here due to C++ limitation |
40 |
|
|
* that you can't have a 'forward' decl for enums. |
41 |
|
|
* I hate C++!!!!!!!!!!!!!!! |
42 |
|
|
* Of course, if I could use real templates, this would go away. |
43 |
|
|
*/ |
44 |
|
|
// MR1 |
45 |
|
|
// MR1 10-Apr-97 133MR1 Prevent use of varying sizes for the |
46 |
|
|
// MR1 ANTLRTokenType enum |
47 |
|
|
// MR1 |
48 |
|
|
|
49 |
|
|
enum ANTLRTokenType { TER_HATES_CPP=0, ITS_TOO_COMPLICATED=9999}; // MR1 |
50 |
|
|
|
51 |
|
|
#define ANTLR_SUPPORT_CODE |
52 |
|
|
|
53 |
|
|
#include ATOKEN_H |
54 |
|
|
#include ATOKENBUFFER_H |
55 |
|
|
#include APARSER_H |
56 |
|
|
|
57 |
|
|
static const int zzINF_DEF_TOKEN_BUFFER_SIZE = 2000; /* MR14 */ |
58 |
|
|
static const int zzINF_BUFFER_TOKEN_CHUNK_SIZE = 1000; /* MR14 */ |
59 |
|
|
|
60 |
|
|
/* L o o k a h e a d M a c r o s */ |
61 |
|
|
|
62 |
|
|
/* maximum of 32 bits/unsigned int and must be 8 bits/byte; |
63 |
|
|
* we only use 8 bits of it. |
64 |
|
|
*/ |
65 |
|
|
SetWordType ANTLRParser::bitmask[sizeof(SetWordType)*8] = { |
66 |
|
|
0x00000001, 0x00000002, 0x00000004, 0x00000008, |
67 |
|
|
0x00000010, 0x00000020, 0x00000040, 0x00000080 |
68 |
|
|
}; |
69 |
|
|
|
70 |
|
|
char ANTLRParser::eMsgBuffer[500] = ""; |
71 |
|
|
|
72 |
|
|
ANTLRParser:: |
73 |
|
|
~ANTLRParser() |
74 |
|
|
{ |
75 |
|
|
delete [] token_type; |
76 |
|
|
delete [] zzFAILtext; // MR16 Manfred Kogler |
77 |
|
|
} |
78 |
|
|
|
79 |
|
|
ANTLRParser:: |
80 |
|
|
ANTLRParser(ANTLRTokenBuffer *_inputTokens, |
81 |
|
|
int k, |
82 |
|
|
int use_inf_look, |
83 |
|
|
int dlook, |
84 |
|
|
int ssize) |
85 |
|
|
{ |
86 |
|
|
LLk = k; |
87 |
|
|
can_use_inf_look = use_inf_look; |
88 |
|
|
/* MR14 */ if (dlook != 0) { |
89 |
|
|
/* MR14 */ panic("ANTLRParser::ANTLRParser - Demand lookahead not supported in C++ mode"); |
90 |
|
|
/* MR14 */ |
91 |
|
|
/* MR14 */ }; |
92 |
|
|
demand_look = 0; /* demand_look = dlook; */ |
93 |
|
|
bsetsize = ssize; |
94 |
|
|
guessing = 0; |
95 |
|
|
token_tbl = NULL; |
96 |
|
|
eofToken = (ANTLRTokenType)1; |
97 |
|
|
|
98 |
|
|
// allocate lookahead buffer |
99 |
|
|
token_type = new ANTLRTokenType[LLk]; |
100 |
|
|
lap = 0; |
101 |
|
|
labase = 0; |
102 |
|
|
#ifdef ZZDEFER_FETCH |
103 |
|
|
stillToFetch = 0; // MR19 |
104 |
|
|
#endif |
105 |
|
|
dirty = 0; |
106 |
|
|
inf_labase = 0; // MR7 |
107 |
|
|
inf_last = 0; // MR7 |
108 |
|
|
/* prime lookahead buffer, point to inputTokens */ |
109 |
|
|
this->inputTokens = _inputTokens; |
110 |
|
|
this->inputTokens->setMinTokens(k); |
111 |
|
|
_inputTokens->setParser(this); // MR1 |
112 |
|
|
resynchConsumed=1; // MR8 |
113 |
|
|
zzFAILtext=NULL; // MR9 |
114 |
|
|
traceOptionValueDefault=0; // MR10 |
115 |
|
|
traceReset(); // MR10 |
116 |
|
|
zzGuessSeq=0; // MR10 |
117 |
|
|
syntaxErrCount=0; // MR11 |
118 |
|
|
} |
119 |
|
|
|
120 |
|
|
void ANTLRParser::init() |
121 |
|
|
{ |
122 |
|
|
prime_lookahead(); |
123 |
|
|
resynchConsumed=1; // MR8 |
124 |
|
|
traceReset(); // MR10 |
125 |
|
|
} |
126 |
|
|
|
127 |
|
|
void ANTLRParser::traceReset() |
128 |
|
|
{ |
129 |
|
|
traceOptionValue=traceOptionValueDefault; |
130 |
|
|
traceGuessOptionValue=1; |
131 |
|
|
traceCurrentRuleName=NULL; |
132 |
|
|
traceDepth=0; |
133 |
|
|
} |
134 |
|
|
|
135 |
|
|
|
136 |
|
|
#ifdef _MSC_VER // MR23 |
137 |
|
|
//Turn off warning: |
138 |
|
|
//interaction between '_setjmp' and C++ object destruction is non-portable |
139 |
|
|
#pragma warning(disable : 4611) |
140 |
|
|
#endif |
141 |
|
|
int ANTLRParser:: |
142 |
|
|
guess(ANTLRParserState *st) |
143 |
|
|
{ |
144 |
|
|
saveState(st); |
145 |
|
|
guessing = 1; |
146 |
|
|
return setjmp(guess_start.state); |
147 |
|
|
} |
148 |
|
|
#ifdef _MSC_VER // MR23 |
149 |
|
|
#pragma warning(default: 4611) |
150 |
|
|
#endif |
151 |
|
|
|
152 |
|
|
void ANTLRParser:: |
153 |
|
|
saveState(ANTLRParserState *buf) |
154 |
|
|
{ |
155 |
|
|
buf->guess_start = guess_start; |
156 |
|
|
buf->guessing = guessing; |
157 |
|
|
buf->inf_labase = inf_labase; |
158 |
|
|
buf->inf_last = inf_last; |
159 |
|
|
buf->dirty = dirty; |
160 |
|
|
buf->traceOptionValue=traceOptionValue; /* MR10 */ |
161 |
|
|
buf->traceGuessOptionValue=traceGuessOptionValue; /* MR10 */ |
162 |
|
|
buf->traceCurrentRuleName=traceCurrentRuleName; /* MR10 */ |
163 |
|
|
buf->traceDepth=traceDepth; /* MR10 */ |
164 |
|
|
} |
165 |
|
|
|
166 |
|
|
void ANTLRParser:: |
167 |
|
|
restoreState(ANTLRParserState *buf) |
168 |
|
|
{ |
169 |
|
|
int i; |
170 |
|
|
int prevTraceOptionValue; |
171 |
|
|
|
172 |
|
|
guess_start = buf->guess_start; |
173 |
|
|
guessing = buf->guessing; |
174 |
|
|
inf_labase = buf->inf_labase; |
175 |
|
|
inf_last = buf->inf_last; |
176 |
|
|
dirty = buf->dirty; |
177 |
|
|
|
178 |
|
|
// restore lookahead buffer from k tokens before restored TokenBuffer position |
179 |
|
|
// if demand_look, then I guess we don't look backwards for these tokens. |
180 |
|
|
for (i=1; i<=LLk; i++) token_type[i-1] = |
181 |
|
|
inputTokens->bufferedToken(i-LLk)->getType(); |
182 |
|
|
lap = 0; |
183 |
|
|
labase = 0; |
184 |
|
|
|
185 |
|
|
/* MR10 */ |
186 |
|
|
|
187 |
|
|
prevTraceOptionValue=traceOptionValue; |
188 |
|
|
traceOptionValue=buf->traceOptionValue; |
189 |
|
|
if ( (prevTraceOptionValue > 0) != |
190 |
|
|
(traceOptionValue > 0)) { |
191 |
|
|
if (traceCurrentRuleName != NULL) { /* MR21 */ |
192 |
|
|
if (traceOptionValue > 0) { |
193 |
|
|
/* MR23 */ printMessage(stderr, |
194 |
|
|
"trace enable restored in rule %s depth %d\n", |
195 |
|
|
traceCurrentRuleName, |
196 |
|
|
traceDepth); |
197 |
|
|
}; |
198 |
|
|
if (traceOptionValue <= 0) { |
199 |
|
|
/* MR23 */ printMessage(stderr, |
200 |
|
|
"trace disable restored in rule %s depth %d\n", |
201 |
|
|
traceCurrentRuleName, /* MR21 */ |
202 |
|
|
traceDepth); |
203 |
|
|
}; |
204 |
|
|
} |
205 |
|
|
}; |
206 |
|
|
traceGuessOptionValue=buf->traceGuessOptionValue; |
207 |
|
|
traceCurrentRuleName=buf->traceCurrentRuleName; |
208 |
|
|
traceDepth=buf->traceDepth; |
209 |
|
|
traceGuessDone(buf); |
210 |
|
|
} |
211 |
|
|
|
212 |
|
|
/* Get the next symbol from the input stream; put it into lookahead buffer; |
213 |
|
|
* fill token_type[] fast reference cache also. NLA is the next place where |
214 |
|
|
* a lookahead ANTLRAbstractToken should go. |
215 |
|
|
*/ |
216 |
|
|
void ANTLRParser:: |
217 |
|
|
consume() |
218 |
|
|
{ |
219 |
|
|
|
220 |
|
|
#ifdef ZZDEBUG_CONSUME_ACTION |
221 |
|
|
zzdebug_consume_action(); |
222 |
|
|
#endif |
223 |
|
|
|
224 |
|
|
// MR19 V.H. Simonis |
225 |
|
|
// Defer Fetch feature |
226 |
|
|
// Moves action of consume() into LA() function |
227 |
|
|
|
228 |
|
|
#ifdef ZZDEFER_FETCH |
229 |
|
|
stillToFetch++; |
230 |
|
|
#else |
231 |
|
|
NLA = inputTokens->getToken()->getType(); |
232 |
|
|
dirty--; |
233 |
|
|
lap = (lap+1)&(LLk-1); |
234 |
|
|
#endif |
235 |
|
|
|
236 |
|
|
} |
237 |
|
|
|
238 |
|
|
_ANTLRTokenPtr ANTLRParser:: |
239 |
|
|
LT(int i) |
240 |
|
|
{ |
241 |
|
|
|
242 |
|
|
// MR19 V.H. Simonis |
243 |
|
|
// Defer Fetch feature |
244 |
|
|
// Moves action of consume() into LA() function |
245 |
|
|
|
246 |
|
|
#ifdef ZZDEFER_FETCH |
247 |
|
|
undeferFetch(); |
248 |
|
|
#endif |
249 |
|
|
|
250 |
|
|
#ifdef DEBUG_TOKENBUFFER |
251 |
|
|
if ( i >= inputTokens->bufferSize() || inputTokens->minTokens() < LLk ) /* MR20 Was "<=" */ |
252 |
|
|
{ |
253 |
|
|
char buf[2000]; /* MR20 Was "static" */ |
254 |
|
|
sprintf(buf, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i); |
255 |
|
|
panic(buf); |
256 |
|
|
} |
257 |
|
|
#endif |
258 |
|
|
return inputTokens->bufferedToken(i-LLk); |
259 |
|
|
} |
260 |
|
|
|
261 |
|
|
void |
262 |
|
|
ANTLRParser:: |
263 |
|
|
look(int k) |
264 |
|
|
{ |
265 |
|
|
int i, c = k - (LLk-dirty); |
266 |
|
|
for (i=1; i<=c; i++) consume(); |
267 |
|
|
} |
268 |
|
|
|
269 |
|
|
/* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK); |
270 |
|
|
*/ |
271 |
|
|
void |
272 |
|
|
ANTLRParser:: |
273 |
|
|
prime_lookahead() |
274 |
|
|
{ |
275 |
|
|
int i; |
276 |
|
|
for(i=1;i<=LLk; i++) consume(); |
277 |
|
|
dirty=0; |
278 |
|
|
// lap = 0; // MR14 Sinan Karasu (sinan.karasu@boeing.com) |
279 |
|
|
// labase = 0; // MR14 |
280 |
|
|
labase=lap; // MR14 |
281 |
|
|
} |
282 |
|
|
|
283 |
|
|
/* check to see if the current input symbol matches '_t'. |
284 |
|
|
* During NON demand lookahead mode, dirty will always be 0 and |
285 |
|
|
* hence the extra code for consuming tokens in _match is never |
286 |
|
|
* executed; the same routine can be used for both modes. |
287 |
|
|
*/ |
288 |
|
|
int ANTLRParser:: |
289 |
|
|
_match(ANTLRTokenType _t, ANTLRChar **MissText, |
290 |
|
|
ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok, |
291 |
|
|
SetWordType **MissSet) |
292 |
|
|
{ |
293 |
|
|
if ( dirty==LLk ) { |
294 |
|
|
consume(); |
295 |
|
|
} |
296 |
|
|
if ( LA(1)!=_t ) { |
297 |
|
|
*MissText=NULL; |
298 |
|
|
*MissTok= _t; |
299 |
|
|
*BadTok = LT(1); |
300 |
|
|
*MissSet=NULL; |
301 |
|
|
return 0; |
302 |
|
|
} |
303 |
|
|
dirty++; |
304 |
|
|
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look |
305 |
|
|
return 1; |
306 |
|
|
} |
307 |
|
|
|
308 |
|
|
/* check to see if the current input symbol matches '_t'. |
309 |
|
|
* Used during exception handling. |
310 |
|
|
*/ |
311 |
|
|
int ANTLRParser:: |
312 |
|
|
_match_wsig(ANTLRTokenType _t) |
313 |
|
|
{ |
314 |
|
|
if ( dirty==LLk ) { |
315 |
|
|
consume(); |
316 |
|
|
} |
317 |
|
|
if ( LA(1)!=_t ) return 0; |
318 |
|
|
dirty++; |
319 |
|
|
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look |
320 |
|
|
return 1; |
321 |
|
|
} |
322 |
|
|
|
323 |
|
|
/* check to see if the current input symbol matches any token in a set. |
324 |
|
|
* During NON demand lookahead mode, dirty will always be 0 and |
325 |
|
|
* hence the extra code for consuming tokens in _match is never |
326 |
|
|
* executed; the same routine can be used for both modes. |
327 |
|
|
*/ |
328 |
|
|
int ANTLRParser:: |
329 |
|
|
_setmatch(SetWordType *tset, ANTLRChar **MissText, |
330 |
|
|
ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok, |
331 |
|
|
SetWordType **MissSet, SetWordType *tokclassErrset) |
332 |
|
|
{ |
333 |
|
|
if ( dirty==LLk ) { |
334 |
|
|
consume(); |
335 |
|
|
} |
336 |
|
|
if ( !set_el(LA(1), tset) ) { |
337 |
|
|
*MissText=NULL; /* MR23 */ |
338 |
|
|
*MissTok=(ANTLRTokenType) 0; /* MR23 */ |
339 |
|
|
*BadTok=LT(1); /* MR23 */ |
340 |
|
|
*MissSet=tokclassErrset; /* MR23 */ |
341 |
|
|
return 0; |
342 |
|
|
} |
343 |
|
|
dirty++; |
344 |
|
|
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look |
345 |
|
|
return 1; |
346 |
|
|
} |
347 |
|
|
|
348 |
|
|
int ANTLRParser:: |
349 |
|
|
_setmatch_wsig(SetWordType *tset) |
350 |
|
|
{ |
351 |
|
|
if ( dirty==LLk ) { |
352 |
|
|
consume(); |
353 |
|
|
} |
354 |
|
|
if ( !set_el(LA(1), tset) ) return 0; |
355 |
|
|
dirty++; |
356 |
|
|
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look |
357 |
|
|
return 1; |
358 |
|
|
} |
359 |
|
|
|
360 |
|
|
/* Exception handling routines */ |
361 |
|
|
// |
362 |
|
|
// 7-Apr-97 133MR1 |
363 |
|
|
// Change suggested by Eli Sternheim (eli@interhdl.com) |
364 |
|
|
// |
365 |
|
|
void ANTLRParser:: |
366 |
|
|
consumeUntil(SetWordType *st) |
367 |
|
|
{ |
368 |
|
|
ANTLRTokenType tmp; // MR1 |
369 |
|
|
const int Eof=1; // MR1 |
370 |
|
|
while ( !set_el( (tmp=LA(1)), st) && tmp!=Eof) { consume(); } // MR1 |
371 |
|
|
} |
372 |
|
|
|
373 |
|
|
// |
374 |
|
|
// 7-Apr-97 133MR1 |
375 |
|
|
// Change suggested by Eli Sternheim (eli@interhdl.com) |
376 |
|
|
// |
377 |
|
|
void ANTLRParser:: |
378 |
|
|
consumeUntilToken(int t) |
379 |
|
|
{ |
380 |
|
|
int tmp; // MR1 |
381 |
|
|
const int Eof=1; // MR1 |
382 |
|
|
while ( (tmp=LA(1)) !=t && tmp!=Eof) { consume(); } // MR1 |
383 |
|
|
} |
384 |
|
|
|
385 |
|
|
|
386 |
|
|
/* Old error stuff */ |
387 |
|
|
|
388 |
|
|
void ANTLRParser:: |
389 |
|
|
resynch(SetWordType *wd,SetWordType mask) |
390 |
|
|
{ |
391 |
|
|
|
392 |
|
|
/* MR8 S.Bochnak@microtool.com.pl */ |
393 |
|
|
/* MR8 Change file scope static "consumed" to instance var */ |
394 |
|
|
|
395 |
|
|
/* if you enter here without having consumed a token from last resynch |
396 |
|
|
* force a token consumption. |
397 |
|
|
*/ |
398 |
|
|
/* MR8 */ if ( !resynchConsumed ) {consume(); resynchConsumed=1; return;} |
399 |
|
|
|
400 |
|
|
/* if current token is in resynch set, we've got what we wanted */ |
401 |
|
|
|
402 |
|
|
/* MR8 */ if ( wd[LA(1)]&mask || LA(1) == eofToken ) {resynchConsumed=0; return;} |
403 |
|
|
|
404 |
|
|
/* scan until we find something in the resynch set */ |
405 |
|
|
|
406 |
|
|
while ( !(wd[LA(1)]&mask) && LA(1) != eofToken ) {consume();} |
407 |
|
|
|
408 |
|
|
/* MR8 */ resynchConsumed=1; |
409 |
|
|
} |
410 |
|
|
|
411 |
|
|
/* standard error reporting function that assumes DLG-based scanners; |
412 |
|
|
* you should redefine in subclass to change it or if you use your |
413 |
|
|
* own scanner. |
414 |
|
|
*/ |
415 |
|
|
|
416 |
|
|
/* MR23 THM There appears to be a parameter "badText" passed to syn() |
417 |
|
|
which is not present in the parameter list. This may be |
418 |
|
|
because in C mode there is no attribute function which |
419 |
|
|
returns the text, so the text representation of the token |
420 |
|
|
must be passed explicitly. I think. |
421 |
|
|
*/ |
422 |
|
|
|
423 |
|
|
void ANTLRParser:: |
424 |
|
|
syn(_ANTLRTokenPtr /*tok MR23*/, ANTLRChar *egroup, SetWordType *eset, |
425 |
|
|
ANTLRTokenType etok, int k) |
426 |
|
|
{ |
427 |
|
|
int line; |
428 |
|
|
|
429 |
|
|
line = LT(1)->getLine(); |
430 |
|
|
|
431 |
|
|
syntaxErrCount++; /* MR11 */ |
432 |
|
|
|
433 |
|
|
/* MR23 If the token is not an EOF token, then use the ->getText() value. |
434 |
|
|
|
435 |
|
|
If the token is the EOF token the text returned by ->getText() |
436 |
|
|
may be garbage. If the text from the token table is "@" use |
437 |
|
|
"<eof>" instead, because end-users don't know what "@" means. |
438 |
|
|
If the text is not "@" then use that text, which must have been |
439 |
|
|
supplied by the grammar writer. |
440 |
|
|
*/ |
441 |
|
|
const char * errorAt = LT(1)->getText(); |
442 |
|
|
if (LA(1) == eofToken) { |
443 |
|
|
errorAt = parserTokenName(LA(1)); |
444 |
|
|
if (errorAt[0] == '@') errorAt = "<eof>"; |
445 |
|
|
} |
446 |
|
|
/* MR23 */ printMessage(stderr, "line %d: syntax error at \"%s\"", |
447 |
|
|
line, errorAt); |
448 |
|
|
if ( !etok && !eset ) {/* MR23 */ printMessage(stderr, "\n"); return;} |
449 |
|
|
if ( k==1 ) /* MR23 */ printMessage(stderr, " missing"); |
450 |
|
|
else |
451 |
|
|
{ |
452 |
|
|
/* MR23 */ printMessage(stderr, "; \"%s\" not", LT(k)->getText()); // MR23 use LT(k) since k>1 |
453 |
|
|
if ( set_deg(eset)>1 ) /* MR23 */ printMessage(stderr, " in"); |
454 |
|
|
} |
455 |
|
|
if ( set_deg(eset)>0 ) edecode(eset); |
456 |
|
|
else /* MR23 */ printMessage(stderr, " %s", token_tbl[etok]); |
457 |
|
|
if ( strlen(egroup) > 0 ) /* MR23 */ printMessage(stderr, " in %s", egroup); |
458 |
|
|
/* MR23 */ printMessage(stderr, "\n"); |
459 |
|
|
} |
460 |
|
|
|
461 |
|
|
/* is b an element of set p? */ |
462 |
|
|
int ANTLRParser:: |
463 |
|
|
set_el(ANTLRTokenType b, SetWordType *p) |
464 |
|
|
{ |
465 |
|
|
return( p[DIVWORD(b)] & bitmask[MODWORD(b)] ); |
466 |
|
|
} |
467 |
|
|
|
468 |
|
|
int ANTLRParser:: |
469 |
|
|
set_deg(SetWordType *a) |
470 |
|
|
{ |
471 |
|
|
/* Fast compute degree of a set... the number |
472 |
|
|
of elements present in the set. Assumes |
473 |
|
|
that all word bits are used in the set |
474 |
|
|
*/ |
475 |
|
|
register SetWordType *p = a; |
476 |
|
|
register SetWordType *endp = &(a[bsetsize]); |
477 |
|
|
register int degree = 0; |
478 |
|
|
|
479 |
|
|
if ( a == NULL ) return 0; |
480 |
|
|
while ( p < endp ) |
481 |
|
|
{ |
482 |
|
|
register SetWordType t = *p; |
483 |
|
|
register SetWordType *b = &(bitmask[0]); |
484 |
|
|
do { |
485 |
|
|
if (t & *b) ++degree; |
486 |
|
|
} while (++b < &(bitmask[sizeof(SetWordType)*8])); |
487 |
|
|
p++; |
488 |
|
|
} |
489 |
|
|
|
490 |
|
|
return(degree); |
491 |
|
|
} |
492 |
|
|
|
493 |
|
|
void ANTLRParser:: |
494 |
|
|
edecode(SetWordType *a) |
495 |
|
|
{ |
496 |
|
|
register SetWordType *p = a; |
497 |
|
|
register SetWordType *endp = &(p[bsetsize]); |
498 |
|
|
register unsigned e = 0; |
499 |
|
|
|
500 |
|
|
if ( set_deg(a)>1 ) /* MR23 */ printMessage(stderr, " {"); |
501 |
|
|
do { |
502 |
|
|
register SetWordType t = *p; |
503 |
|
|
register SetWordType *b = &(bitmask[0]); |
504 |
|
|
do { |
505 |
|
|
if ( t & *b ) /* MR23 */ printMessage(stderr, " %s", token_tbl[e]); |
506 |
|
|
e++; |
507 |
|
|
} while (++b < &(bitmask[sizeof(SetWordType)*8])); |
508 |
|
|
} while (++p < endp); |
509 |
|
|
if ( set_deg(a)>1 ) /* MR23 */ printMessage(stderr, " }"); |
510 |
|
|
} |
511 |
|
|
|
512 |
|
|
/* input looks like: |
513 |
|
|
* zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk) |
514 |
|
|
* where the zzMiss stuff is set here to the token that did not match |
515 |
|
|
* (and which set wasn't it a member of). |
516 |
|
|
*/ |
517 |
|
|
|
518 |
|
|
// MR9 29-Sep-97 Stan Bochnak (S.Bochnak@microTool.com.pl) |
519 |
|
|
// MR9 Original fix to static allocated text didn't |
520 |
|
|
// MR9 work because a pointer to it was passed back |
521 |
|
|
// MR9 to caller. Replace with instance variable. |
522 |
|
|
|
523 |
|
|
const int SETWORDCOUNT=20; |
524 |
|
|
|
525 |
|
|
void |
526 |
|
|
ANTLRParser::FAIL(int k, ...) |
527 |
|
|
{ |
528 |
|
|
// |
529 |
|
|
// MR1 10-Apr-97 |
530 |
|
|
// |
531 |
|
|
|
532 |
|
|
if (zzFAILtext == NULL) zzFAILtext=new char [1000]; // MR9 |
533 |
|
|
SetWordType **f=new SetWordType *[SETWORDCOUNT]; // MR1 // MR9 |
534 |
|
|
SetWordType **miss_set; |
535 |
|
|
ANTLRChar **miss_text; |
536 |
|
|
_ANTLRTokenPtr *bad_tok; |
537 |
|
|
ANTLRChar **bad_text; |
538 |
|
|
// |
539 |
|
|
// 7-Apr-97 133MR1 |
540 |
|
|
// err_k is passed as a "int *", not "unsigned *" |
541 |
|
|
// |
542 |
|
|
int *err_k; // MR1 |
543 |
|
|
int i; |
544 |
|
|
va_list ap; |
545 |
|
|
|
546 |
|
|
va_start(ap, k); |
547 |
|
|
|
548 |
|
|
zzFAILtext[0] = '\0'; |
549 |
|
|
if ( k > SETWORDCOUNT ) panic("FAIL: overflowed buffer"); |
550 |
|
|
for (i=1; i<=k; i++) /* collect all lookahead sets */ |
551 |
|
|
{ |
552 |
|
|
f[i-1] = va_arg(ap, SetWordType *); |
553 |
|
|
} |
554 |
|
|
for (i=1; i<=k; i++) /* look for offending token */ |
555 |
|
|
{ |
556 |
|
|
if ( i>1 ) strcat(zzFAILtext, " "); |
557 |
|
|
strcat(zzFAILtext, LT(i)->getText()); |
558 |
|
|
if ( !set_el(LA(i), f[i-1]) ) break; |
559 |
|
|
} |
560 |
|
|
miss_set = va_arg(ap, SetWordType **); |
561 |
|
|
miss_text = va_arg(ap, ANTLRChar **); |
562 |
|
|
bad_tok = va_arg(ap, _ANTLRTokenPtr *); |
563 |
|
|
bad_text = va_arg(ap, ANTLRChar **); |
564 |
|
|
err_k = va_arg(ap, int *); // MR1 |
565 |
|
|
if ( i>k ) |
566 |
|
|
{ |
567 |
|
|
/* bad; lookahead is permutation that cannot be matched, |
568 |
|
|
* but, the ith token of lookahead is valid at the ith position |
569 |
|
|
* (The old LL sub 1 (k) versus LL(k) parsing technique) |
570 |
|
|
*/ |
571 |
|
|
*miss_set = NULL; |
572 |
|
|
*miss_text = LT(1)->getText(); |
573 |
|
|
*bad_tok = LT(1); |
574 |
|
|
*bad_text = (*bad_tok)->getText(); |
575 |
|
|
*err_k = k; |
576 |
|
|
// |
577 |
|
|
// MR4 20-May-97 erroneously deleted contents of f[] |
578 |
|
|
// MR4 reported by Bruce Guenter (bruceg@qcc.sk.ca) |
579 |
|
|
// MR1 10-Apr-97 release temporary storage |
580 |
|
|
// |
581 |
|
|
delete [] f; // MR1 |
582 |
|
|
return; // MR1 |
583 |
|
|
} |
584 |
|
|
/* MR23 printMessage(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/ |
585 |
|
|
*miss_set = f[i-1]; |
586 |
|
|
*miss_text = zzFAILtext; |
587 |
|
|
*bad_tok = LT(i); |
588 |
|
|
*bad_text = (*bad_tok)->getText(); |
589 |
|
|
if ( i==1 ) *err_k = 1; |
590 |
|
|
else *err_k = k; |
591 |
|
|
// |
592 |
|
|
// MR4 20-May-97 erroneously deleted contents of f[] |
593 |
|
|
// MR4 reported by Bruce Guenter (bruceg@qcc.sk.ca) |
594 |
|
|
// MR1 10-Apr-97 release temporary storage |
595 |
|
|
// |
596 |
|
|
delete [] f; // MR1 |
597 |
|
|
return; // MR1 |
598 |
|
|
} |
599 |
|
|
|
600 |
|
|
int ANTLRParser:: |
601 |
|
|
_match_wdfltsig(ANTLRTokenType tokenWanted, SetWordType *whatFollows) |
602 |
|
|
{ |
603 |
|
|
if ( dirty==LLk ) consume(); |
604 |
|
|
|
605 |
|
|
if ( LA(1)!=tokenWanted ) |
606 |
|
|
{ |
607 |
|
|
syntaxErrCount++; /* MR11 */ |
608 |
|
|
/* MR23 */ printMessage(stderr, |
609 |
|
|
"line %d: syntax error at \"%s\" missing %s\n", |
610 |
|
|
LT(1)->getLine(), |
611 |
|
|
(LA(1)==eofToken && LT(1)->getText()[0] == '@')?"<eof>":LT(1)->getText(), /* MR21a */ |
612 |
|
|
token_tbl[tokenWanted]); |
613 |
|
|
consumeUntil( whatFollows ); |
614 |
|
|
return 0; |
615 |
|
|
} |
616 |
|
|
else { |
617 |
|
|
dirty++; |
618 |
|
|
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look |
619 |
|
|
/* if ( !demand_look ) consume(); */ |
620 |
|
|
return 1; |
621 |
|
|
} |
622 |
|
|
} |
623 |
|
|
|
624 |
|
|
|
625 |
|
|
int ANTLRParser:: |
626 |
|
|
_setmatch_wdfltsig(SetWordType *tokensWanted, |
627 |
|
|
ANTLRTokenType tokenTypeOfSet, |
628 |
|
|
SetWordType *whatFollows) |
629 |
|
|
{ |
630 |
|
|
if ( dirty==LLk ) consume(); |
631 |
|
|
if ( !set_el(LA(1), tokensWanted) ) |
632 |
|
|
{ |
633 |
|
|
syntaxErrCount++; /* MR11 */ |
634 |
|
|
/* MR23 */ printMessage(stderr, |
635 |
|
|
"line %d: syntax error at \"%s\" missing %s\n", |
636 |
|
|
LT(1)->getLine(), |
637 |
|
|
(LA(1)==eofToken && LT(1)->getText()[0] == '@')?"<eof>":LT(1)->getText(), /* MR21a */ |
638 |
|
|
token_tbl[tokenTypeOfSet]); |
639 |
|
|
consumeUntil( whatFollows ); |
640 |
|
|
return 0; |
641 |
|
|
} |
642 |
|
|
else { |
643 |
|
|
dirty++; |
644 |
|
|
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look |
645 |
|
|
/* if ( !demand_look ) consume(); */ |
646 |
|
|
return 1; |
647 |
|
|
} |
648 |
|
|
} |
649 |
|
|
|
650 |
|
|
char *ANTLRParser:: |
651 |
|
|
eMsgd(char *err,int d) |
652 |
|
|
{ |
653 |
|
|
sprintf(eMsgBuffer, err, d); // dangerous, but I don't care |
654 |
|
|
return eMsgBuffer; |
655 |
|
|
} |
656 |
|
|
|
657 |
|
|
char *ANTLRParser:: |
658 |
|
|
eMsg(char *err, char *s) |
659 |
|
|
{ |
660 |
|
|
sprintf(eMsgBuffer, err, s); |
661 |
|
|
return eMsgBuffer; |
662 |
|
|
} |
663 |
|
|
|
664 |
|
|
char *ANTLRParser:: |
665 |
|
|
eMsg2(char *err,char *s, char *t) |
666 |
|
|
{ |
667 |
|
|
sprintf(eMsgBuffer, err, s, t); |
668 |
|
|
return eMsgBuffer; |
669 |
|
|
} |
670 |
|
|
|
671 |
|
|
void ANTLRParser:: |
672 |
|
|
panic(const char *msg) // MR20 const |
673 |
|
|
{ |
674 |
|
|
/* MR23 */ printMessage(stderr, "ANTLR panic: %s\n", msg); |
675 |
|
|
exit(PCCTS_EXIT_FAILURE); // MR1 |
676 |
|
|
} |
677 |
|
|
|
678 |
|
|
const ANTLRChar *ANTLRParser:: // MR1 |
679 |
|
|
parserTokenName(int tok) { // MR1 |
680 |
|
|
return token_tbl[tok]; // MR1 |
681 |
|
|
} // MR1 |
682 |
|
|
|
683 |
|
|
void ANTLRParser::traceGuessDone(const ANTLRParserState *state) { |
684 |
|
|
|
685 |
|
|
int doIt=0; |
686 |
|
|
|
687 |
|
|
if (traceCurrentRuleName == NULL) return; |
688 |
|
|
|
689 |
|
|
if (traceOptionValue <= 0) { |
690 |
|
|
doIt=0; |
691 |
|
|
} else if (traceGuessOptionValue <= 0) { |
692 |
|
|
doIt=0; |
693 |
|
|
} else { |
694 |
|
|
doIt=1; |
695 |
|
|
}; |
696 |
|
|
|
697 |
|
|
if (doIt) { |
698 |
|
|
/* MR23 */ printMessage(stderr,"guess done - returning to rule %s {\"%s\"} at depth %d", |
699 |
|
|
state->traceCurrentRuleName, |
700 |
|
|
LT(1)->getType() == eofToken ? "@" : LT(1)->getText(), |
701 |
|
|
state->traceDepth); |
702 |
|
|
if (state->guessing != 0) { |
703 |
|
|
/* MR23 */ printMessage(stderr," (guess mode continues - an enclosing guess is still active)"); |
704 |
|
|
} else { |
705 |
|
|
/* MR23 */ printMessage(stderr," (guess mode ends)"); |
706 |
|
|
}; |
707 |
|
|
/* MR23 */ printMessage(stderr,"\n"); |
708 |
|
|
}; |
709 |
|
|
} |
710 |
|
|
|
711 |
|
|
void ANTLRParser::traceGuessFail() { |
712 |
|
|
|
713 |
|
|
int doIt=0; |
714 |
|
|
|
715 |
|
|
if (traceCurrentRuleName == NULL) return; /* MR21 */ |
716 |
|
|
|
717 |
|
|
if (traceOptionValue <= 0) { |
718 |
|
|
doIt=0; |
719 |
|
|
} else if (guessing && traceGuessOptionValue <= 0) { |
720 |
|
|
doIt=0; |
721 |
|
|
} else { |
722 |
|
|
doIt=1; |
723 |
|
|
}; |
724 |
|
|
|
725 |
|
|
if (doIt) { |
726 |
|
|
/* MR23 */ printMessage(stderr,"guess failed in %s\n",traceCurrentRuleName); |
727 |
|
|
}; |
728 |
|
|
} |
729 |
|
|
|
730 |
|
|
/* traceOption: |
731 |
|
|
zero value turns off trace |
732 |
|
|
*/ |
733 |
|
|
|
734 |
|
|
void ANTLRParser::tracein(const ANTLRChar * rule) { |
735 |
|
|
|
736 |
|
|
int doIt=0; |
737 |
|
|
|
738 |
|
|
traceDepth++; |
739 |
|
|
traceCurrentRuleName=rule; |
740 |
|
|
|
741 |
|
|
if (traceOptionValue <= 0) { |
742 |
|
|
doIt=0; |
743 |
|
|
} else if (guessing && traceGuessOptionValue <= 0) { |
744 |
|
|
doIt=0; |
745 |
|
|
} else { |
746 |
|
|
doIt=1; |
747 |
|
|
}; |
748 |
|
|
|
749 |
|
|
if (doIt) { |
750 |
|
|
/* MR23 */ printMessage(stderr,"enter rule %s {\"%s\"} depth %d", |
751 |
|
|
rule, |
752 |
|
|
LT(1)->getType() == eofToken ? "@" : LT(1)->getText(), |
753 |
|
|
traceDepth); |
754 |
|
|
if (guessing) /* MR23 */ printMessage(stderr," guessing"); |
755 |
|
|
/* MR23 */ printMessage(stderr,"\n"); |
756 |
|
|
}; |
757 |
|
|
return; |
758 |
|
|
} |
759 |
|
|
|
760 |
|
|
void ANTLRParser::traceout(const ANTLRChar * rule) { |
761 |
|
|
|
762 |
|
|
int doIt=0; |
763 |
|
|
|
764 |
|
|
traceDepth--; |
765 |
|
|
|
766 |
|
|
if (traceOptionValue <= 0) { |
767 |
|
|
doIt=0; |
768 |
|
|
} else if (guessing && traceGuessOptionValue <= 0) { |
769 |
|
|
doIt=0; |
770 |
|
|
} else { |
771 |
|
|
doIt=1; |
772 |
|
|
}; |
773 |
|
|
|
774 |
|
|
if (doIt) { |
775 |
|
|
/* MR23 */ printMessage(stderr,"exit rule %s {\"%s\"} depth %d", |
776 |
|
|
rule, |
777 |
|
|
LT(1)->getType() == eofToken ? "@" : LT(1)->getText(), |
778 |
|
|
traceDepth+1); |
779 |
|
|
if (guessing) /* MR23 */ printMessage(stderr," guessing"); |
780 |
|
|
/* MR23 */ printMessage(stderr,"\n"); |
781 |
|
|
}; |
782 |
|
|
} |
783 |
|
|
|
784 |
|
|
int ANTLRParser::traceOption(int delta) { |
785 |
|
|
|
786 |
|
|
int prevValue=traceOptionValue; |
787 |
|
|
|
788 |
|
|
traceOptionValue=traceOptionValue+delta; |
789 |
|
|
|
790 |
|
|
if (traceCurrentRuleName != NULL) { |
791 |
|
|
if (prevValue <= 0 && traceOptionValue > 0) { |
792 |
|
|
/* MR23 */ printMessage(stderr,"trace enabled in rule %s depth %d\n",traceCurrentRuleName,traceDepth); |
793 |
|
|
}; |
794 |
|
|
if (prevValue > 0 && traceOptionValue <= 0) { |
795 |
|
|
/* MR23 */ printMessage(stderr,"trace disabled in rule %s depth %d\n",traceCurrentRuleName,traceDepth); |
796 |
|
|
}; |
797 |
|
|
}; |
798 |
|
|
|
799 |
|
|
return prevValue; |
800 |
|
|
} |
801 |
|
|
|
802 |
|
|
int ANTLRParser::traceGuessOption(int delta) { |
803 |
|
|
|
804 |
|
|
int prevValue=traceGuessOptionValue; |
805 |
|
|
|
806 |
|
|
traceGuessOptionValue=traceGuessOptionValue+delta; |
807 |
|
|
|
808 |
|
|
if (traceCurrentRuleName != NULL) { |
809 |
|
|
if (prevValue <= 0 && traceGuessOptionValue > 0) { |
810 |
|
|
/* MR23 */ printMessage(stderr,"guess trace enabled in rule %s depth %d\n",traceCurrentRuleName,traceDepth); |
811 |
|
|
}; |
812 |
|
|
if (prevValue > 0 && traceGuessOptionValue <= 0) { |
813 |
|
|
/* MR23 */ printMessage(stderr,"guess trace disabled in rule %s depth %d\n",traceCurrentRuleName,traceDepth); |
814 |
|
|
}; |
815 |
|
|
}; |
816 |
|
|
return prevValue; |
817 |
|
|
} |
818 |
|
|
|
819 |
|
|
// MR19 V.H. Simonis Defer Fetch feature |
820 |
|
|
|
821 |
|
|
void ANTLRParser::undeferFetch() |
822 |
|
|
{ |
823 |
|
|
|
824 |
|
|
#ifdef ZZDEFER_FETCH |
825 |
|
|
if (stillToFetch) { |
826 |
|
|
for (int stillToFetch_x = 0; stillToFetch_x < stillToFetch; ++stillToFetch_x) { |
827 |
|
|
NLA = inputTokens->getToken()->getType(); |
828 |
|
|
dirty--; |
829 |
|
|
lap = (lap+1)&(LLk-1); |
830 |
|
|
} |
831 |
|
|
stillToFetch = 0; |
832 |
|
|
} |
833 |
|
|
#else |
834 |
|
|
return; |
835 |
|
|
#endif |
836 |
|
|
|
837 |
|
|
} |
838 |
|
|
|
839 |
|
|
int ANTLRParser::isDeferFetchEnabled() |
840 |
|
|
{ |
841 |
|
|
#ifdef ZZDEFER_FETCH |
842 |
|
|
return 1; |
843 |
|
|
#else |
844 |
|
|
return 0; |
845 |
|
|
#endif |
846 |
|
|
} |
847 |
|
|
|
848 |
|
|
//MR23 |
849 |
|
|
int ANTLRParser::printMessage(FILE* pFile, const char* pFormat, ...) |
850 |
|
|
{ |
851 |
|
|
va_list marker; |
852 |
|
|
va_start( marker, pFormat ); |
853 |
|
|
int iRet = printMessageV(pFile, pFormat, marker); |
854 |
|
|
va_end( marker ); |
855 |
|
|
return iRet; |
856 |
|
|
} |
857 |
|
|
|
858 |
|
|
int ANTLRParser::printMessageV(FILE* pFile, const char* pFormat, va_list arglist) // MR23 |
859 |
|
|
{ |
860 |
|
|
return vfprintf(pFile, pFormat, arglist); |
861 |
|
|
} |
862 |
|
|
|
863 |
|
|
// MR23 Move semantic predicate error handling from macro to virtual function |
864 |
|
|
// |
865 |
|
|
// Called by the zzfailed_pred |
866 |
|
|
|
867 |
|
|
void ANTLRParser::failedSemanticPredicate(const char* predicate) |
868 |
|
|
{ |
869 |
|
|
printMessage(stdout,"line %d: semantic error; failed predicate: '%s'\n", |
870 |
|
|
LT(1)->getLine(), predicate); |
871 |
|
|
} |