1 |
/* ANTLRParser.h |
2 |
* |
3 |
* Define the generic ANTLRParser superclass, which is subclassed to |
4 |
* define an actual parser. |
5 |
* |
6 |
* Before entry into this file: ANTLRTokenType must be set. |
7 |
* |
8 |
* SOFTWARE RIGHTS |
9 |
* |
10 |
* We reserve no LEGAL rights to the Purdue Compiler Construction Tool |
11 |
* Set (PCCTS) -- PCCTS is in the public domain. An individual or |
12 |
* company may do whatever they wish with source code distributed with |
13 |
* PCCTS or the code generated by PCCTS, including the incorporation of |
14 |
* PCCTS, or its output, into commerical software. |
15 |
* |
16 |
* We encourage users to develop software with PCCTS. However, we do ask |
17 |
* that credit is given to us for developing PCCTS. By "credit", |
18 |
* we mean that if you incorporate our source code into one of your |
19 |
* programs (commercial product, research project, or otherwise) that you |
20 |
* acknowledge this fact somewhere in the documentation, research report, |
21 |
* etc... If you like PCCTS and have developed a nice tool with the |
22 |
* output, please mention that you developed it using PCCTS. In |
23 |
* addition, we ask that this header remain intact in our source code. |
24 |
* As long as these guidelines are kept, we expect to continue enhancing |
25 |
* this system and expect to make other tools available as they are |
26 |
* completed. |
27 |
* |
28 |
* ANTLR 1.33 |
29 |
* Terence Parr |
30 |
* Parr Research Corporation |
31 |
* with Purdue University and AHPCRC, University of Minnesota |
32 |
* 1989-2000 |
33 |
*/ |
34 |
|
35 |
#ifndef APARSER_H_GATE |
36 |
#define APARSER_H_GATE |
37 |
|
38 |
#include "pcctscfg.h" |
39 |
|
40 |
#include "pccts_stdio.h" |
41 |
#include "pccts_setjmp.h" |
42 |
|
43 |
PCCTS_NAMESPACE_STD |
44 |
|
45 |
#include ATOKEN_H |
46 |
#include ATOKENBUFFER_H |
47 |
|
48 |
#ifdef ZZCAN_GUESS |
49 |
#ifndef ZZINF_LOOK |
50 |
#define ZZINF_LOOK |
51 |
#endif |
52 |
#endif |
53 |
|
54 |
|
55 |
#define NLA (token_type[lap&(LLk-1)])/* --> next LA */ |
56 |
|
57 |
typedef unsigned char SetWordType; |
58 |
|
59 |
/* Define external bit set stuff (for SetWordType) */ |
60 |
#define EXT_WORDSIZE (sizeof(char)*8) |
61 |
#define EXT_LOGWORDSIZE 3 |
62 |
|
63 |
/* s y n t a c t i c p r e d i c a t e s t u f f */ |
64 |
|
65 |
#ifndef zzUSER_GUESS_HOOK |
66 |
#define zzUSER_GUESS_HOOK(seqFrozen,zzrv) |
67 |
#endif |
68 |
|
69 |
#ifndef zzUSER_GUESS_DONE_HOOK |
70 |
#define zzUSER_GUESS_DONE_HOOK(seqFrozen) |
71 |
#endif |
72 |
|
73 |
/* MR14 Add zzUSER_GUESS_FAIL_HOOK and related code */ |
74 |
|
75 |
#define zzUSER_GUESS_FAIL_HOOK_INTERNAL zzUSER_GUESS_FAIL_HOOK(SeqFrozen) |
76 |
#ifndef zzUSER_GUESS_FAIL_HOOK |
77 |
#define zzUSER_GUESS_FAIL_HOOK(zzGuessSeq) |
78 |
#endif |
79 |
|
80 |
|
81 |
typedef struct _zzjmp_buf { |
82 |
jmp_buf state; |
83 |
} zzjmp_buf; |
84 |
|
85 |
/* these need to be macros not member functions */ |
86 |
#define zzGUESS_BLOCK ANTLRParserState zzst; int zzrv; int _marker; int zzGuessSeqFrozen; |
87 |
#define zzNON_GUESS_MODE if ( !guessing ) |
88 |
#define zzGUESS_FAIL guess_fail(); |
89 |
|
90 |
/* Note: zzGUESS_DONE does not execute longjmp() */ |
91 |
|
92 |
#define zzGUESS_DONE {zzrv=1; inputTokens->rewind(_marker); guess_done(&zzst);zzUSER_GUESS_DONE_HOOK(zzGuessSeqFrozen) } |
93 |
#define zzGUESS saveState(&zzst); \ |
94 |
guessing = 1; \ |
95 |
zzGuessSeqFrozen = ++zzGuessSeq; \ |
96 |
_marker = inputTokens->mark(); \ |
97 |
zzrv = setjmp(guess_start.state); \ |
98 |
zzUSER_GUESS_HOOK(zzGuessSeqFrozen,zzrv) \ |
99 |
if ( zzrv ) zzGUESS_DONE |
100 |
|
101 |
#define zzTRACEdata const ANTLRChar *zzTracePrevRuleName = NULL; |
102 |
|
103 |
#ifndef zzTRACEIN |
104 |
#define zzTRACEIN(r) zzTracePrevRuleName=traceCurrentRuleName;tracein(r); |
105 |
#endif |
106 |
#ifndef zzTRACEOUT |
107 |
#define zzTRACEOUT(r) traceout(r);traceCurrentRuleName=zzTracePrevRuleName; |
108 |
#endif |
109 |
|
110 |
/* a n t l r p a r s e r d e f */ |
111 |
|
112 |
struct ANTLRParserState { |
113 |
/* class variables */ |
114 |
zzjmp_buf guess_start; |
115 |
int guessing; |
116 |
|
117 |
int inf_labase; |
118 |
int inf_last; |
119 |
|
120 |
int dirty; |
121 |
|
122 |
int traceOptionValue; // MR10 |
123 |
int traceGuessOptionValue; // MR10 |
124 |
const ANTLRChar *traceCurrentRuleName; // MR10 |
125 |
int traceDepth; // MR10 |
126 |
|
127 |
}; |
128 |
|
129 |
/* notes: |
130 |
* |
131 |
* multiple inheritance is a cool way to include what stuff is needed |
132 |
* in this structure (like guess stuff). however, i'm not convinced that |
133 |
* multiple inheritance works correctly on all platforms. not that |
134 |
* much space is used--just include all possibly useful members. |
135 |
* |
136 |
* the class should also be a template with arguments for the lookahead |
137 |
* depth and so on. that way, more than one parser can be defined (as |
138 |
* each will probably have different lookahead requirements). however, |
139 |
* am i sure that templates work? no, i'm not sure. |
140 |
* |
141 |
* no attributes are maintained and, hence, the 'asp' variable is not |
142 |
* needed. $i can still be referenced, but it refers to the token |
143 |
* associated with that rule element. question: where are the token's |
144 |
* stored if not on the software stack? in local variables created |
145 |
* and assigned to by antlr. |
146 |
*/ |
147 |
class ANTLRParser { |
148 |
protected: |
149 |
/* class variables */ |
150 |
static SetWordType bitmask[sizeof(SetWordType)*8]; |
151 |
static char eMsgBuffer[500]; |
152 |
|
153 |
protected: |
154 |
int LLk; // number of lookahead symbols (old LL_K) |
155 |
int demand_look; |
156 |
ANTLRTokenType eofToken; // when do I stop during resynch()s |
157 |
int bsetsize; // size of bitsets created by ANTLR in |
158 |
// units of SetWordType |
159 |
|
160 |
ANTLRTokenBuffer *inputTokens; //place to get input tokens |
161 |
|
162 |
zzjmp_buf guess_start; // where to jump back to upon failure |
163 |
int guessing; // if guessing (using (...)? predicate) |
164 |
|
165 |
// infinite lookahead stuff |
166 |
int can_use_inf_look; // set by subclass (generated by ANTLR) |
167 |
int inf_lap; |
168 |
int inf_labase; |
169 |
int inf_last; |
170 |
int *_inf_line; |
171 |
|
172 |
const ANTLRChar **token_tbl; // pointer to table of token type strings MR20 const |
173 |
|
174 |
int dirty; // used during demand lookahead |
175 |
|
176 |
ANTLRTokenType *token_type; // fast reference cache of token.getType() |
177 |
// ANTLRLightweightToken **token; // the token with all its attributes |
178 |
int lap; |
179 |
int labase; |
180 |
#ifdef ZZDEFER_FETCH |
181 |
int stillToFetch; // MR19 V.H. Simonis |
182 |
#endif |
183 |
|
184 |
private: |
185 |
void fill_inf_look(); |
186 |
|
187 |
protected: |
188 |
virtual void guess_fail() { // MR9 27-Sep-97 make virtual |
189 |
traceGuessFail(); // MR10 |
190 |
longjmp(guess_start.state, 1); } // MR9 |
191 |
virtual void guess_done(ANTLRParserState *st) { // MR9 27-Sep-97 make virtual |
192 |
restoreState(st); } // MR9 |
193 |
virtual int guess(ANTLRParserState *); // MR9 27-Sep-97 make virtual |
194 |
void look(int); |
195 |
int _match(ANTLRTokenType, ANTLRChar **, ANTLRTokenType *, |
196 |
_ANTLRTokenPtr *, SetWordType **); |
197 |
int _setmatch(SetWordType *, ANTLRChar **, ANTLRTokenType *, |
198 |
_ANTLRTokenPtr *, SetWordType **, |
199 |
SetWordType * tokclassErrset /* MR23 */); |
200 |
int _match_wsig(ANTLRTokenType); |
201 |
int _setmatch_wsig(SetWordType *); |
202 |
virtual void consume(); |
203 |
virtual void resynch(SetWordType *wd,SetWordType mask); // MR21 |
204 |
void prime_lookahead(); |
205 |
virtual void tracein(const ANTLRChar *r); // MR10 |
206 |
virtual void traceout(const ANTLRChar *r); // MR10 |
207 |
static unsigned MODWORD(unsigned x) {return x & (EXT_WORDSIZE-1);} // x % EXT_WORDSIZE // MR9 |
208 |
static unsigned DIVWORD(unsigned x) {return x >> EXT_LOGWORDSIZE;} // x / EXT_WORDSIZE // MR9 |
209 |
int set_deg(SetWordType *); |
210 |
int set_el(ANTLRTokenType, SetWordType *); |
211 |
virtual void edecode(SetWordType *); // MR1 |
212 |
virtual void FAIL(int k, ...); // MR1 |
213 |
int traceOptionValue; // MR10 |
214 |
int traceGuessOptionValue; // MR10 |
215 |
const ANTLRChar *traceCurrentRuleName; // MR10 |
216 |
int traceDepth; // MR10 |
217 |
void traceReset(); // MR10 |
218 |
virtual void traceGuessFail(); // MR10 |
219 |
virtual void traceGuessDone(const ANTLRParserState *); // MR10 |
220 |
int zzGuessSeq; // MR10 |
221 |
|
222 |
public: |
223 |
ANTLRParser(ANTLRTokenBuffer *, |
224 |
int k=1, |
225 |
int use_inf_look=0, |
226 |
int demand_look=0, |
227 |
int bsetsize=1); |
228 |
virtual ~ANTLRParser(); |
229 |
|
230 |
virtual void init(); |
231 |
|
232 |
ANTLRTokenType LA(int i) |
233 |
{ |
234 |
// |
235 |
// MR14 demand look will always be 0 for C++ mode |
236 |
// |
237 |
//// return demand_look ? token_type[(labase+(i)-1)&(LLk-1)] : |
238 |
//// token_type[(lap+(i)-1)&(LLk-1)]; |
239 |
|
240 |
// MR19 V.H. Simonis Defer fetch feature |
241 |
|
242 |
#ifdef ZZDEFER_FETCH |
243 |
undeferFetch(); |
244 |
#endif |
245 |
return token_type[(lap+(i)-1)&(LLk-1)]; |
246 |
} |
247 |
_ANTLRTokenPtr LT(int i); |
248 |
|
249 |
void setEofToken(ANTLRTokenType t) { eofToken = t; } |
250 |
ANTLRTokenType getEofToken() const { return eofToken; } // MR14 |
251 |
|
252 |
void noGarbageCollectTokens() { inputTokens->noGarbageCollectTokens(); } |
253 |
void garbageCollectTokens() { inputTokens->garbageCollectTokens(); } |
254 |
|
255 |
virtual void syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, |
256 |
SetWordType *eset, ANTLRTokenType etok, int k); |
257 |
virtual void saveState(ANTLRParserState *); // MR9 27-Sep-97 make virtual |
258 |
virtual void restoreState(ANTLRParserState *); // MR9 27-Sep-97 make virtual |
259 |
|
260 |
virtual void panic(const char *msg); // MR20 const |
261 |
|
262 |
static char *eMsgd(char *,int); |
263 |
static char *eMsg(char *,char *); |
264 |
static char *eMsg2(char *,char *,char *); |
265 |
|
266 |
virtual int printMessage(FILE* pFile, const char* pFormat, ...); // MR23 |
267 |
virtual int printMessageV(FILE* pFile, const char* pFormat, va_list arglist); // MR23 |
268 |
|
269 |
void consumeUntil(SetWordType *st); |
270 |
void consumeUntilToken(int t); |
271 |
|
272 |
virtual int _setmatch_wdfltsig(SetWordType *tokensWanted, |
273 |
ANTLRTokenType tokenTypeOfSet, |
274 |
SetWordType *whatFollows); |
275 |
virtual int _match_wdfltsig(ANTLRTokenType tokenWanted, |
276 |
SetWordType *whatFollows); |
277 |
|
278 |
const ANTLRChar * parserTokenName(int tok); // MR1 |
279 |
|
280 |
int traceOptionValueDefault; // MR11 |
281 |
int traceOption(int delta); // MR11 |
282 |
int traceGuessOption(int delta); // MR11 |
283 |
|
284 |
// MR8 5-Aug-97 S.Bochnak@microtool.com.pl |
285 |
// MR8 Move resynch static local variable |
286 |
// MR8 to class instance |
287 |
|
288 |
int syntaxErrCount; // MR12 |
289 |
ANTLRTokenStream *getLexer() const { // MR12 |
290 |
return inputTokens ? inputTokens->getLexer() : 0; } // MR12 |
291 |
protected: // MR8 |
292 |
int resynchConsumed; // MR8 |
293 |
char *zzFAILtext; // workarea required by zzFAIL // MR9 |
294 |
void undeferFetch(); // MR19 V.H. Simonis |
295 |
int isDeferFetchEnabled(); // MR19 V.H. Simonis |
296 |
virtual void failedSemanticPredicate(const char* predicate); /* MR23 */ |
297 |
}; |
298 |
|
299 |
#define zzmatch(_t) \ |
300 |
if ( !_match((ANTLRTokenType)_t, &zzMissText, &zzMissTok, \ |
301 |
(_ANTLRTokenPtr *) &zzBadTok, &zzMissSet) ) goto fail; |
302 |
|
303 |
#define zzmatch_wsig(_t,handler) \ |
304 |
if ( !_match_wsig((ANTLRTokenType)_t) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;} |
305 |
|
306 |
#define zzsetmatch(_ts,_tokclassErrset) \ |
307 |
if ( !_setmatch(_ts, &zzMissText, &zzMissTok, \ |
308 |
(_ANTLRTokenPtr *) &zzBadTok, &zzMissSet, _tokclassErrset) ) goto fail; |
309 |
|
310 |
#define zzsetmatch_wsig(_ts, handler) \ |
311 |
if ( !_setmatch_wsig(_ts) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;} |
312 |
|
313 |
/* For the dflt signal matchers, a FALSE indicates that an error occurred |
314 |
* just like the other matchers, but in this case, the routine has already |
315 |
* recovered--we do NOT want to consume another token. However, when |
316 |
* the match was successful, we do want to consume hence _signal=0 so that |
317 |
* a token is consumed by the "if (!_signal) consume(); _signal=NoSignal;" |
318 |
* preamble. |
319 |
*/ |
320 |
#define zzsetmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) \ |
321 |
if ( !_setmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) ) \ |
322 |
_signal = MismatchedToken; |
323 |
|
324 |
#define zzmatch_wdfltsig(tokenWanted, whatFollows) \ |
325 |
if ( !_match_wdfltsig(tokenWanted, whatFollows) ) _signal = MismatchedToken; |
326 |
|
327 |
|
328 |
// MR1 10-Apr-97 zzfailed_pred() macro does not backtrack in guess mode. |
329 |
// MR1 Identification and correction due to J. Lilley |
330 |
// |
331 |
// MR23 Call virtual method to report error. |
332 |
// MR23 Provide more control over failed predicate action |
333 |
// without any need for user to worry about guessing internals. |
334 |
|
335 |
#ifndef zzfailed_pred |
336 |
#define zzfailed_pred(_p,_hasuseraction,_useraction) \ |
337 |
if (guessing) { \ |
338 |
zzGUESS_FAIL; \ |
339 |
} else { \ |
340 |
zzfailed_pred_action(_p,_hasuseraction,_useraction) \ |
341 |
} |
342 |
#endif |
343 |
|
344 |
// MR23 Provide more control over failed predicate action |
345 |
// without any need for user to worry about guessing internals. |
346 |
// _hasuseraction == 0 => no user specified error action |
347 |
// _hasuseraction == 1 => user specified error action |
348 |
|
349 |
#ifndef zzfailed_pred_action |
350 |
#define zzfailed_pred_action(_p,_hasuseraction,_useraction) \ |
351 |
if (_hasuseraction) { _useraction } else { failedSemanticPredicate(_p); } |
352 |
#endif |
353 |
|
354 |
#define zzRULE \ |
355 |
SetWordType *zzMissSet=NULL; ANTLRTokenType zzMissTok=(ANTLRTokenType)0; \ |
356 |
_ANTLRTokenPtr zzBadTok=NULL; ANTLRChar *zzBadText=(ANTLRChar *)""; \ |
357 |
int zzErrk=1,zzpf=0; \ |
358 |
zzTRACEdata \ |
359 |
ANTLRChar *zzMissText=(ANTLRChar *)""; |
360 |
|
361 |
#endif |
362 |
|
363 |
/* S t a n d a r d E x c e p t i o n S i g n a l s */ |
364 |
|
365 |
#define NoSignal 0 |
366 |
#define MismatchedToken 1 |
367 |
#define NoViableAlt 2 |
368 |
#define NoSemViableAlt 3 |
369 |
|
370 |
/* MR7 Allow more control over signalling */ |
371 |
/* by adding "Unwind" and "SetSignal" */ |
372 |
|
373 |
#define Unwind 4 |
374 |
#define setSignal(newValue) *_retsignal=_signal=(newValue) |
375 |
#define suppressSignal *_retsignal=_signal=0 |
376 |
#define exportSignal *_retsignal=_signal |