1 |
/* ANTLRTokenBuffer.cpp |
2 |
* |
3 |
* SOFTWARE RIGHTS |
4 |
* |
5 |
* We reserve no LEGAL rights to the Purdue Compiler Construction Tool |
6 |
* Set (PCCTS) -- PCCTS is in the public domain. An individual or |
7 |
* company may do whatever they wish with source code distributed with |
8 |
* PCCTS or the code generated by PCCTS, including the incorporation of |
9 |
* PCCTS, or its output, into commerical software. |
10 |
* |
11 |
* We encourage users to develop software with PCCTS. However, we do ask |
12 |
* that credit is given to us for developing PCCTS. By "credit", |
13 |
* we mean that if you incorporate our source code into one of your |
14 |
* programs (commercial product, research project, or otherwise) that you |
15 |
* acknowledge this fact somewhere in the documentation, research report, |
16 |
* etc... If you like PCCTS and have developed a nice tool with the |
17 |
* output, please mention that you developed it using PCCTS. In |
18 |
* addition, we ask that this header remain intact in our source code. |
19 |
* As long as these guidelines are kept, we expect to continue enhancing |
20 |
* this system and expect to make other tools available as they are |
21 |
* completed. |
22 |
* |
23 |
* ANTLR 1.33 |
24 |
* Terence Parr |
25 |
* Parr Research Corporation |
26 |
* with Purdue University and AHPCRC, University of Minnesota |
27 |
* 1989-2000 |
28 |
*/ |
29 |
|
30 |
typedef int ANTLRTokenType; // fool AToken.h into compiling |
31 |
|
32 |
class ANTLRParser; /* MR1 */ |
33 |
|
34 |
#define ANTLR_SUPPORT_CODE |
35 |
|
36 |
#include "pcctscfg.h" |
37 |
|
38 |
#include ATOKENBUFFER_H |
39 |
#include APARSER_H // MR23 |
40 |
|
41 |
typedef ANTLRAbstractToken *_ANTLRTokenPtr; |
42 |
|
43 |
#if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) |
44 |
static unsigned char test[1000]; |
45 |
#endif |
46 |
|
47 |
#ifdef DBG_REFCOUNTTOKEN |
48 |
int ANTLRRefCountToken::ctor = 0; /* MR23 */ |
49 |
int ANTLRRefCountToken::dtor = 0; /* MR23 */ |
50 |
#endif |
51 |
|
52 |
ANTLRTokenBuffer:: |
53 |
ANTLRTokenBuffer(ANTLRTokenStream *_input, int _k, int _chunk_size_formal) /* MR14 */ |
54 |
{ |
55 |
this->input = _input; |
56 |
this->k = _k; |
57 |
buffer_size = chunk_size = _chunk_size_formal; |
58 |
buffer = (_ANTLRTokenPtr *) |
59 |
calloc(chunk_size+1,sizeof(_ANTLRTokenPtr )); |
60 |
if ( buffer == NULL ) { |
61 |
panic("cannot alloc token buffer"); |
62 |
} |
63 |
buffer++; // leave the first elem empty so tp-1 is valid ptr |
64 |
|
65 |
tp = &buffer[0]; |
66 |
last = tp-1; |
67 |
next = &buffer[0]; |
68 |
num_markers = 0; |
69 |
end_of_buffer = &buffer[buffer_size-1]; |
70 |
threshold = &buffer[(int)(buffer_size/2)]; // MR23 - Used to be 1.0/2.0 ! |
71 |
_deleteTokens = 1; // assume we delete tokens |
72 |
parser=NULL; // MR5 - uninitialized reference |
73 |
} |
74 |
|
75 |
static void f() {;} |
76 |
ANTLRTokenBuffer:: |
77 |
~ANTLRTokenBuffer() |
78 |
{ |
79 |
f(); |
80 |
// Delete all remaining tokens (from 0..last inclusive) |
81 |
if ( _deleteTokens ) |
82 |
{ |
83 |
_ANTLRTokenPtr *z; |
84 |
for (z=buffer; z<=last; z++) |
85 |
{ |
86 |
(*z)->deref(); |
87 |
// z->deref(); |
88 |
#ifdef DBG_REFCOUNTTOKEN |
89 |
/* MR23 */ printMessage(stderr, "##########dtor: deleting token '%s' (ref %d)\n", |
90 |
((ANTLRCommonToken *)*z)->getText(), (*z)->nref()); |
91 |
#endif |
92 |
if ( (*z)->nref()==0 ) |
93 |
{ |
94 |
delete (*z); |
95 |
} |
96 |
} |
97 |
} |
98 |
|
99 |
if ( buffer!=NULL ) free((char *)(buffer-1)); |
100 |
} |
101 |
|
102 |
#if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) |
103 |
#include "pccts_stdio.h" |
104 |
PCCTS_NAMESPACE_STD |
105 |
#endif |
106 |
|
107 |
_ANTLRTokenPtr ANTLRTokenBuffer:: |
108 |
getToken() |
109 |
{ |
110 |
if ( tp <= last ) // is there any buffered lookahead still to be read? |
111 |
{ |
112 |
return *tp++; // read buffered lookahead |
113 |
} |
114 |
// out of buffered lookahead, get some more "real" |
115 |
// input from getANTLRToken() |
116 |
if ( num_markers==0 ) |
117 |
{ |
118 |
if( next > threshold ) |
119 |
{ |
120 |
#ifdef DBG_TBUF |
121 |
/* MR23 */ printMessage(stderr,"getToken: next > threshold (high water is %d)\n", threshold-buffer); |
122 |
#endif |
123 |
makeRoom(); |
124 |
} |
125 |
} |
126 |
else { |
127 |
if ( next > end_of_buffer ) |
128 |
{ |
129 |
#ifdef DBG_TBUF |
130 |
/* MR23 */ printMessage(stderr,"getToken: next > end_of_buffer (size is %d)\n", buffer_size); |
131 |
#endif |
132 |
extendBuffer(); |
133 |
} |
134 |
} |
135 |
*next = getANTLRToken(); |
136 |
(*next)->ref(); // say we have a copy of this pointer in buffer |
137 |
last = next; |
138 |
next++; |
139 |
tp = last; |
140 |
return *tp++; |
141 |
} |
142 |
|
143 |
void ANTLRTokenBuffer:: |
144 |
rewind(int pos) |
145 |
{ |
146 |
#if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) |
147 |
/* MR23 */ printMessage(stderr, "rewind(%d)[nm=%d,from=%d,%d.n=%d]\n", pos, num_markers, tp-buffer,pos,test[pos]); |
148 |
test[pos]--; |
149 |
#endif |
150 |
tp = &buffer[pos]; |
151 |
num_markers--; |
152 |
} |
153 |
|
154 |
/* |
155 |
* This function is used to specify that the token pointers read |
156 |
* by the ANTLRTokenBuffer should be buffered up (to be reused later). |
157 |
*/ |
158 |
int ANTLRTokenBuffer:: |
159 |
mark() |
160 |
{ |
161 |
#if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) |
162 |
test[tp-buffer]++; |
163 |
/* MR23 */ printMessage(stderr,"mark(%d)[nm=%d,%d.n=%d]\n",tp-buffer,num_markers+1,tp-buffer,test[tp-buffer]); |
164 |
#endif |
165 |
num_markers++; |
166 |
return tp - buffer; |
167 |
} |
168 |
|
169 |
/* |
170 |
* returns the token pointer n positions ahead. |
171 |
* This implies that bufferedToken(1) gets the NEXT symbol of lookahead. |
172 |
* This is used in conjunction with the ANTLRParser lookahead buffer. |
173 |
* |
174 |
* No markers are set or anything. A bunch of input is buffered--that's all. |
175 |
* The tp pointer is left alone as the lookahead has not been advanced |
176 |
* with getToken(). The next call to getToken() will find a token |
177 |
* in the buffer and won't have to call getANTLRToken(). |
178 |
* |
179 |
* If this is called before a consume() is done, how_many_more_i_need is |
180 |
* set to 'n'. |
181 |
*/ |
182 |
_ANTLRTokenPtr ANTLRTokenBuffer:: |
183 |
bufferedToken(int n) |
184 |
{ |
185 |
// int how_many_more_i_need = (last-tp < 0) ? n : n-(last-tp)-1; |
186 |
int how_many_more_i_need = (tp > last) ? n : n-(last-tp)-1; |
187 |
// Make sure that at least n tokens are available in the buffer |
188 |
#ifdef DBG_TBUF |
189 |
/* MR23 */ printMessage(stderr, "bufferedToken(%d)\n", n); |
190 |
#endif |
191 |
for (int i=1; i<=how_many_more_i_need; i++) |
192 |
{ |
193 |
if ( next > end_of_buffer ) // buffer overflow? |
194 |
{ |
195 |
extendBuffer(); |
196 |
} |
197 |
*next = getANTLRToken(); |
198 |
(*next)->ref(); // say we have a copy of this pointer in buffer |
199 |
last = next; |
200 |
next++; |
201 |
} |
202 |
return tp[n - 1]; |
203 |
} |
204 |
|
205 |
/* If no markers are set, the none of the input needs to be saved (except |
206 |
* for the lookahead Token pointers). We save only k-1 token pointers as |
207 |
* we are guaranteed to do a getANTLRToken() right after this because otherwise |
208 |
* we wouldn't have needed to extend the buffer. |
209 |
* |
210 |
* If there are markers in the buffer, we need to save things and so |
211 |
* extendBuffer() is called. |
212 |
*/ |
213 |
void ANTLRTokenBuffer:: |
214 |
makeRoom() |
215 |
{ |
216 |
#ifdef DBG_TBUF |
217 |
/* MR23 */ printMessage(stderr, "in makeRoom.................\n"); |
218 |
/* MR23 */ printMessage(stderr, "num_markers==%d\n", num_markers); |
219 |
#endif |
220 |
/* |
221 |
if ( num_markers == 0 ) |
222 |
{ |
223 |
*/ |
224 |
#ifdef DBG_TBUF |
225 |
/* MR23 */ printMessage(stderr, "moving lookahead and resetting next\n"); |
226 |
|
227 |
_ANTLRTokenPtr *r; |
228 |
/* MR23 */ printMessage(stderr, "tbuf = ["); |
229 |
for (r=buffer; r<=last; r++) |
230 |
{ |
231 |
if ( *r==NULL ) /* MR23 */ printMessage(stderr, " xxx"); |
232 |
else /* MR23 */ printMessage(stderr, " '%s'", ((ANTLRCommonToken *)*r)->getText()); |
233 |
} |
234 |
/* MR23 */ printMessage(stderr, " ]\n"); |
235 |
|
236 |
/* MR23 */ printMessage(stderr, |
237 |
"before: tp=%d, last=%d, next=%d, threshold=%d\n",tp-buffer,last-buffer,next-buffer,threshold-buffer); |
238 |
#endif |
239 |
|
240 |
// Delete all tokens from 0..last-(k-1) inclusive |
241 |
if ( _deleteTokens ) |
242 |
{ |
243 |
_ANTLRTokenPtr *z; |
244 |
for (z=buffer; z<=last-(k-1); z++) |
245 |
{ |
246 |
(*z)->deref(); |
247 |
// z->deref(); |
248 |
#ifdef DBG_REFCOUNTTOKEN |
249 |
/* MR23 */ printMessage(stderr, "##########makeRoom: deleting token '%s' (ref %d)\n", |
250 |
((ANTLRCommonToken *)*z)->getText(), (*z)->nref()); |
251 |
#endif |
252 |
if ( (*z)->nref()==0 ) |
253 |
{ |
254 |
delete (*z); |
255 |
} |
256 |
} |
257 |
} |
258 |
|
259 |
// reset the buffer to initial conditions, but move k-1 symbols |
260 |
// to the beginning of buffer and put new input symbol at k |
261 |
_ANTLRTokenPtr *p = buffer, *q = last-(k-1)+1; |
262 |
// ANTLRAbstractToken **p = buffer, **q = end_of_buffer-(k-1)+1; |
263 |
#ifdef DBG_TBUF |
264 |
/* MR23 */ printMessage(stderr, "lookahead buffer = ["); |
265 |
#endif |
266 |
for (int i=1; i<=(k-1); i++) |
267 |
{ |
268 |
*p++ = *q++; |
269 |
#ifdef DBG_TBUF |
270 |
/* MR23 */ printMessage(stderr, |
271 |
" '%s'", ((ANTLRCommonToken *)buffer[i-1])->getText()); |
272 |
#endif |
273 |
} |
274 |
#ifdef DBG_TBUF |
275 |
/* MR23 */ printMessage(stderr, " ]\n"); |
276 |
#endif |
277 |
next = &buffer[k-1]; |
278 |
tp = &buffer[k-1]; // tp points to what will be filled in next |
279 |
last = tp-1; |
280 |
#ifdef DBG_TBUF |
281 |
/* MR23 */ printMessage(stderr, |
282 |
"after: tp=%d, last=%d, next=%d\n", |
283 |
tp-buffer, last-buffer, next-buffer); |
284 |
#endif |
285 |
/* |
286 |
} |
287 |
else { |
288 |
extendBuffer(); |
289 |
} |
290 |
*/ |
291 |
} |
292 |
|
293 |
/* This function extends 'buffer' by chunk_size and returns with all |
294 |
* pointers at the same relative positions in the buffer (the buffer base |
295 |
* address could have changed in realloc()) except that 'next' comes |
296 |
* back set to where the next token should be stored. All other pointers |
297 |
* are untouched. |
298 |
*/ |
299 |
void |
300 |
ANTLRTokenBuffer:: |
301 |
extendBuffer() |
302 |
{ |
303 |
int save_last = last-buffer, save_tp = tp-buffer, save_next = next-buffer; |
304 |
#ifdef DBG_TBUF |
305 |
/* MR23 */ printMessage(stderr, "extending physical buffer\n"); |
306 |
#endif |
307 |
buffer_size += chunk_size; |
308 |
buffer = (_ANTLRTokenPtr *) |
309 |
realloc((char *)(buffer-1), |
310 |
(buffer_size+1)*sizeof(_ANTLRTokenPtr )); |
311 |
if ( buffer == NULL ) { |
312 |
panic("cannot alloc token buffer"); |
313 |
} |
314 |
buffer++; // leave the first elem empty so tp-1 is valid ptr |
315 |
|
316 |
tp = buffer + save_tp; // put the pointers back to same relative position |
317 |
last = buffer + save_last; |
318 |
next = buffer + save_next; |
319 |
end_of_buffer = &buffer[buffer_size-1]; |
320 |
threshold = &buffer[(int)(buffer_size*(1.0/2.0))]; |
321 |
|
322 |
/* |
323 |
// zero out new token ptrs so we'll know if something to delete in buffer |
324 |
ANTLRAbstractToken **p = end_of_buffer-chunk_size+1; |
325 |
for (; p<=end_of_buffer; p++) *p = NULL; |
326 |
*/ |
327 |
} |
328 |
|
329 |
ANTLRParser * ANTLRTokenBuffer:: // MR1 |
330 |
setParser(ANTLRParser *p) { // MR1 |
331 |
ANTLRParser *old=parser; // MR1 |
332 |
parser=p; // MR1 |
333 |
input->setParser(p); // MR1 |
334 |
return old; // MR1 |
335 |
} // MR1 |
336 |
// MR1 |
337 |
ANTLRParser * ANTLRTokenBuffer:: // MR1 |
338 |
getParser() { // MR1 |
339 |
return parser; // MR1 |
340 |
} // MR1 |
341 |
|
342 |
void ANTLRTokenBuffer::panic(const char *msg) // MR23 |
343 |
{ |
344 |
if (parser) //MR23 |
345 |
parser->panic(msg); //MR23 |
346 |
else //MR23 |
347 |
exit(PCCTS_EXIT_FAILURE); |
348 |
} |
349 |
|
350 |
//MR23 |
351 |
int ANTLRTokenBuffer::printMessage(FILE* pFile, const char* pFormat, ...) |
352 |
{ |
353 |
va_list marker; |
354 |
va_start( marker, pFormat ); |
355 |
|
356 |
int iRet = 0; |
357 |
if (parser) |
358 |
parser->printMessageV(pFile, pFormat, marker); |
359 |
else |
360 |
iRet = vfprintf(pFile, pFormat, marker); |
361 |
|
362 |
va_end( marker ); |
363 |
return iRet; |
364 |
} |
365 |
|
366 |
/* to avoid having to link in another file just for the smart token ptr |
367 |
* stuff, we include it here. Ugh. |
368 |
* |
369 |
* MR23 This causes nothing but problems for IDEs. |
370 |
* Change from .cpp to .h |
371 |
* |
372 |
*/ |
373 |
|
374 |
#include ATOKPTR_IMPL_H |