| 1 |
/* ANTLRTokenBuffer.cpp |
| 2 |
* |
| 3 |
* SOFTWARE RIGHTS |
| 4 |
* |
| 5 |
* We reserve no LEGAL rights to the Purdue Compiler Construction Tool |
| 6 |
* Set (PCCTS) -- PCCTS is in the public domain. An individual or |
| 7 |
* company may do whatever they wish with source code distributed with |
| 8 |
* PCCTS or the code generated by PCCTS, including the incorporation of |
| 9 |
* PCCTS, or its output, into commerical software. |
| 10 |
* |
| 11 |
* We encourage users to develop software with PCCTS. However, we do ask |
| 12 |
* that credit is given to us for developing PCCTS. By "credit", |
| 13 |
* we mean that if you incorporate our source code into one of your |
| 14 |
* programs (commercial product, research project, or otherwise) that you |
| 15 |
* acknowledge this fact somewhere in the documentation, research report, |
| 16 |
* etc... If you like PCCTS and have developed a nice tool with the |
| 17 |
* output, please mention that you developed it using PCCTS. In |
| 18 |
* addition, we ask that this header remain intact in our source code. |
| 19 |
* As long as these guidelines are kept, we expect to continue enhancing |
| 20 |
* this system and expect to make other tools available as they are |
| 21 |
* completed. |
| 22 |
* |
| 23 |
* ANTLR 1.33 |
| 24 |
* Terence Parr |
| 25 |
* Parr Research Corporation |
| 26 |
* with Purdue University and AHPCRC, University of Minnesota |
| 27 |
* 1989-2000 |
| 28 |
*/ |
| 29 |
|
| 30 |
typedef int ANTLRTokenType; // fool AToken.h into compiling |
| 31 |
|
| 32 |
class ANTLRParser; /* MR1 */ |
| 33 |
|
| 34 |
#define ANTLR_SUPPORT_CODE |
| 35 |
|
| 36 |
#include "pcctscfg.h" |
| 37 |
|
| 38 |
#include ATOKENBUFFER_H |
| 39 |
#include APARSER_H // MR23 |
| 40 |
|
| 41 |
typedef ANTLRAbstractToken *_ANTLRTokenPtr; |
| 42 |
|
| 43 |
#if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) |
| 44 |
static unsigned char test[1000]; |
| 45 |
#endif |
| 46 |
|
| 47 |
#ifdef DBG_REFCOUNTTOKEN |
| 48 |
int ANTLRRefCountToken::ctor = 0; /* MR23 */ |
| 49 |
int ANTLRRefCountToken::dtor = 0; /* MR23 */ |
| 50 |
#endif |
| 51 |
|
| 52 |
ANTLRTokenBuffer:: |
| 53 |
ANTLRTokenBuffer(ANTLRTokenStream *_input, int _k, int _chunk_size_formal) /* MR14 */ |
| 54 |
{ |
| 55 |
this->input = _input; |
| 56 |
this->k = _k; |
| 57 |
buffer_size = chunk_size = _chunk_size_formal; |
| 58 |
buffer = (_ANTLRTokenPtr *) |
| 59 |
calloc(chunk_size+1,sizeof(_ANTLRTokenPtr )); |
| 60 |
if ( buffer == NULL ) { |
| 61 |
panic("cannot alloc token buffer"); |
| 62 |
} |
| 63 |
buffer++; // leave the first elem empty so tp-1 is valid ptr |
| 64 |
|
| 65 |
tp = &buffer[0]; |
| 66 |
last = tp-1; |
| 67 |
next = &buffer[0]; |
| 68 |
num_markers = 0; |
| 69 |
end_of_buffer = &buffer[buffer_size-1]; |
| 70 |
threshold = &buffer[(int)(buffer_size/2)]; // MR23 - Used to be 1.0/2.0 ! |
| 71 |
_deleteTokens = 1; // assume we delete tokens |
| 72 |
parser=NULL; // MR5 - uninitialized reference |
| 73 |
} |
| 74 |
|
| 75 |
static void f() {;} |
| 76 |
ANTLRTokenBuffer:: |
| 77 |
~ANTLRTokenBuffer() |
| 78 |
{ |
| 79 |
f(); |
| 80 |
// Delete all remaining tokens (from 0..last inclusive) |
| 81 |
if ( _deleteTokens ) |
| 82 |
{ |
| 83 |
_ANTLRTokenPtr *z; |
| 84 |
for (z=buffer; z<=last; z++) |
| 85 |
{ |
| 86 |
(*z)->deref(); |
| 87 |
// z->deref(); |
| 88 |
#ifdef DBG_REFCOUNTTOKEN |
| 89 |
/* MR23 */ printMessage(stderr, "##########dtor: deleting token '%s' (ref %d)\n", |
| 90 |
((ANTLRCommonToken *)*z)->getText(), (*z)->nref()); |
| 91 |
#endif |
| 92 |
if ( (*z)->nref()==0 ) |
| 93 |
{ |
| 94 |
delete (*z); |
| 95 |
} |
| 96 |
} |
| 97 |
} |
| 98 |
|
| 99 |
if ( buffer!=NULL ) free((char *)(buffer-1)); |
| 100 |
} |
| 101 |
|
| 102 |
#if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) |
| 103 |
#include "pccts_stdio.h" |
| 104 |
PCCTS_NAMESPACE_STD |
| 105 |
#endif |
| 106 |
|
| 107 |
_ANTLRTokenPtr ANTLRTokenBuffer:: |
| 108 |
getToken() |
| 109 |
{ |
| 110 |
if ( tp <= last ) // is there any buffered lookahead still to be read? |
| 111 |
{ |
| 112 |
return *tp++; // read buffered lookahead |
| 113 |
} |
| 114 |
// out of buffered lookahead, get some more "real" |
| 115 |
// input from getANTLRToken() |
| 116 |
if ( num_markers==0 ) |
| 117 |
{ |
| 118 |
if( next > threshold ) |
| 119 |
{ |
| 120 |
#ifdef DBG_TBUF |
| 121 |
/* MR23 */ printMessage(stderr,"getToken: next > threshold (high water is %d)\n", threshold-buffer); |
| 122 |
#endif |
| 123 |
makeRoom(); |
| 124 |
} |
| 125 |
} |
| 126 |
else { |
| 127 |
if ( next > end_of_buffer ) |
| 128 |
{ |
| 129 |
#ifdef DBG_TBUF |
| 130 |
/* MR23 */ printMessage(stderr,"getToken: next > end_of_buffer (size is %d)\n", buffer_size); |
| 131 |
#endif |
| 132 |
extendBuffer(); |
| 133 |
} |
| 134 |
} |
| 135 |
*next = getANTLRToken(); |
| 136 |
(*next)->ref(); // say we have a copy of this pointer in buffer |
| 137 |
last = next; |
| 138 |
next++; |
| 139 |
tp = last; |
| 140 |
return *tp++; |
| 141 |
} |
| 142 |
|
| 143 |
void ANTLRTokenBuffer:: |
| 144 |
rewind(int pos) |
| 145 |
{ |
| 146 |
#if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) |
| 147 |
/* MR23 */ printMessage(stderr, "rewind(%d)[nm=%d,from=%d,%d.n=%d]\n", pos, num_markers, tp-buffer,pos,test[pos]); |
| 148 |
test[pos]--; |
| 149 |
#endif |
| 150 |
tp = &buffer[pos]; |
| 151 |
num_markers--; |
| 152 |
} |
| 153 |
|
| 154 |
/* |
| 155 |
* This function is used to specify that the token pointers read |
| 156 |
* by the ANTLRTokenBuffer should be buffered up (to be reused later). |
| 157 |
*/ |
| 158 |
int ANTLRTokenBuffer:: |
| 159 |
mark() |
| 160 |
{ |
| 161 |
#if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) |
| 162 |
test[tp-buffer]++; |
| 163 |
/* MR23 */ printMessage(stderr,"mark(%d)[nm=%d,%d.n=%d]\n",tp-buffer,num_markers+1,tp-buffer,test[tp-buffer]); |
| 164 |
#endif |
| 165 |
num_markers++; |
| 166 |
return tp - buffer; |
| 167 |
} |
| 168 |
|
| 169 |
/* |
| 170 |
* returns the token pointer n positions ahead. |
| 171 |
* This implies that bufferedToken(1) gets the NEXT symbol of lookahead. |
| 172 |
* This is used in conjunction with the ANTLRParser lookahead buffer. |
| 173 |
* |
| 174 |
* No markers are set or anything. A bunch of input is buffered--that's all. |
| 175 |
* The tp pointer is left alone as the lookahead has not been advanced |
| 176 |
* with getToken(). The next call to getToken() will find a token |
| 177 |
* in the buffer and won't have to call getANTLRToken(). |
| 178 |
* |
| 179 |
* If this is called before a consume() is done, how_many_more_i_need is |
| 180 |
* set to 'n'. |
| 181 |
*/ |
| 182 |
_ANTLRTokenPtr ANTLRTokenBuffer:: |
| 183 |
bufferedToken(int n) |
| 184 |
{ |
| 185 |
// int how_many_more_i_need = (last-tp < 0) ? n : n-(last-tp)-1; |
| 186 |
int how_many_more_i_need = (tp > last) ? n : n-(last-tp)-1; |
| 187 |
// Make sure that at least n tokens are available in the buffer |
| 188 |
#ifdef DBG_TBUF |
| 189 |
/* MR23 */ printMessage(stderr, "bufferedToken(%d)\n", n); |
| 190 |
#endif |
| 191 |
for (int i=1; i<=how_many_more_i_need; i++) |
| 192 |
{ |
| 193 |
if ( next > end_of_buffer ) // buffer overflow? |
| 194 |
{ |
| 195 |
extendBuffer(); |
| 196 |
} |
| 197 |
*next = getANTLRToken(); |
| 198 |
(*next)->ref(); // say we have a copy of this pointer in buffer |
| 199 |
last = next; |
| 200 |
next++; |
| 201 |
} |
| 202 |
return tp[n - 1]; |
| 203 |
} |
| 204 |
|
| 205 |
/* If no markers are set, the none of the input needs to be saved (except |
| 206 |
* for the lookahead Token pointers). We save only k-1 token pointers as |
| 207 |
* we are guaranteed to do a getANTLRToken() right after this because otherwise |
| 208 |
* we wouldn't have needed to extend the buffer. |
| 209 |
* |
| 210 |
* If there are markers in the buffer, we need to save things and so |
| 211 |
* extendBuffer() is called. |
| 212 |
*/ |
| 213 |
void ANTLRTokenBuffer:: |
| 214 |
makeRoom() |
| 215 |
{ |
| 216 |
#ifdef DBG_TBUF |
| 217 |
/* MR23 */ printMessage(stderr, "in makeRoom.................\n"); |
| 218 |
/* MR23 */ printMessage(stderr, "num_markers==%d\n", num_markers); |
| 219 |
#endif |
| 220 |
/* |
| 221 |
if ( num_markers == 0 ) |
| 222 |
{ |
| 223 |
*/ |
| 224 |
#ifdef DBG_TBUF |
| 225 |
/* MR23 */ printMessage(stderr, "moving lookahead and resetting next\n"); |
| 226 |
|
| 227 |
_ANTLRTokenPtr *r; |
| 228 |
/* MR23 */ printMessage(stderr, "tbuf = ["); |
| 229 |
for (r=buffer; r<=last; r++) |
| 230 |
{ |
| 231 |
if ( *r==NULL ) /* MR23 */ printMessage(stderr, " xxx"); |
| 232 |
else /* MR23 */ printMessage(stderr, " '%s'", ((ANTLRCommonToken *)*r)->getText()); |
| 233 |
} |
| 234 |
/* MR23 */ printMessage(stderr, " ]\n"); |
| 235 |
|
| 236 |
/* MR23 */ printMessage(stderr, |
| 237 |
"before: tp=%d, last=%d, next=%d, threshold=%d\n",tp-buffer,last-buffer,next-buffer,threshold-buffer); |
| 238 |
#endif |
| 239 |
|
| 240 |
// Delete all tokens from 0..last-(k-1) inclusive |
| 241 |
if ( _deleteTokens ) |
| 242 |
{ |
| 243 |
_ANTLRTokenPtr *z; |
| 244 |
for (z=buffer; z<=last-(k-1); z++) |
| 245 |
{ |
| 246 |
(*z)->deref(); |
| 247 |
// z->deref(); |
| 248 |
#ifdef DBG_REFCOUNTTOKEN |
| 249 |
/* MR23 */ printMessage(stderr, "##########makeRoom: deleting token '%s' (ref %d)\n", |
| 250 |
((ANTLRCommonToken *)*z)->getText(), (*z)->nref()); |
| 251 |
#endif |
| 252 |
if ( (*z)->nref()==0 ) |
| 253 |
{ |
| 254 |
delete (*z); |
| 255 |
} |
| 256 |
} |
| 257 |
} |
| 258 |
|
| 259 |
// reset the buffer to initial conditions, but move k-1 symbols |
| 260 |
// to the beginning of buffer and put new input symbol at k |
| 261 |
_ANTLRTokenPtr *p = buffer, *q = last-(k-1)+1; |
| 262 |
// ANTLRAbstractToken **p = buffer, **q = end_of_buffer-(k-1)+1; |
| 263 |
#ifdef DBG_TBUF |
| 264 |
/* MR23 */ printMessage(stderr, "lookahead buffer = ["); |
| 265 |
#endif |
| 266 |
for (int i=1; i<=(k-1); i++) |
| 267 |
{ |
| 268 |
*p++ = *q++; |
| 269 |
#ifdef DBG_TBUF |
| 270 |
/* MR23 */ printMessage(stderr, |
| 271 |
" '%s'", ((ANTLRCommonToken *)buffer[i-1])->getText()); |
| 272 |
#endif |
| 273 |
} |
| 274 |
#ifdef DBG_TBUF |
| 275 |
/* MR23 */ printMessage(stderr, " ]\n"); |
| 276 |
#endif |
| 277 |
next = &buffer[k-1]; |
| 278 |
tp = &buffer[k-1]; // tp points to what will be filled in next |
| 279 |
last = tp-1; |
| 280 |
#ifdef DBG_TBUF |
| 281 |
/* MR23 */ printMessage(stderr, |
| 282 |
"after: tp=%d, last=%d, next=%d\n", |
| 283 |
tp-buffer, last-buffer, next-buffer); |
| 284 |
#endif |
| 285 |
/* |
| 286 |
} |
| 287 |
else { |
| 288 |
extendBuffer(); |
| 289 |
} |
| 290 |
*/ |
| 291 |
} |
| 292 |
|
| 293 |
/* This function extends 'buffer' by chunk_size and returns with all |
| 294 |
* pointers at the same relative positions in the buffer (the buffer base |
| 295 |
* address could have changed in realloc()) except that 'next' comes |
| 296 |
* back set to where the next token should be stored. All other pointers |
| 297 |
* are untouched. |
| 298 |
*/ |
| 299 |
void |
| 300 |
ANTLRTokenBuffer:: |
| 301 |
extendBuffer() |
| 302 |
{ |
| 303 |
int save_last = last-buffer, save_tp = tp-buffer, save_next = next-buffer; |
| 304 |
#ifdef DBG_TBUF |
| 305 |
/* MR23 */ printMessage(stderr, "extending physical buffer\n"); |
| 306 |
#endif |
| 307 |
buffer_size += chunk_size; |
| 308 |
buffer = (_ANTLRTokenPtr *) |
| 309 |
realloc((char *)(buffer-1), |
| 310 |
(buffer_size+1)*sizeof(_ANTLRTokenPtr )); |
| 311 |
if ( buffer == NULL ) { |
| 312 |
panic("cannot alloc token buffer"); |
| 313 |
} |
| 314 |
buffer++; // leave the first elem empty so tp-1 is valid ptr |
| 315 |
|
| 316 |
tp = buffer + save_tp; // put the pointers back to same relative position |
| 317 |
last = buffer + save_last; |
| 318 |
next = buffer + save_next; |
| 319 |
end_of_buffer = &buffer[buffer_size-1]; |
| 320 |
threshold = &buffer[(int)(buffer_size*(1.0/2.0))]; |
| 321 |
|
| 322 |
/* |
| 323 |
// zero out new token ptrs so we'll know if something to delete in buffer |
| 324 |
ANTLRAbstractToken **p = end_of_buffer-chunk_size+1; |
| 325 |
for (; p<=end_of_buffer; p++) *p = NULL; |
| 326 |
*/ |
| 327 |
} |
| 328 |
|
| 329 |
ANTLRParser * ANTLRTokenBuffer:: // MR1 |
| 330 |
setParser(ANTLRParser *p) { // MR1 |
| 331 |
ANTLRParser *old=parser; // MR1 |
| 332 |
parser=p; // MR1 |
| 333 |
input->setParser(p); // MR1 |
| 334 |
return old; // MR1 |
| 335 |
} // MR1 |
| 336 |
// MR1 |
| 337 |
ANTLRParser * ANTLRTokenBuffer:: // MR1 |
| 338 |
getParser() { // MR1 |
| 339 |
return parser; // MR1 |
| 340 |
} // MR1 |
| 341 |
|
| 342 |
void ANTLRTokenBuffer::panic(const char *msg) // MR23 |
| 343 |
{ |
| 344 |
if (parser) //MR23 |
| 345 |
parser->panic(msg); //MR23 |
| 346 |
else //MR23 |
| 347 |
exit(PCCTS_EXIT_FAILURE); |
| 348 |
} |
| 349 |
|
| 350 |
//MR23 |
| 351 |
int ANTLRTokenBuffer::printMessage(FILE* pFile, const char* pFormat, ...) |
| 352 |
{ |
| 353 |
va_list marker; |
| 354 |
va_start( marker, pFormat ); |
| 355 |
|
| 356 |
int iRet = 0; |
| 357 |
if (parser) |
| 358 |
parser->printMessageV(pFile, pFormat, marker); |
| 359 |
else |
| 360 |
iRet = vfprintf(pFile, pFormat, marker); |
| 361 |
|
| 362 |
va_end( marker ); |
| 363 |
return iRet; |
| 364 |
} |
| 365 |
|
| 366 |
/* to avoid having to link in another file just for the smart token ptr |
| 367 |
* stuff, we include it here. Ugh. |
| 368 |
* |
| 369 |
* MR23 This causes nothing but problems for IDEs. |
| 370 |
* Change from .cpp to .h |
| 371 |
* |
| 372 |
*/ |
| 373 |
|
| 374 |
#include ATOKPTR_IMPL_H |