Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include <vlCore/VLTTokenizer.hpp>
00033 #include <vlCore/Log.hpp>
00034 #include <vlCore/Say.hpp>
00035 #include <ctime>
00036
00037 using namespace vl;
00038
00039 bool VLTTokenizer::getToken(VLTToken& token)
00040 {
00041 token.mType = VLTToken::TOKEN_ERROR;
00042
00043
00044
00045 if (mRawtextBlock)
00046 return getRawtextBlock(token);
00047
00048 struct StringTurbo
00049 {
00050 inline StringTurbo(std::string* str): mString(str)
00051 {
00052 mPosition = 0;
00053 }
00054
00055 inline ~StringTurbo()
00056 {
00057 mBuffer[mPosition] = '\0';
00058 *mString = mBuffer;
00059 }
00060
00061 inline void push_back(const char& ch)
00062 {
00063 mBuffer[mPosition++] = ch;
00064 }
00065
00066 inline void operator=(const char& ch)
00067 {
00068 mBuffer[0] = ch;
00069 mPosition = 1;
00070 }
00071
00072 inline void operator=(const char* s)
00073 {
00074 int len = strlen(s);
00075 memcpy( mBuffer, s, len);
00076 mPosition = len;
00077 }
00078
00079 inline bool operator==(const char* str)
00080 {
00081 size_t len = strlen(str);
00082 if (len != mPosition)
00083 return false;
00084 else
00085 return memcmp(mBuffer, str, len) == 0;
00086 }
00087
00088 char mBuffer[1024*4];
00089 size_t mPosition;
00090 std::string* mString;
00091 } string_turbo(&token.mString);
00092
00093
00094 char ch1=0, ch2=0;
00095 do
00096 {
00097 if (!readTextChar(ch1))
00098 {
00099 token.mType = VLTToken::TOKEN_EOF;
00100 return true;
00101 }
00102
00103 if (ch1 == '\n')
00104 ++mLineNumber;
00105 else
00106
00107 if (ch1 == '/')
00108 {
00109 if(readTextChar(ch2))
00110 {
00111 if (ch2 == '/')
00112 {
00113
00114 for(ch1 = 0; readTextChar(ch1) && ch1 != '\n'; )
00115 {
00116
00117 }
00118 if (ch1 == '\n')
00119 ++mLineNumber;
00120 }
00121 else
00122 if (ch2 == '*')
00123 {
00124
00125 while(readTextChar(ch1))
00126 {
00127 if (ch1 == '*' && readTextChar(ch2) && ch2 == '/')
00128 {
00129 ch1 = '\n';
00130 break;
00131 }
00132
00133 if (ch1 == '\n')
00134 ++mLineNumber;
00135 }
00136 }
00137 else
00138 {
00139 Log::error( Say("Line %n : unexpected character '%c' after '/'.\n") << mLineNumber << ch2 );
00140 return false;
00141 }
00142 }
00143 else
00144 {
00145 Log::error( Say("Line %n : unexpected end of file in comment.\n") << mLineNumber);
00146 return false;
00147 }
00148 continue;
00149 }
00150
00151 } while(ch1 == ' ' || ch1 == '\t' || ch1 == '\n');
00152
00153 switch(ch1)
00154 {
00155 case '(':
00156 token.mType = VLTToken::LeftRoundBracket;
00157 string_turbo = "(";
00158 return true;
00159
00160 case ')':
00161 token.mType = VLTToken::RightRoundBracket;
00162 string_turbo = ")";
00163 return true;
00164
00165 case '[':
00166 token.mType = VLTToken::LeftSquareBracket;
00167 string_turbo = "[";
00168 return true;
00169
00170 case ']':
00171 token.mType = VLTToken::RightSquareBracket;
00172 string_turbo = "]";
00173 return true;
00174
00175 case '{':
00176 if(readTextChar(ch2) && ch2 == '<')
00177 {
00178
00179
00180 while(ch2 != '\n' && readTextChar(ch2))
00181 {
00182 switch(ch2)
00183 {
00184 case '\f':
00185 case '\b':
00186 case '\v':
00187 case '\t':
00188 case ' ':
00189 continue;
00190
00191 case '\n':
00192 ++mLineNumber;
00193 break;
00194
00195 default:
00196 string_turbo = ch2;
00197 return false;
00198 }
00199 }
00200
00201 if (ch2 == '\n')
00202 {
00203 token.mType = VLTToken::LeftFancyBracket;
00204 string_turbo = "{<";
00205 mRawtextBlock = true;
00206 return true;
00207 }
00208 else
00209 {
00210 string_turbo = ch2;
00211 return false;
00212 }
00213 }
00214 else
00215 {
00216 token.mType = VLTToken::LeftCurlyBracket;
00217 string_turbo = "{";
00218 if(!isEndOfFile())
00219 ungetToken(ch2);
00220 }
00221 return true;
00222
00223 case '}':
00224 token.mType = VLTToken::RightCurlyBracket;
00225 string_turbo = "}";
00226 return true;
00227
00228 case '>':
00229 if(readTextChar(ch2))
00230 {
00231 if(ch2 == '}')
00232 {
00233 token.mType = VLTToken::RightFancyBracket;
00234 string_turbo = ">}";
00235 return true;
00236 }
00237 else
00238 {
00239 Log::error( Say("Line %n : expected '}' instead of '%c' after '>'.\n") << mLineNumber << ch2 );
00240 return false;
00241 }
00242 }
00243 else
00244 {
00245 Log::error( Say("Line %n : unexpected end of file.\n") << mLineNumber );
00246 return false;
00247 }
00248
00249 case '=':
00250 token.mType = VLTToken::Equals;
00251 string_turbo = "=";
00252 return true;
00253
00254 case '<':
00255 string_turbo = "<";
00256 while(readTextChar(ch1) && ch1 != '>')
00257 {
00258 if ( (ch1 >= 'a' && ch1 <= 'z') || (ch1 >= 'A' && ch1 <= 'Z') || (ch1 >= '0' && ch1 <= '9') || ch1 == '_' || ch1 == ':' )
00259 string_turbo.push_back(ch1);
00260 else
00261 {
00262 Log::error( Say("Line %n : unexpected character '%c'.\n") << mLineNumber << ch1 );
00263 return false;
00264 }
00265 }
00266 string_turbo.push_back('>');
00267 if (isEndOfFile())
00268 {
00269 Log::error( Say("Line %n : unexpected end of file while reading object header.\n") << mLineNumber );
00270 return false;
00271 }
00272 token.mType = VLTToken::TagHeader;
00273 return true;
00274
00275 case '#':
00276 string_turbo = "#";
00277 while(readTextChar(ch1))
00278 {
00279 if ( (ch1 >= 'a' && ch1 <= 'z') || (ch1 >= 'A' && ch1 <= 'Z') || (ch1 >= '0' && ch1 <= '9') || ch1 == '_' )
00280 string_turbo.push_back(ch1);
00281 else
00282 {
00283 ungetToken(ch1);
00284 break;
00285 }
00286 }
00287 if (string_turbo == "#_")
00288 {
00289 Log::error( Say("Line %n : illegal id '#_' found.\n") << mLineNumber );
00290 return false;
00291 }
00292 token.mType = VLTToken::ID;
00293 return true;
00294
00295 case '"':
00296 while(readTextChar(ch1))
00297 {
00298
00299 if (ch1 == '"')
00300 break;
00301 else
00302
00303 if (ch1 == '\n')
00304 {
00305 Log::error( Say("Line %n : end of line found before end of string, did you forget a \"?.\n") << mLineNumber );
00306 return false;
00307 }
00308 else
00309
00310 if (ch1 == '\\' && readTextChar(ch2))
00311 {
00312 if (ch2 == '"')
00313 ch1 = '"';
00314 else
00315 if (ch2 == '\\')
00316 ch1 = '\\';
00317 else
00318 if (ch2 == 'b')
00319 ch1 = '\b';
00320 else
00321 if (ch2 == 'f')
00322 ch1 = '\f';
00323 else
00324 if (ch2 == 'r')
00325 ch1 = '\r';
00326 else
00327 if (ch2 == 'n')
00328 ch1 = '\n';
00329 else
00330 if (ch2 == 't')
00331 ch1 = '\t';
00332 else
00333 ungetToken(ch2);
00334 string_turbo.push_back(ch1);
00335 }
00336 else
00337
00338 string_turbo.push_back(ch1);
00339 }
00340 if (isEndOfFile())
00341 {
00342 Log::error( Say("Line %n : end of file found before end of string, did you forget a \"?.\n") << mLineNumber );
00343 return false;
00344 }
00345 else
00346 {
00347 token.mType = VLTToken::String;
00348 return true;
00349 }
00350
00351 default:
00352
00353 if ( (ch1 >= 'a' && ch1 <= 'z') || (ch1 >= 'A' && ch1 <= 'Z') || ch1 == '_' )
00354 {
00355 string_turbo.push_back(ch1);
00356 while(readTextChar(ch1))
00357 {
00358 if ( (ch1 >= 'a' && ch1 <= 'z') || (ch1 >= 'A' && ch1 <= 'Z') || (ch1 >= '0' && ch1 <= '9') || ch1 == '_' )
00359 string_turbo.push_back(ch1);
00360 else
00361 {
00362 ungetToken(ch1);
00363 break;
00364 }
00365 }
00366 if (string_turbo == "_")
00367 {
00368 Log::error( Say("Line %n : unexpected character '_'.\n") << mLineNumber );
00369 return false;
00370 }
00371 else
00372 {
00373
00374 if (string_turbo == "true" || string_turbo == "false")
00375 token.mType = VLTToken::Boolean;
00376 else
00377 token.mType = VLTToken::Identifier;
00378 return true;
00379 }
00380 }
00381 else
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398 if ( (ch1 >= '0' && ch1 <= '9') || ch1 == '.' || ch1 == '+' || ch1 == '-' )
00399 {
00400 token.mType = VLTToken::TOKEN_ERROR;
00401 string_turbo.push_back(ch1);
00402
00403 enum { sZERO, sPLUS_MINUS, sINT, sFRAC, sPOINT, sE, sPLUS_MINUS_EXP, sEXP } state = sINT;
00404
00405 if ( ch1 >= '1' && ch1 <= '9' )
00406 state = sINT;
00407 else
00408 if (ch1 == '0')
00409 state = sZERO;
00410 else
00411 if (ch1 == '.')
00412 state = sPOINT;
00413 else
00414 if (ch1 == '+' || ch1 == '-')
00415 state = sPLUS_MINUS;
00416
00417 while(readTextChar(ch1))
00418 {
00419 switch(state)
00420 {
00421
00422 case sZERO:
00423 if (ch1 == '.')
00424 {
00425 string_turbo.push_back(ch1);
00426 state = sPOINT;
00427 }
00428 else
00429 {
00430 token.mType = VLTToken::Integer;
00431 ungetToken(ch1);
00432 return true;
00433 }
00434 break;
00435
00436 case sPLUS_MINUS:
00437 if (ch1 == '0')
00438 {
00439 string_turbo.push_back(ch1);
00440 state = sZERO;
00441 }
00442 else
00443 if (ch1 >= '1' && ch1 <= '9')
00444 {
00445 string_turbo.push_back(ch1);
00446 state = sINT;
00447 }
00448 else
00449 if (ch1 == '.')
00450 {
00451 string_turbo.push_back(ch1);
00452 state = sPOINT;
00453 }
00454 else
00455 {
00456 Log::error( Say("Line %n :unexpected character '%c'.\n") << mLineNumber << ch1 );
00457 return false;
00458 }
00459 break;
00460
00461 case sINT:
00462 if (ch1 >= '0' && ch1 <= '9')
00463 string_turbo.push_back(ch1);
00464 else
00465 if (ch1 == '.')
00466 {
00467 string_turbo.push_back(ch1);
00468 state = sPOINT;
00469 }
00470 else
00471 {
00472 token.mType = VLTToken::Integer;
00473 ungetToken(ch1);
00474 return true;
00475 }
00476 break;
00477
00478 case sPOINT:
00479 if (ch1 >= '0' && ch1 <= '9')
00480 {
00481 string_turbo.push_back(ch1);
00482 state = sFRAC;
00483 }
00484 else
00485 {
00486 Log::error( Say("Line %n :unexpected character '%c'.\n") << mLineNumber << ch1 );
00487 return false;
00488 }
00489 break;
00490
00491 case sFRAC:
00492 if (ch1 >= '0' && ch1 <= '9')
00493 string_turbo.push_back(ch1);
00494 else
00495 if (ch1 == 'E' || ch1 == 'e')
00496 {
00497 string_turbo.push_back(ch1);
00498 state = sE;
00499 }
00500 else
00501 {
00502 token.mType = VLTToken::real;
00503 ungetToken(ch1);
00504 return true;
00505 }
00506 break;
00507
00508 case sE:
00509 if (ch1 == '+' || ch1 == '-')
00510 {
00511 string_turbo.push_back(ch1);
00512 state = sPLUS_MINUS_EXP;
00513 }
00514 else
00515 {
00516 Log::error( Say("Line %n :unexpected character '%c'.\n") << mLineNumber << ch1 );
00517 return false;
00518 }
00519 break;
00520
00521 case sPLUS_MINUS_EXP:
00522 if (ch1 >= '0' && ch1 <= '9')
00523 {
00524 string_turbo.push_back(ch1);
00525 state = sEXP;
00526 }
00527 else
00528 {
00529 Log::error( Say("Line %n :unexpected character '%c'.\n") << mLineNumber << ch1 );
00530 return false;
00531 }
00532 break;
00533
00534 case sEXP:
00535 if (ch1 >= '0' && ch1 <= '9')
00536 string_turbo.push_back(ch1);
00537 else
00538 {
00539 token.mType = VLTToken::real;
00540 ungetToken(ch1);
00541 return true;
00542 }
00543 break;
00544 }
00545 }
00546
00547 if (state == sINT)
00548 {
00549 token.mType = VLTToken::Integer;
00550 return true;
00551 }
00552 else
00553 if (state == sFRAC || state == sEXP)
00554 {
00555 token.mType = VLTToken::real;
00556 return true;
00557 }
00558 else
00559 return false;
00560 }
00561 else
00562 {
00563 Log::error( Say("Line %n : unexpected character '%c'.\n") << mLineNumber << ch1 );
00564 return false;
00565 }
00566 }
00567 }
00568
00569 bool VLTTokenizer::getRawtextBlock(VLTToken& token)
00570 {
00571 mRawtextBlock = false;
00572
00573 token.mType = VLTToken::TOKEN_ERROR;
00574 token.mString.resize(0);
00575
00576 char ch =0;
00577 while(readTextChar(ch))
00578 {
00579 if (ch == '\n')
00580 ++mLineNumber;
00581
00582 if (ch == '>')
00583 {
00584
00585 char ch2 = 0;
00586 if (readTextChar(ch2))
00587 {
00588 if(ch2 == '}')
00589 {
00590
00591 if (!token.mString.empty() && token.mString[ token.mString.size() - 1 ] == '\\')
00592 {
00593 token.mString.resize( token.mString.size() - 1 );
00594 token.mString += ">}";
00595 continue;
00596 }
00597 else
00598 {
00599 token.mType = VLTToken::RawtextBlock;
00600 ungetToken('}');
00601 ungetToken('>');
00602 return true;
00603 }
00604 }
00605 else
00606 ungetToken(ch2);
00607 }
00608 }
00609
00610 token.mString.push_back(ch);
00611 }
00612
00613 return false;
00614 }
00615