Visualization Library: X:/dropbox/visualizationlibrary/src/vlCore/VLTTokenizer.cpp Source File

Go to the documentation of this file.
00001 /**************************************************************************************/
00002 /*                                                                                    */
00003 /*  Visualization Library                                                             */
00004 /*  http://www.visualizationlibrary.org                                               */
00005 /*                                                                                    */
00006 /*  Copyright (c) 2005-2010, Michele Bosi                                             */
00007 /*  All rights reserved.                                                              */
00008 /*                                                                                    */
00009 /*  Redistribution and use in source and binary forms, with or without modification,  */
00010 /*  are permitted provided that the following conditions are met:                     */
00011 /*                                                                                    */
00012 /*  - Redistributions of source code must retain the above copyright notice, this     */
00013 /*  list of conditions and the following disclaimer.                                  */
00014 /*                                                                                    */
00015 /*  - Redistributions in binary form must reproduce the above copyright notice, this  */
00016 /*  list of conditions and the following disclaimer in the documentation and/or       */
00017 /*  other materials provided with the distribution.                                   */
00018 /*                                                                                    */
00019 /*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND   */
00020 /*  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED     */
00021 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE            */
00022 /*  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR  */
00023 /*  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES    */
00024 /*  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;      */
00025 /*  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON    */
00026 /*  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT           */
00027 /*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS     */
00028 /*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                      */
00029 /*                                                                                    */
00030 /**************************************************************************************/
00031 
00032 #include <vlCore/VLTTokenizer.hpp>
00033 #include <vlCore/Log.hpp>
00034 #include <vlCore/Say.hpp>
00035 #include <ctime>
00036 
00037 using namespace vl;
00038 
00039 bool VLTTokenizer::getToken(VLTToken& token)
00040 {
00041   token.mType = VLTToken::TOKEN_ERROR;
00042   /*token.mString.resize(0);*/
00043 
00044   // Must be done before StringTurbo is declared
00045   if (mRawtextBlock)
00046     return getRawtextBlock(token);
00047 
00048   struct StringTurbo
00049   {
00050     inline StringTurbo(std::string* str): mString(str)
00051     {
00052       mPosition = 0;
00053     }
00054     
00055     inline ~StringTurbo()
00056     {
00057       mBuffer[mPosition] = '\0';
00058       *mString = mBuffer;
00059     }
00060 
00061     inline void push_back(const char& ch)
00062     {
00063       mBuffer[mPosition++] = ch;
00064     }
00065 
00066     inline void operator=(const char& ch)
00067     {
00068       mBuffer[0] = ch;
00069       mPosition = 1;
00070     }
00071 
00072     inline void operator=(const char* s)
00073     {
00074       int len = strlen(s);
00075       memcpy( mBuffer, s, len);
00076       mPosition = len;
00077     }
00078 
00079     inline bool operator==(const char* str)
00080     {
00081       size_t len = strlen(str);
00082       if (len != mPosition)
00083         return false;
00084       else
00085         return memcmp(mBuffer, str, len) == 0;
00086     }
00087 
00088     char mBuffer[1024*4];
00089     size_t mPosition;
00090     std::string* mString;
00091   } string_turbo(&token.mString);
00092 
00093   // read chars skipping spaces
00094   char ch1=0, ch2=0;
00095   do 
00096   {
00097     if (!readTextChar(ch1))
00098     {
00099       token.mType = VLTToken::TOKEN_EOF;
00100       return true;
00101     }
00102 
00103     if (ch1 == '\n')
00104       ++mLineNumber;
00105     else
00106     // eat comments
00107     if (ch1 == '/')
00108     {
00109       if(readTextChar(ch2))
00110       {
00111         if (ch2 == '/') // single line comment
00112         {
00113           // eat everything till the end of the line
00114           for(ch1 = 0; readTextChar(ch1) && ch1 != '\n'; )
00115           {
00116             // eat everything
00117           }
00118           if (ch1 == '\n')
00119             ++mLineNumber;
00120         }
00121         else
00122         if (ch2 == '*') // multi line comment
00123         {
00124           // eat everything till the end of the line
00125           while(readTextChar(ch1))
00126           {
00127             if (ch1 == '*' && readTextChar(ch2) && ch2 == '/')
00128             {
00129               ch1 = '\n'; // pretend it's a space to stay in the loop
00130               break;
00131             }
00132             // eat everything
00133             if (ch1 == '\n')
00134               ++mLineNumber;
00135           }
00136         }
00137         else
00138         {
00139           Log::error( Say("Line %n : unexpected character '%c' after '/'.\n") << mLineNumber << ch2 );
00140           return false;
00141         }
00142       }
00143       else
00144       {
00145         Log::error( Say("Line %n : unexpected end of file in comment.\n") << mLineNumber);
00146         return false;
00147       }
00148       continue;
00149     }
00150 
00151   } while(ch1 == ' ' || ch1 == '\t' || ch1 == '\n');
00152 
00153   switch(ch1)
00154   {
00155   case '(':
00156     token.mType = VLTToken::LeftRoundBracket;
00157     string_turbo = "(";
00158     return true;
00159     
00160   case ')':
00161     token.mType = VLTToken::RightRoundBracket;
00162     string_turbo = ")";
00163     return true;
00164 
00165   case '[':
00166     token.mType = VLTToken::LeftSquareBracket;
00167     string_turbo = "[";
00168     return true;
00169     
00170   case ']':
00171     token.mType = VLTToken::RightSquareBracket;
00172     string_turbo = "]";
00173     return true;
00174 
00175   case '{':
00176     if(readTextChar(ch2) && ch2 == '<')
00177     {
00178       // actual data starts at the next new line
00179       // eat all the spaces until the end of the current line
00180       while(ch2 != '\n' && readTextChar(ch2)) 
00181       { 
00182         switch(ch2)
00183         {
00184         case '\f':
00185         case '\b':
00186         case '\v':
00187         case '\t':
00188         case ' ':
00189           continue;
00190 
00191         case '\n':
00192           ++mLineNumber;
00193           break;
00194 
00195         default:
00196           string_turbo = ch2;
00197           return false;
00198         }
00199       }
00200 
00201       if (ch2 == '\n')
00202       {
00203         token.mType = VLTToken::LeftFancyBracket;
00204         string_turbo = "{<";
00205         mRawtextBlock = true;
00206         return true;
00207       }
00208       else
00209       {
00210         string_turbo = ch2;
00211         return false;
00212       }
00213     }
00214     else
00215     {
00216       token.mType = VLTToken::LeftCurlyBracket;
00217       string_turbo = "{";
00218       if(!isEndOfFile())
00219         ungetToken(ch2);
00220     }
00221     return true;
00222 
00223   case '}':
00224     token.mType = VLTToken::RightCurlyBracket;
00225     string_turbo = "}";
00226     return true;
00227 
00228   case '>':
00229     if(readTextChar(ch2))
00230     {
00231       if(ch2 == '}')
00232       {
00233         token.mType = VLTToken::RightFancyBracket;
00234         string_turbo = ">}";
00235         return true;
00236       }
00237       else
00238       {
00239         Log::error( Say("Line %n : expected '}' instead of '%c' after '>'.\n") << mLineNumber << ch2 );
00240         return false;
00241       }
00242     }
00243     else
00244     {
00245         Log::error( Say("Line %n : unexpected end of file.\n") << mLineNumber );
00246         return false;
00247     }
00248 
00249   case '=':
00250     token.mType = VLTToken::Equals; 
00251     string_turbo = "=";
00252     return true;
00253 
00254   case '<':
00255     string_turbo = "<";
00256     while(readTextChar(ch1) && ch1 != '>')
00257     {
00258       if ( (ch1 >= 'a' && ch1 <= 'z') || (ch1 >= 'A' && ch1 <= 'Z') || (ch1 >= '0' && ch1 <= '9') || ch1 == '_' || ch1 == ':' )
00259         string_turbo.push_back(ch1);
00260       else
00261       {
00262         Log::error( Say("Line %n : unexpected character '%c'.\n") << mLineNumber << ch1 );
00263         return false;
00264       }
00265     }
00266     string_turbo.push_back('>');
00267     if (isEndOfFile())
00268     {
00269       Log::error( Say("Line %n : unexpected end of file while reading object header.\n") << mLineNumber );
00270       return false;
00271     }
00272     token.mType = VLTToken::TagHeader;
00273     return true;
00274 
00275   case '#':
00276     string_turbo = "#";
00277     while(readTextChar(ch1))
00278     {
00279       if ( (ch1 >= 'a' && ch1 <= 'z') || (ch1 >= 'A' && ch1 <= 'Z') || (ch1 >= '0' && ch1 <= '9') || ch1 == '_' )
00280         string_turbo.push_back(ch1);
00281       else
00282       {
00283         ungetToken(ch1);
00284         break;
00285       }
00286     }
00287     if (string_turbo == "#_")
00288     {
00289       Log::error( Say("Line %n : illegal id '#_' found.\n") << mLineNumber );
00290       return false;
00291     }
00292     token.mType = VLTToken::ID;
00293     return true;
00294 
00295   case '"':
00296     while(readTextChar(ch1))
00297     {
00298       // end string
00299       if (ch1 == '"')
00300         break;
00301       else
00302       // return found before end of string
00303       if (ch1 == '\n')
00304       {
00305         Log::error( Say("Line %n : end of line found before end of string, did you forget a \"?.\n") << mLineNumber );
00306         return false;
00307       }
00308       else
00309       // escape sequences
00310       if (ch1 == '\\' && readTextChar(ch2))
00311       {
00312         if (ch2 == '"')
00313           ch1 = '"';
00314         else
00315         if (ch2 == '\\')
00316           ch1 = '\\';
00317         else
00318         if (ch2 == 'b')
00319           ch1 = '\b';
00320         else
00321         if (ch2 == 'f')
00322           ch1 = '\f';
00323         else
00324         if (ch2 == 'r')
00325           ch1 = '\r';
00326         else
00327         if (ch2 == 'n')
00328           ch1 = '\n';
00329         else
00330         if (ch2 == 't')
00331           ch1 = '\t';
00332         else
00333           ungetToken(ch2);
00334         string_turbo.push_back(ch1);
00335       }
00336       else
00337       // accept everyhing else
00338         string_turbo.push_back(ch1);
00339     }
00340     if (isEndOfFile())
00341     {
00342       Log::error( Say("Line %n : end of file found before end of string, did you forget a \"?.\n") << mLineNumber );
00343       return false;
00344     }
00345     else
00346     {
00347       token.mType = VLTToken::String;
00348       return true;
00349     }
00350 
00351   default:
00352     // identifier
00353     if ( (ch1 >= 'a' && ch1 <= 'z') || (ch1 >= 'A' && ch1 <= 'Z') || ch1 == '_' )
00354     {
00355       string_turbo.push_back(ch1);
00356       while(readTextChar(ch1))
00357       {
00358         if ( (ch1 >= 'a' && ch1 <= 'z') || (ch1 >= 'A' && ch1 <= 'Z') || (ch1 >= '0' && ch1 <= '9') || ch1 == '_' )
00359           string_turbo.push_back(ch1);
00360         else
00361         {
00362           ungetToken(ch1);
00363           break;
00364         }
00365       }
00366       if (string_turbo == "_")
00367       {
00368         Log::error( Say("Line %n : unexpected character '_'.\n") << mLineNumber );
00369         return false;
00370       }
00371       else
00372       {
00373         // check if it's a boolean
00374         if (string_turbo == "true" || string_turbo == "false")
00375           token.mType = VLTToken::Boolean;
00376         else
00377           token.mType = VLTToken::Identifier;
00378         return true;
00379       }
00380     }
00381     else
00382     // Integer / real
00383     //
00384     // ACCEPTED:
00385     // 123
00386     // +123.123E+10 -123.123e-10
00387     // +123
00388     // +.123
00389     // 0.123
00390     // 123.123
00391     //
00392     // REJECTED:
00393     // 01234
00394     // 01.234
00395     // 123.
00396     // 123.123E
00397     // 123.123e+
00398     if ( (ch1 >= '0' && ch1 <= '9') || ch1 == '.' || ch1 == '+' || ch1 == '-' )
00399     {
00400       token.mType = VLTToken::TOKEN_ERROR;
00401       string_turbo.push_back(ch1);
00402 
00403       enum { sZERO, sPLUS_MINUS, sINT, sFRAC, sPOINT, sE, sPLUS_MINUS_EXP, sEXP } state = sINT;
00404 
00405       if ( ch1 >= '1' && ch1 <= '9' )
00406         state = sINT;
00407       else
00408       if (ch1 == '0')
00409         state = sZERO;
00410       else
00411       if (ch1 == '.')
00412         state = sPOINT;
00413       else
00414       if (ch1 == '+' || ch1 == '-')
00415         state = sPLUS_MINUS;
00416 
00417       while(readTextChar(ch1))
00418       {
00419         switch(state)
00420         {
00421         // if starting with 0 must be 0.0-9
00422         case sZERO:
00423           if (ch1 == '.')
00424           {
00425             string_turbo.push_back(ch1);
00426             state = sPOINT;
00427           }
00428           else
00429           {
00430             token.mType = VLTToken::Integer;
00431             ungetToken(ch1);
00432             return true;
00433           }
00434           break;
00435 
00436         case sPLUS_MINUS:
00437           if (ch1 == '0')
00438           {
00439             string_turbo.push_back(ch1);
00440             state = sZERO;
00441           }
00442           else
00443           if (ch1 >= '1' && ch1 <= '9')
00444           {
00445             string_turbo.push_back(ch1);
00446             state = sINT;
00447           }
00448           else
00449           if (ch1 == '.')
00450           {
00451             string_turbo.push_back(ch1);
00452             state = sPOINT;
00453           }
00454           else
00455           {
00456             Log::error( Say("Line %n :unexpected character '%c'.\n") << mLineNumber << ch1 );
00457             return false;
00458           }
00459           break;
00460 
00461         case sINT:
00462           if (ch1 >= '0' && ch1 <= '9')
00463             string_turbo.push_back(ch1);
00464           else
00465           if (ch1 == '.')
00466           {
00467             string_turbo.push_back(ch1);
00468             state = sPOINT;
00469           }
00470           else
00471           {
00472             token.mType = VLTToken::Integer;
00473             ungetToken(ch1);
00474             return true;
00475           }
00476           break;
00477 
00478         case sPOINT:
00479           if (ch1 >= '0' && ch1 <= '9')
00480           {
00481             string_turbo.push_back(ch1);
00482             state = sFRAC;
00483           }
00484           else
00485           {
00486             Log::error( Say("Line %n :unexpected character '%c'.\n") << mLineNumber << ch1 );
00487             return false;
00488           }
00489           break;
00490 
00491         case sFRAC:
00492           if (ch1 >= '0' && ch1 <= '9')
00493             string_turbo.push_back(ch1);
00494           else
00495           if (ch1 == 'E' || ch1 == 'e')
00496           {
00497             string_turbo.push_back(ch1);
00498             state = sE;
00499           }
00500           else
00501           {
00502             token.mType = VLTToken::real;
00503             ungetToken(ch1);
00504             return true;
00505           }
00506           break;
00507 
00508         case sE:
00509           if (ch1 == '+' || ch1 == '-')
00510           {
00511             string_turbo.push_back(ch1);
00512             state = sPLUS_MINUS_EXP;
00513           }
00514           else
00515           {
00516             Log::error( Say("Line %n :unexpected character '%c'.\n") << mLineNumber << ch1 );
00517             return false;
00518           }
00519           break;
00520 
00521         case sPLUS_MINUS_EXP:
00522           if (ch1 >= '0' && ch1 <= '9')
00523           {
00524             string_turbo.push_back(ch1);
00525             state = sEXP;
00526           }
00527           else
00528           {
00529             Log::error( Say("Line %n :unexpected character '%c'.\n") << mLineNumber << ch1 );
00530             return false;
00531           }
00532           break;
00533 
00534         case sEXP:
00535           if (ch1 >= '0' && ch1 <= '9')
00536             string_turbo.push_back(ch1);
00537           else
00538           {
00539             token.mType = VLTToken::real;
00540             ungetToken(ch1);
00541             return true;
00542           }
00543           break;
00544         }
00545       }
00546       // reached TOKEN_EOF in the middle of the parsing so we check where we were, note that it cannot be a Integer or a real
00547       if (state == sINT)
00548       {
00549         token.mType = VLTToken::Integer;
00550         return true;
00551       }
00552       else
00553       if (state == sFRAC || state == sEXP)
00554       {
00555         token.mType = VLTToken::real;
00556         return true;
00557       }
00558       else
00559         return false;
00560     }
00561     else
00562     {
00563       Log::error( Say("Line %n : unexpected character '%c'.\n") << mLineNumber << ch1 );
00564       return false;
00565     }
00566   }
00567 }
00568 //-----------------------------------------------------------------------------
00569 bool VLTTokenizer::getRawtextBlock(VLTToken& token)
00570 {
00571   mRawtextBlock = false;
00572 
00573   token.mType = VLTToken::TOKEN_ERROR;
00574   token.mString.resize(0);
00575 
00576   char ch =0;
00577   while(readTextChar(ch))
00578   {
00579     if (ch == '\n')
00580       ++mLineNumber;
00581 
00582     if (ch == '>')
00583     {
00584       // check for rawtext block end >}
00585       char ch2 = 0;
00586       if (readTextChar(ch2))
00587       {
00588         if(ch2 == '}')
00589         {
00590           // check if it was escaped
00591           if (!token.mString.empty() && token.mString[ token.mString.size() - 1 ] == '\\')
00592           {
00593             token.mString.resize( token.mString.size() - 1 );
00594             token.mString += ">}";
00595             continue;
00596           }
00597           else
00598           {
00599             token.mType = VLTToken::RawtextBlock;
00600             ungetToken('}');
00601             ungetToken('>');
00602             return true;
00603           }
00604         }
00605         else
00606           ungetToken(ch2);
00607       }
00608     }
00609     
00610     token.mString.push_back(ch);
00611   }
00612 
00613   return false;
00614 }
00615 //-----------------------------------------------------------------------------