00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include <vlCore/String.hpp>
00033 #include <vlCore/String_Tables.hpp>
00034 #include <vlCore/Log.hpp>
00035 #include <vlCore/Say.hpp>
00036 #include <vlCore/FileSystem.hpp>
00037 #include <vlCore/VirtualFile.hpp>
00038 #include <vlCore/VisualizationLibrary.hpp>
00039 #include <stdio.h>
00040 #include <stdarg.h>
00041
00042 using namespace vl;
00043
00044
00045
00046
00047 String::String()
00048 {
00049 #if VL_STRING_COPY_ON_WRITE == 0
00050 acquireData();
00051 #endif
00052 }
00053
00054 String::String(const String& other)
00055 {
00056 operator=(other);
00057 }
00058
00059 String::String(const wchar_t* wstr)
00060 {
00061 #if VL_STRING_COPY_ON_WRITE == 0
00062 acquireData();
00063 #endif
00064 if (wstr)
00065 *this = wstr;
00066 }
00067
00068 String::String(const char* str)
00069 {
00070 #if VL_STRING_COPY_ON_WRITE == 0
00071 acquireData();
00072 #endif
00073 if (str)
00074 *this = str;
00075 }
00076
00077 String::String(wchar_t ch, int count)
00078 {
00079 #if VL_STRING_COPY_ON_WRITE == 0
00080 acquireData();
00081 #endif
00082 for(int i=0; i<count; ++i)
00083 *this += ch;
00084 }
00085
00086 String String::loadText(const String& path, EStringEncoding default_encoding)
00087 {
00088 ref<VirtualFile> file = defFileSystem()->locateFile(path);
00089 if (file)
00090 return loadText( file.get(), default_encoding );
00091 else
00092 {
00093 Log::error( Say("Could not locate '%s'.\n") << path );
00094 return String();
00095 }
00096 }
00097
00098 String String::loadText(VirtualFile* file, EStringEncoding default_encoding)
00099 {
00100 std::vector<char> buffer;
00101 file->load( buffer );
00102 file->close();
00103
00104 if ( buffer.size() )
00105 {
00106 return loadText( &buffer[0], (int)buffer.size(), default_encoding );
00107 }
00108 else
00109 {
00110 return String();
00111 }
00112 }
00113
00114 String String::loadText(void* data, int bytes, EStringEncoding default_encoding )
00115 {
00116 EStringEncoding enc = detectEncoding( data, bytes, default_encoding );
00117 String text;
00118 switch(enc)
00119 {
00120 case SE_ASCII:
00121 return fromAscii((char*)data, bytes);
00122 break;
00123 case SE_LATIN1:
00124 return fromLatin1((char*)data, bytes);
00125 break;
00126 case SE_UTF8:
00127 return fromUTF8((char*)data, bytes);
00128 break;
00129 case SE_UTF16_BE:
00130 return fromUTF16BE((unsigned short*)data, bytes );
00131 break;
00132 case SE_UTF16_LE:
00133 return fromUTF16LE((unsigned short*)data, bytes );
00134 break;
00135 case SE_Unknown:
00136 Log::error("String::loadText() unknown encoding.\n");
00137 break;
00138 case SE_UTF32_BE:
00139 case SE_UTF32_LE:
00140 Log::error("String::loadText() SE_UTF32_BE/SE_UTF32_LE encoding not supported.\n");
00141 break;
00142 }
00143 return String();
00144 }
00145
00146 String& String::resize(int character_count)
00147 {
00148 acquireData();
00149 mString->resize(character_count);
00150 return *this;
00151 }
00152
00153 String String::substring(int start, int count) const
00154 {
00155
00156 if ( empty() )
00157 return String();
00158
00159 if (start<0)
00160 start = 0;
00161 if (count<0)
00162 count = length();
00163 int end_idx = start+count-1;
00164 if (end_idx > length()-1 )
00165 end_idx = length()-1;
00166
00167 String str;
00168 str.acquireData();
00169 int sz = end_idx - start + 1;
00170 sz = sz < 0 ? 0 : sz;
00171 str.mString->resize( sz );
00172 for(int i=0; i<(int)str.mString->length(); ++i)
00173 (*str.mString)[i] = (*mString)[start+i];
00174 return str;
00175 }
00176
00177 int String::findBackwards(wchar_t ch) const
00178 {
00179
00180 if (empty())
00181 return -1;
00182
00183 for(int i=length(); i--; )
00184 if ((*mString)[i] == ch)
00185 return i;
00186 return -1;
00187 }
00188
00189 int String::findBackwards(const String& str) const
00190 {
00191
00192 if (empty())
00193 return -1;
00194
00195 if (str.length() < length())
00196 {
00197 for(int i = length() - str.length()+1; i--; )
00198 {
00199 int j=0;
00200 for(; j<str.length(); ++j)
00201 {
00202 if ( str[j] != (*mString)[i+j] )
00203 break;
00204 }
00205 if ( j == str.length() )
00206 return i;
00207 }
00208 }
00209 return -1;
00210 }
00211
00212 bool String::contains(wchar_t ch) const
00213 {
00214 return find(ch) != -1;
00215 }
00216
00217 int String::find(wchar_t ch, int start) const
00218 {
00219
00220 if (empty())
00221 return -1;
00222
00223 for(int i=start; i<length(); ++i)
00224 if ((*mString)[i] == ch)
00225 return i;
00226 return -1;
00227 }
00228
00229 bool String::contains(const String& substr) const
00230 {
00231 return find(substr) != -1;
00232 }
00233
00234 namespace
00235 {
00236
00237 int String_Quick_Search(const wchar_t*x, int m, const wchar_t*y, int n)
00238 {
00239 int qsBc[0x10000];
00240 for (int i = 0; i < 0x10000; ++i)
00241 qsBc[i] = m + 1;
00242 for (int i = 0; i < m; ++i)
00243 qsBc[x[i]] = m - i;
00244 for(int j=0; j <= n - m; j += qsBc[y[j + m]] )
00245 if (memcmp(x, y + j, m*sizeof(wchar_t)) == 0)
00246 return j;
00247 return -1;
00248 }
00249 }
00250
00251 int String::findInLargeText(const String& substr, int start) const
00252 {
00253
00254 if (empty())
00255 return -1;
00256
00257 if ( substr.length() > length() || start >= length() || substr.empty() || empty() )
00258 return -1;
00259 {
00260 int pos = String_Quick_Search( &(*substr.mString)[0], substr.length(), &(*mString)[0]+start, length()-start );
00261 return pos >= 0 ? pos + start : pos;
00262 }
00263 }
00264
00265 int String::find(const String& substr, int start) const
00266 {
00267
00268 if (empty())
00269 return -1;
00270
00271 if ( substr.length() > length() || start >= length() || substr.empty() || empty() )
00272 return -1;
00273 {
00274 int max = length() - substr.length();
00275 for(int i=start; i<=max; ++i)
00276 {
00277 int j=0;
00278 for(; j<substr.length(); ++j)
00279 {
00280 if (substr[j] != (*mString)[i+j])
00281 break;
00282 }
00283 if (j == substr.length())
00284 return i;
00285 }
00286 return -1;
00287 }
00288 }
00289
00290 void String::squeeze()
00291 {
00292 if(empty())
00293 return;
00294
00295 mString->squeeze();
00296 }
00297
00298 String& String::fill(wchar_t ch)
00299 {
00300 acquireData();
00301
00302 for(int i=0; i<length(); ++i)
00303 (*mString)[i] = ch;
00304 return *this;
00305 }
00306
00307 String& String::trim(const String& chars)
00308 {
00309 acquireData();
00310
00311 while( chars.length() )
00312 {
00313 int len = length();
00314 for( int i=0; i<chars.length(); ++i)
00315 trim(chars[i]);
00316 if ( len == length())
00317 break;
00318 }
00319 return *this;
00320 }
00321
00322 String& String::trim(wchar_t ch)
00323 {
00324 acquireData();
00325
00326 if (length())
00327 {
00328 int pos = 0;
00329 while( (*mString)[pos] == ch )
00330 pos++;
00331 if (pos)
00332 *this = substring(pos);
00333 pos = length()-1;
00334 while( pos >=0 && (*mString)[pos] == ch )
00335 pos--;
00336 pos++;
00337 if (pos != length())
00338 *this = substring(0,pos);
00339 }
00340 return *this;
00341 }
00342
00343 String& String::trim()
00344 {
00345 acquireData();
00346
00347 trim("\n\r\t\v ");
00348
00349 return *this;
00350 }
00351
00352 void String::split(const String& separator_list, std::vector<String>& fields, bool remove_empty) const
00353 {
00354 fields.clear();
00355
00356
00357 if (empty())
00358 return;
00359
00360 if ( length() )
00361 {
00362 fields.push_back( String() );
00363 fields.back().acquireData();
00364 fields.back().mString->clear();
00365 for(int i=0; i<length(); ++i)
00366 {
00367 if ( separator_list.contains((*mString)[i]) )
00368 {
00369 fields.push_back( String() );
00370 fields.back().acquireData();
00371 fields.back().mString->clear();
00372 continue;
00373 }
00374 fields.back().mString->push_back( (*mString)[i] );
00375 }
00376 }
00377
00378 if (remove_empty)
00379 {
00380 for ( size_t i=fields.size(); i--; )
00381 if (fields[i].empty())
00382 fields.erase(fields.begin() + i);
00383 }
00384 }
00385
00386 void String::split(wchar_t separator, std::vector<String>& fields, bool remove_empty) const
00387 {
00388 fields.clear();
00389
00390
00391 if (empty())
00392 return;
00393
00394 if ( length() )
00395 {
00396 fields.push_back( String() );
00397 fields.back().acquireData();
00398 fields.back().mString->clear();
00399 for(int i=0; i<length(); ++i)
00400 {
00401 if ((*mString)[i] == separator)
00402 {
00403 fields.push_back( String() );
00404 fields.back().acquireData();
00405 fields.back().mString->clear();
00406 continue;
00407 }
00408 fields.back().mString->push_back( (*mString)[i] );
00409 }
00410 }
00411
00412 if (remove_empty)
00413 {
00414 for ( size_t i=fields.size(); i--; )
00415 if (fields[i].empty())
00416 fields.erase(fields.begin() + i);
00417 }
00418 }
00419
00420 void String::splitLines(std::vector<String>& lines) const
00421 {
00422 lines.clear();
00423
00424
00425 if (empty())
00426 return;
00427
00428 if ( length() )
00429 {
00430 lines.push_back( String() );
00431 lines.back().acquireData();
00432 lines.back().mString->clear();
00433 for(int i=0; i<length(); ++i)
00434 {
00435 if ((*mString)[i] == '\n' || (*mString)[i] == '\r')
00436 {
00437 lines.push_back( String() );
00438 lines.back().acquireData();
00439 lines.back().mString->clear();
00440
00441 if ((*mString)[i] == '\n' && (*mString)[i+1] == '\r')
00442 ++i;
00443 else
00444 if ((*mString)[i] == '\r' && (*mString)[i+1] == '\n')
00445 ++i;
00446 continue;
00447 }
00448 lines.back().mString->push_back( (*mString)[i] );
00449 }
00450 }
00451 }
00452
00453 String String::field(wchar_t separator, int field_index) const
00454 {
00455 String field;
00456 int field_count = 0;
00457 int i=0;
00458 for(; i<length() && field_count<field_index; ++i)
00459 {
00460 if ( (*this)[i] == separator )
00461 ++field_count;
00462 }
00463
00464 for(; i<length() && (*this)[i] != separator; ++i)
00465 field+=(*this)[i];
00466 return field;
00467 }
00468
00469 String& String::remove(const String& str, int start, int count)
00470 {
00471 acquireData();
00472
00473 if (count == 0)
00474 return *this;
00475 if (count<0)
00476 count = length();
00477 int removed = 0;
00478 for( int pos = find(str, start); pos != -1 && removed<count; start=pos, pos=find(str, start), ++removed)
00479 remove( pos, str.length() );
00480 return *this;
00481 }
00482
00483 String& String::remove( int start, int count )
00484 {
00485 if (count == 0)
00486 return *this;
00487
00488 acquireData();
00489
00490 String tmp;
00491 tmp.acquireData();
00492 tmp.mString->clear();
00493 int end = start + count-1;
00494 for( int i=0; i<length(); i++ )
00495 if (i<start || i>end)
00496 tmp.mString->push_back((*mString)[i]);
00497
00498 mString = tmp.mString;
00499 return *this;
00500 }
00501
00502 String& String::remove(wchar_t ch, int start, int count)
00503 {
00504 acquireData();
00505
00506 if (count<0)
00507 count = length();
00508 String tmp = *this;
00509 tmp.acquireData();
00510 mString->clear();
00511 int removed = 0;
00512 for(int i=0; i<tmp.length(); ++i)
00513 if ( tmp[i]!=ch || removed==count || i<start)
00514 mString->push_back( tmp[i] );
00515 else
00516 ++removed;
00517
00518 return *this;
00519 }
00520
00521 String& String::reverse()
00522 {
00523 acquireData();
00524 int count = length() / 2;
00525 for(int i=0; i<count; ++i)
00526 {
00527 wchar_t tmp = (*this)[i];
00528 (*this)[i] = (*this)[length() - 1 - i];
00529 (*this)[length() - 1 - i] = tmp;
00530 }
00531 return *this;
00532 }
00533
00534 String& String::normalizeSlashes()
00535 {
00536
00537 replace('\\', '/');
00538
00539 int len=0;
00540 do
00541 {
00542 len=length();
00543 replace("//", "/");
00544 }
00545 while(len!=length());
00546
00547 bool beg_slash = startsWith('/');
00548
00549 bool end_slash = endsWith('/');
00550
00551
00552 std::vector<String> parts;
00553 split('/', parts, true);
00554 std::vector<String> new_parts;
00555 for(size_t i=0; i<parts.size(); ++i)
00556 {
00557 if (parts[i] == ".")
00558 continue;
00559 else
00560 if (parts[i] == ".." && !new_parts.empty())
00561 {
00562 new_parts.pop_back();
00563 continue;
00564 }
00565 else
00566 new_parts.push_back(parts[i]);
00567 }
00568
00569
00570
00571 clear();
00572 if (beg_slash)
00573 *this += '/';
00574
00575 for(size_t i=0; i<new_parts.size(); ++i)
00576 {
00577 *this += new_parts[i];
00578 if(i != new_parts.size()-1)
00579 *this += '/';
00580 }
00581
00582 if (end_slash)
00583 *this += '/';
00584
00585 return *this;
00586 }
00587
00588 String& String::append(wchar_t ch, int count)
00589 {
00590 acquireData();
00591
00592 for(int i=0; i<count; ++i)
00593 mString->push_back(ch);
00594 return *this;
00595 }
00596
00597 String& String::append(const String& other)
00598 {
00599 acquireData();
00600
00601 for(int i=0; i<other.length(); ++i)
00602 mString->push_back(other[i]);
00603 return *this;
00604 }
00605
00606 String& String::prepend(const String& str)
00607 {
00608 return insert(0, str);
00609 }
00610
00611 String& String::prepend(wchar_t ch, int count)
00612 {
00613 return insert(0, ch, count);
00614 }
00615
00616 String& String::replace( int start, int count, const String& str )
00617 {
00618 remove(start, count);
00619 insert(start, str);
00620 return *this;
00621 }
00622
00623 String& String::replace( const String& oldstr, const String& newstr, bool case_sensitive )
00624 {
00625 acquireData();
00626 String supstr = case_sensitive ? *this : toLowerCase();
00627 String substr = case_sensitive ? oldstr : oldstr.toLowerCase();
00628
00629 std::vector<int> positions;
00630 for( int pos = 0; (pos=supstr.find(substr,pos)) != -1; pos += substr.length() )
00631 positions.push_back(pos);
00632
00633
00634 for(unsigned i=positions.size(); i--; )
00635 replace(positions[i], oldstr.length(), newstr);
00636
00637 return *this;
00638 }
00639
00640 String& String::replace( int start, int count, wchar_t ch )
00641 {
00642 acquireData();
00643
00644 if (start < 0 )
00645 start = 0;
00646 if (count < 0)
00647 count = length();
00648 int end = start + count;
00649 if (end > length())
00650 end = length();
00651 for(int i=start; i<end; ++i)
00652 (*mString)[i] = ch;
00653 return *this;
00654 }
00655
00656 String& String::replace( wchar_t old_ch, wchar_t new_ch )
00657 {
00658 acquireData();
00659
00660 for(int i=0; i<length(); ++i)
00661 if ((*mString)[i] == old_ch)
00662 (*mString)[i] = new_ch;
00663 return *this;
00664 }
00665
00666 int String::count(wchar_t ch, int start) const
00667 {
00668
00669 if (empty())
00670 return 0;
00671
00672 int num = 0;
00673 for(int i=start; i<length(); ++i)
00674 if ((*mString)[i] == ch)
00675 ++num;
00676 return num;
00677 }
00678
00679 int String::count(const String& str, int start) const
00680 {
00681
00682 if (empty())
00683 return 0;
00684
00685 int found = 0;
00686 for( int pos = find(str, start); pos != -1; start=pos+str.length(), pos=find(str, start))
00687 ++found;
00688 return found;
00689 }
00690
00691 int String::compare(const String& other) const
00692 {
00693 createData();
00694
00695 int min = length() < other.length() ? length() : other.length();
00696 for(int i=0; i<min; ++i)
00697 {
00698 if ( (*mString)[i] != (*other.mString)[i] )
00699 return (int)(*mString)[i] - (int)(*other.mString)[i];
00700 }
00701
00702 return length() - other.length();
00703 }
00704
00705 bool String::endsWith(const String& str) const
00706 {
00707
00708 if (empty())
00709 return false;
00710
00711 if (length() < str.length() || empty() || str.empty() )
00712 return false;
00713 else
00714 {
00715 int offset = length() - str.length();
00716 return memcmp( &(*mString)[0] + offset, &(*str.mString)[0], sizeof((*mString)[0])*str.length() ) == 0;
00717 }
00718 }
00719
00720 bool String::startsWith(const String& str) const
00721 {
00722
00723 if (str.empty())
00724 return true;
00725
00726 if (empty())
00727 return false;
00728
00729 if (length() < str.length() || empty() || str.empty() )
00730 return false;
00731 else
00732 {
00733 return memcmp( &(*mString)[0], &(*str.mString)[0], sizeof((*mString)[0])*str.length() ) == 0;
00734 }
00735 }
00736
00737 bool String::endsWith(wchar_t ch) const
00738 {
00739
00740 if (empty())
00741 return false;
00742
00743 return length() > 0 && (*mString)[length()-1] == ch;
00744 }
00745
00746 bool String::startsWith(wchar_t ch) const
00747 {
00748
00749 if (empty())
00750 return false;
00751
00752 return length() > 0 && (*mString)[0] == ch;
00753 }
00754
00755 String String::toLowerCase() const
00756 {
00757
00758 if (empty())
00759 return String();
00760
00761 String lower = *this;
00762 lower.acquireData();
00763 for(int i=0; i<length(); ++i)
00764 (*lower.mString)[i] = getLowerCase( (*lower.mString)[i] );
00765 return lower;
00766 }
00767
00768 String String::toUpperCase() const
00769 {
00770
00771 if (empty())
00772 return String();
00773
00774 String lower = *this;
00775 lower.acquireData();
00776 for(int i=0; i<length(); ++i)
00777 (*lower.mString)[i] = getUpperCase( (*lower.mString)[i] );
00778 return lower;
00779 }
00780
00781 String& String::insert(int pos, const String& str)
00782 {
00783 if (str.empty())
00784 return *this;
00785
00786 acquireData();
00787
00788 if (pos > length())
00789 return append(str);
00790 int remaining = length() - pos;
00791 mString->resize( mString->length() + str.length() );
00792 memmove( &(*mString)[0]+pos+str.length(), &(*mString)[0]+pos, sizeof(str[0])*remaining );
00793 memcpy( &(*mString)[0]+pos, &(*str.mString)[0], sizeof(str[0])*str.length() );
00794 return *this;
00795 }
00796
00797 String& String::insert(int pos, wchar_t ch, int count)
00798 {
00799 if (count == 0)
00800 return *this;
00801
00802 acquireData();
00803
00804 if (pos >= length())
00805 return append(ch, count);
00806 int remaining = length() - pos;
00807 mString->resize( mString->length() + count );
00808 memmove( &(*mString)[0]+pos+count, &(*mString)[0]+pos, sizeof((*mString)[0])*remaining );
00809 for(int i=0; i<count && i+pos<length(); ++i)
00810 (*mString)[i+pos] = ch;
00811 return *this;
00812 }
00813
00814 String String::left(int count) const
00815 {
00816 if (count<0)
00817 return substring(0, length()+count);
00818 else
00819 return substring(0, count);
00820 }
00821
00822 String String::right(int count) const
00823 {
00824 if (count<0)
00825 return substring(-count, length()+count);
00826 else
00827 return substring(length()-count, count);
00828 }
00829
00830 String String::extractPath() const
00831 {
00832
00833 if (empty())
00834 return String();
00835
00836 String path = *this;
00837 path.normalizeSlashes();
00838 int slash_pos = path.findBackwards('/');
00839 if (slash_pos<0)
00840 return String();
00841 else
00842 return path.substring(0,slash_pos+1);
00843 }
00844
00845 String String::extractFileName() const
00846 {
00847
00848 if (empty())
00849 return String();
00850
00851 int a = findBackwards('/');
00852 int b = findBackwards('\\');
00853 int slash_pos = a > b ? a : b;
00854 return substring(slash_pos+1);
00855 }
00856
00857 String String::extractFileExtension(bool require_dot) const
00858 {
00859
00860 if (empty())
00861 return String();
00862
00863 int dot_pos = findBackwards('.');
00864 if (require_dot && dot_pos == -1)
00865 return String();
00866 else
00867 return substring(dot_pos+1);
00868 }
00869
00870 String String::fromStdWString(const std::wstring& str)
00871 {
00872 String s;
00873 s.acquireData();
00874
00875 s.mString->clear();
00876 for(int i=0; i<(int)str.length(); ++i)
00877 s.mString->push_back( str[i] );
00878 return s;
00879 }
00880
00881 String String::fromStdString(const std::string& str, bool utf8)
00882 {
00883 if (utf8)
00884 return fromUTF8( str.c_str(), str.length());
00885 else
00886 return fromAscii( str.c_str() );
00887 }
00888
00889 String String::fromAscii(const char* str, int size)
00890 {
00891 String s;
00892 s.acquireData();
00893
00894 if (size<0)
00895 size = (int)strlen(str);
00896 const unsigned char* ascii = (const unsigned char*)str;
00897 s.mString->clear();
00898 for(int i=0; i<size; ++i)
00899 {
00900 if( ascii[i] < 128 )
00901 s.mString->push_back( ascii[i] );
00902 else
00903
00904 s.mString->push_back( L'?' );
00905 }
00906
00907 return s;
00908 }
00909
00910 String String::fromUTF16BE(const unsigned short* str, int byte_count)
00911 {
00912 String s;
00913 s.acquireData();
00914
00915 VL_COMPILE_TIME_CHECK( sizeof(unsigned short) == 2 )
00916 int character_count = byte_count < 0 ? -1 : byte_count / 2;
00917
00918
00919 if (character_count<0)
00920 for(character_count=0; str[character_count]; ) ++character_count;
00921
00922
00923 if (str[0] == 65534)
00924 {
00925 str++;
00926 --character_count;
00927 }
00928
00929 s.mString->clear();
00930 for(int i=0; i<character_count; ++i)
00931 {
00932 const unsigned char* bytes = (const unsigned char*)(str+i);
00933 unsigned int code = bytes[1] + (bytes[0]<<8);
00934
00935 if (code>=0xD800 && code <=0xDC00)
00936 {
00937 s.mString->push_back( '?' );
00938 ++i;
00939 }
00940 else
00941 s.mString->push_back( (wchar_t)code );
00942 }
00943 return s;
00944 }
00945
00946 String String::fromUTF16LE(const unsigned short* str, int byte_count)
00947 {
00948 String s;
00949 s.acquireData();
00950
00951 VL_COMPILE_TIME_CHECK( sizeof(unsigned short) == 2 )
00952 int character_count = byte_count < 0 ? -1 : byte_count / 2;
00953
00954
00955 if (character_count<0)
00956 for(character_count=0; str[character_count]; ) ++character_count;
00957
00958
00959 if (str[0] == 65279)
00960 {
00961 str++;
00962 --character_count;
00963 }
00964
00965 s.mString->clear();
00966 for(int i=0; i<character_count; ++i)
00967 {
00968 unsigned char* bytes = (unsigned char*)(str+i);
00969 unsigned int code = bytes[0] + (bytes[1]<<8);
00970
00971 if (code>=0xD800 && code <=0xDC00)
00972 {
00973 s.mString->push_back( '?' );
00974 ++i;
00975 }
00976 else
00977 s.mString->push_back( (wchar_t)code );
00978 }
00979 return s;
00980 }
00981
00982 String String::fromUTF16(const unsigned short* str, int byte_count)
00983 {
00984 String s;
00985 s.acquireData();
00986
00987 if (str[0] == 65279)
00988 s = fromUTF16LE(str, byte_count);
00989 else
00990 if (str[0] == 65534)
00991 s = fromUTF16BE(str, byte_count);
00992 else
00993 {
00994 Log::error("String::fromUTF16(): not UTF16 BE nor LE found.\n");
00995 s.clear();
00996 }
00997 return s;
00998 }
00999
01000 String String::fromUTF8(const char* str, int byte_count)
01001 {
01002 String s;
01003 s.acquireData();
01004
01005 unsigned char* utf8 = (unsigned char*)str;
01006 int start=0;
01007
01008 if ( utf8[0] == 0xEF && utf8[1] == 0xBB && utf8[2] == 0xBF )
01009 start=3;
01010
01011 if (byte_count<0)
01012 for(byte_count=0; utf8[byte_count]; ) ++byte_count;
01013
01014 s.mString->clear();
01015 const int UTF8_1BYTE = 128;
01016 const int UTF8_2BYTE = 128+64;
01017 const int UTF8_3BYTE = 128+64+32;
01018 const int UTF8_4BYTE = 128+64+32+16;
01019
01020 for( int i=start; i<byte_count; ++i )
01021 {
01022
01023
01024
01025
01026
01027
01028 unsigned int unicode_code_point = 0;
01029 if (utf8[i] < UTF8_1BYTE)
01030 unicode_code_point = utf8[i];
01031 else
01032 if ( (utf8[i] & UTF8_3BYTE) == UTF8_2BYTE )
01033 {
01034 unicode_code_point = ((utf8[i]-UTF8_2BYTE)<<6) + (utf8[i+1]&0x3f);
01035 i+=1;
01036 }
01037 else
01038 if ( (utf8[i] & UTF8_4BYTE) == UTF8_3BYTE )
01039 {
01040 unicode_code_point = ((utf8[i]-UTF8_3BYTE)<<12) + ((utf8[i+1]&0x3f)<<6) + (utf8[i+2]&0x3f);
01041 i+=2;
01042 }
01043 else
01044 {
01045 unicode_code_point = ((utf8[i]-UTF8_4BYTE)<<18) + ((utf8[i+1]&0x3f)<<12) + ((utf8[i+2]&0x3f)<<6) + (utf8[i+3]&0x3f);
01046 i+=3;
01047 }
01048
01049 if (unicode_code_point <= 0xFFFF)
01050 s.mString->push_back((wchar_t)unicode_code_point);
01051 else
01052 s.mString->push_back(L'?');
01053 }
01054 return s;
01055 }
01056
01057 String String::fromLatin1(const char* str, int character_count)
01058 {
01059 String s;
01060 s.acquireData();
01061
01062 unsigned char* latin1 = (unsigned char*)str;
01063 if (character_count<0)
01064 for(character_count=0; latin1[character_count]; ) ++character_count;
01065
01066 s.mString->clear();
01067 for(int i=0; i<character_count; ++i)
01068 s.mString->push_back( latin1_to_unicode[ latin1[i] ] );
01069 return s;
01070 }
01071
01072 String String::fromPointer(const void* value)
01073 {
01074 char buffer[32];
01075 memset(buffer, 0, sizeof(buffer));
01076 sprintf(buffer, "%p", value);
01077 return fromAscii(buffer);
01078 }
01079
01080 String String::fromInt(int value)
01081 {
01082 char buffer[256];
01083 memset(buffer, 0, sizeof(buffer));
01084 sprintf(buffer, "%d", value);
01085 return fromAscii(buffer);
01086 }
01087
01088 String String::fromUInt(unsigned int value)
01089 {
01090 char buffer[256];
01091 memset(buffer, 0, sizeof(buffer));
01092 sprintf(buffer, "%u", value);
01093 return fromAscii(buffer);
01094 }
01095
01096 String String::fromLongLong(long long value)
01097 {
01098 char buffer[256];
01099 memset(buffer, 0, sizeof(buffer));
01100 sprintf(buffer, "%lld", value);
01101 return fromAscii(buffer);
01102 }
01103
01104 String String::fromULongLong(unsigned long long value)
01105 {
01106 char buffer[256];
01107 memset(buffer, 0, sizeof(buffer));
01108 sprintf(buffer, "%llu", value);
01109 return fromAscii(buffer);
01110 }
01111
01112 String String::fromDouble(double value, int decimals)
01113 {
01114 char buffer[256];
01115 memset(buffer, 0, sizeof(buffer));
01116 switch(decimals)
01117 {
01118 case 0: sprintf(buffer, "%.0lf", value); break;
01119 case 1: sprintf(buffer, "%.1lf", value); break;
01120 case 2: sprintf(buffer, "%.2lf", value); break;
01121 case 3: sprintf(buffer, "%.3lf", value); break;
01122 case 4: sprintf(buffer, "%.4lf", value); break;
01123 case 5: sprintf(buffer, "%.5lf", value); break;
01124 case 6: sprintf(buffer, "%.6lf", value); break;
01125 case 7: sprintf(buffer, "%.7lf", value); break;
01126 case 8: sprintf(buffer, "%.8lf", value); break;
01127 case 9: sprintf(buffer, "%.9lf", value); break;
01128 case 10: sprintf(buffer, "%.10lf", value); break;
01129 case 11: sprintf(buffer, "%.11lf", value); break;
01130 case 12: sprintf(buffer, "%.12lf", value); break;
01131 case 13: sprintf(buffer, "%.13lf", value); break;
01132 case 14: sprintf(buffer, "%.14lf", value); break;
01133 case 15: sprintf(buffer, "%.15lf", value); break;
01134 case 16: sprintf(buffer, "%.16lf", value); break;
01135 case 17: sprintf(buffer, "%.17lf", value); break;
01136 case 18: sprintf(buffer, "%.18lf", value); break;
01137 case 19: sprintf(buffer, "%.19lf", value); break;
01138 case 20: sprintf(buffer, "%.20lf", value); break;
01139 default: sprintf(buffer, "%.6lf", value); break;
01140 }
01141 return fromAscii(buffer);
01142 }
01143
01144 std::wstring String::toStdWString() const
01145 {
01146
01147 if (empty())
01148 return std::wstring();
01149
01150 std::wstring ws;
01151 for(int i=0; i<length(); ++i)
01152 ws += (*mString)[i];
01153 return ws;
01154 }
01155
01156 std::string String::toStdString() const
01157 {
01158
01159 if (empty())
01160 return std::string();
01161 std::string std_string;
01162
01163 std::vector<unsigned char> utf8;
01164 toUTF8(utf8, false);
01165 if (utf8.size()>1)
01166 {
01167 std_string.resize(utf8.size()-1);
01168 memcpy(&std_string[0], &utf8[0], utf8.size()-1);
01169 }
01170
01171 return std_string;
01172 }
01173
01174 void String::toAscii(std::string& ascii, bool translate_non_ascii_chars) const
01175 {
01176
01177 if (empty())
01178 {
01179 ascii.clear();
01180 return;
01181 }
01182
01183 ascii.clear();
01184 if (mString->length())
01185 {
01186 for(int i=0; i<(int)mString->length() && (*mString)[i]; ++i)
01187 {
01188 if ( (*mString)[i] < 128 || !translate_non_ascii_chars )
01189 ascii += (char)((*mString)[i] & 0xFF);
01190 else
01191 {
01192 const char* translation = unicode_to_ascii( (*mString)[i] );
01193 if (translation)
01194 {
01195 for(int j=0; translation[j]; ++j)
01196 ascii += translation[j];
01197 }
01198 else
01199 ascii += '?';
01200 }
01201 }
01202 }
01203
01204 }
01205
01206 void String::toUTF8(std::string& str, bool include_utf8_signature) const
01207 {
01208 std::vector<unsigned char> utf8;
01209 toUTF8(utf8, include_utf8_signature);
01210 str.clear();
01211 if (utf8.size())
01212 {
01213 for(int i=0; utf8[i]; ++i)
01214 str.push_back(utf8[i]);
01215 }
01216 }
01217
01218 void String::toUTF8(std::vector<unsigned char>& utf8, bool include_utf8_signature) const
01219 {
01220 utf8.clear();
01221 if(include_utf8_signature)
01222 {
01223 utf8.push_back(0xEF);
01224 utf8.push_back(0xBB);
01225 utf8.push_back(0xBF);
01226 }
01227
01228
01229 if (empty())
01230 {
01231 utf8.push_back(0);
01232 return;
01233 }
01234
01235
01236
01237
01238
01239
01240 for(int i=0; i<length(); ++i)
01241 {
01242 if ( (*mString)[i] < 0x80)
01243 utf8.push_back( (unsigned char)(*mString)[i] );
01244 else
01245 if ( (*mString)[i] < 0x800)
01246 {
01247 int a = 0xC0 | ((*mString)[i]>>6);
01248 int b = 0x80 | ((*mString)[i]&0x3F);
01249 utf8.push_back( (unsigned char)a );
01250 utf8.push_back( (unsigned char)b );
01251 }
01252 else
01253 {
01254 int a = 0xE0 | ((*mString)[i]>>12);
01255 int b = 0x80 | (((*mString)[i]>>6)&0x3F);
01256 int c = 0x80 | ((*mString)[i]&0x3F);
01257 utf8.push_back( (unsigned char)a );
01258 utf8.push_back( (unsigned char)b );
01259 utf8.push_back( (unsigned char)c );
01260 }
01261 }
01262
01263 utf8.push_back(0);
01264 }
01265
01266 void String::toUTF16BE(std::vector<unsigned char>& utf16, bool include_utf16be_signature) const
01267 {
01268 utf16.clear();
01269 if (include_utf16be_signature)
01270 {
01271 utf16.push_back(0xFE);
01272 utf16.push_back(0xFF);
01273 }
01274
01275
01276 if (empty())
01277 {
01278 utf16.push_back(0);
01279 return;
01280 }
01281
01282 for(int i=0; i<length(); ++i)
01283 {
01284 int x = ((*mString)[i]>>8) & 0xFF;
01285 int y = (*mString)[i] & 0xFF;
01286 utf16.push_back( (unsigned char)x );
01287 utf16.push_back( (unsigned char)y );
01288 }
01289 utf16.push_back(0);
01290 }
01291
01292 void String::toUTF16LE(std::vector<unsigned char>& utf16, bool include_utf16le_signature) const
01293 {
01294 utf16.clear();
01295 if (include_utf16le_signature)
01296 {
01297 utf16.push_back(0xFF);
01298 utf16.push_back(0xFE);
01299 }
01300
01301
01302 if (empty())
01303 {
01304 utf16.push_back(0);
01305 return;
01306 }
01307
01308 for(int i=0; i<length(); ++i)
01309 {
01310 int x = (*mString)[i] & 0xFF;
01311 int y = ((*mString)[i]>>8) & 0xFF;
01312 utf16.push_back( (unsigned char)x );
01313 utf16.push_back( (unsigned char)y );
01314 }
01315 utf16.push_back(0);
01316 }
01317
01318 void String::toLatin1(std::vector<unsigned char>& latin1) const
01319 {
01320 latin1.clear();
01321
01322
01323 if (empty())
01324 {
01325 latin1.push_back(0);
01326 return;
01327 }
01328
01329 for(int i=0; i<length(); ++i)
01330 {
01331 if ((*mString)[i] < 128)
01332 latin1.push_back((unsigned char)(*mString)[i]);
01333 else
01334 {
01335
01336 int j=128;
01337 for(; latin1_to_unicode[j]; ++j)
01338 {
01339 if ( latin1_to_unicode[j] == (*mString)[i] )
01340 {
01341 latin1.push_back((unsigned char)j);
01342 break;
01343 }
01344 }
01345 if (j==256)
01346 latin1.push_back('?');
01347 }
01348 }
01349 latin1.push_back(0);
01350 }
01351
01352 int String::toInt(bool hex) const
01353 {
01354
01355 if (empty())
01356 return 0;
01357
01358 if (hex)
01359 {
01360 int i=0;
01361 sscanf(toStdString().c_str(), "%x", &i);
01362 return i;
01363 }
01364 else
01365 return atoi( toStdString().c_str() );
01366 }
01367
01368 double String::toDouble() const
01369 {
01370
01371 if (empty())
01372 return 0.0;
01373
01374 return atof( toStdString().c_str() );
01375 }
01376
01377 void String::filterStrings(std::vector<String>& strings, const String& filter)
01378 {
01379 String match = filter;
01380 int filter_type = 0;
01381 bool filter_ok = filter.empty();
01382
01383 if ( filter.startsWith('*') )
01384 {
01385 filter_type--;
01386 match.remove(0, 1);
01387 filter_ok = true;
01388 }
01389
01390 if ( filter.endsWith('*') )
01391 {
01392 filter_type++;
01393 match.remove(match.length()-1, 1);
01394 filter_ok = true;
01395 }
01396
01397 if ( !filter_ok )
01398 {
01399 Log::error( Say("unacceptable filter '%s'.\n") << filter );
01400 return;
01401 }
01402
01403 if ( filter_type && filter.length() > 1 )
01404 {
01405 for( int i=(int)strings.size(); i--; )
01406 switch(filter_type)
01407 {
01408 case 0: if( strings[i].find(match) == -1 ) strings.erase( strings.begin() + i ); break;
01409 case -1: if( !strings[i].endsWith(match) ) strings.erase( strings.begin() + i ); break;
01410 case +1: if( !strings[i].startsWith(match) ) strings.erase( strings.begin() + i ); break;
01411 }
01412 }
01413 }
01414
01415 EStringEncoding String::detectEncoding(const void* str, int byte_count, EStringEncoding default_encoding)
01416 {
01417 const unsigned char* h = (unsigned char*)str;
01418
01419
01420
01421
01422
01423
01424 if (byte_count>4 && h[0] == 0xFF && h[1] == 0xFE && h[2] == 0 && h[3] == 0 ) return SE_UTF32_LE;
01425 if (byte_count>4 && h[0] == 0 && h[1] == 0 && h[2] == 0xFE && h[3] == 0xFF) return SE_UTF32_BE;
01426 if (byte_count>3 && h[0] == 0xEF && h[1] == 0xBB && h[2] == 0xBF ) return SE_UTF8;
01427 if (byte_count>2 && h[0] == 0xFE && h[1] == 0xFF ) return SE_UTF16_BE;
01428 if (byte_count>2 && h[0] == 0xFF && h[1] == 0xFE ) return SE_UTF16_LE;
01429 return default_encoding;
01430 }
01431
01432 unsigned short String::getUpperCase(unsigned short ch)
01433 {
01434 for(int i=0; i<107; ++i)
01435 {
01436 if (ch >= case_table_start_min_max[i][1] && ch <= case_table_start_min_max[i][2])
01437 {
01438 int index = ch - case_table_start_min_max[i][1] + case_table_start_min_max[i][0];
01439 return case_table_upper_lower_title[index][0];
01440 }
01441 }
01442 return ch;
01443 }
01444
01445 unsigned short String::getLowerCase(unsigned short ch)
01446 {
01447 for(int i=0; i<107; ++i)
01448 {
01449 if (ch >= case_table_start_min_max[i][1] && ch <= case_table_start_min_max[i][2])
01450 {
01451 int index = ch - case_table_start_min_max[i][1] + case_table_start_min_max[i][0];
01452 return case_table_upper_lower_title[index][1];
01453 }
01454 }
01455 return ch;
01456 }
01457
01458 unsigned short String::getTitleCase(unsigned short ch)
01459 {
01460 for(int i=0; i<107; ++i)
01461 {
01462 if (ch >= case_table_start_min_max[i][1] && ch <= case_table_start_min_max[i][2])
01463 {
01464 int index = ch - case_table_start_min_max[i][1] + case_table_start_min_max[i][0];
01465 return case_table_upper_lower_title[index][2];
01466 }
01467 }
01468 return ch;
01469 }
01470
01471 std::string String::trimStdString(const std::string& text)
01472 {
01473 std::string trimmed;
01474 for(unsigned i=0; i<text.length(); ++i)
01475 {
01476 if(text[i] == ' ' ||
01477 text[i] == '\n' ||
01478 text[i] == '\t' ||
01479 text[i] == '\v' ||
01480 text[i] == '\b' ||
01481 text[i] == '\a' ||
01482 text[i] == '\f' ||
01483 text[i] == '\r' )
01484 continue;
01485 else
01486 {
01487 trimmed = text.c_str() + i;
01488 break;
01489 }
01490 }
01491 int i = (int)trimmed.length();
01492 while( i-- )
01493 {
01494 if(trimmed[i] == ' ' ||
01495 trimmed[i] == '\n' ||
01496 trimmed[i] == '\t' ||
01497 trimmed[i] == '\v' ||
01498 trimmed[i] == '\b' ||
01499 trimmed[i] == '\a' ||
01500 trimmed[i] == '\f' ||
01501 trimmed[i] == '\r' )
01502 continue;
01503 else
01504 break;
01505 }
01506 trimmed.resize( i+1 );
01507 return trimmed;
01508 }
01509
01510 String String::printf(const char* fmt, ...)
01511 {
01512 std::vector<char> buffer;
01513 buffer.resize(1024 + strlen(fmt));
01514 buffer[0] = 0;
01515
01516 va_list ap;
01517 va_start(ap, fmt);
01518 vsnprintf(&buffer[0], buffer.size(), fmt, ap);
01519 va_end(ap);
01520 return &buffer[0];
01521 }
01522