Files
commandergenius/src/Unicode.h

172 lines
5.2 KiB
C++

/*
OpenLieroX
UTF8/Unicode conversions
code under LGPL
created 01-05-2007
by Albert Zeyer and Dark Charlie
*/
#ifndef __UNICODE_H__
#define __UNICODE_H__
#include <SDL.h> // for Uint32
#include <string>
typedef Uint32 UnicodeChar;
typedef std::basic_string<UnicodeChar> UnicodeString;
typedef Uint16 Utf16Char;
typedef std::basic_string<Utf16Char> Utf16String;
struct ConversionItem {
UnicodeChar Unicode;
unsigned char Utf8[4];
char Ascii;
};
#define UNKNOWN_CHARACTER ' ' // Characters not in conversion table
extern ConversionItem tConversionTable[];
///////////////////////
// Moves the iterator to next unicode character in the string, returns number of bytes skipped
template<typename _Iterator1, typename _Iterator2>
inline size_t IncUtf8StringIterator(_Iterator1& it, const _Iterator2& last) {
if(it == last) return 0;
unsigned char c;
size_t res = 1;
for(++it; last != it; ++it, ++res) {
c = *it;
if(!(c&0x80) || ((c&0xC0) == 0xC0)) break;
}
return res;
}
template<typename _Iterator>
inline size_t MultIncUtf8StringIterator(_Iterator& it, const _Iterator& last, size_t count) {
size_t res = 0;
for(size_t i = 0; i < count; i++) {
if(it == last) break;
res += IncUtf8StringIterator(it, last);
}
return res;
}
///////////////////
// The iterator points at first byte of the UTF8 encoded character, returns number of bytes skipped
template<typename _Iterator1, typename _Iterator2>
inline size_t DecUtf8StringIterator(_Iterator1& it, const _Iterator2& first) {
if(it == first) return 0;
size_t res = 1;
unsigned char c;
--it;
for(; first != it; --it, ++res) {
c = *it;
if(!(c&0x80) || ((c&0xC0) == 0xC0)) break;
}
return res;
}
template<typename _Iterator>
inline _Iterator GetMultIncUtf8StringIterator(_Iterator it, const _Iterator& last, size_t count) {
MultIncUtf8StringIterator(it, last, count);
return it;
}
inline std::string::const_iterator Utf8PositionToIterator(const std::string& str, size_t pos) {
std::string::const_iterator res = str.begin();
MultIncUtf8StringIterator(res, str.end(), pos);
return res;
}
inline std::string::iterator Utf8PositionToIterator(std::string& str, size_t pos) {
std::string::iterator res = str.begin();
MultIncUtf8StringIterator(res, str.end(), pos);
return res;
}
////////////////////////
// Reads next unicode character from a UTF8 encoded string
// the iterator shows at the next character after this operation
UnicodeChar GetNextUnicodeFromUtf8(std::string::const_iterator &it, const std::string::const_iterator& last, size_t& num_skipped);
inline UnicodeChar GetNextUnicodeFromUtf8(std::string::const_iterator& it, const std::string::const_iterator& last) {
size_t tmp; return GetNextUnicodeFromUtf8(it, last, tmp); }
inline UnicodeChar GetUnicodeFromUtf8(const std::string& str, size_t pos) {
std::string::const_iterator it = Utf8PositionToIterator(str, pos);
return GetNextUnicodeFromUtf8(it, str.end());
}
////////////////////
// Gets the UTF8 representation of the unicode character (can be more bytes)
std::string GetUtf8FromUnicode(UnicodeChar ch);
inline size_t Utf8StringSize(const std::string& str) {
size_t res = 0;
std::string::const_iterator it = str.begin();
for(; it != str.end(); IncUtf8StringIterator(it, str.end()))
res++;
return res;
}
inline std::string Utf8SubStr(const std::string& str, size_t start, size_t n = (size_t)-1) {
if (n == (size_t)-1)
return std::string(Utf8PositionToIterator(str, start), str.end());
else
return std::string(
Utf8PositionToIterator(str, start),
Utf8PositionToIterator(str, start + n));
}
inline void Utf8Erase(std::string& str, size_t start, size_t n = (size_t)-1) {
std::string::iterator it = Utf8PositionToIterator(str, start);
str.erase(it, GetMultIncUtf8StringIterator(it, str.end(), n));
}
inline void Utf8Insert(std::string& str, size_t start, const std::string& s) {
str.insert(Utf8PositionToIterator(str, start), s.begin(), s.end());
}
inline void InsertUnicodeChar(std::string& str, size_t pos, UnicodeChar ch) {
std::string tmp = GetUtf8FromUnicode(ch);
Utf8Insert(str, pos, tmp);
}
// Uppercase/lowercase handling
UnicodeChar UnicodeToLower(UnicodeChar c);
UnicodeChar UnicodeToUpper(UnicodeChar c);
// Conversion functions
int FindTableIndex(UnicodeChar c);
char UnicodeCharToAsciiChar(UnicodeChar c);
std::string RemoveSpecialChars(const std::string &Utf8String);
std::string Utf16ToUtf8(const Utf16String& str);
Utf16String Utf8ToUtf16(const std::string& str);
std::string UnicodeToUtf8(const UnicodeString& str);
UnicodeString Utf8ToUnicode(const std::string& str);
#ifdef WIN32
std::string Utf8ToSystemNative(const std::string& utf8str);
std::string SystemNativeToUtf8(const std::string& natstr);
#else // Other platforms use natively utf8 (at least we suppose so)
inline std::string Utf8ToSystemNative(const std::string& utf8str) { return utf8str; }
inline std::string SystemNativeToUtf8(const std::string& natstr) { return natstr; }
#endif
size_t TransformRawToUtf8Pos(const std::string& text, size_t pos);
size_t TransformUtf8PosToRaw(const std::string& text, size_t pos);
inline size_t TransformRawToUtf8ToRaw(const std::string& src, size_t srcpos, const std::string& dest) {
return TransformUtf8PosToRaw(dest, TransformRawToUtf8Pos(src, srcpos));
}
#endif