git-svn-id: https://clonekeenplus.svn.sourceforge.net/svnroot/clonekeenplus/cgenius/trunk@87 4df4b0f3-56ce-47cb-b001-ed939b7d65a6
172 lines
5.2 KiB
C++
172 lines
5.2 KiB
C++
/*
|
|
OpenLieroX
|
|
|
|
UTF8/Unicode conversions
|
|
|
|
code under LGPL
|
|
created 01-05-2007
|
|
by Albert Zeyer and Dark Charlie
|
|
*/
|
|
|
|
#ifndef __UNICODE_H__
|
|
#define __UNICODE_H__
|
|
|
|
#include <SDL.h> // for Uint32
|
|
#include <string>
|
|
|
|
typedef Uint32 UnicodeChar;
|
|
typedef std::basic_string<UnicodeChar> UnicodeString;
|
|
typedef Uint16 Utf16Char;
|
|
typedef std::basic_string<Utf16Char> Utf16String;
|
|
|
|
struct ConversionItem {
|
|
UnicodeChar Unicode;
|
|
unsigned char Utf8[4];
|
|
char Ascii;
|
|
};
|
|
|
|
#define UNKNOWN_CHARACTER ' ' // Characters not in conversion table
|
|
extern ConversionItem tConversionTable[];
|
|
|
|
|
|
///////////////////////
|
|
// Moves the iterator to next unicode character in the string, returns number of bytes skipped
|
|
template<typename _Iterator1, typename _Iterator2>
|
|
inline size_t IncUtf8StringIterator(_Iterator1& it, const _Iterator2& last) {
|
|
if(it == last) return 0;
|
|
unsigned char c;
|
|
size_t res = 1;
|
|
for(++it; last != it; ++it, ++res) {
|
|
c = *it;
|
|
if(!(c&0x80) || ((c&0xC0) == 0xC0)) break;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
template<typename _Iterator>
|
|
inline size_t MultIncUtf8StringIterator(_Iterator& it, const _Iterator& last, size_t count) {
|
|
size_t res = 0;
|
|
for(size_t i = 0; i < count; i++) {
|
|
if(it == last) break;
|
|
res += IncUtf8StringIterator(it, last);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
///////////////////
|
|
// The iterator points at first byte of the UTF8 encoded character, returns number of bytes skipped
|
|
template<typename _Iterator1, typename _Iterator2>
|
|
inline size_t DecUtf8StringIterator(_Iterator1& it, const _Iterator2& first) {
|
|
if(it == first) return 0;
|
|
size_t res = 1;
|
|
unsigned char c;
|
|
--it;
|
|
for(; first != it; --it, ++res) {
|
|
c = *it;
|
|
if(!(c&0x80) || ((c&0xC0) == 0xC0)) break;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
template<typename _Iterator>
|
|
inline _Iterator GetMultIncUtf8StringIterator(_Iterator it, const _Iterator& last, size_t count) {
|
|
MultIncUtf8StringIterator(it, last, count);
|
|
return it;
|
|
}
|
|
|
|
inline std::string::const_iterator Utf8PositionToIterator(const std::string& str, size_t pos) {
|
|
std::string::const_iterator res = str.begin();
|
|
MultIncUtf8StringIterator(res, str.end(), pos);
|
|
return res;
|
|
}
|
|
|
|
inline std::string::iterator Utf8PositionToIterator(std::string& str, size_t pos) {
|
|
std::string::iterator res = str.begin();
|
|
MultIncUtf8StringIterator(res, str.end(), pos);
|
|
return res;
|
|
}
|
|
|
|
|
|
|
|
////////////////////////
|
|
// Reads next unicode character from a UTF8 encoded string
|
|
// the iterator shows at the next character after this operation
|
|
UnicodeChar GetNextUnicodeFromUtf8(std::string::const_iterator &it, const std::string::const_iterator& last, size_t& num_skipped);
|
|
inline UnicodeChar GetNextUnicodeFromUtf8(std::string::const_iterator& it, const std::string::const_iterator& last) {
|
|
size_t tmp; return GetNextUnicodeFromUtf8(it, last, tmp); }
|
|
|
|
inline UnicodeChar GetUnicodeFromUtf8(const std::string& str, size_t pos) {
|
|
std::string::const_iterator it = Utf8PositionToIterator(str, pos);
|
|
return GetNextUnicodeFromUtf8(it, str.end());
|
|
}
|
|
|
|
////////////////////
|
|
// Gets the UTF8 representation of the unicode character (can be more bytes)
|
|
std::string GetUtf8FromUnicode(UnicodeChar ch);
|
|
|
|
|
|
|
|
inline size_t Utf8StringSize(const std::string& str) {
|
|
size_t res = 0;
|
|
std::string::const_iterator it = str.begin();
|
|
for(; it != str.end(); IncUtf8StringIterator(it, str.end()))
|
|
res++;
|
|
|
|
return res;
|
|
}
|
|
|
|
inline std::string Utf8SubStr(const std::string& str, size_t start, size_t n = (size_t)-1) {
|
|
if (n == (size_t)-1)
|
|
return std::string(Utf8PositionToIterator(str, start), str.end());
|
|
else
|
|
return std::string(
|
|
Utf8PositionToIterator(str, start),
|
|
Utf8PositionToIterator(str, start + n));
|
|
}
|
|
|
|
inline void Utf8Erase(std::string& str, size_t start, size_t n = (size_t)-1) {
|
|
std::string::iterator it = Utf8PositionToIterator(str, start);
|
|
str.erase(it, GetMultIncUtf8StringIterator(it, str.end(), n));
|
|
}
|
|
|
|
inline void Utf8Insert(std::string& str, size_t start, const std::string& s) {
|
|
str.insert(Utf8PositionToIterator(str, start), s.begin(), s.end());
|
|
}
|
|
|
|
inline void InsertUnicodeChar(std::string& str, size_t pos, UnicodeChar ch) {
|
|
std::string tmp = GetUtf8FromUnicode(ch);
|
|
Utf8Insert(str, pos, tmp);
|
|
}
|
|
|
|
// Uppercase/lowercase handling
|
|
UnicodeChar UnicodeToLower(UnicodeChar c);
|
|
UnicodeChar UnicodeToUpper(UnicodeChar c);
|
|
|
|
// Conversion functions
|
|
|
|
int FindTableIndex(UnicodeChar c);
|
|
char UnicodeCharToAsciiChar(UnicodeChar c);
|
|
std::string RemoveSpecialChars(const std::string &Utf8String);
|
|
std::string Utf16ToUtf8(const Utf16String& str);
|
|
Utf16String Utf8ToUtf16(const std::string& str);
|
|
std::string UnicodeToUtf8(const UnicodeString& str);
|
|
UnicodeString Utf8ToUnicode(const std::string& str);
|
|
#ifdef WIN32
|
|
std::string Utf8ToSystemNative(const std::string& utf8str);
|
|
std::string SystemNativeToUtf8(const std::string& natstr);
|
|
#else // Other platforms use natively utf8 (at least we suppose so)
|
|
inline std::string Utf8ToSystemNative(const std::string& utf8str) { return utf8str; }
|
|
inline std::string SystemNativeToUtf8(const std::string& natstr) { return natstr; }
|
|
#endif
|
|
|
|
size_t TransformRawToUtf8Pos(const std::string& text, size_t pos);
|
|
size_t TransformUtf8PosToRaw(const std::string& text, size_t pos);
|
|
inline size_t TransformRawToUtf8ToRaw(const std::string& src, size_t srcpos, const std::string& dest) {
|
|
return TransformUtf8PosToRaw(dest, TransformRawToUtf8Pos(src, srcpos));
|
|
}
|
|
|
|
#endif
|