mirror of
https://gitlab.com/OpenMW/openmw.git
synced 2025-04-30 05:47:57 +03:00
Imported Upstream version 0.26.0
This commit is contained in:
commit
9a2b6c69b6
1398 changed files with 212217 additions and 0 deletions
8
components/to_utf8/Makefile
Normal file
8
components/to_utf8/Makefile
Normal file
|
@ -0,0 +1,8 @@
|
|||
tables_gen.hpp: gen_iconv
|
||||
./gen_iconv > tables_gen.hpp
|
||||
|
||||
gen_iconv: gen_iconv.cpp
|
||||
g++ -Wall $^ -o $@
|
||||
|
||||
clean:
|
||||
rm -f ./gen_iconv
|
118
components/to_utf8/gen_iconv.cpp
Normal file
118
components/to_utf8/gen_iconv.cpp
Normal file
|
@ -0,0 +1,118 @@
|
|||
// This program generates the file tables_gen.hpp
|
||||
|
||||
#include <iostream>
|
||||
using namespace std;
|
||||
|
||||
#include <iconv.h>
|
||||
#include <cassert>
|
||||
|
||||
void tab() { cout << " "; }
|
||||
|
||||
// write one number with a space in front of it and a comma after it
|
||||
void num(char i, bool last)
|
||||
{
|
||||
// Convert i to its integer value, i.e. -128 to 127. Printing it directly
|
||||
// would result in non-printable characters in the source code, which is bad.
|
||||
cout << " " << static_cast<int>(i);
|
||||
if(!last) cout << ",";
|
||||
}
|
||||
|
||||
// Write one table entry (UTF8 value), 1-5 bytes
|
||||
void writeChar(char *value, int length, bool last, const std::string &comment="")
|
||||
{
|
||||
assert(length >= 1 && length <= 5);
|
||||
tab();
|
||||
num(length, false);
|
||||
for(int i=0;i<5;i++)
|
||||
num(value[i], last && i==4);
|
||||
|
||||
if(comment != "")
|
||||
cout << " // " << comment;
|
||||
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
// What to write on missing characters
|
||||
void writeMissing(bool last)
|
||||
{
|
||||
// Just write a space character
|
||||
char value[5];
|
||||
value[0] = ' ';
|
||||
for(int i=1; i<5; i++)
|
||||
value[i] = 0;
|
||||
writeChar(value, 1, last, "not part of this charset");
|
||||
}
|
||||
|
||||
int write_table(const std::string &charset, const std::string &tableName)
|
||||
{
|
||||
// Write table header
|
||||
cout << "static signed char " << tableName << "[] =\n{\n";
|
||||
|
||||
// Open conversion system
|
||||
iconv_t cd = iconv_open ("UTF-8", charset.c_str());
|
||||
|
||||
// Convert each character from 0 to 255
|
||||
for(int i=0; i<256; i++)
|
||||
{
|
||||
bool last = (i==255);
|
||||
|
||||
char input = i;
|
||||
char *iptr = &input;
|
||||
size_t ileft = 1;
|
||||
|
||||
char output[5];
|
||||
for(int k=0; k<5; k++) output[k] = 0;
|
||||
char *optr = output;
|
||||
size_t oleft = 5;
|
||||
|
||||
size_t res = iconv(cd, &iptr, &ileft, &optr, &oleft);
|
||||
|
||||
if(res) writeMissing(last);
|
||||
else writeChar(output, 5-oleft, last);
|
||||
}
|
||||
|
||||
iconv_close (cd);
|
||||
|
||||
// Finish table
|
||||
cout << "};\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// Write header guard
|
||||
cout << "#ifndef COMPONENTS_TOUTF8_TABLE_GEN_H\n#define COMPONENTS_TOUTF8_TABLE_GEN_H\n\n";
|
||||
|
||||
// Write namespace
|
||||
cout << "namespace ToUTF8\n{\n\n";
|
||||
|
||||
// Central European and Eastern European languages that use Latin script, such as
|
||||
// Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian, Serbian (Latin script), Romanian and Albanian.
|
||||
cout << "\n/// Central European and Eastern European languages that use Latin script,"
|
||||
"\n/// such as Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian,"
|
||||
"\n/// Serbian (Latin script), Romanian and Albanian."
|
||||
"\n";
|
||||
write_table("WINDOWS-1250", "windows_1250");
|
||||
|
||||
// Cyrillic alphabet such as Russian, Bulgarian, Serbian Cyrillic and other languages
|
||||
cout << "\n/// Cyrillic alphabet such as Russian, Bulgarian, Serbian Cyrillic"
|
||||
"\n/// and other languages"
|
||||
"\n";
|
||||
write_table("WINDOWS-1251", "windows_1251");
|
||||
|
||||
// English
|
||||
cout << "\n/// Latin alphabet used by English and some other Western languages"
|
||||
"\n";
|
||||
write_table("WINDOWS-1252", "windows_1252");
|
||||
|
||||
write_table("CP437", "cp437");
|
||||
|
||||
// Close namespace
|
||||
cout << "\n}\n\n";
|
||||
|
||||
// Close header guard
|
||||
cout << "#endif\n\n";
|
||||
|
||||
return 0;
|
||||
}
|
1056
components/to_utf8/tables_gen.hpp
Normal file
1056
components/to_utf8/tables_gen.hpp
Normal file
File diff suppressed because it is too large
Load diff
4
components/to_utf8/tests/output/to_utf8_test.out
Normal file
4
components/to_utf8/tests/output/to_utf8_test.out
Normal file
|
@ -0,0 +1,4 @@
|
|||
original: Без вопросов отдаете ему рулет, зная, что позже вы сможете привести с собой своих друзей и тогда он получит по заслугам?
|
||||
converted: Без вопросов отдаете ему рулет, зная, что позже вы сможете привести с собой своих друзей и тогда он получит по заслугам?
|
||||
original: Vous lui donnez le gâteau sans protester avant d’aller chercher tous vos amis et de revenir vous venger.
|
||||
converted: Vous lui donnez le gâteau sans protester avant d’aller chercher tous vos amis et de revenir vous venger.
|
18
components/to_utf8/tests/test.sh
Executable file
18
components/to_utf8/tests/test.sh
Executable file
|
@ -0,0 +1,18 @@
|
|||
#!/bin/bash
|
||||
|
||||
make || exit
|
||||
|
||||
mkdir -p output
|
||||
|
||||
PROGS=*_test
|
||||
|
||||
for a in $PROGS; do
|
||||
if [ -f "output/$a.out" ]; then
|
||||
echo "Running $a:"
|
||||
./$a | diff output/$a.out -
|
||||
else
|
||||
echo "Creating $a.out"
|
||||
./$a > "output/$a.out"
|
||||
git add "output/$a.out"
|
||||
fi
|
||||
done
|
1
components/to_utf8/tests/test_data/french-utf8.txt
Normal file
1
components/to_utf8/tests/test_data/french-utf8.txt
Normal file
|
@ -0,0 +1 @@
|
|||
Vous lui donnez le gâteau sans protester avant d’aller chercher tous vos amis et de revenir vous venger.
|
1
components/to_utf8/tests/test_data/french-win1252.txt
Normal file
1
components/to_utf8/tests/test_data/french-win1252.txt
Normal file
|
@ -0,0 +1 @@
|
|||
Vous lui donnez le gâteau sans protester avant d’aller chercher tous vos amis et de revenir vous venger.
|
1
components/to_utf8/tests/test_data/russian-utf8.txt
Normal file
1
components/to_utf8/tests/test_data/russian-utf8.txt
Normal file
|
@ -0,0 +1 @@
|
|||
Без вопросов отдаете ему рулет, зная, что позже вы сможете привести с собой своих друзей и тогда он получит по заслугам?
|
1
components/to_utf8/tests/test_data/russian-win1251.txt
Normal file
1
components/to_utf8/tests/test_data/russian-win1251.txt
Normal file
|
@ -0,0 +1 @@
|
|||
Без вопросов отдаете ему рулет, зная, что позже вы сможете привести с собой своих друзей и тогда он получит по заслугам?
|
59
components/to_utf8/tests/to_utf8_test.cpp
Normal file
59
components/to_utf8/tests/to_utf8_test.cpp
Normal file
|
@ -0,0 +1,59 @@
|
|||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cassert>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "../to_utf8.hpp"
|
||||
|
||||
std::string getFirstLine(const std::string &filename);
|
||||
void testEncoder(ToUTF8::FromType encoding, const std::string &legacyEncFile,
|
||||
const std::string &utf8File);
|
||||
|
||||
/// Test character encoding conversion to and from UTF-8
|
||||
void testEncoder(ToUTF8::FromType encoding, const std::string &legacyEncFile,
|
||||
const std::string &utf8File)
|
||||
{
|
||||
// get some test data
|
||||
std::string legacyEncLine = getFirstLine(legacyEncFile);
|
||||
std::string utf8Line = getFirstLine(utf8File);
|
||||
|
||||
// create an encoder for specified character encoding
|
||||
ToUTF8::Utf8Encoder encoder (encoding);
|
||||
|
||||
// convert text to UTF-8
|
||||
std::string convertedUtf8Line = encoder.getUtf8(legacyEncLine);
|
||||
|
||||
std::cout << "original: " << utf8Line << std::endl;
|
||||
std::cout << "converted: " << convertedUtf8Line << std::endl;
|
||||
|
||||
// check correctness
|
||||
assert(convertedUtf8Line == utf8Line);
|
||||
|
||||
// convert UTF-8 text to legacy encoding
|
||||
std::string convertedLegacyEncLine = encoder.getLegacyEnc(utf8Line);
|
||||
// check correctness
|
||||
assert(convertedLegacyEncLine == legacyEncLine);
|
||||
}
|
||||
|
||||
std::string getFirstLine(const std::string &filename)
|
||||
{
|
||||
std::string line;
|
||||
std::ifstream text (filename.c_str());
|
||||
|
||||
if (!text.is_open())
|
||||
{
|
||||
throw std::runtime_error("Unable to open file " + filename);
|
||||
}
|
||||
|
||||
std::getline(text, line);
|
||||
text.close();
|
||||
|
||||
return line;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
testEncoder(ToUTF8::WINDOWS_1251, "test_data/russian-win1251.txt", "test_data/russian-utf8.txt");
|
||||
testEncoder(ToUTF8::WINDOWS_1252, "test_data/french-win1252.txt", "test_data/french-utf8.txt");
|
||||
return 0;
|
||||
}
|
344
components/to_utf8/to_utf8.cpp
Normal file
344
components/to_utf8/to_utf8.cpp
Normal file
|
@ -0,0 +1,344 @@
|
|||
#include "to_utf8.hpp"
|
||||
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
/* This file contains the code to translate from WINDOWS-1252 (native
|
||||
charset used in English version of Morrowind) to UTF-8. The library
|
||||
is designed to be extened to support more source encodings later,
|
||||
which means that we may add support for Russian, Polish and Chinese
|
||||
files and so on.
|
||||
|
||||
The code does not depend on any external library at
|
||||
runtime. Instead, it uses a pregenerated table made with iconv (see
|
||||
gen_iconv.cpp and the Makefile) which is located in tables_gen.hpp.
|
||||
|
||||
This is both faster and uses less dependencies. The tables would
|
||||
only need to be regenerated if we are adding support more input
|
||||
encodings. As such, there is no need to make the generator code
|
||||
platform independent.
|
||||
|
||||
The library is optimized for the case of pure ASCII input strings,
|
||||
which is the vast majority of cases at least for the English
|
||||
version. A test of my version of Morrowind.esm got 130 non-ASCII vs
|
||||
236195 ASCII strings, or less than 0.06% of strings containing
|
||||
non-ASCII characters.
|
||||
|
||||
To optmize for this, ff the first pass of the string does not find
|
||||
any non-ASCII characters, the entire string is passed along without
|
||||
any modification.
|
||||
|
||||
Most of the non-ASCII strings are books, and are quite large. (The
|
||||
non-ASCII characters are typically starting and ending quotation
|
||||
marks.) Within these, almost all the characters are ASCII. For this
|
||||
purpose, the library is also optimized for mostly-ASCII contents
|
||||
even in the cases where some conversion is necessary.
|
||||
*/
|
||||
|
||||
|
||||
// Generated tables
|
||||
#include "tables_gen.hpp"
|
||||
|
||||
using namespace ToUTF8;
|
||||
|
||||
Utf8Encoder::Utf8Encoder(const FromType sourceEncoding):
|
||||
mOutput(50*1024)
|
||||
{
|
||||
switch (sourceEncoding)
|
||||
{
|
||||
case ToUTF8::WINDOWS_1252:
|
||||
{
|
||||
translationArray = ToUTF8::windows_1252;
|
||||
break;
|
||||
}
|
||||
case ToUTF8::WINDOWS_1250:
|
||||
{
|
||||
translationArray = ToUTF8::windows_1250;
|
||||
break;
|
||||
}
|
||||
case ToUTF8::WINDOWS_1251:
|
||||
{
|
||||
translationArray = ToUTF8::windows_1251;
|
||||
break;
|
||||
}
|
||||
case ToUTF8::CP437:
|
||||
{
|
||||
translationArray = ToUTF8::cp437;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string Utf8Encoder::getUtf8(const char* input, size_t size)
|
||||
{
|
||||
// Double check that the input string stops at some point (it might
|
||||
// contain zero terminators before this, inside its own data, which
|
||||
// is also ok.)
|
||||
assert(input[size] == 0);
|
||||
|
||||
// TODO: The rest of this function is designed for single-character
|
||||
// input encodings only. It also assumes that the input the input
|
||||
// encoding shares its first 128 values (0-127) with ASCII. These
|
||||
// conditions must be checked again if you add more input encodings
|
||||
// later.
|
||||
|
||||
// Compute output length, and check for pure ascii input at the same
|
||||
// time.
|
||||
bool ascii;
|
||||
size_t outlen = getLength(input, ascii);
|
||||
|
||||
// If we're pure ascii, then don't bother converting anything.
|
||||
if(ascii)
|
||||
return std::string(input, outlen);
|
||||
|
||||
// Make sure the output is large enough
|
||||
resize(outlen);
|
||||
char *out = &mOutput[0];
|
||||
|
||||
// Translate
|
||||
while (*input)
|
||||
copyFromArray(*(input++), out);
|
||||
|
||||
// Make sure that we wrote the correct number of bytes
|
||||
assert((out-&mOutput[0]) == (int)outlen);
|
||||
|
||||
// And make extra sure the output is null terminated
|
||||
assert(mOutput.size() > outlen);
|
||||
assert(mOutput[outlen] == 0);
|
||||
|
||||
// Return a string
|
||||
return std::string(&mOutput[0], outlen);
|
||||
}
|
||||
|
||||
std::string Utf8Encoder::getLegacyEnc(const char *input, size_t size)
|
||||
{
|
||||
// Double check that the input string stops at some point (it might
|
||||
// contain zero terminators before this, inside its own data, which
|
||||
// is also ok.)
|
||||
assert(input[size] == 0);
|
||||
|
||||
// TODO: The rest of this function is designed for single-character
|
||||
// input encodings only. It also assumes that the input the input
|
||||
// encoding shares its first 128 values (0-127) with ASCII. These
|
||||
// conditions must be checked again if you add more input encodings
|
||||
// later.
|
||||
|
||||
// Compute output length, and check for pure ascii input at the same
|
||||
// time.
|
||||
bool ascii;
|
||||
size_t outlen = getLength2(input, ascii);
|
||||
|
||||
// If we're pure ascii, then don't bother converting anything.
|
||||
if(ascii)
|
||||
return std::string(input, outlen);
|
||||
|
||||
// Make sure the output is large enough
|
||||
resize(outlen);
|
||||
char *out = &mOutput[0];
|
||||
|
||||
// Translate
|
||||
while(*input)
|
||||
copyFromArray2(input, out);
|
||||
|
||||
// Make sure that we wrote the correct number of bytes
|
||||
assert((out-&mOutput[0]) == (int)outlen);
|
||||
|
||||
// And make extra sure the output is null terminated
|
||||
assert(mOutput.size() > outlen);
|
||||
assert(mOutput[outlen] == 0);
|
||||
|
||||
// Return a string
|
||||
return std::string(&mOutput[0], outlen);
|
||||
}
|
||||
|
||||
// Make sure the output vector is large enough for 'size' bytes,
|
||||
// including a terminating zero after it.
|
||||
void Utf8Encoder::resize(size_t size)
|
||||
{
|
||||
if (mOutput.size() <= size)
|
||||
// Add some extra padding to reduce the chance of having to resize
|
||||
// again later.
|
||||
mOutput.resize(3*size);
|
||||
|
||||
// And make sure the string is zero terminated
|
||||
mOutput[size] = 0;
|
||||
}
|
||||
|
||||
/** Get the total length length needed to decode the given string with
|
||||
the given translation array. The arrays are encoded with 6 bytes
|
||||
per character, with the first giving the length and the next 5 the
|
||||
actual data.
|
||||
|
||||
The function serves a dual purpose for optimization reasons: it
|
||||
checks if the input is pure ascii (all values are <= 127). If this
|
||||
is the case, then the ascii parameter is set to true, and the
|
||||
caller can optimize for this case.
|
||||
*/
|
||||
size_t Utf8Encoder::getLength(const char* input, bool &ascii)
|
||||
{
|
||||
ascii = true;
|
||||
size_t len = 0;
|
||||
const char* ptr = input;
|
||||
unsigned char inp = *ptr;
|
||||
|
||||
// Do away with the ascii part of the string first (this is almost
|
||||
// always the entire string.)
|
||||
while (inp && inp < 128)
|
||||
inp = *(++ptr);
|
||||
len += (ptr-input);
|
||||
|
||||
// If we're not at the null terminator at this point, then there
|
||||
// were some non-ascii characters to deal with. Go to slow-mode for
|
||||
// the rest of the string.
|
||||
if (inp)
|
||||
{
|
||||
ascii = false;
|
||||
while (inp)
|
||||
{
|
||||
// Find the translated length of this character in the
|
||||
// lookup table.
|
||||
len += translationArray[inp*6];
|
||||
inp = *(++ptr);
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
// Translate one character 'ch' using the translation array 'arr', and
|
||||
// advance the output pointer accordingly.
|
||||
void Utf8Encoder::copyFromArray(unsigned char ch, char* &out)
|
||||
{
|
||||
// Optimize for ASCII values
|
||||
if (ch < 128)
|
||||
{
|
||||
*(out++) = ch;
|
||||
return;
|
||||
}
|
||||
|
||||
const signed char *in = translationArray + ch*6;
|
||||
int len = *(in++);
|
||||
for (int i=0; i<len; i++)
|
||||
*(out++) = *(in++);
|
||||
}
|
||||
|
||||
size_t Utf8Encoder::getLength2(const char* input, bool &ascii)
|
||||
{
|
||||
ascii = true;
|
||||
size_t len = 0;
|
||||
const char* ptr = input;
|
||||
unsigned char inp = *ptr;
|
||||
|
||||
// Do away with the ascii part of the string first (this is almost
|
||||
// always the entire string.)
|
||||
while (inp && inp < 128)
|
||||
inp = *(++ptr);
|
||||
len += (ptr-input);
|
||||
|
||||
// If we're not at the null terminator at this point, then there
|
||||
// were some non-ascii characters to deal with. Go to slow-mode for
|
||||
// the rest of the string.
|
||||
if (inp)
|
||||
{
|
||||
ascii = false;
|
||||
while(inp)
|
||||
{
|
||||
len += 1;
|
||||
// Find the translated length of this character in the
|
||||
// lookup table.
|
||||
switch(inp)
|
||||
{
|
||||
case 0xe2: len -= 2; break;
|
||||
case 0xc2:
|
||||
case 0xcb:
|
||||
case 0xc4:
|
||||
case 0xc6:
|
||||
case 0xc3:
|
||||
case 0xd0:
|
||||
case 0xd1:
|
||||
case 0xd2:
|
||||
case 0xc5: len -= 1; break;
|
||||
}
|
||||
|
||||
inp = *(++ptr);
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
void Utf8Encoder::copyFromArray2(const char*& chp, char* &out)
|
||||
{
|
||||
unsigned char ch = *(chp++);
|
||||
// Optimize for ASCII values
|
||||
if (ch < 128)
|
||||
{
|
||||
*(out++) = ch;
|
||||
return;
|
||||
}
|
||||
|
||||
int len = 1;
|
||||
switch (ch)
|
||||
{
|
||||
case 0xe2: len = 3; break;
|
||||
case 0xc2:
|
||||
case 0xcb:
|
||||
case 0xc4:
|
||||
case 0xc6:
|
||||
case 0xc3:
|
||||
case 0xd0:
|
||||
case 0xd1:
|
||||
case 0xd2:
|
||||
case 0xc5: len = 2; break;
|
||||
}
|
||||
|
||||
if (len == 1) // There is no 1 length utf-8 glyph that is not 0x20 (empty space)
|
||||
{
|
||||
*(out++) = ch;
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned char ch2 = *(chp++);
|
||||
unsigned char ch3 = '\0';
|
||||
if (len == 3)
|
||||
ch3 = *(chp++);
|
||||
|
||||
for (int i = 128; i < 256; i++)
|
||||
{
|
||||
unsigned char b1 = translationArray[i*6 + 1], b2 = translationArray[i*6 + 2], b3 = translationArray[i*6 + 3];
|
||||
if (b1 == ch && b2 == ch2 && (len != 3 || b3 == ch3))
|
||||
{
|
||||
*(out++) = (char)i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Could not find glyph " << std::hex << (int)ch << " " << (int)ch2 << " " << (int)ch3 << std::endl;
|
||||
|
||||
*(out++) = ch; // Could not find glyph, just put whatever
|
||||
}
|
||||
|
||||
ToUTF8::FromType ToUTF8::calculateEncoding(const std::string& encodingName)
|
||||
{
|
||||
if (encodingName == "win1250")
|
||||
return ToUTF8::WINDOWS_1250;
|
||||
else if (encodingName == "win1251")
|
||||
return ToUTF8::WINDOWS_1251;
|
||||
else
|
||||
return ToUTF8::WINDOWS_1252;
|
||||
}
|
||||
|
||||
std::string ToUTF8::encodingUsingMessage(const std::string& encodingName)
|
||||
{
|
||||
if (encodingName == "win1250")
|
||||
return "Using Central and Eastern European font encoding.";
|
||||
else if (encodingName == "win1251")
|
||||
return "Using Cyrillic font encoding.";
|
||||
else
|
||||
return "Using default (English) font encoding.";
|
||||
}
|
55
components/to_utf8/to_utf8.hpp
Normal file
55
components/to_utf8/to_utf8.hpp
Normal file
|
@ -0,0 +1,55 @@
|
|||
#ifndef COMPONENTS_TOUTF8_H
|
||||
#define COMPONENTS_TOUTF8_H
|
||||
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
namespace ToUTF8
|
||||
{
|
||||
// These are all the currently supported code pages
|
||||
enum FromType
|
||||
{
|
||||
WINDOWS_1250, // Central ane Eastern European languages
|
||||
WINDOWS_1251, // Cyrillic languages
|
||||
WINDOWS_1252, // Used by English version of Morrowind (and
|
||||
// probably others)
|
||||
CP437 // Used for fonts (*.fnt) if data files encoding is 1252. Otherwise, uses the same encoding as the data files.
|
||||
};
|
||||
|
||||
FromType calculateEncoding(const std::string& encodingName);
|
||||
std::string encodingUsingMessage(const std::string& encodingName);
|
||||
|
||||
// class
|
||||
|
||||
class Utf8Encoder
|
||||
{
|
||||
public:
|
||||
Utf8Encoder(FromType sourceEncoding);
|
||||
|
||||
// Convert to UTF8 from the previously given code page.
|
||||
std::string getUtf8(const char *input, size_t size);
|
||||
inline std::string getUtf8(const std::string &str)
|
||||
{
|
||||
return getUtf8(str.c_str(), str.size());
|
||||
}
|
||||
|
||||
std::string getLegacyEnc(const char *input, size_t size);
|
||||
inline std::string getLegacyEnc(const std::string &str)
|
||||
{
|
||||
return getLegacyEnc(str.c_str(), str.size());
|
||||
}
|
||||
|
||||
private:
|
||||
void resize(size_t size);
|
||||
size_t getLength(const char* input, bool &ascii);
|
||||
void copyFromArray(unsigned char chp, char* &out);
|
||||
size_t getLength2(const char* input, bool &ascii);
|
||||
void copyFromArray2(const char*& chp, char* &out);
|
||||
|
||||
std::vector<char> mOutput;
|
||||
signed char* translationArray;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue