# UE4字符编码

# 参考文档

# 常见字符串处理

# string和wstring的转换

下面是c++中string和wstring的转换：

#include <string>
#include <locale>
#include <codecvt>

// convert string to wstring
inline std::wstring to_wide_string(const std::string& input)
{
    std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
    return converter.from_bytes(input);
}
// convert wstring to string 
inline std::string to_byte_string(const std::wstring& input)
{
//std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
    std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
    return converter.to_bytes(input);
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

# TCHAR和CHAR转换

下面如果是UTF-8格式的话，需要使用UTF8_TO_TCHAR和TCHAR_TO_UTF8。

//TCHAR转换为CHAR
const char* msg = TCHAR_TO_ANSI(TEXT("dddd"));

//CHAR转换为TCHAR
const TCHAR* msg = ANSI_TO_TCHAR("dddd");

1
2
3
4
5

# FString和TCHAR转换

//FString转换为TCHAR
FString str("aaaa");
const TCHAR* msg = str.GetCharArray().GetData();
FString str("aaaa");
const TCHAR* msg = *str;
//TCHAR转换为FString

//宽字符初始化要L开头
std::wstring cwstr = L"中文技术哪家强?";
//还可以使用wchar_t*来表示一个包含中文的字符串
wchar_t* wchar = L"中文技术哪家强?";
//wchar_t*和std::wstirng的转换
const wchar_t* tmpwchar = cwstr.c_str();
//上面说了TCHAR本质就是wchar_t
TCHAR* tchar = wchar;
//std::wstring.cstr()返回的是const wchar_t*,所以这里使用const_cast将其由const wchar_t*转换成wchar_t*,也就是TCHAR*
TCHAR* tchar = const_cast(cwstr.c_str());
//ue4中,可以直接使用TCHAR*或者const TCHAR*来初始化FString
FString fstr=tchar;

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19

# TArray转换为FString

FString::Printf(TEXT("%s"), UTF8_TO_TCHAR(reinterpret_cast<const char*>(Request.Body.GetData())));

# CPP处理中文

参照资料：

c++ 中文字符串处理方法 (opens new window)

c++ 中char*/string 形式的字符串无法正确的对中文字符串进行处理,中文字符长度不固定，按字节处理出现乱码或者错误，在unicode中每个中文为2个字节，而中文中间夹杂的英文和半角标点则仍然是1个字节。

处理思路：

输入层接收char*输入，并将其转换为wchar*.
逻辑处理层在 wchar* 或 wstring 的基础上进行字符串操作，此时操作最小单位为中文字符，不会再有乱码。
输出层将wchar*的结果再次转换为char* ，返回给外部。

# 宽字符串转换

#define WIN32_LEAN_AND_MEAN

#include <Windows.h>

#include <string>

std::string ConvertWideToANSI(const std::wstring& wstr)
{
    int count = WideCharToMultiByte(CP_ACP, 0, wstr.c_str(), wstr.length(), NULL, 0, NULL, NULL);
    std::string str(count, 0);
    WideCharToMultiByte(CP_ACP, 0, wstr.c_str(), -1, &str[0], count, NULL, NULL);
    return str;
}

std::wstring ConvertAnsiToWide(const std::string& str)
{
    int count = MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.length(), NULL, 0);
    std::wstring wstr(count, 0);
    MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.length(), &wstr[0], count);
    return wstr;
}

std::string ConvertWideToUtf8(const std::wstring& wstr)
{
    int count = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.length(), NULL, 0, NULL, NULL);
    std::string str(count, 0);
    WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, &str[0], count, NULL, NULL);
    return str;
}

std::wstring ConvertUtf8ToWide(const std::string& str)
{
    int count = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0);
    std::wstring wstr(count, 0);
    MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &wstr[0], count);
    return wstr;
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

# UE4中获取宽字符串

#include <windows.h>

void CallPythonToDownloadFTP::GetFStringFromTArray(FString& Result, const uint8* Buffer, int32 Size)
{
    bool bNeedUEConvert = true;

    if (Size >= 2 && !(Size & 1) && Buffer[0] == 0xff && Buffer[1] == 0xfe)
    {
        // Unicode
    }
    else if (Size >= 2 && !(Size & 1) && Buffer[0] == 0xfe && Buffer[1] == 0xff)
    {
        // Unicode
    }
    else
    {
        if (Size >= 3 && Buffer[0] == 0xef && Buffer[1] == 0xbb && Buffer[2] == 0xbf)
        {
            // UTF-8 BOM
        }
#ifdef PLATFORM_WINDOWS
        else
        {
            int32 UnicodeLen = ::MultiByteToWideChar(CP_ACP, 0, reinterpret_cast<const ANSICHAR*>(Buffer), Size, nullptr, 0);
            if (UnicodeLen > 0)
            {
                TArray<TCHAR>& ResultArray = Result.GetCharArray();
                ResultArray.Empty();
                ResultArray.AddUninitialized(UnicodeLen + 1); // +1 for the null terminator

                int32 WritedTCharLens = ::MultiByteToWideChar(CP_ACP, 0, reinterpret_cast<const ANSICHAR*>(Buffer), Size
                    , ResultArray.GetData(), UnicodeLen);
                if (WritedTCharLens > 0)
                {
                    ResultArray[UnicodeLen] = TEXT('\0');
                    bNeedUEConvert = false;
                }
                else
                {
                    ResultArray.Empty();
                }
            }
        }
#endif
    }

    if (bNeedUEConvert)
    {
        FFileHelper::BufferToString(Result, Buffer, Size);
    }
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

← C++专项研究-常用操作 C++专项研究-智能指针 →