14 return ((iByte & 0x80) == 0) || ((iByte & 0xE0) == 0xC0) || ((iByte & 0xF0) == 0xE0) || ((iByte & 0xF8) == 0xF0) || ((iByte & 0xFC) == 0xF8);
20 return (iByte & 0xC0) == 0x80;
25 return (uiChar <= 127);
30 const plUInt32 uiBit7 = iFirstByte & PL_BIT(7);
31 const plUInt32 uiBit6 = iFirstByte & PL_BIT(6);
32 const plUInt32 uiBit5 = iFirstByte & PL_BIT(5);
33 const plUInt32 uiBit4 = iFirstByte & PL_BIT(4);
38 PL_IGNORE_UNUSED(uiBit6);
39 PL_ASSERT_DEV(uiBit6 != 0,
"Invalid Leading UTF-8 Byte.");
50template <
typename ByteIterator>
53 return utf8::unchecked::next(ref_szUtf8Iterator);
56template <
typename UInt16Iterator>
59 uint32_t cp = utf8::internal::mask16(*ref_szUtf16Iterator);
60 return utf8::internal::is_lead_surrogate(cp);
63template <
typename UInt16Iterator>
66 uint32_t cp = utf8::internal::mask16(*ref_szUtf16Iterator++);
67 if (utf8::internal::is_lead_surrogate(cp))
69 uint32_t trail_surrogate = utf8::internal::mask16(*ref_szUtf16Iterator++);
70 cp = (cp << 10) + trail_surrogate + utf8::internal::SURROGATE_OFFSET;
76template <
typename WCharIterator>
79 if constexpr (
sizeof(wchar_t) == 2)
85 const plUInt32 uiResult = *ref_szWCharIterator;
86 ++ref_szWCharIterator;
91template <
typename ByteIterator>
94 ref_szUtf8Output = utf8::unchecked::utf32to8(&uiUtf32, &uiUtf32 + 1, ref_szUtf8Output);
97template <
typename UInt16Iterator>
100 if (uiUtf32 > 0xffff)
103 *ref_szUtf16Output++ =
static_cast<uint16_t
>((uiUtf32 >> 10) + utf8::internal::LEAD_OFFSET);
104 *ref_szUtf16Output++ =
static_cast<uint16_t
>((uiUtf32 & 0x3ff) + utf8::internal::TRAIL_SURROGATE_MIN);
107 *ref_szUtf16Output++ =
static_cast<uint16_t
>(uiUtf32);
110template <
typename WCharIterator>
113 if constexpr (
sizeof(wchar_t) == 2)
119 *ref_szWCharOutput =
static_cast<wchar_t>(uiUtf32);
126 return utf8::unchecked::peek_next(pFirstChar);
133 if (uiCharacter <= 0x0000007f)
136 if (uiCharacter <= 0x000007ff)
139 if (uiCharacter <= 0x0000ffff)
146 PL_ASSERT_DEV(uiCharacter <= 0x0010ffff,
"Invalid Unicode Codepoint");
152#if PL_ENABLED(PL_USE_STRING_VALIDATION)
154 szStringEnd = szString + strlen(szString);
156 return utf8::is_valid(szString, szStringEnd);
164 PL_ASSERT_DEBUG(ref_szUtf8 !=
nullptr,
"This function expects non nullptr pointers");
166 if (utf8::starts_with_bom(ref_szUtf8, ref_szUtf8 + 4))
177 PL_ASSERT_DEBUG(ref_pUtf16 !=
nullptr,
"This function expects non nullptr pointers");
190 PL_ASSERT_DEBUG(ref_pUtf16 !=
nullptr,
"This function expects non nullptr pointers");
203 PL_ASSERT_DEBUG(ref_szUtf8 !=
nullptr,
"Invalid string pointer to advance!");
205 while (uiNumCharacters > 0)
207 if (*ref_szUtf8 ==
'\0')
223 PL_ASSERT_DEBUG(ref_szUtf8 !=
nullptr,
"Invalid string pointer to advance!");
225 while (uiNumCharacters > 0)
227 if (ref_szUtf8 >= szUtf8End || *ref_szUtf8 ==
'\0')
243 PL_ASSERT_DEBUG(ref_szUtf8 !=
nullptr,
"Invalid string pointer to advance!");
245 while (uiNumCharacters > 0)
247 if (ref_szUtf8 <= szUtf8Start)
263 return reinterpret_cast<T*
>(-1);
static bool IsUtf16Surrogate(UInt16Iterator &ref_szUtf16Iterator)
Characters that cannot be represented in a single utf16 code point need to be split up into two surro...
Definition UnicodeUtils_inl.h:57
static plUInt32 DecodeUtf8ToUtf32(ByteIterator &ref_szUtf8Iterator)
Decodes the next character from the given Utf8 sequence to Utf32 and increments the iterator as far a...
Definition UnicodeUtils_inl.h:51
static plUInt32 DecodeWCharToUtf32(WCharIterator &ref_szWCharIterator)
Decodes the next character from the given wchar_t sequence to Utf32 and increments the iterator as fa...
Definition UnicodeUtils_inl.h:77
static bool SkipUtf8Bom(const char *&ref_szUtf8)
If the given string starts with a Utf8 Bom, the pointer is incremented behind the Bom,...
Definition UnicodeUtils_inl.h:162
static plUInt32 DecodeUtf16ToUtf32(UInt16Iterator &ref_szUtf16Iterator)
Decodes the next character from the given Utf16 sequence to Utf32 and increments the iterator as far ...
Definition UnicodeUtils_inl.h:64
static constexpr plUInt16 Utf16BomBE
Byte Order Mark for Big Endian Utf16 strings.
Definition UnicodeUtils.h:22
static constexpr T * GetMaxStringEnd()
[internal] Returns the max string end pointer for the given type
Definition UnicodeUtils_inl.h:261
static plUInt32 ConvertUtf8ToUtf32(const char *pFirstChar)
Converts the UTF-8 character that starts at pFirstChar into a UTF-32 character.
Definition UnicodeUtils_inl.h:124
static bool IsASCII(plUInt32 uiChar)
Returns whether a character is a pure ASCII character (only the first 7 Bits are used)
Definition UnicodeUtils_inl.h:23
static void EncodeUtf32ToUtf16(plUInt32 uiUtf32, UInt16Iterator &ref_szUtf16Output)
Encodes the given Utf32 character to Utf16 and writes as many bytes to the output iterator,...
Definition UnicodeUtils_inl.h:98
static bool SkipUtf16BomLE(const plUInt16 *&ref_pUtf16)
If the given string starts with a Utf16 little endian Bom, the pointer is incremented behind the Bom,...
Definition UnicodeUtils_inl.h:175
static constexpr plUInt16 Utf16BomLE
Byte Order Mark for Little Endian Utf16 strings.
Definition UnicodeUtils.h:19
static void EncodeUtf32ToWChar(plUInt32 uiUtf32, WCharIterator &ref_szWCharOutput)
Encodes the given Utf32 character to wchar_t and writes as many bytes to the output iterator,...
Definition UnicodeUtils_inl.h:111
static bool SkipUtf16BomBE(const plUInt16 *&ref_pUtf16)
If the given string starts with a Utf16 big endian Bom, the pointer is incremented behind the Bom,...
Definition UnicodeUtils_inl.h:188
static plResult MoveToNextUtf8(const char *&ref_szUtf8, plUInt32 uiNumCharacters=1)
Moves the given string pointer ahead to the next Utf8 character sequence.
Definition UnicodeUtils_inl.h:201
static void EncodeUtf32ToUtf8(plUInt32 uiUtf32, ByteIterator &ref_szUtf8Output)
Encodes the given Utf32 character to Utf8 and writes as many bytes to the output iterator,...
Definition UnicodeUtils_inl.h:92
static plUInt32 GetSizeForCharacterInUtf8(plUInt32 uiCharacter)
Computes how many bytes the character would require, if encoded in UTF-8.
Definition UnicodeUtils_inl.h:129
static plUInt32 GetUtf8SequenceLength(char iFirstByte)
Returns the number of bytes that a UTF-8 sequence is in length, which is encoded in the first byte of...
Definition UnicodeUtils_inl.h:28
static bool IsUtf8ContinuationByte(char iByte)
Checks whether the given byte is a byte in a UTF-8 multi-byte sequence.
Definition UnicodeUtils_inl.h:17
static bool IsValidUtf8(const char *szString, const char *szStringEnd=GetMaxStringEnd< char >())
Returns false if the given string does not contain a completely valid Utf8 string.
Definition UnicodeUtils_inl.h:150
static plResult MoveToPriorUtf8(const char *&ref_szUtf8, const char *szUtf8Start, plUInt32 uiNumCharacters=1)
Moves the given string pointer backwards to the previous Utf8 character sequence.
Definition UnicodeUtils_inl.h:241
static bool IsUtf8StartByte(char iByte)
Checks whether the given byte is a start byte in a UTF-8 multi-byte sequence.
Definition UnicodeUtils_inl.h:11
Default enum for returning failure or success, instead of using a bool.
Definition Types.h:54