Plasma Engine  2.0
Loading...
Searching...
No Matches
Tokenizer.h
1#pragma once
2
3#include <Foundation/Basics.h>
4#include <Foundation/Containers/Deque.h>
5#include <Foundation/IO/Stream.h>
6#include <Foundation/Logging/Log.h>
7#include <Foundation/Strings/HashedString.h>
8
10struct PL_FOUNDATION_DLL plTokenType
11{
31
32 static const char* EnumNames[ENUM_COUNT];
33};
34
36struct PL_FOUNDATION_DLL plToken
37{
38 plToken()
39 {
40 m_iType = plTokenType::Unknown;
41 m_uiLine = 0;
42 m_uiColumn = 0;
43 m_uiCustomFlags = 0;
44 }
45
47 plInt32 m_iType;
48
50 plUInt32 m_uiLine;
51
53 plUInt32 m_uiColumn;
54
58
61
64};
65
78class PL_FOUNDATION_DLL plTokenizer
79{
80public:
84 plTokenizer(plAllocator* pAllocator = nullptr);
85
87
92 void Tokenize(plArrayPtr<const plUInt8> data, plLogInterface* pLog, bool bCopyData = true);
93
95 const plDeque<plToken>& GetTokens() const { return m_Tokens; }
96
98 plDeque<plToken>& GetTokens() { return m_Tokens; }
99
101 void GetAllTokens(plDynamicArray<const plToken*>& ref_tokens) const;
102
104 void GetAllLines(plDynamicArray<const plToken*>& ref_tokens) const;
105
116 plResult GetNextLine(plUInt32& ref_uiFirstToken, plHybridArray<const plToken*, 32>& ref_tokens) const;
117
118 plResult GetNextLine(plUInt32& ref_uiFirstToken, plHybridArray<plToken*, 32>& ref_tokens);
119
121 const plArrayPtr<const plUInt8> GetTokenizedData() const { return m_Data; }
122
126 void SetTreatHashSignAsLineComment(bool bHashSignIsLineComment) { m_bHashSignIsLineComment = bHashSignIsLineComment; }
127
128private:
129 void NextChar();
130 void AddToken();
131
132 void HandleUnknown();
133 void HandleString(char terminator);
134 void HandleRawString();
135 void HandleNumber();
136 void HandleLineComment();
137 void HandleBlockComment();
138 void HandleWhitespace();
139 void HandleIdentifier();
140 void HandleNonIdentifier();
141
142 plLogInterface* m_pLog = nullptr;
144 plStringView m_sIterator;
145 plStringView m_sRawStringMarker;
146 plUInt32 m_uiCurLine = 1;
147 plUInt32 m_uiCurColumn = -1;
148 plUInt32 m_uiCurChar = '\0';
149 plUInt32 m_uiNextChar = '\0';
150
151 plUInt32 m_uiLastLine = 1;
152 plUInt32 m_uiLastColumn = 1;
153
154 const char* m_szCurCharStart = nullptr;
155 const char* m_szNextCharStart = nullptr;
156 const char* m_szTokenStart = nullptr;
157
158 plDeque<plToken> m_Tokens;
160
161 bool m_bHashSignIsLineComment = false;
162};
Base class for all memory allocators.
Definition Allocator.h:23
This class encapsulates an array and it's size. It is recommended to use this class instead of plain ...
Definition ArrayPtr.h:37
Definition Deque.h:270
Definition DynamicArray.h:81
This class is optimized to take nearly no memory (sizeof(void*)) and to allow very fast checks whethe...
Definition HashedString.h:25
A hybrid array uses in-place storage to handle the first few elements without any allocation....
Definition HybridArray.h:12
Base class for all logging classes.
Definition Log.h:77
plStringView represent a read-only sub-string of a larger string, as it can store a dedicated string ...
Definition StringView.h:34
Takes text and splits it up into plToken objects. The result can be used for easier parsing.
Definition Tokenizer.h:79
const plDeque< plToken > & GetTokens() const
Gives read access to the token stream.
Definition Tokenizer.h:95
const plArrayPtr< const plUInt8 > GetTokenizedData() const
Returns the internal copy of the tokenized data. Will be empty if Tokenize was called with 'bCopyData...
Definition Tokenizer.h:121
void SetTreatHashSignAsLineComment(bool bHashSignIsLineComment)
Enables treating lines that start with # character as line comments.
Definition Tokenizer.h:126
plDeque< plToken > & GetTokens()
Gives read and write access to the token stream.
Definition Tokenizer.h:98
Default enum for returning failure or success, instead of using a bool.
Definition Types.h:54
Represents one piece of tokenized text in a document.
Definition Tokenizer.h:37
plStringView m_DataView
Definition Tokenizer.h:57
plUInt32 m_uiCustomFlags
For users to be able to store additional info for a token.
Definition Tokenizer.h:60
plUInt32 m_uiLine
The line in which the token appeared.
Definition Tokenizer.h:50
plInt32 m_iType
Typically of type plTokenType, but users can put anything in there, that they like.
Definition Tokenizer.h:47
plUInt32 m_uiColumn
The column in the line, at which the token string started.
Definition Tokenizer.h:53
plHashedString m_File
The file in which the token appeared.
Definition Tokenizer.h:63
Describes which kind of token an plToken is.
Definition Tokenizer.h:11
Enum
Definition Tokenizer.h:13
@ RawString1Postfix
The postfix part of a C++11 string. E.g: )foo".
Definition Tokenizer.h:27
@ RawString1Prefix
The prefix part of a C++11 string. E.g: R"foo(.
Definition Tokenizer.h:26
@ LineComment
A comment that starts with two slashes and ends at the next newline (or end of file)
Definition Tokenizer.h:19
@ Identifier
a series of alphanumerics or underscores
Definition Tokenizer.h:16
@ String1
A string enclosed in ".
Definition Tokenizer.h:21
@ Float
A floating point number.
Definition Tokenizer.h:24
@ BlockComment
A comment that starts with a slash and a star, and ends at the next star/slash combination (or end of...
Definition Tokenizer.h:20
@ String2
A string enclosed in '.
Definition Tokenizer.h:22
@ RawString1
A raw c++11 string enclosed in ". Contents do not contain the enclosing " or the start / end marker.
Definition Tokenizer.h:25
@ Whitespace
The token is a space or tab.
Definition Tokenizer.h:15
@ Integer
An integer number.
Definition Tokenizer.h:23
@ EndOfFile
End-of-file marker.
Definition Tokenizer.h:28
@ Unknown
for internal use
Definition Tokenizer.h:14
@ Newline
Either ' ' or '\r '.
Definition Tokenizer.h:18
@ NonIdentifier
Everything else.
Definition Tokenizer.h:17