You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

187 lines
6.5 KiB
C++

#include <math.h>
b32 IsTokenValid(token Token) {
b32 Result = (Token.Type != Token_Unknown);
return Result;
}
b32 TokenEquals(token Token, char* Match) {
b32 Result = StringsMatch(Token.String, Match);
return Result;
}
void Refill(tokenizer *Tokenizer){
if(Tokenizer->Input.Count == 0) {
Tokenizer->At[0] = 0;
Tokenizer->At[1] = 0;
} else if(Tokenizer->Input.Count == 1) {
Tokenizer->At[0] = Tokenizer->Input.Data[0];
Tokenizer->At[1] = 0;
} else {
char C0 = Tokenizer->Input.Data[0];
char C1 = Tokenizer->Input.Data[1];
Tokenizer->At[0] = C0;
Tokenizer->At[1] = C1;
}
}
void AdvanceInput(tokenizer* Tokenizer, u32 Count) {
Tokenizer->TotalCount += Count;
Advance(&Tokenizer->Input, Count);
Refill(Tokenizer);
}
tokenizer Tokenize(string Data, char* Filename) {
tokenizer Result = {};
Result.Filename = Filename;
Result.LinesCount = 1;
Result.TokensCount = 1;
Result.TotalCount = 1;
Result.Input = Data;
Result.At = (char*)malloc(2);
Refill(&Result);
return(Result);
}
tokenizer Tokenize(char* Data);
tokenizer Tokenize(char* Filename);
token GetToken(tokenizer* Tokenizer) {
token Token = {};
Token.Filename = Tokenizer->Filename;
Token.TextLength = 1;
Token.Text = Tokenizer->Input;
char C = Tokenizer->At[0];
AdvanceInput(Tokenizer, 1);
switch (C) {
case '\0': { Token.Type = Token_EndOfStream; } break;
case '(': { Token.Type = Token_OpenParen; } break;
case ')': { Token.Type = Token_CloseParen; } break;
case ':': { Token.Type = Token_Colon; } break;
case ';': { Token.Type = Token_Semicolen; } break;
case '*': { Token.Type = Token_Asterik; } break;
case '[': { Token.Type = Token_OpenBracket; } break;
case ']': { Token.Type = Token_CloseBracket; } break;
case '{': { Token.Type = Token_OpenBrace; } break;
case '}': { Token.Type = Token_CloseBrace; } break;
case '=': { Token.Type = Token_Equals; } break;
case ',': { Token.Type = Token_Comma; } break;
case '|': { Token.Type = Token_Or; } break;
case '#': { Token.Type = Token_Pound; } break;
case '"': { // note(jax): We've got a string
Token.Type = Token_String;
while(Tokenizer->At[0] && Tokenizer->At[0] != '"') {
if ((Tokenizer->At[0] == '\\') && (Tokenizer->At[1])) {
AdvanceInput(Tokenizer, 1);
}
AdvanceInput(Tokenizer, 1);;
}
if (Tokenizer->At[0] == '"') {
AdvanceInput(Tokenizer, 1);
}
if (Token.Text.Count &&
(Token.Text.Data[0] == '"')) {
++Token.Text.Data;
--Token.Text.Count;
}
if (Token.Text.Count &&
(Token.Text.Data[Token.Text.Count - 1] == '"')) {
--Token.Text.Count;
}
} break;
default: {
if (IsSpacing(C)) {
Token.Type = Token_Space;
while (IsSpacing(Tokenizer->At[0])) {
AdvanceInput(Tokenizer, 1);
}
} else if (IsEndOfLine(C)) {
Token.Type = Token_EndOfLine;
if(((C == '\r') &&
(Tokenizer->At[0] == '\n')) ||
((C == '\n') &&
(Tokenizer->At[0] == '\r'))) {
AdvanceInput(Tokenizer, 1);
}
++Tokenizer->LinesCount;
} else if((C == '/') && (Tokenizer->At[0] == '/')) {
Token.Type = Token_Comment;
AdvanceInput(Tokenizer, 2);
while(Tokenizer->At[0] && !IsEndOfLine(Tokenizer->At[0])) {
AdvanceInput(Tokenizer, 1);
}
} else if((C == '/') &&
(Tokenizer->At[0] == '*')) {
Token.Type = Token_Comment;
AdvanceInput(Tokenizer, 2);
while(Tokenizer->At[0] && !((Tokenizer->At[0] == '*')
&& (Tokenizer->At[1] == '/'))) {
if(((Tokenizer->At[0] == '\r') && (Tokenizer->At[1] == '\n'))
|| ((Tokenizer->At[0] == '\n') && (Tokenizer->At[1] == '\r'))) {
AdvanceInput(Tokenizer, 1);
}
if (IsEndOfLine(Tokenizer->At[0])) {
++Tokenizer->LinesCount;
}
AdvanceInput(Tokenizer, 1);
}
if(Tokenizer->At[0] == '*') {
AdvanceInput(Tokenizer, 2);
}
} else if (IsAlphabetical(C)) {
Token.Type = Token_Identifier;
while (IsAlphabetical(Tokenizer->At[0]) || IsNumeric(Tokenizer->At[0]) || (Tokenizer->At[0] == '_')) {
AdvanceInput(Tokenizer, 1);
}
} else if (IsNumeric(C)) {
f32 Number = (f32)(C - '0');
while (IsNumeric(Tokenizer->At[0])) {
f32 Digit = (f32)(Tokenizer->At[0] - '0');
Number = 10.0f*Number + Digit;
AdvanceInput(Tokenizer, 1);
}
// This float detection code is copy pasted, I have no idea how it works!
if (Tokenizer->At[0] == '.') {
AdvanceInput(Tokenizer, 1);
f32 Coefficient = 0.1f;
while (IsNumeric(Tokenizer->At[0])) {
f32 Digit = (f32)(Tokenizer->At[0] - '0');
Number += Coefficient * Digit;
Coefficient *= 0.1f;
AdvanceInput(Tokenizer, 1);
}
}
Token.Type = Token_Literal;
Token.Float = Number;
Token.Integer = (s32)Number;
} else {
Token.Type = Token_Unknown;
}
} break;
}
++Tokenizer->TokensCount;
Token.Text.Count = (Tokenizer->Input.Data - Token.Text.Data);
return Token;
}
token PeekToken(tokenizer* Tokenizer);