X-Git-Url: https://git.piment-noir.org/?a=blobdiff_plain;f=lexer%2Flexical_analyzer.c;fp=lexer%2Flexical_analyzer.c;h=36acd5e214098b47d2078cac42a2bb4d4779c15a;hb=9ed84d89d23a6198fd4157bfe93424b7b7582332;hp=0000000000000000000000000000000000000000;hpb=7f9d92f6d5a67e5afd2351893abc51d73f1d47d8;p=TP_AL_C.git diff --git a/lexer/lexical_analyzer.c b/lexer/lexical_analyzer.c new file mode 100644 index 0000000..36acd5e --- /dev/null +++ b/lexer/lexical_analyzer.c @@ -0,0 +1,212 @@ +/* Lexical analyzer */ + +#include +#include + +#include "global_vars.h" +#include "print_helper.h" + +wint_t c; + +/* It looks silly to check for each characters but for debugging, it's just the way to go */ +static bool isAlphaNum() { + if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \ + c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \ + c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \ + c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \ + c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \ + c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \ + c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \ + c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \ + c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \ + c == L'\''|| c == L'#' || \ + c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \ + c == L'7' || c == L'8' || c == L'9' || \ + // FIXME: Accentued characters (aka multibytes characters) support is still buggy + c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \ + c == L'ù' || c == L'û' || \ + c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \ + c == L'Ù' || c == L'Û') { + return true; + } + return false; +} + +static bool isSeparator() { + if (c == L'\t' || c == L' ' || c == L'\n') { + return true; + } + return false; +} + +static bool isEOF() { + if (c == WEOF) { + return true; + } + return false; +} + +int scanner() { + unsigned int i = 0; + wchar_t m[6]; + +init: + if (c == L' ' || c == L'\t') { + c = fgetwc(source); + goto init; + } + if (c == L'\n') { + c = fgetwc(source); + goto initLV1; + } + if (c == L'>') { + c = fgetwc(source); + goto MC1; + } + if (c == L'=') { + c = fgetwc(source); + goto S1SS1; + } + if (isAlphaNum()) { + token[tokenFound].value[i] = c; + i++; + c = fgetwc(source); + goto M1; + } + if (isEOF()) { + goto FIN; + } + goto error; + +MC1: + if (c == L'A' && !wcscmp(fgetws(m, 6, source), L"uteur")) { + wcscpy((wchar_t*)token[tokenFound].value, L">Auteur"); + c = fgetwc(source); + goto MC2; + } + if (c == L'T' && !wcscmp(fgetws(m, 5, source), L"itre")) { + wcscpy((wchar_t*)token[tokenFound].value, L">Titre"); + c = fgetwc(source); + goto MC2; + } + goto error; + +S1SS1: + if (c == L'=') { + c = fgetwc(source); + goto SS2; + } + if (isSeparator() || isEOF()) { + goto SECTION; + } + goto error; + +SS2: + if (isSeparator() || isEOF()) { + goto SSECTION; + } + goto error; + +SECTION: + tokenType = SECTION; + return EXIT_SUCCESS; + +SSECTION: + tokenType = SSECTION; + return EXIT_SUCCESS; + +M1: + if (isAlphaNum()) { + token[tokenFound].value[i] = c; + i++; + c = fgetwc(source); + goto M1; + } + if (isSeparator() || isEOF()) { + goto MOT; + } + goto error; + +initLV1: + if (c == L' ' || c == L'\t') { + c = fgetwc(source); + goto initLV1; + } + if (c == L'\n') { + c = fgetwc(source); + goto initLV1LV2; + } + if (isAlphaNum()) { + token[tokenFound].value[i] = c; + i++; + c = fgetwc(source); + goto M1; + } + if (c == L'=') { + c = fgetwc(source); + goto S1SS1; + } + if (c == L'>') { + c = fgetwc(source); + goto MC1; + } + if (isEOF()) { + goto FIN; + } + goto error; + +initLV1LV2: + if (isSeparator()) { + c = fgetwc(source); + goto initLV1LV2; + } + if (isAlphaNum()) { + goto NPARA; + } + if (c == L'>') { + c = fgetwc(source); + goto MC1; + } + if (c == L'=') { + c = fgetwc(source); + goto S1SS1; + } + if (isEOF()) { + goto FIN; + } + goto error; + +NPARA: + tokenType = NPARA; + return EXIT_SUCCESS; + +MOT: + tokenType = MOT; + return EXIT_SUCCESS; + +MC2: + if (isSeparator() || isEOF()) { + goto MOTCLE; + } + goto error; + +MOTCLE: + tokenType = MOTCLE; + return EXIT_SUCCESS; + +FIN: + tokenType = FIN; + return EXIT_SUCCESS; + +error: + if (tokenType == MOT || tokenType == MOTCLE) { + wpr_error(L"Scanner error with token type: %s and value: %ls\n", + tokenTypestr[tokenType], + token[tokenFound].value); + } else { + wpr_error(L"Scanner error with token type: %s\n", + tokenTypestr[tokenType]); + } + tokenType = FIN; + exit(EXIT_FAILURE); +}