X-Git-Url: https://git.piment-noir.org/?a=blobdiff_plain;f=lexer%2Fmain.c;h=c8b897978c6c27e9945b1e5de0a2565132718bce;hb=6a19b8fe2e017d899437b43646b27fdcc26d5ed0;hp=8b137891791fe96927ad78e64b0aad7bded08bdc;hpb=e9a51b682b4e6bc16b469fdf5578a045555479bc;p=TP_AL_C.git diff --git a/lexer/main.c b/lexer/main.c index 8b13789..c8b8979 100644 --- a/lexer/main.c +++ b/lexer/main.c @@ -1 +1,290 @@ +#include +#include +#include +#include +#include +#define TOKEN_MAX 500 + +struct token_s { + const char* type; + wint_t value[50]; +}; + +struct token_s token[TOKEN_MAX] = {NULL, 0}; + +FILE *source = NULL, *target = NULL; +wint_t c; +unsigned int tokenFound = 0; +enum TokenType { + MOTCLE, + SECTION, + SSECTION, + NPARA, + MOT, + FIN +} tokenType; +const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" }; +unsigned int i = 0; + +/* It looks silly to check for each characters but for debugging, it's just the way to go */ +bool istAlpha() { + if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \ + c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \ + c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \ + c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \ + c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \ + c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \ + c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \ + c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \ + c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \ + c == L'\''|| c == L'#' || \ + c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \ + c == L'7' || c == L'8' || c == L'9' || \ + c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \ + c == L'ù' || c == L'û' || \ + c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \ + c == L'Ù' || c == L'Û') { + return true; + } + return false; +} + +bool isSeparator() { + if (c == L'\t' || c == L' ' || c == L'\n') { + return true; + } + return false; +} + +int scanner() { + const wchar_t* Titre = L"Titre"; + const wchar_t* Auteur = L"Auteur"; + unsigned int j = 0; + +// The main loop get the next character +init: + if (c == L' ' || c == L'\t') { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto init; + } + if (c == L'\n') { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto initLV1; + } + if (c == L'>') { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto MC1; + } + if (c == L'=') { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto S1SS1; + } + if (istAlpha()) { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto M1; + } + if (c == WEOF) { + goto FIN; + } else { + goto error; + } + +MC1: + // FIXME: Partial match need a rewind in the characters extraction from the file + if (c == (wint_t)Titre[j] && j < wcslen(Titre) - 1) { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + j++; + goto MC1; + } + if (c == (wint_t)Auteur[j] && j < wcslen(Auteur) - 1) { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + j++; + goto MC1; + } else { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto MC2; + } + +S1SS1: + if (c == L'=') { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto SS2; + } + if (isSeparator() || c == WEOF) { + goto SECTION; + } + +SS2: + if (isSeparator() || c == WEOF) { + goto SSECTION; + } + +SECTION: + tokenType = SECTION; + return 1; + +SSECTION: + tokenType = SSECTION; + return 1; + +M1: + if (istAlpha()) { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto M1; + } + if (isSeparator() || c == WEOF) { + goto MOT; + } + +initLV1: + if (c == L' ' || c == L'\t') { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto initLV1; + } + if (c == L'\n') { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto initLV1LV2; + } + if (istAlpha()) { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto M1; + } + if (c == L'=') { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto S1SS1; + } + if (c == L'>') { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto MC1; + } + if (c == WEOF) { + goto FIN; + } + +initLV1LV2: + if (isSeparator()) { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto initLV1LV2; + } + if (istAlpha()) { + goto NPARA; + } + if (c == L'>') { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto MC1; + } + if (c == L'=') { + c = fgetwc(source); + token[tokenFound].value[i] = c; + i++; + goto S1SS1; + } + if (c == WEOF) { + goto FIN; + } + +NPARA: + tokenType = NPARA; + return 1; + +MOT: + tokenType = MOT; + return 1; + +MC2: + if (isSeparator() || c == WEOF) { + goto MOTCLE; + } + +MOTCLE: + tokenType = MOTCLE; + return 1; + +FIN: + tokenType = FIN; + return 1; + +error: + tokenType = FIN; + return -1; +} + +int main() { + + // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) : + source = fopen("test.txt", "r"); + // Cree et ouvre un fichier target.html en lecture/ecriture + // avec suppression du contenu au prealable : + target = fopen("target.html", "w+"); + + if (source == NULL) { + printf("Impossible d'ouvrir le fichier source\n"); + return -1; + } + + if (target == NULL) { + printf("Impossible d'ouvrir le fichier target\n"); + return -1; + } + + do { + c = fgetwc(source); // lecture du caractere suivant du fichier source + token[tokenFound].value[i] = c; + i++; + int scanrt = scanner(); + if (scanrt == -1) { + wprintf(L"Scanner error with token value: %ls\n", token[tokenFound].value); + exit(EXIT_FAILURE); + } + if (c != WEOF) { + wprintf(L"Token type found: %s with value: %ls\n", tokenTypestr[tokenType], token[tokenFound].value); + } else { + wprintf(L"Token type found: %s\n", tokenTypestr[tokenType]); + } + token[tokenFound].type = tokenTypestr[tokenType]; + tokenFound++; + // reinit token.value array counter + i = 0; + //} while (c != WEOF); // tant que la fin du fichier n'est pas atteinte + } while (!feof(source)); // tant que la fin du fichier n'est pas atteinte + + if (source != NULL) fclose(source); // fermeture du fichier source + if (target != NULL) fclose(target); // fermeture du fichier target + + return 0; +}