X-Git-Url: https://git.piment-noir.org/?a=blobdiff_plain;f=lexer%2Fmain.c;h=40ff04d540f95c4b7e6fa16b984d8e188997af6d;hb=fa60d3b49e93c94e140541edac812946eb27b39b;hp=c8b897978c6c27e9945b1e5de0a2565132718bce;hpb=6a19b8fe2e017d899437b43646b27fdcc26d5ed0;p=TP_AL_C.git diff --git a/lexer/main.c b/lexer/main.c index c8b8979..40ff04d 100644 --- a/lexer/main.c +++ b/lexer/main.c @@ -25,7 +25,6 @@ enum TokenType { FIN } tokenType; const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" }; -unsigned int i = 0; /* It looks silly to check for each characters but for debugging, it's just the way to go */ bool istAlpha() { @@ -41,6 +40,7 @@ bool istAlpha() { c == L'\''|| c == L'#' || \ c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \ c == L'7' || c == L'8' || c == L'9' || \ + // FIXME: Accentued characters (aka multibytes characters) support is still buggy c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \ c == L'ù' || c == L'û' || \ c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \ @@ -58,145 +58,117 @@ bool isSeparator() { } int scanner() { - const wchar_t* Titre = L"Titre"; - const wchar_t* Auteur = L"Auteur"; - unsigned int j = 0; + unsigned int i = 0; + wchar_t m[6]; -// The main loop get the next character init: if (c == L' ' || c == L'\t') { c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; goto init; } if (c == L'\n') { c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; goto initLV1; } if (c == L'>') { c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; goto MC1; } if (c == L'=') { c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; goto S1SS1; } if (istAlpha()) { - c = fgetwc(source); token[tokenFound].value[i] = c; i++; + c = fgetwc(source); goto M1; } if (c == WEOF) { goto FIN; - } else { - goto error; } + goto error; MC1: - // FIXME: Partial match need a rewind in the characters extraction from the file - if (c == (wint_t)Titre[j] && j < wcslen(Titre) - 1) { + if (c == L'A' && !wcscmp(fgetws(m, 6, source), L"uteur")) { + wcscpy((wchar_t*)token[tokenFound].value, L">Auteur"); c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; - j++; - goto MC1; + goto MC2; } - if (c == (wint_t)Auteur[j] && j < wcslen(Auteur) - 1) { + if (c == L'T' && !wcscmp(fgetws(m, 5, source), L"itre")) { + wcscpy((wchar_t*)token[tokenFound].value, L">Titre"); c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; - j++; - goto MC1; - } else { - c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; goto MC2; } + goto error; S1SS1: if (c == L'=') { c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; goto SS2; } if (isSeparator() || c == WEOF) { goto SECTION; } + goto error; SS2: if (isSeparator() || c == WEOF) { goto SSECTION; } + goto error; SECTION: tokenType = SECTION; - return 1; + return EXIT_SUCCESS; SSECTION: tokenType = SSECTION; - return 1; + return EXIT_SUCCESS; M1: if (istAlpha()) { - c = fgetwc(source); token[tokenFound].value[i] = c; i++; + c = fgetwc(source); goto M1; } if (isSeparator() || c == WEOF) { goto MOT; } + goto error; initLV1: if (c == L' ' || c == L'\t') { c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; goto initLV1; } if (c == L'\n') { c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; goto initLV1LV2; } if (istAlpha()) { - c = fgetwc(source); token[tokenFound].value[i] = c; i++; + c = fgetwc(source); goto M1; } if (c == L'=') { c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; goto S1SS1; } if (c == L'>') { c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; goto MC1; } if (c == WEOF) { goto FIN; } + goto error; initLV1LV2: if (isSeparator()) { c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; goto initLV1LV2; } if (istAlpha()) { @@ -204,50 +176,47 @@ initLV1LV2: } if (c == L'>') { c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; goto MC1; } if (c == L'=') { c = fgetwc(source); - token[tokenFound].value[i] = c; - i++; goto S1SS1; } if (c == WEOF) { goto FIN; } + goto error; NPARA: tokenType = NPARA; - return 1; + return EXIT_SUCCESS; MOT: tokenType = MOT; - return 1; + return EXIT_SUCCESS; MC2: if (isSeparator() || c == WEOF) { goto MOTCLE; } + goto error; MOTCLE: tokenType = MOTCLE; - return 1; + return EXIT_SUCCESS; FIN: tokenType = FIN; - return 1; + return EXIT_SUCCESS; error: tokenType = FIN; - return -1; + return EXIT_FAILURE; } int main() { - // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) : - source = fopen("test.txt", "r"); + source = fopen("test.txt", "r+"); // Cree et ouvre un fichier target.html en lecture/ecriture // avec suppression du contenu au prealable : target = fopen("target.html", "w+"); @@ -262,29 +231,24 @@ int main() { return -1; } + c = fgetwc(source); // lecture du premier caractere do { - c = fgetwc(source); // lecture du caractere suivant du fichier source - token[tokenFound].value[i] = c; - i++; int scanrt = scanner(); - if (scanrt == -1) { + if (scanrt == EXIT_FAILURE) { wprintf(L"Scanner error with token value: %ls\n", token[tokenFound].value); exit(EXIT_FAILURE); } - if (c != WEOF) { - wprintf(L"Token type found: %s with value: %ls\n", tokenTypestr[tokenType], token[tokenFound].value); + if (tokenType == MOT || tokenType == MOTCLE) { + wprintf(L"%20s: %ls\n", tokenTypestr[tokenType], token[tokenFound].value); } else { - wprintf(L"Token type found: %s\n", tokenTypestr[tokenType]); + wprintf(L"%20s\n", tokenTypestr[tokenType]); } token[tokenFound].type = tokenTypestr[tokenType]; tokenFound++; - // reinit token.value array counter - i = 0; - //} while (c != WEOF); // tant que la fin du fichier n'est pas atteinte - } while (!feof(source)); // tant que la fin du fichier n'est pas atteinte + } while (tokenType != FIN); // tant que la fin du fichier n'est pas atteinte if (source != NULL) fclose(source); // fermeture du fichier source if (target != NULL) fclose(target); // fermeture du fichier target - return 0; + return EXIT_SUCCESS; }