X-Git-Url: https://git.piment-noir.org/?a=blobdiff_plain;f=lexer%2Fmain.c;h=40ff04d540f95c4b7e6fa16b984d8e188997af6d;hb=fa60d3b49e93c94e140541edac812946eb27b39b;hp=3247e34c93d17bf8f2879b120b62a94941b7501e;hpb=d3eb30efdbdd46508f1034c1c1aed4bd5e23bbc5;p=TP_AL_C.git

diff --git a/lexer/main.c b/lexer/main.c
index 3247e34..40ff04d 100644
--- a/lexer/main.c
+++ b/lexer/main.c
@@ -2,11 +2,20 @@
 #include <stdio.h>
 #include <string.h>
 #include <stdbool.h>
+#include <wchar.h>
 
-FILE *source, *target = NULL;
-char c;
-unsigned int i = 0;
-char tokenValue[50];
+#define TOKEN_MAX 500
+
+struct token_s {
+    const char* type;
+    wint_t value[50];
+};
+
+struct token_s token[TOKEN_MAX] = {NULL, 0};
+
+FILE *source = NULL, *target = NULL;
+wint_t c;
+unsigned int tokenFound = 0;
 enum TokenType {
     MOTCLE,
     SECTION,
@@ -17,223 +26,197 @@ enum TokenType {
 } tokenType;
 const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
 
-/* This looks silly to check for each characters but for debugging, it's just the way to go */
+/* It looks silly to check for each characters but for debugging, it's just the way to go */
 bool istAlpha() {
-    if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \
-        c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \
-        c == 'o' || c == 'p' || c == 'q' || c == 'r' || c == 's' || c == 't' || c == 'u' || \
-        c == 'v' || c == 'w' || c == 'x' || c == 'y' || c == 'z' || \
-        c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F' || c == 'G' || \
-        c == 'H' || c == 'I' || c == 'J' || c == 'K' || c == 'L' || c == 'M' || c == 'N' || \
-        c == 'O' || c == 'P' || c == 'Q' || c == 'R' || c == 'S' || c == 'T' || c == 'U' || \
-        c == 'V' || c == 'W' || c == 'X' || c == 'Y' || c == 'Z' || \
-        c == '.' || c == '?' || c == '!' || c == ',' || c == ';' || c == ':' || c == '-' || \
-        c == '\''|| c == '#' || \
-        c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || \
-        c == '7' || c == '8' || c == '9') {
+    if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \
+        c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \
+        c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \
+        c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \
+        c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \
+        c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \
+        c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \
+        c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \
+        c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \
+        c == L'\''|| c == L'#' || \
+        c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \
+        c == L'7' || c == L'8' || c == L'9' || \
+        // FIXME: Accentued characters (aka multibytes characters) support is still buggy
+        c == L'Ã ' || c == L'Ã¢' || c == L'Ã§' || c == L'Ã¨' || c == L'Ã©' || c == L'Ã®' || c == L'Ã´' || \
+        c == L'Ã¹' || c == L'Ã»' || \
+        c == L'Ã' || c == L'Ã' || c == L'Ã' || c == L'Ã' || c == L'Ã' || c == L'Ã' || c == L'Ã' || \
+        c == L'Ã' || c == L'Ã') {
             return true;
         }
         return false;
 }
 
 bool isSeparator() {
-    if (c == '\t' || c == ' ' || c == '\n') {
+    if (c == L'\t' || c == L' ' || c == L'\n') {
         return true;
     }
     return false;
 }
 
 int scanner() {
-const char* Titre = "Titre";
-const char* Auteur = "Auteur";
-unsigned int j = 0;
+    unsigned int i = 0;
+    wchar_t m[6];
 
-// The main loop get the next character
 init:
-    if (c == ' ' || c == '\t') {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
+    if (c == L' ' || c == L'\t') {
+        c = fgetwc(source);
         goto init;
     }
-    if (c == '>') {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
+    if (c == L'\n') {
+        c = fgetwc(source);
+        goto initLV1;
+    }
+    if (c == L'>') {
+        c = fgetwc(source);
         goto MC1;
     }
-    if (c == '=') {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
+    if (c == L'=') {
+        c = fgetwc(source);
         goto S1SS1;
     }
     if (istAlpha()) {
-        c = fgetc(source);
-        tokenValue[i] = c;
+        token[tokenFound].value[i] = c;
         i++;
+        c = fgetwc(source);
         goto M1;
     }
-    if (c == '\n') {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
-        goto initLV1;
-    }
-    if (c == EOF) {
+    if (c == WEOF) {
         goto FIN;
-    } else {
-        goto error;
     }
+    goto error;
 
 MC1:
-    if (c == Titre[j] && j < strlen(Titre) - 1) {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
-        j++;
-        goto MC1;
+    if (c == L'A' && !wcscmp(fgetws(m, 6, source), L"uteur")) {
+        wcscpy((wchar_t*)token[tokenFound].value, L">Auteur");
+        c = fgetwc(source);
+        goto MC2;
     }
-    if (c == Auteur[j] && j < strlen(Auteur) - 1) {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
-        j++;
-        goto MC1;
-    } else {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
+    if (c == L'T' && !wcscmp(fgetws(m, 5, source), L"itre")) {
+        wcscpy((wchar_t*)token[tokenFound].value, L">Titre");
+        c = fgetwc(source);
         goto MC2;
     }
+    goto error;
 
 S1SS1:
-    if (c == '=') {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
+    if (c == L'=') {
+        c = fgetwc(source);
         goto SS2;
     }
-    if (isSeparator() || c == EOF) {
+    if (isSeparator() || c == WEOF) {
         goto SECTION;
     }
-
+    goto error;
 
 SS2:
-    if (isSeparator() || c == EOF) {
+    if (isSeparator() || c == WEOF) {
         goto SSECTION;
     }
+    goto error;
 
 SECTION:
     tokenType = SECTION;
-    return 1;
+    return EXIT_SUCCESS;
 
 SSECTION:
     tokenType = SSECTION;
-    return 1;
+    return EXIT_SUCCESS;
 
 M1:
     if (istAlpha()) {
-        c = fgetc(source);
-        tokenValue[i] = c;
+        token[tokenFound].value[i] = c;
         i++;
+        c = fgetwc(source);
         goto M1;
     }
-    if (isSeparator() || c == EOF) {
+    if (isSeparator() || c == WEOF) {
         goto MOT;
     }
+    goto error;
 
 initLV1:
-    if (c == '\n' || c == '\t') {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
+    if (c == L' ' || c == L'\t') {
+        c = fgetwc(source);
         goto initLV1;
     }
+    if (c == L'\n') {
+        c = fgetwc(source);
+        goto initLV1LV2;
+    }
     if (istAlpha()) {
-        c = fgetc(source);
-        tokenValue[i] = c;
+        token[tokenFound].value[i] = c;
         i++;
+        c = fgetwc(source);
         goto M1;
     }
-    if (c == '=') {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
+    if (c == L'=') {
+        c = fgetwc(source);
         goto S1SS1;
     }
-    if (c == '>') {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
+    if (c == L'>') {
+        c = fgetwc(source);
         goto MC1;
     }
-    if (c == '\n') {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
-        goto initLV1LV2;
-    }
-    if (c == EOF) {
+    if (c == WEOF) {
         goto FIN;
     }
+    goto error;
 
 initLV1LV2:
     if (isSeparator()) {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
+        c = fgetwc(source);
         goto initLV1LV2;
     }
-    if (c == '>') {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
+    if (istAlpha()) {
+        goto NPARA;
+    }
+    if (c == L'>') {
+        c = fgetwc(source);
         goto MC1;
     }
-    if (c == '=') {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
+    if (c == L'=') {
+        c = fgetwc(source);
         goto S1SS1;
     }
-    if (istAlpha()) {
-        goto NPARA;
-    }
-    if (c == EOF) {
+    if (c == WEOF) {
         goto FIN;
     }
+    goto error;
 
 NPARA:
     tokenType = NPARA;
-    return 1;
+    return EXIT_SUCCESS;
 
 MOT:
     tokenType = MOT;
-    return 1;
+    return EXIT_SUCCESS;
 
 MC2:
-    if (isSeparator() || c == EOF) {
+    if (isSeparator() || c == WEOF) {
         goto MOTCLE;
     }
+    goto error;
 
 MOTCLE:
     tokenType = MOTCLE;
-    return 1;
+    return EXIT_SUCCESS;
 
 FIN:
     tokenType = FIN;
-    return 1;
+    return EXIT_SUCCESS;
 
 error:
     tokenType = FIN;
-    return -1;
+    return EXIT_FAILURE;
 }
 
-int main (int argc, char const *argv[]) {
-
+int main() {
     // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
-    source = fopen("test.txt", "r");
+    source = fopen("test.txt", "r+");
     // Cree et ouvre un fichier target.html en lecture/ecriture
     // avec suppression du contenu au prealable :
     target = fopen("target.html", "w+");
@@ -248,28 +231,24 @@ int main (int argc, char const *argv[]) {
         return -1;
     }
 
+    c = fgetwc(source); // lecture du premier caractere
     do {
-        c = fgetc(source); // lecture du caractere suivant du fichier source
-        //fputc(c, target);  // ecrire c dans le fichier target
-        tokenValue[i] = c;
-        i++;
         int scanrt = scanner();
-        if (scanrt == -1) {
-            printf ("Scanner error with token value: %s\n", tokenValue);
+        if (scanrt == EXIT_FAILURE) {
+            wprintf(L"Scanner error with token value: %ls\n", token[tokenFound].value);
             exit(EXIT_FAILURE);
         }
-        if (c != EOF) {
-            printf ("Token type found: %s with value: %s\n", tokenTypestr[tokenType], tokenValue);
+        if (tokenType == MOT || tokenType == MOTCLE) {
+            wprintf(L"%20s: %ls\n", tokenTypestr[tokenType], token[tokenFound].value);
         } else {
-            printf ("Token type found: %s\n", tokenTypestr[tokenType]);
+            wprintf(L"%20s\n", tokenTypestr[tokenType]);
         }
-        // reinit
-        i = 0;
-        memset(tokenValue, 0, sizeof(tokenValue));
-    } while (c != EOF);    // tant que la fin du fichier n'est pas atteinte
+        token[tokenFound].type = tokenTypestr[tokenType];
+        tokenFound++;
+    } while (tokenType != FIN); // tant que la fin du fichier n'est pas atteinte
 
     if (source != NULL) fclose(source); // fermeture du fichier source
     if (target != NULL) fclose(target); // fermeture du fichier target
 
-    return 0;
+    return EXIT_SUCCESS;
 }