Store all tokens found during the lexical analysis in an array.

[TP_AL_C.git] / lexer / main.c
diff --git a/lexer/main.c b/lexer/main.c

index 671d88f94286139bee7cd87159b149312c0e92c2..db41495086b75c03aa4a24683d07a188dabbc429 100644 (file)
--- a/lexer/main.c
+++ b/lexer/main.c
@@ -3,10 +3,13 @@
  #include <string.h>
  #include <stdbool.h>
  
  #include <string.h>
  #include <stdbool.h>
  
-FILE *source, *target = NULL;
+#define TOKEN_MAX_LENGTH 50
+#define TOKEN_LIST_MAX 500
+
+FILE *source = NULL, *target = NULL;
  char c;
  unsigned int i = 0;
  char c;
  unsigned int i = 0;
-char tokenValue[50];
+char tokenValue[TOKEN_MAX_LENGTH];
  enum TokenType {
      MOTCLE,
      SECTION,
  enum TokenType {
      MOTCLE,
      SECTION,
@@ -16,7 +19,9 @@ enum TokenType {
      FIN
  } tokenType;
  const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
      FIN
  } tokenType;
  const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
+const char* tokenList[TOKEN_LIST_MAX];
  
  
+/* It looks silly to check for each characters but for debugging, it's just the way to go */
  bool istAlpha() {
      if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \
          c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \
  bool istAlpha() {
      if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \
          c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \
@@ -43,9 +48,9 @@ bool isSeparator() {
  }
  
  int scanner() {
  }
  
  int scanner() {
-const char* Titre = "Titre";
-const char* Auteur = "Auteur";
-unsigned int j = 0;
+    const char* Titre = "Titre";
+    const char* Auteur = "Auteur";
+    unsigned int j = 0;
  
  // The main loop get the next character
  init:
  
  // The main loop get the next character
  init:
@@ -55,6 +60,12 @@ init:
          i++;
          goto init;
      }
          i++;
          goto init;
      }
+    if (c == '\n') {
+        c = fgetc(source);
+        tokenValue[i] = c;
+        i++;
+        goto initLV1;
+    }
      if (c == '>') {
          c = fgetc(source);
          tokenValue[i] = c;
      if (c == '>') {
          c = fgetc(source);
          tokenValue[i] = c;
@@ -73,12 +84,6 @@ init:
          i++;
          goto M1;
      }
          i++;
          goto M1;
      }
-    if (c == '\n') {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
-        goto initLV1;
-    }
      if (c == EOF) {
          goto FIN;
      } else {
      if (c == EOF) {
          goto FIN;
      } else {
@@ -86,6 +91,7 @@ init:
      }
  
  MC1:
      }
  
  MC1:
+    // FIXME: Partial match need a rewind in the characters extraction from the file
      if (c == Titre[j] && j < strlen(Titre) - 1) {
          c = fgetc(source);
          tokenValue[i] = c;
      if (c == Titre[j] && j < strlen(Titre) - 1) {
          c = fgetc(source);
          tokenValue[i] = c;
@@ -107,25 +113,29 @@ MC1:
      }
  
  S1SS1:
      }
  
  S1SS1:
-    if (isSeparator() || c == EOF) {
-        goto SECTION;
-    }
      if (c == '=') {
          c = fgetc(source);
          tokenValue[i] = c;
          i++;
          goto SS2;
      }
      if (c == '=') {
          c = fgetc(source);
          tokenValue[i] = c;
          i++;
          goto SS2;
      }
+    if (isSeparator() || c == EOF) {
+        goto SECTION;
+    }
  
  SS2:
      if (isSeparator() || c == EOF) {
  
  SS2:
      if (isSeparator() || c == EOF) {
-        goto SECTION;
+        goto SSECTION;
      }
  
  SECTION:
      tokenType = SECTION;
      return 1;
  
      }
  
  SECTION:
      tokenType = SECTION;
      return 1;
  
+SSECTION:
+    tokenType = SSECTION;
+    return 1;
+
  M1:
      if (istAlpha()) {
          c = fgetc(source);
  M1:
      if (istAlpha()) {
          c = fgetc(source);
@@ -138,12 +148,18 @@ M1:
      }
  
  initLV1:
      }
  
  initLV1:
-    if (c == '\n' || c == '\t') {
+    if (c == ' ' || c == '\t') {
          c = fgetc(source);
          tokenValue[i] = c;
          i++;
          goto initLV1;
      }
          c = fgetc(source);
          tokenValue[i] = c;
          i++;
          goto initLV1;
      }
+    if (c == '\n') {
+        c = fgetc(source);
+        tokenValue[i] = c;
+        i++;
+        goto initLV1LV2;
+    }
      if (istAlpha()) {
          c = fgetc(source);
          tokenValue[i] = c;
      if (istAlpha()) {
          c = fgetc(source);
          tokenValue[i] = c;
@@ -162,12 +178,6 @@ initLV1:
          i++;
          goto MC1;
      }
          i++;
          goto MC1;
      }
-    if (c == '\n') {
-        c = fgetc(source);
-        tokenValue[i] = c;
-        i++;
-        goto initLV1LV2;
-    }
      if (c == EOF) {
          goto FIN;
      }
      if (c == EOF) {
          goto FIN;
      }
@@ -179,6 +189,9 @@ initLV1LV2:
          i++;
          goto initLV1LV2;
      }
          i++;
          goto initLV1LV2;
      }
+    if (istAlpha()) {
+        goto NPARA;
+    }
      if (c == '>') {
          c = fgetc(source);
          tokenValue[i] = c;
      if (c == '>') {
          c = fgetc(source);
          tokenValue[i] = c;
@@ -194,9 +207,6 @@ initLV1LV2:
      if (c == EOF) {
          goto FIN;
      }
      if (c == EOF) {
          goto FIN;
      }
-    if (istAlpha()) {
-        goto NPARA;
-    }
  
  NPARA:
      tokenType = NPARA;
  
  NPARA:
      tokenType = NPARA;
@@ -242,9 +252,10 @@ int main (int argc, char const *argv[]) {
          return -1;
      }
  
          return -1;
      }
  
+    int tokenFound = 0;
+
      do {
          c = fgetc(source); // lecture du caractere suivant du fichier source
      do {
          c = fgetc(source); // lecture du caractere suivant du fichier source
-        //fputc(c, target);  // ecrire c dans le fichier target
          tokenValue[i] = c;
          i++;
          int scanrt = scanner();
          tokenValue[i] = c;
          i++;
          int scanrt = scanner();
@@ -257,10 +268,12 @@ int main (int argc, char const *argv[]) {
          } else {
              printf ("Token type found: %s\n", tokenTypestr[tokenType]);
          }
          } else {
              printf ("Token type found: %s\n", tokenTypestr[tokenType]);
          }
-        // reinit
+        tokenFound++;
+        tokenList[tokenFound] = tokenTypestr[tokenType];
+        // reinit tokenValue
          i = 0;
          memset(tokenValue, 0, sizeof(tokenValue));
          i = 0;
          memset(tokenValue, 0, sizeof(tokenValue));
-    } while (c != EOF);    // tant que la fin du fichier n'est pas atteinte
+    } while (c != EOF); // tant que la fin du fichier n'est pas atteinte
  
      if (source != NULL) fclose(source); // fermeture du fichier source
      if (target != NULL) fclose(target); // fermeture du fichier target
  
      if (source != NULL) fclose(source); // fermeture du fichier source
      if (target != NULL) fclose(target); // fermeture du fichier target