lexer/main.c

   1 #include <stdlib.h>
   2 #include <stdio.h>
   3 #include <string.h>
   4 #include <stdbool.h>
   5
   6 FILE *source, *target = NULL;
   7 char c;
   8 unsigned int i = 0;
   9 char tokenValue[50];
  10 enum TokenType {
  11     MOTCLE,
  12     SECTION,
  13     SSECTION,
  14     NPARA,
  15     MOT,
  16     FIN
  17 } tokenType;
  18 const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
  19
  20 /* This looks silly to check for each characters but for debugging, it's just the way to go */
  21 bool istAlpha() {
  22     if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \
  23         c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \
  24         c == 'o' || c == 'p' || c == 'q' || c == 'r' || c == 's' || c == 't' || c == 'u' || \
  25         c == 'v' || c == 'w' || c == 'x' || c == 'y' || c == 'z' || \
  26         c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F' || c == 'G' || \
  27         c == 'H' || c == 'I' || c == 'J' || c == 'K' || c == 'L' || c == 'M' || c == 'N' || \
  28         c == 'O' || c == 'P' || c == 'Q' || c == 'R' || c == 'S' || c == 'T' || c == 'U' || \
  29         c == 'V' || c == 'W' || c == 'X' || c == 'Y' || c == 'Z' || \
  30         c == '.' || c == '?' || c == '!' || c == ',' || c == ';' || c == ':' || c == '-' || \
  31         c == '\''|| c == '#' || \
  32         c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || \
  33         c == '7' || c == '8' || c == '9') {
  34             return true;
  35         }
  36         return false;
  37 }
  38
  39 bool isSeparator() {
  40     if (c == '\t' || c == ' ' || c == '\n') {
  41         return true;
  42     }
  43     return false;
  44 }
  45
  46 int scanner() {
  47 const char* Titre = "Titre";
  48 const char* Auteur = "Auteur";
  49 unsigned int j = 0;
  50
  51 // The main loop get the next character
  52 init:
  53     if (c == ' ' || c == '\t') {
  54         c = fgetc(source);
  55         tokenValue[i] = c;
  56         i++;
  57         goto init;
  58     }
  59     if (c == '\n') {
  60         c = fgetc(source);
  61         tokenValue[i] = c;
  62         i++;
  63         goto initLV1;
  64     }
  65     if (c == '>') {
  66         c = fgetc(source);
  67         tokenValue[i] = c;
  68         i++;
  69         goto MC1;
  70     }
  71     if (c == '=') {
  72         c = fgetc(source);
  73         tokenValue[i] = c;
  74         i++;
  75         goto S1SS1;
  76     }
  77     if (istAlpha()) {
  78         c = fgetc(source);
  79         tokenValue[i] = c;
  80         i++;
  81         goto M1;
  82     }
  83     if (c == EOF) {
  84         goto FIN;
  85     } else {
  86         goto error;
  87     }
  88
  89 MC1:
  90     if (c == Titre[j] && j < strlen(Titre) - 1) {
  91         c = fgetc(source);
  92         tokenValue[i] = c;
  93         i++;
  94         j++;
  95         goto MC1;
  96     }
  97     if (c == Auteur[j] && j < strlen(Auteur) - 1) {
  98         c = fgetc(source);
  99         tokenValue[i] = c;
 100         i++;
 101         j++;
 102         goto MC1;
 103     } else {
 104         c = fgetc(source);
 105         tokenValue[i] = c;
 106         i++;
 107         goto MC2;
 108     }
 109
 110 S1SS1:
 111     if (c == '=') {
 112         c = fgetc(source);
 113         tokenValue[i] = c;
 114         i++;
 115         goto SS2;
 116     }
 117     if (isSeparator() || c == EOF) {
 118         goto SECTION;
 119     }
 120
 121 SS2:
 122     if (isSeparator() || c == EOF) {
 123         goto SSECTION;
 124     }
 125
 126 SECTION:
 127     tokenType = SECTION;
 128     return 1;
 129
 130 SSECTION:
 131     tokenType = SSECTION;
 132     return 1;
 133
 134 M1:
 135     if (istAlpha()) {
 136         c = fgetc(source);
 137         tokenValue[i] = c;
 138         i++;
 139         goto M1;
 140     }
 141     if (isSeparator() || c == EOF) {
 142         goto MOT;
 143     }
 144
 145 initLV1:
 146     if (c == ' ' || c == '\t') {
 147         c = fgetc(source);
 148         tokenValue[i] = c;
 149         i++;
 150         goto initLV1;
 151     }
 152     if (c == '\n') {
 153         c = fgetc(source);
 154         tokenValue[i] = c;
 155         i++;
 156         goto initLV1LV2;
 157     }
 158     if (istAlpha()) {
 159         c = fgetc(source);
 160         tokenValue[i] = c;
 161         i++;
 162         goto M1;
 163     }
 164     if (c == '=') {
 165         c = fgetc(source);
 166         tokenValue[i] = c;
 167         i++;
 168         goto S1SS1;
 169     }
 170     if (c == '>') {
 171         c = fgetc(source);
 172         tokenValue[i] = c;
 173         i++;
 174         goto MC1;
 175     }
 176     if (c == EOF) {
 177         goto FIN;
 178     }
 179
 180 initLV1LV2:
 181     if (isSeparator()) {
 182         c = fgetc(source);
 183         tokenValue[i] = c;
 184         i++;
 185         goto initLV1LV2;
 186     }
 187     if (istAlpha()) {
 188         goto NPARA;
 189     }
 190     if (c == '>') {
 191         c = fgetc(source);
 192         tokenValue[i] = c;
 193         i++;
 194         goto MC1;
 195     }
 196     if (c == '=') {
 197         c = fgetc(source);
 198         tokenValue[i] = c;
 199         i++;
 200         goto S1SS1;
 201     }
 202     if (c == EOF) {
 203         goto FIN;
 204     }
 205
 206 NPARA:
 207     tokenType = NPARA;
 208     return 1;
 209
 210 MOT:
 211     tokenType = MOT;
 212     return 1;
 213
 214 MC2:
 215     if (isSeparator() || c == EOF) {
 216         goto MOTCLE;
 217     }
 218
 219 MOTCLE:
 220     tokenType = MOTCLE;
 221     return 1;
 222
 223 FIN:
 224     tokenType = FIN;
 225     return 1;
 226
 227 error:
 228     tokenType = FIN;
 229     return -1;
 230 }
 231
 232 int main (int argc, char const *argv[]) {
 233
 234     // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
 235     source = fopen("test.txt", "r");
 236     // Cree et ouvre un fichier target.html en lecture/ecriture
 237     // avec suppression du contenu au prealable :
 238     target = fopen("target.html", "w+");
 239
 240     if (source == NULL) {
 241         printf("Impossible d'ouvrir le fichier source\n");
 242         return -1;
 243     }
 244
 245     if (target == NULL) {
 246         printf("Impossible d'ouvrir le fichier target\n");
 247         return -1;
 248     }
 249
 250     do {
 251         c = fgetc(source); // lecture du caractere suivant du fichier source
 252         tokenValue[i] = c;
 253         i++;
 254         int scanrt = scanner();
 255         if (scanrt == -1) {
 256             printf ("Scanner error with token value: %s\n", tokenValue);
 257             exit(EXIT_FAILURE);
 258         }
 259         if (c != EOF) {
 260             printf ("Token type found: %s with value: %s\n", tokenTypestr[tokenType], tokenValue);
 261         } else {
 262             printf ("Token type found: %s\n", tokenTypestr[tokenType]);
 263         }
 264         // reinit tokenValue
 265         i = 0;
 266         memset(tokenValue, 0, sizeof(tokenValue));
 267     } while (c != EOF); // tant que la fin du fichier n'est pas atteinte
 268
 269     if (source != NULL) fclose(source); // fermeture du fichier source
 270     if (target != NULL) fclose(target); // fermeture du fichier target
 271
 272     return 0;
 273 }