lexer/main.c

   1 #include <stdlib.h>
   2 #include <stdio.h>
   3 #include <string.h>
   4 #include <stdbool.h>
   5
   6 FILE *source, *target = NULL;
   7 char c;
   8 unsigned int i = 0;
   9 char tokenValue[50];
  10 enum TokenType {
  11     MOTCLE,
  12     SECTION,
  13     SSECTION,
  14     NPARA,
  15     MOT,
  16     FIN
  17 } tokenType;
  18 const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
  19
  20 bool istAlpha() {
  21     if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \
  22         c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \
  23         c == 'o' || c == 'p' || c == 'q' || c == 'r' || c == 's' || c == 't' || c == 'u' || \
  24         c == 'v' || c == 'w' || c == 'x' || c == 'y' || c == 'z' || \
  25         c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F' || c == 'G' || \
  26         c == 'H' || c == 'I' || c == 'J' || c == 'K' || c == 'L' || c == 'M' || c == 'N' || \
  27         c == 'O' || c == 'P' || c == 'Q' || c == 'R' || c == 'S' || c == 'T' || c == 'U' || \
  28         c == 'V' || c == 'W' || c == 'X' || c == 'Y' || c == 'Z' || \
  29         c == '.' || c == '?' || c == '!' || c == ',' || c == ';' || c == ':' || c == '-' || \
  30         c == '\''|| c == '#' || \
  31         c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || \
  32         c == '7' || c == '8' || c == '9') {
  33             return true;
  34         }
  35         return false;
  36 }
  37
  38 bool isSeparator() {
  39     if (c == '\t' || c == ' ' || c == '\n') {
  40         return true;
  41     }
  42     return false;
  43 }
  44
  45 int scanner() {
  46 const char* Titre = "Titre";
  47 const char* Auteur = "Auteur";
  48 unsigned int j = 0;
  49
  50 // The main loop get the next character
  51 init:
  52     if (c == ' ' || c == '\t') {
  53         c = fgetc(source);
  54         tokenValue[i] = c;
  55         i++;
  56         goto init;
  57     }
  58     if (c == '>') {
  59         c = fgetc(source);
  60         tokenValue[i] = c;
  61         i++;
  62         goto MC1;
  63     }
  64     if (c == '=') {
  65         c = fgetc(source);
  66         tokenValue[i] = c;
  67         i++;
  68         goto S1SS1;
  69     }
  70     if (istAlpha()) {
  71         c = fgetc(source);
  72         tokenValue[i] = c;
  73         i++;
  74         goto M1;
  75     }
  76     if (c == '\n') {
  77         c = fgetc(source);
  78         tokenValue[i] = c;
  79         i++;
  80         goto initLV1;
  81     }
  82     if (c == EOF) {
  83         goto FIN;
  84     } else {
  85         goto error;
  86     }
  87
  88 MC1:
  89     if (c == Titre[j] && j < strlen(Titre) - 1) {
  90         c = fgetc(source);
  91         tokenValue[i] = c;
  92         i++;
  93         j++;
  94         goto MC1;
  95     }
  96     if (c == Auteur[j] && j < strlen(Auteur) - 1) {
  97         c = fgetc(source);
  98         tokenValue[i] = c;
  99         i++;
 100         j++;
 101         goto MC1;
 102     } else {
 103         c = fgetc(source);
 104         tokenValue[i] = c;
 105         i++;
 106         goto MC2;
 107     }
 108
 109 S1SS1:
 110     if (isSeparator() || c == EOF) {
 111         goto SECTION;
 112     }
 113     if (c == '=') {
 114         c = fgetc(source);
 115         tokenValue[i] = c;
 116         i++;
 117         goto SS2;
 118     }
 119
 120 SS2:
 121     if (isSeparator() || c == EOF) {
 122         goto SECTION;
 123     }
 124
 125 SECTION:
 126     tokenType = SECTION;
 127     return 1;
 128
 129 M1:
 130     if (istAlpha()) {
 131         c = fgetc(source);
 132         tokenValue[i] = c;
 133         i++;
 134         goto M1;
 135     }
 136     if (isSeparator() || c == EOF) {
 137         goto MOT;
 138     }
 139
 140 initLV1:
 141     if (c == '\n' || c == '\t') {
 142         c = fgetc(source);
 143         tokenValue[i] = c;
 144         i++;
 145         goto initLV1;
 146     }
 147     if (istAlpha()) {
 148         c = fgetc(source);
 149         tokenValue[i] = c;
 150         i++;
 151         goto M1;
 152     }
 153     if (c == '=') {
 154         c = fgetc(source);
 155         tokenValue[i] = c;
 156         i++;
 157         goto S1SS1;
 158     }
 159     if (c == '>') {
 160         c = fgetc(source);
 161         tokenValue[i] = c;
 162         i++;
 163         goto MC1;
 164     }
 165     if (c == '\n') {
 166         c = fgetc(source);
 167         tokenValue[i] = c;
 168         i++;
 169         goto initLV1LV2;
 170     }
 171     if (c == EOF) {
 172         goto FIN;
 173     }
 174
 175 initLV1LV2:
 176     if (isSeparator()) {
 177         c = fgetc(source);
 178         tokenValue[i] = c;
 179         i++;
 180         goto initLV1LV2;
 181     }
 182     if (c == '>') {
 183         c = fgetc(source);
 184         tokenValue[i] = c;
 185         i++;
 186         goto MC1;
 187     }
 188     if (c == '=') {
 189         c = fgetc(source);
 190         tokenValue[i] = c;
 191         i++;
 192         goto S1SS1;
 193     }
 194     if (c == EOF) {
 195         goto FIN;
 196     }
 197     if (istAlpha()) {
 198         goto NPARA;
 199     }
 200
 201 NPARA:
 202     tokenType = NPARA;
 203     return 1;
 204
 205 MOT:
 206     tokenType = MOT;
 207     return 1;
 208
 209 MC2:
 210     if (isSeparator() || c == EOF) {
 211         goto MOTCLE;
 212     }
 213
 214 MOTCLE:
 215     tokenType = MOTCLE;
 216     return 1;
 217
 218 FIN:
 219     tokenType = FIN;
 220     return 1;
 221
 222 error:
 223     tokenType = FIN;
 224     return -1;
 225 }
 226
 227 int main (int argc, char const *argv[]) {
 228
 229     // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
 230     source = fopen("test.txt", "r");
 231     // Cree et ouvre un fichier target.html en lecture/ecriture
 232     // avec suppression du contenu au prealable :
 233     target = fopen("target.html", "w+");
 234
 235     if (source == NULL) {
 236         printf("Impossible d'ouvrir le fichier source\n");
 237         return -1;
 238     }
 239
 240     if (target == NULL) {
 241         printf("Impossible d'ouvrir le fichier target\n");
 242         return -1;
 243     }
 244
 245     do {
 246         c = fgetc(source); // lecture du caractere suivant du fichier source
 247         //fputc(c, target);  // ecrire c dans le fichier target
 248         tokenValue[i] = c;
 249         i++;
 250         int scanrt = scanner();
 251         if (scanrt == -1) {
 252             printf ("Scanner error with token value: %s\n", tokenValue);
 253             exit(EXIT_FAILURE);
 254         }
 255         if (c != EOF) {
 256             printf ("Token type found: %s with value: %s\n", tokenTypestr[tokenType], tokenValue);
 257         } else {
 258             printf ("Token type found: %s\n", tokenTypestr[tokenType]);
 259         }
 260         // reinit
 261         i = 0;
 262         memset(tokenValue, 0, sizeof(tokenValue));
 263     } while (c != EOF);    // tant que la fin du fichier n'est pas atteinte
 264
 265     if (source != NULL) fclose(source); // fermeture du fichier source
 266     if (target != NULL) fclose(target); // fermeture du fichier target
 267
 268     return 0;
 269 }