| 1 | #include <stdlib.h> |
| 2 | #include <stdio.h> |
| 3 | #include <string.h> |
| 4 | #include <stdbool.h> |
| 5 | |
| 6 | FILE *source, *target = NULL; |
| 7 | char c; |
| 8 | unsigned int i = 0; |
| 9 | char tokenValue[50]; |
| 10 | enum TokenType { |
| 11 | MOTCLE, |
| 12 | SECTION, |
| 13 | SSECTION, |
| 14 | NPARA, |
| 15 | MOT, |
| 16 | FIN |
| 17 | } tokenType; |
| 18 | const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" }; |
| 19 | |
| 20 | /* This looks silly to check for each characters but for debugging, it's just the way to go */ |
| 21 | bool istAlpha() { |
| 22 | if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \ |
| 23 | c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \ |
| 24 | c == 'o' || c == 'p' || c == 'q' || c == 'r' || c == 's' || c == 't' || c == 'u' || \ |
| 25 | c == 'v' || c == 'w' || c == 'x' || c == 'y' || c == 'z' || \ |
| 26 | c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F' || c == 'G' || \ |
| 27 | c == 'H' || c == 'I' || c == 'J' || c == 'K' || c == 'L' || c == 'M' || c == 'N' || \ |
| 28 | c == 'O' || c == 'P' || c == 'Q' || c == 'R' || c == 'S' || c == 'T' || c == 'U' || \ |
| 29 | c == 'V' || c == 'W' || c == 'X' || c == 'Y' || c == 'Z' || \ |
| 30 | c == '.' || c == '?' || c == '!' || c == ',' || c == ';' || c == ':' || c == '-' || \ |
| 31 | c == '\''|| c == '#' || \ |
| 32 | c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || \ |
| 33 | c == '7' || c == '8' || c == '9') { |
| 34 | return true; |
| 35 | } |
| 36 | return false; |
| 37 | } |
| 38 | |
| 39 | bool isSeparator() { |
| 40 | if (c == '\t' || c == ' ' || c == '\n') { |
| 41 | return true; |
| 42 | } |
| 43 | return false; |
| 44 | } |
| 45 | |
| 46 | int scanner() { |
| 47 | const char* Titre = "Titre"; |
| 48 | const char* Auteur = "Auteur"; |
| 49 | unsigned int j = 0; |
| 50 | |
| 51 | // The main loop get the next character |
| 52 | init: |
| 53 | if (c == ' ' || c == '\t') { |
| 54 | c = fgetc(source); |
| 55 | tokenValue[i] = c; |
| 56 | i++; |
| 57 | goto init; |
| 58 | } |
| 59 | if (c == '>') { |
| 60 | c = fgetc(source); |
| 61 | tokenValue[i] = c; |
| 62 | i++; |
| 63 | goto MC1; |
| 64 | } |
| 65 | if (c == '=') { |
| 66 | c = fgetc(source); |
| 67 | tokenValue[i] = c; |
| 68 | i++; |
| 69 | goto S1SS1; |
| 70 | } |
| 71 | if (istAlpha()) { |
| 72 | c = fgetc(source); |
| 73 | tokenValue[i] = c; |
| 74 | i++; |
| 75 | goto M1; |
| 76 | } |
| 77 | if (c == '\n') { |
| 78 | c = fgetc(source); |
| 79 | tokenValue[i] = c; |
| 80 | i++; |
| 81 | goto initLV1; |
| 82 | } |
| 83 | if (c == EOF) { |
| 84 | goto FIN; |
| 85 | } else { |
| 86 | goto error; |
| 87 | } |
| 88 | |
| 89 | MC1: |
| 90 | if (c == Titre[j] && j < strlen(Titre) - 1) { |
| 91 | c = fgetc(source); |
| 92 | tokenValue[i] = c; |
| 93 | i++; |
| 94 | j++; |
| 95 | goto MC1; |
| 96 | } |
| 97 | if (c == Auteur[j] && j < strlen(Auteur) - 1) { |
| 98 | c = fgetc(source); |
| 99 | tokenValue[i] = c; |
| 100 | i++; |
| 101 | j++; |
| 102 | goto MC1; |
| 103 | } else { |
| 104 | c = fgetc(source); |
| 105 | tokenValue[i] = c; |
| 106 | i++; |
| 107 | goto MC2; |
| 108 | } |
| 109 | |
| 110 | S1SS1: |
| 111 | if (c == '=') { |
| 112 | c = fgetc(source); |
| 113 | tokenValue[i] = c; |
| 114 | i++; |
| 115 | goto SS2; |
| 116 | } |
| 117 | if (isSeparator() || c == EOF) { |
| 118 | goto SECTION; |
| 119 | } |
| 120 | |
| 121 | |
| 122 | SS2: |
| 123 | if (isSeparator() || c == EOF) { |
| 124 | goto SSECTION; |
| 125 | } |
| 126 | |
| 127 | SECTION: |
| 128 | tokenType = SECTION; |
| 129 | return 1; |
| 130 | |
| 131 | SSECTION: |
| 132 | tokenType = SSECTION; |
| 133 | return 1; |
| 134 | |
| 135 | M1: |
| 136 | if (istAlpha()) { |
| 137 | c = fgetc(source); |
| 138 | tokenValue[i] = c; |
| 139 | i++; |
| 140 | goto M1; |
| 141 | } |
| 142 | if (isSeparator() || c == EOF) { |
| 143 | goto MOT; |
| 144 | } |
| 145 | |
| 146 | initLV1: |
| 147 | if (c == '\n' || c == '\t') { |
| 148 | c = fgetc(source); |
| 149 | tokenValue[i] = c; |
| 150 | i++; |
| 151 | goto initLV1; |
| 152 | } |
| 153 | if (istAlpha()) { |
| 154 | c = fgetc(source); |
| 155 | tokenValue[i] = c; |
| 156 | i++; |
| 157 | goto M1; |
| 158 | } |
| 159 | if (c == '=') { |
| 160 | c = fgetc(source); |
| 161 | tokenValue[i] = c; |
| 162 | i++; |
| 163 | goto S1SS1; |
| 164 | } |
| 165 | if (c == '>') { |
| 166 | c = fgetc(source); |
| 167 | tokenValue[i] = c; |
| 168 | i++; |
| 169 | goto MC1; |
| 170 | } |
| 171 | if (c == '\n') { |
| 172 | c = fgetc(source); |
| 173 | tokenValue[i] = c; |
| 174 | i++; |
| 175 | goto initLV1LV2; |
| 176 | } |
| 177 | if (c == EOF) { |
| 178 | goto FIN; |
| 179 | } |
| 180 | |
| 181 | initLV1LV2: |
| 182 | if (isSeparator()) { |
| 183 | c = fgetc(source); |
| 184 | tokenValue[i] = c; |
| 185 | i++; |
| 186 | goto initLV1LV2; |
| 187 | } |
| 188 | if (c == '>') { |
| 189 | c = fgetc(source); |
| 190 | tokenValue[i] = c; |
| 191 | i++; |
| 192 | goto MC1; |
| 193 | } |
| 194 | if (c == '=') { |
| 195 | c = fgetc(source); |
| 196 | tokenValue[i] = c; |
| 197 | i++; |
| 198 | goto S1SS1; |
| 199 | } |
| 200 | if (istAlpha()) { |
| 201 | goto NPARA; |
| 202 | } |
| 203 | if (c == EOF) { |
| 204 | goto FIN; |
| 205 | } |
| 206 | |
| 207 | NPARA: |
| 208 | tokenType = NPARA; |
| 209 | return 1; |
| 210 | |
| 211 | MOT: |
| 212 | tokenType = MOT; |
| 213 | return 1; |
| 214 | |
| 215 | MC2: |
| 216 | if (isSeparator() || c == EOF) { |
| 217 | goto MOTCLE; |
| 218 | } |
| 219 | |
| 220 | MOTCLE: |
| 221 | tokenType = MOTCLE; |
| 222 | return 1; |
| 223 | |
| 224 | FIN: |
| 225 | tokenType = FIN; |
| 226 | return 1; |
| 227 | |
| 228 | error: |
| 229 | tokenType = FIN; |
| 230 | return -1; |
| 231 | } |
| 232 | |
| 233 | int main (int argc, char const *argv[]) { |
| 234 | |
| 235 | // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) : |
| 236 | source = fopen("test.txt", "r"); |
| 237 | // Cree et ouvre un fichier target.html en lecture/ecriture |
| 238 | // avec suppression du contenu au prealable : |
| 239 | target = fopen("target.html", "w+"); |
| 240 | |
| 241 | if (source == NULL) { |
| 242 | printf("Impossible d'ouvrir le fichier source\n"); |
| 243 | return -1; |
| 244 | } |
| 245 | |
| 246 | if (target == NULL) { |
| 247 | printf("Impossible d'ouvrir le fichier target\n"); |
| 248 | return -1; |
| 249 | } |
| 250 | |
| 251 | do { |
| 252 | c = fgetc(source); // lecture du caractere suivant du fichier source |
| 253 | //fputc(c, target); // ecrire c dans le fichier target |
| 254 | tokenValue[i] = c; |
| 255 | i++; |
| 256 | int scanrt = scanner(); |
| 257 | if (scanrt == -1) { |
| 258 | printf ("Scanner error with token value: %s\n", tokenValue); |
| 259 | exit(EXIT_FAILURE); |
| 260 | } |
| 261 | if (c != EOF) { |
| 262 | printf ("Token type found: %s with value: %s\n", tokenTypestr[tokenType], tokenValue); |
| 263 | } else { |
| 264 | printf ("Token type found: %s\n", tokenTypestr[tokenType]); |
| 265 | } |
| 266 | // reinit |
| 267 | i = 0; |
| 268 | memset(tokenValue, 0, sizeof(tokenValue)); |
| 269 | } while (c != EOF); // tant que la fin du fichier n'est pas atteinte |
| 270 | |
| 271 | if (source != NULL) fclose(source); // fermeture du fichier source |
| 272 | if (target != NULL) fclose(target); // fermeture du fichier target |
| 273 | |
| 274 | return 0; |
| 275 | } |