Increment the number of tokens found after the array storage.
[TP_AL_C.git] / lexer / main.c
CommitLineData
96964f3e
JB
1#include <stdlib.h>
2#include <stdio.h>
3#include <string.h>
4b580abd 4#include <stdbool.h>
e9a51b68 5
dcfcd9ab
JB
6#define TOKEN_MAX_LENGTH 50
7#define TOKEN_LIST_MAX 500
8
9FILE *source = NULL, *target = NULL;
96964f3e 10char c;
4b580abd 11unsigned int i = 0;
dcfcd9ab 12char tokenValue[TOKEN_MAX_LENGTH];
4b580abd
JB
13enum TokenType {
14 MOTCLE,
15 SECTION,
16 SSECTION,
17 NPARA,
18 MOT,
19 FIN
20} tokenType;
21const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
dcfcd9ab 22const char* tokenList[TOKEN_LIST_MAX];
4b580abd 23
91e46777 24/* It looks silly to check for each characters but for debugging, it's just the way to go */
4b580abd
JB
25bool istAlpha() {
26 if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \
27 c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \
28 c == 'o' || c == 'p' || c == 'q' || c == 'r' || c == 's' || c == 't' || c == 'u' || \
29 c == 'v' || c == 'w' || c == 'x' || c == 'y' || c == 'z' || \
30 c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F' || c == 'G' || \
31 c == 'H' || c == 'I' || c == 'J' || c == 'K' || c == 'L' || c == 'M' || c == 'N' || \
32 c == 'O' || c == 'P' || c == 'Q' || c == 'R' || c == 'S' || c == 'T' || c == 'U' || \
33 c == 'V' || c == 'W' || c == 'X' || c == 'Y' || c == 'Z' || \
34 c == '.' || c == '?' || c == '!' || c == ',' || c == ';' || c == ':' || c == '-' || \
35 c == '\''|| c == '#' || \
36 c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || \
37 c == '7' || c == '8' || c == '9') {
38 return true;
39 }
40 return false;
41}
42
43bool isSeparator() {
44 if (c == '\t' || c == ' ' || c == '\n') {
45 return true;
46 }
47 return false;
48}
49
50int scanner() {
da74c1e0
JB
51 const char* Titre = "Titre";
52 const char* Auteur = "Auteur";
53 unsigned int j = 0;
4b580abd
JB
54
55// The main loop get the next character
56init:
57 if (c == ' ' || c == '\t') {
58 c = fgetc(source);
59 tokenValue[i] = c;
60 i++;
61 goto init;
62 }
fde9417f
JB
63 if (c == '\n') {
64 c = fgetc(source);
65 tokenValue[i] = c;
66 i++;
67 goto initLV1;
68 }
4b580abd
JB
69 if (c == '>') {
70 c = fgetc(source);
71 tokenValue[i] = c;
72 i++;
73 goto MC1;
74 }
75 if (c == '=') {
76 c = fgetc(source);
77 tokenValue[i] = c;
78 i++;
79 goto S1SS1;
80 }
81 if (istAlpha()) {
82 c = fgetc(source);
83 tokenValue[i] = c;
84 i++;
85 goto M1;
86 }
4b580abd
JB
87 if (c == EOF) {
88 goto FIN;
89 } else {
90 goto error;
91 }
92
93MC1:
dcfcd9ab 94 // FIXME: Partial match need a rewind in the characters extraction from the file
4b580abd
JB
95 if (c == Titre[j] && j < strlen(Titre) - 1) {
96 c = fgetc(source);
97 tokenValue[i] = c;
98 i++;
99 j++;
100 goto MC1;
101 }
102 if (c == Auteur[j] && j < strlen(Auteur) - 1) {
103 c = fgetc(source);
104 tokenValue[i] = c;
105 i++;
106 j++;
107 goto MC1;
108 } else {
109 c = fgetc(source);
110 tokenValue[i] = c;
111 i++;
112 goto MC2;
113 }
114
115S1SS1:
4b580abd
JB
116 if (c == '=') {
117 c = fgetc(source);
118 tokenValue[i] = c;
119 i++;
120 goto SS2;
121 }
d3eb30ef
JB
122 if (isSeparator() || c == EOF) {
123 goto SECTION;
124 }
125
4b580abd
JB
126SS2:
127 if (isSeparator() || c == EOF) {
d3eb30ef 128 goto SSECTION;
4b580abd
JB
129 }
130
131SECTION:
132 tokenType = SECTION;
133 return 1;
134
d3eb30ef
JB
135SSECTION:
136 tokenType = SSECTION;
137 return 1;
138
4b580abd
JB
139M1:
140 if (istAlpha()) {
141 c = fgetc(source);
142 tokenValue[i] = c;
143 i++;
144 goto M1;
145 }
146 if (isSeparator() || c == EOF) {
147 goto MOT;
148 }
149
150initLV1:
fde9417f 151 if (c == ' ' || c == '\t') {
4b580abd
JB
152 c = fgetc(source);
153 tokenValue[i] = c;
154 i++;
155 goto initLV1;
156 }
fde9417f
JB
157 if (c == '\n') {
158 c = fgetc(source);
159 tokenValue[i] = c;
160 i++;
161 goto initLV1LV2;
162 }
4b580abd
JB
163 if (istAlpha()) {
164 c = fgetc(source);
165 tokenValue[i] = c;
166 i++;
167 goto M1;
168 }
169 if (c == '=') {
170 c = fgetc(source);
171 tokenValue[i] = c;
172 i++;
173 goto S1SS1;
174 }
175 if (c == '>') {
176 c = fgetc(source);
177 tokenValue[i] = c;
178 i++;
179 goto MC1;
180 }
4b580abd
JB
181 if (c == EOF) {
182 goto FIN;
183 }
184
185initLV1LV2:
186 if (isSeparator()) {
187 c = fgetc(source);
188 tokenValue[i] = c;
189 i++;
190 goto initLV1LV2;
191 }
fde9417f
JB
192 if (istAlpha()) {
193 goto NPARA;
194 }
4b580abd
JB
195 if (c == '>') {
196 c = fgetc(source);
197 tokenValue[i] = c;
198 i++;
199 goto MC1;
200 }
201 if (c == '=') {
202 c = fgetc(source);
203 tokenValue[i] = c;
204 i++;
205 goto S1SS1;
206 }
d3eb30ef
JB
207 if (c == EOF) {
208 goto FIN;
209 }
4b580abd
JB
210
211NPARA:
212 tokenType = NPARA;
213 return 1;
214
215MOT:
216 tokenType = MOT;
217 return 1;
218
219MC2:
220 if (isSeparator() || c == EOF) {
221 goto MOTCLE;
222 }
223
224MOTCLE:
225 tokenType = MOTCLE;
226 return 1;
227
228FIN:
229 tokenType = FIN;
230 return 1;
231
232error:
233 tokenType = FIN;
234 return -1;
235}
96964f3e
JB
236
237int main (int argc, char const *argv[]) {
238
239 // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
240 source = fopen("test.txt", "r");
241 // Cree et ouvre un fichier target.html en lecture/ecriture
242 // avec suppression du contenu au prealable :
243 target = fopen("target.html", "w+");
244
245 if (source == NULL) {
246 printf("Impossible d'ouvrir le fichier source\n");
247 return -1;
248 }
249
250 if (target == NULL) {
251 printf("Impossible d'ouvrir le fichier target\n");
252 return -1;
253 }
254
dcfcd9ab
JB
255 int tokenFound = 0;
256
4b580abd 257 do {
96964f3e 258 c = fgetc(source); // lecture du caractere suivant du fichier source
4b580abd
JB
259 tokenValue[i] = c;
260 i++;
261 int scanrt = scanner();
262 if (scanrt == -1) {
263 printf ("Scanner error with token value: %s\n", tokenValue);
264 exit(EXIT_FAILURE);
265 }
266 if (c != EOF) {
267 printf ("Token type found: %s with value: %s\n", tokenTypestr[tokenType], tokenValue);
268 } else {
269 printf ("Token type found: %s\n", tokenTypestr[tokenType]);
270 }
dcfcd9ab 271 tokenList[tokenFound] = tokenTypestr[tokenType];
28280a4c 272 tokenFound++;
fde9417f 273 // reinit tokenValue
4b580abd
JB
274 i = 0;
275 memset(tokenValue, 0, sizeof(tokenValue));
532c982b 276 } while (c != EOF); // tant que la fin du fichier n'est pas atteinte
96964f3e
JB
277
278 if (source != NULL) fclose(source); // fermeture du fichier source
279 if (target != NULL) fclose(target); // fermeture du fichier target
280
281 return 0;
282}