Fix the sub section detector.
[TP_AL_C.git] / lexer / main.c
CommitLineData
96964f3e
JB
1#include <stdlib.h>
2#include <stdio.h>
3#include <string.h>
4b580abd 4#include <stdbool.h>
e9a51b68 5
96964f3e
JB
6FILE *source, *target = NULL;
7char c;
4b580abd
JB
8unsigned int i = 0;
9char tokenValue[50];
10enum TokenType {
11 MOTCLE,
12 SECTION,
13 SSECTION,
14 NPARA,
15 MOT,
16 FIN
17} tokenType;
18const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
19
d3eb30ef 20/* This looks silly to check for each characters but for debugging, it's just the way to go */
4b580abd
JB
21bool istAlpha() {
22 if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \
23 c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \
24 c == 'o' || c == 'p' || c == 'q' || c == 'r' || c == 's' || c == 't' || c == 'u' || \
25 c == 'v' || c == 'w' || c == 'x' || c == 'y' || c == 'z' || \
26 c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F' || c == 'G' || \
27 c == 'H' || c == 'I' || c == 'J' || c == 'K' || c == 'L' || c == 'M' || c == 'N' || \
28 c == 'O' || c == 'P' || c == 'Q' || c == 'R' || c == 'S' || c == 'T' || c == 'U' || \
29 c == 'V' || c == 'W' || c == 'X' || c == 'Y' || c == 'Z' || \
30 c == '.' || c == '?' || c == '!' || c == ',' || c == ';' || c == ':' || c == '-' || \
31 c == '\''|| c == '#' || \
32 c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || \
33 c == '7' || c == '8' || c == '9') {
34 return true;
35 }
36 return false;
37}
38
39bool isSeparator() {
40 if (c == '\t' || c == ' ' || c == '\n') {
41 return true;
42 }
43 return false;
44}
45
46int scanner() {
47const char* Titre = "Titre";
48const char* Auteur = "Auteur";
49unsigned int j = 0;
50
51// The main loop get the next character
52init:
53 if (c == ' ' || c == '\t') {
54 c = fgetc(source);
55 tokenValue[i] = c;
56 i++;
57 goto init;
58 }
59 if (c == '>') {
60 c = fgetc(source);
61 tokenValue[i] = c;
62 i++;
63 goto MC1;
64 }
65 if (c == '=') {
66 c = fgetc(source);
67 tokenValue[i] = c;
68 i++;
69 goto S1SS1;
70 }
71 if (istAlpha()) {
72 c = fgetc(source);
73 tokenValue[i] = c;
74 i++;
75 goto M1;
76 }
77 if (c == '\n') {
78 c = fgetc(source);
79 tokenValue[i] = c;
80 i++;
81 goto initLV1;
82 }
83 if (c == EOF) {
84 goto FIN;
85 } else {
86 goto error;
87 }
88
89MC1:
90 if (c == Titre[j] && j < strlen(Titre) - 1) {
91 c = fgetc(source);
92 tokenValue[i] = c;
93 i++;
94 j++;
95 goto MC1;
96 }
97 if (c == Auteur[j] && j < strlen(Auteur) - 1) {
98 c = fgetc(source);
99 tokenValue[i] = c;
100 i++;
101 j++;
102 goto MC1;
103 } else {
104 c = fgetc(source);
105 tokenValue[i] = c;
106 i++;
107 goto MC2;
108 }
109
110S1SS1:
4b580abd
JB
111 if (c == '=') {
112 c = fgetc(source);
113 tokenValue[i] = c;
114 i++;
115 goto SS2;
116 }
d3eb30ef
JB
117 if (isSeparator() || c == EOF) {
118 goto SECTION;
119 }
120
4b580abd
JB
121
122SS2:
123 if (isSeparator() || c == EOF) {
d3eb30ef 124 goto SSECTION;
4b580abd
JB
125 }
126
127SECTION:
128 tokenType = SECTION;
129 return 1;
130
d3eb30ef
JB
131SSECTION:
132 tokenType = SSECTION;
133 return 1;
134
4b580abd
JB
135M1:
136 if (istAlpha()) {
137 c = fgetc(source);
138 tokenValue[i] = c;
139 i++;
140 goto M1;
141 }
142 if (isSeparator() || c == EOF) {
143 goto MOT;
144 }
145
146initLV1:
147 if (c == '\n' || c == '\t') {
148 c = fgetc(source);
149 tokenValue[i] = c;
150 i++;
151 goto initLV1;
152 }
153 if (istAlpha()) {
154 c = fgetc(source);
155 tokenValue[i] = c;
156 i++;
157 goto M1;
158 }
159 if (c == '=') {
160 c = fgetc(source);
161 tokenValue[i] = c;
162 i++;
163 goto S1SS1;
164 }
165 if (c == '>') {
166 c = fgetc(source);
167 tokenValue[i] = c;
168 i++;
169 goto MC1;
170 }
171 if (c == '\n') {
172 c = fgetc(source);
173 tokenValue[i] = c;
174 i++;
175 goto initLV1LV2;
176 }
177 if (c == EOF) {
178 goto FIN;
179 }
180
181initLV1LV2:
182 if (isSeparator()) {
183 c = fgetc(source);
184 tokenValue[i] = c;
185 i++;
186 goto initLV1LV2;
187 }
188 if (c == '>') {
189 c = fgetc(source);
190 tokenValue[i] = c;
191 i++;
192 goto MC1;
193 }
194 if (c == '=') {
195 c = fgetc(source);
196 tokenValue[i] = c;
197 i++;
198 goto S1SS1;
199 }
4b580abd
JB
200 if (istAlpha()) {
201 goto NPARA;
202 }
d3eb30ef
JB
203 if (c == EOF) {
204 goto FIN;
205 }
4b580abd
JB
206
207NPARA:
208 tokenType = NPARA;
209 return 1;
210
211MOT:
212 tokenType = MOT;
213 return 1;
214
215MC2:
216 if (isSeparator() || c == EOF) {
217 goto MOTCLE;
218 }
219
220MOTCLE:
221 tokenType = MOTCLE;
222 return 1;
223
224FIN:
225 tokenType = FIN;
226 return 1;
227
228error:
229 tokenType = FIN;
230 return -1;
231}
96964f3e
JB
232
233int main (int argc, char const *argv[]) {
234
235 // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
236 source = fopen("test.txt", "r");
237 // Cree et ouvre un fichier target.html en lecture/ecriture
238 // avec suppression du contenu au prealable :
239 target = fopen("target.html", "w+");
240
241 if (source == NULL) {
242 printf("Impossible d'ouvrir le fichier source\n");
243 return -1;
244 }
245
246 if (target == NULL) {
247 printf("Impossible d'ouvrir le fichier target\n");
248 return -1;
249 }
250
4b580abd 251 do {
96964f3e 252 c = fgetc(source); // lecture du caractere suivant du fichier source
4b580abd
JB
253 //fputc(c, target); // ecrire c dans le fichier target
254 tokenValue[i] = c;
255 i++;
256 int scanrt = scanner();
257 if (scanrt == -1) {
258 printf ("Scanner error with token value: %s\n", tokenValue);
259 exit(EXIT_FAILURE);
260 }
261 if (c != EOF) {
262 printf ("Token type found: %s with value: %s\n", tokenTypestr[tokenType], tokenValue);
263 } else {
264 printf ("Token type found: %s\n", tokenTypestr[tokenType]);
265 }
266 // reinit
267 i = 0;
268 memset(tokenValue, 0, sizeof(tokenValue));
269 } while (c != EOF); // tant que la fin du fichier n'est pas atteinte
96964f3e
JB
270
271 if (source != NULL) fclose(source); // fermeture du fichier source
272 if (target != NULL) fclose(target); // fermeture du fichier target
273
274 return 0;
275}