Commit | Line | Data |
---|---|---|
96964f3e JB |
1 | #include <stdlib.h> |
2 | #include <stdio.h> | |
3 | #include <string.h> | |
4b580abd | 4 | #include <stdbool.h> |
e9a51b68 | 5 | |
96964f3e JB |
6 | FILE *source, *target = NULL; |
7 | char c; | |
4b580abd JB |
8 | unsigned int i = 0; |
9 | char tokenValue[50]; | |
10 | enum TokenType { | |
11 | MOTCLE, | |
12 | SECTION, | |
13 | SSECTION, | |
14 | NPARA, | |
15 | MOT, | |
16 | FIN | |
17 | } tokenType; | |
18 | const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" }; | |
19 | ||
d3eb30ef | 20 | /* This looks silly to check for each characters but for debugging, it's just the way to go */ |
4b580abd JB |
21 | bool istAlpha() { |
22 | if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \ | |
23 | c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \ | |
24 | c == 'o' || c == 'p' || c == 'q' || c == 'r' || c == 's' || c == 't' || c == 'u' || \ | |
25 | c == 'v' || c == 'w' || c == 'x' || c == 'y' || c == 'z' || \ | |
26 | c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F' || c == 'G' || \ | |
27 | c == 'H' || c == 'I' || c == 'J' || c == 'K' || c == 'L' || c == 'M' || c == 'N' || \ | |
28 | c == 'O' || c == 'P' || c == 'Q' || c == 'R' || c == 'S' || c == 'T' || c == 'U' || \ | |
29 | c == 'V' || c == 'W' || c == 'X' || c == 'Y' || c == 'Z' || \ | |
30 | c == '.' || c == '?' || c == '!' || c == ',' || c == ';' || c == ':' || c == '-' || \ | |
31 | c == '\''|| c == '#' || \ | |
32 | c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || \ | |
33 | c == '7' || c == '8' || c == '9') { | |
34 | return true; | |
35 | } | |
36 | return false; | |
37 | } | |
38 | ||
39 | bool isSeparator() { | |
40 | if (c == '\t' || c == ' ' || c == '\n') { | |
41 | return true; | |
42 | } | |
43 | return false; | |
44 | } | |
45 | ||
46 | int scanner() { | |
47 | const char* Titre = "Titre"; | |
48 | const char* Auteur = "Auteur"; | |
49 | unsigned int j = 0; | |
50 | ||
51 | // The main loop get the next character | |
52 | init: | |
53 | if (c == ' ' || c == '\t') { | |
54 | c = fgetc(source); | |
55 | tokenValue[i] = c; | |
56 | i++; | |
57 | goto init; | |
58 | } | |
59 | if (c == '>') { | |
60 | c = fgetc(source); | |
61 | tokenValue[i] = c; | |
62 | i++; | |
63 | goto MC1; | |
64 | } | |
65 | if (c == '=') { | |
66 | c = fgetc(source); | |
67 | tokenValue[i] = c; | |
68 | i++; | |
69 | goto S1SS1; | |
70 | } | |
71 | if (istAlpha()) { | |
72 | c = fgetc(source); | |
73 | tokenValue[i] = c; | |
74 | i++; | |
75 | goto M1; | |
76 | } | |
77 | if (c == '\n') { | |
78 | c = fgetc(source); | |
79 | tokenValue[i] = c; | |
80 | i++; | |
81 | goto initLV1; | |
82 | } | |
83 | if (c == EOF) { | |
84 | goto FIN; | |
85 | } else { | |
86 | goto error; | |
87 | } | |
88 | ||
89 | MC1: | |
90 | if (c == Titre[j] && j < strlen(Titre) - 1) { | |
91 | c = fgetc(source); | |
92 | tokenValue[i] = c; | |
93 | i++; | |
94 | j++; | |
95 | goto MC1; | |
96 | } | |
97 | if (c == Auteur[j] && j < strlen(Auteur) - 1) { | |
98 | c = fgetc(source); | |
99 | tokenValue[i] = c; | |
100 | i++; | |
101 | j++; | |
102 | goto MC1; | |
103 | } else { | |
104 | c = fgetc(source); | |
105 | tokenValue[i] = c; | |
106 | i++; | |
107 | goto MC2; | |
108 | } | |
109 | ||
110 | S1SS1: | |
4b580abd JB |
111 | if (c == '=') { |
112 | c = fgetc(source); | |
113 | tokenValue[i] = c; | |
114 | i++; | |
115 | goto SS2; | |
116 | } | |
d3eb30ef JB |
117 | if (isSeparator() || c == EOF) { |
118 | goto SECTION; | |
119 | } | |
120 | ||
4b580abd JB |
121 | |
122 | SS2: | |
123 | if (isSeparator() || c == EOF) { | |
d3eb30ef | 124 | goto SSECTION; |
4b580abd JB |
125 | } |
126 | ||
127 | SECTION: | |
128 | tokenType = SECTION; | |
129 | return 1; | |
130 | ||
d3eb30ef JB |
131 | SSECTION: |
132 | tokenType = SSECTION; | |
133 | return 1; | |
134 | ||
4b580abd JB |
135 | M1: |
136 | if (istAlpha()) { | |
137 | c = fgetc(source); | |
138 | tokenValue[i] = c; | |
139 | i++; | |
140 | goto M1; | |
141 | } | |
142 | if (isSeparator() || c == EOF) { | |
143 | goto MOT; | |
144 | } | |
145 | ||
146 | initLV1: | |
147 | if (c == '\n' || c == '\t') { | |
148 | c = fgetc(source); | |
149 | tokenValue[i] = c; | |
150 | i++; | |
151 | goto initLV1; | |
152 | } | |
153 | if (istAlpha()) { | |
154 | c = fgetc(source); | |
155 | tokenValue[i] = c; | |
156 | i++; | |
157 | goto M1; | |
158 | } | |
159 | if (c == '=') { | |
160 | c = fgetc(source); | |
161 | tokenValue[i] = c; | |
162 | i++; | |
163 | goto S1SS1; | |
164 | } | |
165 | if (c == '>') { | |
166 | c = fgetc(source); | |
167 | tokenValue[i] = c; | |
168 | i++; | |
169 | goto MC1; | |
170 | } | |
171 | if (c == '\n') { | |
172 | c = fgetc(source); | |
173 | tokenValue[i] = c; | |
174 | i++; | |
175 | goto initLV1LV2; | |
176 | } | |
177 | if (c == EOF) { | |
178 | goto FIN; | |
179 | } | |
180 | ||
181 | initLV1LV2: | |
182 | if (isSeparator()) { | |
183 | c = fgetc(source); | |
184 | tokenValue[i] = c; | |
185 | i++; | |
186 | goto initLV1LV2; | |
187 | } | |
188 | if (c == '>') { | |
189 | c = fgetc(source); | |
190 | tokenValue[i] = c; | |
191 | i++; | |
192 | goto MC1; | |
193 | } | |
194 | if (c == '=') { | |
195 | c = fgetc(source); | |
196 | tokenValue[i] = c; | |
197 | i++; | |
198 | goto S1SS1; | |
199 | } | |
4b580abd JB |
200 | if (istAlpha()) { |
201 | goto NPARA; | |
202 | } | |
d3eb30ef JB |
203 | if (c == EOF) { |
204 | goto FIN; | |
205 | } | |
4b580abd JB |
206 | |
207 | NPARA: | |
208 | tokenType = NPARA; | |
209 | return 1; | |
210 | ||
211 | MOT: | |
212 | tokenType = MOT; | |
213 | return 1; | |
214 | ||
215 | MC2: | |
216 | if (isSeparator() || c == EOF) { | |
217 | goto MOTCLE; | |
218 | } | |
219 | ||
220 | MOTCLE: | |
221 | tokenType = MOTCLE; | |
222 | return 1; | |
223 | ||
224 | FIN: | |
225 | tokenType = FIN; | |
226 | return 1; | |
227 | ||
228 | error: | |
229 | tokenType = FIN; | |
230 | return -1; | |
231 | } | |
96964f3e JB |
232 | |
233 | int main (int argc, char const *argv[]) { | |
234 | ||
235 | // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) : | |
236 | source = fopen("test.txt", "r"); | |
237 | // Cree et ouvre un fichier target.html en lecture/ecriture | |
238 | // avec suppression du contenu au prealable : | |
239 | target = fopen("target.html", "w+"); | |
240 | ||
241 | if (source == NULL) { | |
242 | printf("Impossible d'ouvrir le fichier source\n"); | |
243 | return -1; | |
244 | } | |
245 | ||
246 | if (target == NULL) { | |
247 | printf("Impossible d'ouvrir le fichier target\n"); | |
248 | return -1; | |
249 | } | |
250 | ||
4b580abd | 251 | do { |
96964f3e | 252 | c = fgetc(source); // lecture du caractere suivant du fichier source |
4b580abd JB |
253 | //fputc(c, target); // ecrire c dans le fichier target |
254 | tokenValue[i] = c; | |
255 | i++; | |
256 | int scanrt = scanner(); | |
257 | if (scanrt == -1) { | |
258 | printf ("Scanner error with token value: %s\n", tokenValue); | |
259 | exit(EXIT_FAILURE); | |
260 | } | |
261 | if (c != EOF) { | |
262 | printf ("Token type found: %s with value: %s\n", tokenTypestr[tokenType], tokenValue); | |
263 | } else { | |
264 | printf ("Token type found: %s\n", tokenTypestr[tokenType]); | |
265 | } | |
266 | // reinit | |
267 | i = 0; | |
268 | memset(tokenValue, 0, sizeof(tokenValue)); | |
269 | } while (c != EOF); // tant que la fin du fichier n'est pas atteinte | |
96964f3e JB |
270 | |
271 | if (source != NULL) fclose(source); // fermeture du fichier source | |
272 | if (target != NULL) fclose(target); // fermeture du fichier target | |
273 | ||
274 | return 0; | |
275 | } |