Commit | Line | Data |
---|---|---|
96964f3e JB |
1 | #include <stdlib.h> |
2 | #include <stdio.h> | |
3 | #include <string.h> | |
4b580abd | 4 | #include <stdbool.h> |
e9a51b68 | 5 | |
dcfcd9ab JB |
6 | #define TOKEN_MAX_LENGTH 50 |
7 | #define TOKEN_LIST_MAX 500 | |
8 | ||
9 | FILE *source = NULL, *target = NULL; | |
96964f3e | 10 | char c; |
4b580abd | 11 | unsigned int i = 0; |
dcfcd9ab | 12 | char tokenValue[TOKEN_MAX_LENGTH]; |
4b580abd JB |
13 | enum TokenType { |
14 | MOTCLE, | |
15 | SECTION, | |
16 | SSECTION, | |
17 | NPARA, | |
18 | MOT, | |
19 | FIN | |
20 | } tokenType; | |
21 | const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" }; | |
dcfcd9ab | 22 | const char* tokenList[TOKEN_LIST_MAX]; |
4b580abd | 23 | |
91e46777 | 24 | /* It looks silly to check for each characters but for debugging, it's just the way to go */ |
4b580abd JB |
25 | bool istAlpha() { |
26 | if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \ | |
27 | c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \ | |
28 | c == 'o' || c == 'p' || c == 'q' || c == 'r' || c == 's' || c == 't' || c == 'u' || \ | |
29 | c == 'v' || c == 'w' || c == 'x' || c == 'y' || c == 'z' || \ | |
30 | c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F' || c == 'G' || \ | |
31 | c == 'H' || c == 'I' || c == 'J' || c == 'K' || c == 'L' || c == 'M' || c == 'N' || \ | |
32 | c == 'O' || c == 'P' || c == 'Q' || c == 'R' || c == 'S' || c == 'T' || c == 'U' || \ | |
33 | c == 'V' || c == 'W' || c == 'X' || c == 'Y' || c == 'Z' || \ | |
34 | c == '.' || c == '?' || c == '!' || c == ',' || c == ';' || c == ':' || c == '-' || \ | |
35 | c == '\''|| c == '#' || \ | |
36 | c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || \ | |
37 | c == '7' || c == '8' || c == '9') { | |
38 | return true; | |
39 | } | |
40 | return false; | |
41 | } | |
42 | ||
43 | bool isSeparator() { | |
44 | if (c == '\t' || c == ' ' || c == '\n') { | |
45 | return true; | |
46 | } | |
47 | return false; | |
48 | } | |
49 | ||
50 | int scanner() { | |
da74c1e0 JB |
51 | const char* Titre = "Titre"; |
52 | const char* Auteur = "Auteur"; | |
53 | unsigned int j = 0; | |
4b580abd JB |
54 | |
55 | // The main loop get the next character | |
56 | init: | |
57 | if (c == ' ' || c == '\t') { | |
58 | c = fgetc(source); | |
59 | tokenValue[i] = c; | |
60 | i++; | |
61 | goto init; | |
62 | } | |
fde9417f JB |
63 | if (c == '\n') { |
64 | c = fgetc(source); | |
65 | tokenValue[i] = c; | |
66 | i++; | |
67 | goto initLV1; | |
68 | } | |
4b580abd JB |
69 | if (c == '>') { |
70 | c = fgetc(source); | |
71 | tokenValue[i] = c; | |
72 | i++; | |
73 | goto MC1; | |
74 | } | |
75 | if (c == '=') { | |
76 | c = fgetc(source); | |
77 | tokenValue[i] = c; | |
78 | i++; | |
79 | goto S1SS1; | |
80 | } | |
81 | if (istAlpha()) { | |
82 | c = fgetc(source); | |
83 | tokenValue[i] = c; | |
84 | i++; | |
85 | goto M1; | |
86 | } | |
4b580abd JB |
87 | if (c == EOF) { |
88 | goto FIN; | |
89 | } else { | |
90 | goto error; | |
91 | } | |
92 | ||
93 | MC1: | |
dcfcd9ab | 94 | // FIXME: Partial match need a rewind in the characters extraction from the file |
4b580abd JB |
95 | if (c == Titre[j] && j < strlen(Titre) - 1) { |
96 | c = fgetc(source); | |
97 | tokenValue[i] = c; | |
98 | i++; | |
99 | j++; | |
100 | goto MC1; | |
101 | } | |
102 | if (c == Auteur[j] && j < strlen(Auteur) - 1) { | |
103 | c = fgetc(source); | |
104 | tokenValue[i] = c; | |
105 | i++; | |
106 | j++; | |
107 | goto MC1; | |
108 | } else { | |
109 | c = fgetc(source); | |
110 | tokenValue[i] = c; | |
111 | i++; | |
112 | goto MC2; | |
113 | } | |
114 | ||
115 | S1SS1: | |
4b580abd JB |
116 | if (c == '=') { |
117 | c = fgetc(source); | |
118 | tokenValue[i] = c; | |
119 | i++; | |
120 | goto SS2; | |
121 | } | |
d3eb30ef JB |
122 | if (isSeparator() || c == EOF) { |
123 | goto SECTION; | |
124 | } | |
125 | ||
4b580abd JB |
126 | SS2: |
127 | if (isSeparator() || c == EOF) { | |
d3eb30ef | 128 | goto SSECTION; |
4b580abd JB |
129 | } |
130 | ||
131 | SECTION: | |
132 | tokenType = SECTION; | |
133 | return 1; | |
134 | ||
d3eb30ef JB |
135 | SSECTION: |
136 | tokenType = SSECTION; | |
137 | return 1; | |
138 | ||
4b580abd JB |
139 | M1: |
140 | if (istAlpha()) { | |
141 | c = fgetc(source); | |
142 | tokenValue[i] = c; | |
143 | i++; | |
144 | goto M1; | |
145 | } | |
146 | if (isSeparator() || c == EOF) { | |
147 | goto MOT; | |
148 | } | |
149 | ||
150 | initLV1: | |
fde9417f | 151 | if (c == ' ' || c == '\t') { |
4b580abd JB |
152 | c = fgetc(source); |
153 | tokenValue[i] = c; | |
154 | i++; | |
155 | goto initLV1; | |
156 | } | |
fde9417f JB |
157 | if (c == '\n') { |
158 | c = fgetc(source); | |
159 | tokenValue[i] = c; | |
160 | i++; | |
161 | goto initLV1LV2; | |
162 | } | |
4b580abd JB |
163 | if (istAlpha()) { |
164 | c = fgetc(source); | |
165 | tokenValue[i] = c; | |
166 | i++; | |
167 | goto M1; | |
168 | } | |
169 | if (c == '=') { | |
170 | c = fgetc(source); | |
171 | tokenValue[i] = c; | |
172 | i++; | |
173 | goto S1SS1; | |
174 | } | |
175 | if (c == '>') { | |
176 | c = fgetc(source); | |
177 | tokenValue[i] = c; | |
178 | i++; | |
179 | goto MC1; | |
180 | } | |
4b580abd JB |
181 | if (c == EOF) { |
182 | goto FIN; | |
183 | } | |
184 | ||
185 | initLV1LV2: | |
186 | if (isSeparator()) { | |
187 | c = fgetc(source); | |
188 | tokenValue[i] = c; | |
189 | i++; | |
190 | goto initLV1LV2; | |
191 | } | |
fde9417f JB |
192 | if (istAlpha()) { |
193 | goto NPARA; | |
194 | } | |
4b580abd JB |
195 | if (c == '>') { |
196 | c = fgetc(source); | |
197 | tokenValue[i] = c; | |
198 | i++; | |
199 | goto MC1; | |
200 | } | |
201 | if (c == '=') { | |
202 | c = fgetc(source); | |
203 | tokenValue[i] = c; | |
204 | i++; | |
205 | goto S1SS1; | |
206 | } | |
d3eb30ef JB |
207 | if (c == EOF) { |
208 | goto FIN; | |
209 | } | |
4b580abd JB |
210 | |
211 | NPARA: | |
212 | tokenType = NPARA; | |
213 | return 1; | |
214 | ||
215 | MOT: | |
216 | tokenType = MOT; | |
217 | return 1; | |
218 | ||
219 | MC2: | |
220 | if (isSeparator() || c == EOF) { | |
221 | goto MOTCLE; | |
222 | } | |
223 | ||
224 | MOTCLE: | |
225 | tokenType = MOTCLE; | |
226 | return 1; | |
227 | ||
228 | FIN: | |
229 | tokenType = FIN; | |
230 | return 1; | |
231 | ||
232 | error: | |
233 | tokenType = FIN; | |
234 | return -1; | |
235 | } | |
96964f3e JB |
236 | |
237 | int main (int argc, char const *argv[]) { | |
238 | ||
239 | // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) : | |
240 | source = fopen("test.txt", "r"); | |
241 | // Cree et ouvre un fichier target.html en lecture/ecriture | |
242 | // avec suppression du contenu au prealable : | |
243 | target = fopen("target.html", "w+"); | |
244 | ||
245 | if (source == NULL) { | |
246 | printf("Impossible d'ouvrir le fichier source\n"); | |
247 | return -1; | |
248 | } | |
249 | ||
250 | if (target == NULL) { | |
251 | printf("Impossible d'ouvrir le fichier target\n"); | |
252 | return -1; | |
253 | } | |
254 | ||
dcfcd9ab JB |
255 | int tokenFound = 0; |
256 | ||
4b580abd | 257 | do { |
96964f3e | 258 | c = fgetc(source); // lecture du caractere suivant du fichier source |
4b580abd JB |
259 | tokenValue[i] = c; |
260 | i++; | |
261 | int scanrt = scanner(); | |
262 | if (scanrt == -1) { | |
263 | printf ("Scanner error with token value: %s\n", tokenValue); | |
264 | exit(EXIT_FAILURE); | |
265 | } | |
266 | if (c != EOF) { | |
267 | printf ("Token type found: %s with value: %s\n", tokenTypestr[tokenType], tokenValue); | |
268 | } else { | |
269 | printf ("Token type found: %s\n", tokenTypestr[tokenType]); | |
270 | } | |
dcfcd9ab | 271 | tokenList[tokenFound] = tokenTypestr[tokenType]; |
28280a4c | 272 | tokenFound++; |
fde9417f | 273 | // reinit tokenValue |
4b580abd JB |
274 | i = 0; |
275 | memset(tokenValue, 0, sizeof(tokenValue)); | |
532c982b | 276 | } while (c != EOF); // tant que la fin du fichier n'est pas atteinte |
96964f3e JB |
277 | |
278 | if (source != NULL) fclose(source); // fermeture du fichier source | |
279 | if (target != NULL) fclose(target); // fermeture du fichier target | |
280 | ||
281 | return 0; | |
282 | } |