Commit | Line | Data |
---|---|---|
96964f3e JB |
1 | #include <stdlib.h> |
2 | #include <stdio.h> | |
3 | #include <string.h> | |
4b580abd | 4 | #include <stdbool.h> |
6a19b8fe | 5 | #include <wchar.h> |
e9a51b68 | 6 | |
6a19b8fe JB |
7 | #define TOKEN_MAX 500 |
8 | ||
9 | struct token_s { | |
10 | const char* type; | |
11 | wint_t value[50]; | |
12 | }; | |
13 | ||
14 | struct token_s token[TOKEN_MAX] = {NULL, 0}; | |
dcfcd9ab JB |
15 | |
16 | FILE *source = NULL, *target = NULL; | |
6a19b8fe JB |
17 | wint_t c; |
18 | unsigned int tokenFound = 0; | |
4b580abd JB |
19 | enum TokenType { |
20 | MOTCLE, | |
21 | SECTION, | |
22 | SSECTION, | |
23 | NPARA, | |
24 | MOT, | |
25 | FIN | |
26 | } tokenType; | |
27 | const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" }; | |
6a19b8fe | 28 | unsigned int i = 0; |
4b580abd | 29 | |
91e46777 | 30 | /* It looks silly to check for each characters but for debugging, it's just the way to go */ |
4b580abd | 31 | bool istAlpha() { |
6a19b8fe JB |
32 | if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \ |
33 | c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \ | |
34 | c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \ | |
35 | c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \ | |
36 | c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \ | |
37 | c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \ | |
38 | c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \ | |
39 | c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \ | |
40 | c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \ | |
41 | c == L'\''|| c == L'#' || \ | |
42 | c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \ | |
43 | c == L'7' || c == L'8' || c == L'9' || \ | |
44 | c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \ | |
45 | c == L'ù' || c == L'û' || \ | |
46 | c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \ | |
47 | c == L'Ù' || c == L'Û') { | |
4b580abd JB |
48 | return true; |
49 | } | |
50 | return false; | |
51 | } | |
52 | ||
53 | bool isSeparator() { | |
6a19b8fe | 54 | if (c == L'\t' || c == L' ' || c == L'\n') { |
4b580abd JB |
55 | return true; |
56 | } | |
57 | return false; | |
58 | } | |
59 | ||
60 | int scanner() { | |
6a19b8fe JB |
61 | const wchar_t* Titre = L"Titre"; |
62 | const wchar_t* Auteur = L"Auteur"; | |
da74c1e0 | 63 | unsigned int j = 0; |
4b580abd JB |
64 | |
65 | // The main loop get the next character | |
66 | init: | |
6a19b8fe JB |
67 | if (c == L' ' || c == L'\t') { |
68 | c = fgetwc(source); | |
69 | token[tokenFound].value[i] = c; | |
4b580abd JB |
70 | i++; |
71 | goto init; | |
72 | } | |
6a19b8fe JB |
73 | if (c == L'\n') { |
74 | c = fgetwc(source); | |
75 | token[tokenFound].value[i] = c; | |
fde9417f JB |
76 | i++; |
77 | goto initLV1; | |
78 | } | |
6a19b8fe JB |
79 | if (c == L'>') { |
80 | c = fgetwc(source); | |
81 | token[tokenFound].value[i] = c; | |
4b580abd JB |
82 | i++; |
83 | goto MC1; | |
84 | } | |
6a19b8fe JB |
85 | if (c == L'=') { |
86 | c = fgetwc(source); | |
87 | token[tokenFound].value[i] = c; | |
4b580abd JB |
88 | i++; |
89 | goto S1SS1; | |
90 | } | |
91 | if (istAlpha()) { | |
6a19b8fe JB |
92 | c = fgetwc(source); |
93 | token[tokenFound].value[i] = c; | |
4b580abd JB |
94 | i++; |
95 | goto M1; | |
96 | } | |
6a19b8fe | 97 | if (c == WEOF) { |
4b580abd JB |
98 | goto FIN; |
99 | } else { | |
100 | goto error; | |
101 | } | |
102 | ||
103 | MC1: | |
dcfcd9ab | 104 | // FIXME: Partial match need a rewind in the characters extraction from the file |
6a19b8fe JB |
105 | if (c == (wint_t)Titre[j] && j < wcslen(Titre) - 1) { |
106 | c = fgetwc(source); | |
107 | token[tokenFound].value[i] = c; | |
4b580abd JB |
108 | i++; |
109 | j++; | |
110 | goto MC1; | |
111 | } | |
6a19b8fe JB |
112 | if (c == (wint_t)Auteur[j] && j < wcslen(Auteur) - 1) { |
113 | c = fgetwc(source); | |
114 | token[tokenFound].value[i] = c; | |
4b580abd JB |
115 | i++; |
116 | j++; | |
117 | goto MC1; | |
118 | } else { | |
6a19b8fe JB |
119 | c = fgetwc(source); |
120 | token[tokenFound].value[i] = c; | |
4b580abd JB |
121 | i++; |
122 | goto MC2; | |
123 | } | |
124 | ||
125 | S1SS1: | |
6a19b8fe JB |
126 | if (c == L'=') { |
127 | c = fgetwc(source); | |
128 | token[tokenFound].value[i] = c; | |
4b580abd JB |
129 | i++; |
130 | goto SS2; | |
131 | } | |
6a19b8fe | 132 | if (isSeparator() || c == WEOF) { |
d3eb30ef JB |
133 | goto SECTION; |
134 | } | |
135 | ||
4b580abd | 136 | SS2: |
6a19b8fe | 137 | if (isSeparator() || c == WEOF) { |
d3eb30ef | 138 | goto SSECTION; |
4b580abd JB |
139 | } |
140 | ||
141 | SECTION: | |
142 | tokenType = SECTION; | |
143 | return 1; | |
144 | ||
d3eb30ef JB |
145 | SSECTION: |
146 | tokenType = SSECTION; | |
147 | return 1; | |
148 | ||
4b580abd JB |
149 | M1: |
150 | if (istAlpha()) { | |
6a19b8fe JB |
151 | c = fgetwc(source); |
152 | token[tokenFound].value[i] = c; | |
4b580abd JB |
153 | i++; |
154 | goto M1; | |
155 | } | |
6a19b8fe | 156 | if (isSeparator() || c == WEOF) { |
4b580abd JB |
157 | goto MOT; |
158 | } | |
159 | ||
160 | initLV1: | |
6a19b8fe JB |
161 | if (c == L' ' || c == L'\t') { |
162 | c = fgetwc(source); | |
163 | token[tokenFound].value[i] = c; | |
4b580abd JB |
164 | i++; |
165 | goto initLV1; | |
166 | } | |
6a19b8fe JB |
167 | if (c == L'\n') { |
168 | c = fgetwc(source); | |
169 | token[tokenFound].value[i] = c; | |
fde9417f JB |
170 | i++; |
171 | goto initLV1LV2; | |
172 | } | |
4b580abd | 173 | if (istAlpha()) { |
6a19b8fe JB |
174 | c = fgetwc(source); |
175 | token[tokenFound].value[i] = c; | |
4b580abd JB |
176 | i++; |
177 | goto M1; | |
178 | } | |
6a19b8fe JB |
179 | if (c == L'=') { |
180 | c = fgetwc(source); | |
181 | token[tokenFound].value[i] = c; | |
4b580abd JB |
182 | i++; |
183 | goto S1SS1; | |
184 | } | |
6a19b8fe JB |
185 | if (c == L'>') { |
186 | c = fgetwc(source); | |
187 | token[tokenFound].value[i] = c; | |
4b580abd JB |
188 | i++; |
189 | goto MC1; | |
190 | } | |
6a19b8fe | 191 | if (c == WEOF) { |
4b580abd JB |
192 | goto FIN; |
193 | } | |
194 | ||
195 | initLV1LV2: | |
196 | if (isSeparator()) { | |
6a19b8fe JB |
197 | c = fgetwc(source); |
198 | token[tokenFound].value[i] = c; | |
4b580abd JB |
199 | i++; |
200 | goto initLV1LV2; | |
201 | } | |
fde9417f JB |
202 | if (istAlpha()) { |
203 | goto NPARA; | |
204 | } | |
6a19b8fe JB |
205 | if (c == L'>') { |
206 | c = fgetwc(source); | |
207 | token[tokenFound].value[i] = c; | |
4b580abd JB |
208 | i++; |
209 | goto MC1; | |
210 | } | |
6a19b8fe JB |
211 | if (c == L'=') { |
212 | c = fgetwc(source); | |
213 | token[tokenFound].value[i] = c; | |
4b580abd JB |
214 | i++; |
215 | goto S1SS1; | |
216 | } | |
6a19b8fe | 217 | if (c == WEOF) { |
d3eb30ef JB |
218 | goto FIN; |
219 | } | |
4b580abd JB |
220 | |
221 | NPARA: | |
222 | tokenType = NPARA; | |
223 | return 1; | |
224 | ||
225 | MOT: | |
226 | tokenType = MOT; | |
227 | return 1; | |
228 | ||
229 | MC2: | |
6a19b8fe | 230 | if (isSeparator() || c == WEOF) { |
4b580abd JB |
231 | goto MOTCLE; |
232 | } | |
233 | ||
234 | MOTCLE: | |
235 | tokenType = MOTCLE; | |
236 | return 1; | |
237 | ||
238 | FIN: | |
239 | tokenType = FIN; | |
240 | return 1; | |
241 | ||
242 | error: | |
243 | tokenType = FIN; | |
244 | return -1; | |
245 | } | |
96964f3e | 246 | |
6a19b8fe | 247 | int main() { |
96964f3e JB |
248 | |
249 | // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) : | |
250 | source = fopen("test.txt", "r"); | |
251 | // Cree et ouvre un fichier target.html en lecture/ecriture | |
252 | // avec suppression du contenu au prealable : | |
253 | target = fopen("target.html", "w+"); | |
254 | ||
255 | if (source == NULL) { | |
256 | printf("Impossible d'ouvrir le fichier source\n"); | |
257 | return -1; | |
258 | } | |
259 | ||
260 | if (target == NULL) { | |
261 | printf("Impossible d'ouvrir le fichier target\n"); | |
262 | return -1; | |
263 | } | |
264 | ||
4b580abd | 265 | do { |
6a19b8fe JB |
266 | c = fgetwc(source); // lecture du caractere suivant du fichier source |
267 | token[tokenFound].value[i] = c; | |
4b580abd JB |
268 | i++; |
269 | int scanrt = scanner(); | |
270 | if (scanrt == -1) { | |
6a19b8fe | 271 | wprintf(L"Scanner error with token value: %ls\n", token[tokenFound].value); |
4b580abd JB |
272 | exit(EXIT_FAILURE); |
273 | } | |
6a19b8fe JB |
274 | if (c != WEOF) { |
275 | wprintf(L"Token type found: %s with value: %ls\n", tokenTypestr[tokenType], token[tokenFound].value); | |
4b580abd | 276 | } else { |
6a19b8fe | 277 | wprintf(L"Token type found: %s\n", tokenTypestr[tokenType]); |
4b580abd | 278 | } |
6a19b8fe | 279 | token[tokenFound].type = tokenTypestr[tokenType]; |
28280a4c | 280 | tokenFound++; |
6a19b8fe | 281 | // reinit token.value array counter |
4b580abd | 282 | i = 0; |
6a19b8fe JB |
283 | //} while (c != WEOF); // tant que la fin du fichier n'est pas atteinte |
284 | } while (!feof(source)); // tant que la fin du fichier n'est pas atteinte | |
96964f3e JB |
285 | |
286 | if (source != NULL) fclose(source); // fermeture du fichier source | |
287 | if (target != NULL) fclose(target); // fermeture du fichier target | |
288 | ||
289 | return 0; | |
290 | } |