Commit | Line | Data |
---|---|---|
96964f3e JB |
1 | #include <stdlib.h> |
2 | #include <stdio.h> | |
3 | #include <string.h> | |
4b580abd | 4 | #include <stdbool.h> |
e9a51b68 | 5 | |
96964f3e JB |
6 | FILE *source, *target = NULL; |
7 | char c; | |
4b580abd JB |
8 | unsigned int i = 0; |
9 | char tokenValue[50]; | |
10 | enum TokenType { | |
11 | MOTCLE, | |
12 | SECTION, | |
13 | SSECTION, | |
14 | NPARA, | |
15 | MOT, | |
16 | FIN | |
17 | } tokenType; | |
18 | const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" }; | |
19 | ||
d3eb30ef | 20 | /* This looks silly to check for each characters but for debugging, it's just the way to go */ |
4b580abd JB |
21 | bool istAlpha() { |
22 | if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \ | |
23 | c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \ | |
24 | c == 'o' || c == 'p' || c == 'q' || c == 'r' || c == 's' || c == 't' || c == 'u' || \ | |
25 | c == 'v' || c == 'w' || c == 'x' || c == 'y' || c == 'z' || \ | |
26 | c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F' || c == 'G' || \ | |
27 | c == 'H' || c == 'I' || c == 'J' || c == 'K' || c == 'L' || c == 'M' || c == 'N' || \ | |
28 | c == 'O' || c == 'P' || c == 'Q' || c == 'R' || c == 'S' || c == 'T' || c == 'U' || \ | |
29 | c == 'V' || c == 'W' || c == 'X' || c == 'Y' || c == 'Z' || \ | |
30 | c == '.' || c == '?' || c == '!' || c == ',' || c == ';' || c == ':' || c == '-' || \ | |
31 | c == '\''|| c == '#' || \ | |
32 | c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || \ | |
33 | c == '7' || c == '8' || c == '9') { | |
34 | return true; | |
35 | } | |
36 | return false; | |
37 | } | |
38 | ||
39 | bool isSeparator() { | |
40 | if (c == '\t' || c == ' ' || c == '\n') { | |
41 | return true; | |
42 | } | |
43 | return false; | |
44 | } | |
45 | ||
46 | int scanner() { | |
47 | const char* Titre = "Titre"; | |
48 | const char* Auteur = "Auteur"; | |
49 | unsigned int j = 0; | |
50 | ||
51 | // The main loop get the next character | |
52 | init: | |
53 | if (c == ' ' || c == '\t') { | |
54 | c = fgetc(source); | |
55 | tokenValue[i] = c; | |
56 | i++; | |
57 | goto init; | |
58 | } | |
fde9417f JB |
59 | if (c == '\n') { |
60 | c = fgetc(source); | |
61 | tokenValue[i] = c; | |
62 | i++; | |
63 | goto initLV1; | |
64 | } | |
4b580abd JB |
65 | if (c == '>') { |
66 | c = fgetc(source); | |
67 | tokenValue[i] = c; | |
68 | i++; | |
69 | goto MC1; | |
70 | } | |
71 | if (c == '=') { | |
72 | c = fgetc(source); | |
73 | tokenValue[i] = c; | |
74 | i++; | |
75 | goto S1SS1; | |
76 | } | |
77 | if (istAlpha()) { | |
78 | c = fgetc(source); | |
79 | tokenValue[i] = c; | |
80 | i++; | |
81 | goto M1; | |
82 | } | |
4b580abd JB |
83 | if (c == EOF) { |
84 | goto FIN; | |
85 | } else { | |
86 | goto error; | |
87 | } | |
88 | ||
89 | MC1: | |
90 | if (c == Titre[j] && j < strlen(Titre) - 1) { | |
91 | c = fgetc(source); | |
92 | tokenValue[i] = c; | |
93 | i++; | |
94 | j++; | |
95 | goto MC1; | |
96 | } | |
97 | if (c == Auteur[j] && j < strlen(Auteur) - 1) { | |
98 | c = fgetc(source); | |
99 | tokenValue[i] = c; | |
100 | i++; | |
101 | j++; | |
102 | goto MC1; | |
103 | } else { | |
104 | c = fgetc(source); | |
105 | tokenValue[i] = c; | |
106 | i++; | |
107 | goto MC2; | |
108 | } | |
109 | ||
110 | S1SS1: | |
4b580abd JB |
111 | if (c == '=') { |
112 | c = fgetc(source); | |
113 | tokenValue[i] = c; | |
114 | i++; | |
115 | goto SS2; | |
116 | } | |
d3eb30ef JB |
117 | if (isSeparator() || c == EOF) { |
118 | goto SECTION; | |
119 | } | |
120 | ||
4b580abd JB |
121 | SS2: |
122 | if (isSeparator() || c == EOF) { | |
d3eb30ef | 123 | goto SSECTION; |
4b580abd JB |
124 | } |
125 | ||
126 | SECTION: | |
127 | tokenType = SECTION; | |
128 | return 1; | |
129 | ||
d3eb30ef JB |
130 | SSECTION: |
131 | tokenType = SSECTION; | |
132 | return 1; | |
133 | ||
4b580abd JB |
134 | M1: |
135 | if (istAlpha()) { | |
136 | c = fgetc(source); | |
137 | tokenValue[i] = c; | |
138 | i++; | |
139 | goto M1; | |
140 | } | |
141 | if (isSeparator() || c == EOF) { | |
142 | goto MOT; | |
143 | } | |
144 | ||
145 | initLV1: | |
fde9417f | 146 | if (c == ' ' || c == '\t') { |
4b580abd JB |
147 | c = fgetc(source); |
148 | tokenValue[i] = c; | |
149 | i++; | |
150 | goto initLV1; | |
151 | } | |
fde9417f JB |
152 | if (c == '\n') { |
153 | c = fgetc(source); | |
154 | tokenValue[i] = c; | |
155 | i++; | |
156 | goto initLV1LV2; | |
157 | } | |
4b580abd JB |
158 | if (istAlpha()) { |
159 | c = fgetc(source); | |
160 | tokenValue[i] = c; | |
161 | i++; | |
162 | goto M1; | |
163 | } | |
164 | if (c == '=') { | |
165 | c = fgetc(source); | |
166 | tokenValue[i] = c; | |
167 | i++; | |
168 | goto S1SS1; | |
169 | } | |
170 | if (c == '>') { | |
171 | c = fgetc(source); | |
172 | tokenValue[i] = c; | |
173 | i++; | |
174 | goto MC1; | |
175 | } | |
4b580abd JB |
176 | if (c == EOF) { |
177 | goto FIN; | |
178 | } | |
179 | ||
180 | initLV1LV2: | |
181 | if (isSeparator()) { | |
182 | c = fgetc(source); | |
183 | tokenValue[i] = c; | |
184 | i++; | |
185 | goto initLV1LV2; | |
186 | } | |
fde9417f JB |
187 | if (istAlpha()) { |
188 | goto NPARA; | |
189 | } | |
4b580abd JB |
190 | if (c == '>') { |
191 | c = fgetc(source); | |
192 | tokenValue[i] = c; | |
193 | i++; | |
194 | goto MC1; | |
195 | } | |
196 | if (c == '=') { | |
197 | c = fgetc(source); | |
198 | tokenValue[i] = c; | |
199 | i++; | |
200 | goto S1SS1; | |
201 | } | |
d3eb30ef JB |
202 | if (c == EOF) { |
203 | goto FIN; | |
204 | } | |
4b580abd JB |
205 | |
206 | NPARA: | |
207 | tokenType = NPARA; | |
208 | return 1; | |
209 | ||
210 | MOT: | |
211 | tokenType = MOT; | |
212 | return 1; | |
213 | ||
214 | MC2: | |
215 | if (isSeparator() || c == EOF) { | |
216 | goto MOTCLE; | |
217 | } | |
218 | ||
219 | MOTCLE: | |
220 | tokenType = MOTCLE; | |
221 | return 1; | |
222 | ||
223 | FIN: | |
224 | tokenType = FIN; | |
225 | return 1; | |
226 | ||
227 | error: | |
228 | tokenType = FIN; | |
229 | return -1; | |
230 | } | |
96964f3e JB |
231 | |
232 | int main (int argc, char const *argv[]) { | |
233 | ||
234 | // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) : | |
235 | source = fopen("test.txt", "r"); | |
236 | // Cree et ouvre un fichier target.html en lecture/ecriture | |
237 | // avec suppression du contenu au prealable : | |
238 | target = fopen("target.html", "w+"); | |
239 | ||
240 | if (source == NULL) { | |
241 | printf("Impossible d'ouvrir le fichier source\n"); | |
242 | return -1; | |
243 | } | |
244 | ||
245 | if (target == NULL) { | |
246 | printf("Impossible d'ouvrir le fichier target\n"); | |
247 | return -1; | |
248 | } | |
249 | ||
4b580abd | 250 | do { |
96964f3e | 251 | c = fgetc(source); // lecture du caractere suivant du fichier source |
4b580abd JB |
252 | tokenValue[i] = c; |
253 | i++; | |
254 | int scanrt = scanner(); | |
255 | if (scanrt == -1) { | |
256 | printf ("Scanner error with token value: %s\n", tokenValue); | |
257 | exit(EXIT_FAILURE); | |
258 | } | |
259 | if (c != EOF) { | |
260 | printf ("Token type found: %s with value: %s\n", tokenTypestr[tokenType], tokenValue); | |
261 | } else { | |
262 | printf ("Token type found: %s\n", tokenTypestr[tokenType]); | |
263 | } | |
fde9417f | 264 | // reinit tokenValue |
4b580abd JB |
265 | i = 0; |
266 | memset(tokenValue, 0, sizeof(tokenValue)); | |
532c982b | 267 | } while (c != EOF); // tant que la fin du fichier n'est pas atteinte |
96964f3e JB |
268 | |
269 | if (source != NULL) fclose(source); // fermeture du fichier source | |
270 | if (target != NULL) fclose(target); // fermeture du fichier target | |
271 | ||
272 | return 0; | |
273 | } |