Introduction of wchar.h to support more characters.
[TP_AL_C.git] / lexer / main.c
CommitLineData
96964f3e
JB
1#include <stdlib.h>
2#include <stdio.h>
3#include <string.h>
4b580abd 4#include <stdbool.h>
6a19b8fe 5#include <wchar.h>
e9a51b68 6
6a19b8fe
JB
7#define TOKEN_MAX 500
8
9struct token_s {
10 const char* type;
11 wint_t value[50];
12};
13
14struct token_s token[TOKEN_MAX] = {NULL, 0};
dcfcd9ab
JB
15
16FILE *source = NULL, *target = NULL;
6a19b8fe
JB
17wint_t c;
18unsigned int tokenFound = 0;
4b580abd
JB
19enum TokenType {
20 MOTCLE,
21 SECTION,
22 SSECTION,
23 NPARA,
24 MOT,
25 FIN
26} tokenType;
27const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
6a19b8fe 28unsigned int i = 0;
4b580abd 29
91e46777 30/* It looks silly to check for each characters but for debugging, it's just the way to go */
4b580abd 31bool istAlpha() {
6a19b8fe
JB
32 if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \
33 c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \
34 c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \
35 c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \
36 c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \
37 c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \
38 c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \
39 c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \
40 c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \
41 c == L'\''|| c == L'#' || \
42 c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \
43 c == L'7' || c == L'8' || c == L'9' || \
44 c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \
45 c == L'ù' || c == L'û' || \
46 c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \
47 c == L'Ù' || c == L'Û') {
4b580abd
JB
48 return true;
49 }
50 return false;
51}
52
53bool isSeparator() {
6a19b8fe 54 if (c == L'\t' || c == L' ' || c == L'\n') {
4b580abd
JB
55 return true;
56 }
57 return false;
58}
59
60int scanner() {
6a19b8fe
JB
61 const wchar_t* Titre = L"Titre";
62 const wchar_t* Auteur = L"Auteur";
da74c1e0 63 unsigned int j = 0;
4b580abd
JB
64
65// The main loop get the next character
66init:
6a19b8fe
JB
67 if (c == L' ' || c == L'\t') {
68 c = fgetwc(source);
69 token[tokenFound].value[i] = c;
4b580abd
JB
70 i++;
71 goto init;
72 }
6a19b8fe
JB
73 if (c == L'\n') {
74 c = fgetwc(source);
75 token[tokenFound].value[i] = c;
fde9417f
JB
76 i++;
77 goto initLV1;
78 }
6a19b8fe
JB
79 if (c == L'>') {
80 c = fgetwc(source);
81 token[tokenFound].value[i] = c;
4b580abd
JB
82 i++;
83 goto MC1;
84 }
6a19b8fe
JB
85 if (c == L'=') {
86 c = fgetwc(source);
87 token[tokenFound].value[i] = c;
4b580abd
JB
88 i++;
89 goto S1SS1;
90 }
91 if (istAlpha()) {
6a19b8fe
JB
92 c = fgetwc(source);
93 token[tokenFound].value[i] = c;
4b580abd
JB
94 i++;
95 goto M1;
96 }
6a19b8fe 97 if (c == WEOF) {
4b580abd
JB
98 goto FIN;
99 } else {
100 goto error;
101 }
102
103MC1:
dcfcd9ab 104 // FIXME: Partial match need a rewind in the characters extraction from the file
6a19b8fe
JB
105 if (c == (wint_t)Titre[j] && j < wcslen(Titre) - 1) {
106 c = fgetwc(source);
107 token[tokenFound].value[i] = c;
4b580abd
JB
108 i++;
109 j++;
110 goto MC1;
111 }
6a19b8fe
JB
112 if (c == (wint_t)Auteur[j] && j < wcslen(Auteur) - 1) {
113 c = fgetwc(source);
114 token[tokenFound].value[i] = c;
4b580abd
JB
115 i++;
116 j++;
117 goto MC1;
118 } else {
6a19b8fe
JB
119 c = fgetwc(source);
120 token[tokenFound].value[i] = c;
4b580abd
JB
121 i++;
122 goto MC2;
123 }
124
125S1SS1:
6a19b8fe
JB
126 if (c == L'=') {
127 c = fgetwc(source);
128 token[tokenFound].value[i] = c;
4b580abd
JB
129 i++;
130 goto SS2;
131 }
6a19b8fe 132 if (isSeparator() || c == WEOF) {
d3eb30ef
JB
133 goto SECTION;
134 }
135
4b580abd 136SS2:
6a19b8fe 137 if (isSeparator() || c == WEOF) {
d3eb30ef 138 goto SSECTION;
4b580abd
JB
139 }
140
141SECTION:
142 tokenType = SECTION;
143 return 1;
144
d3eb30ef
JB
145SSECTION:
146 tokenType = SSECTION;
147 return 1;
148
4b580abd
JB
149M1:
150 if (istAlpha()) {
6a19b8fe
JB
151 c = fgetwc(source);
152 token[tokenFound].value[i] = c;
4b580abd
JB
153 i++;
154 goto M1;
155 }
6a19b8fe 156 if (isSeparator() || c == WEOF) {
4b580abd
JB
157 goto MOT;
158 }
159
160initLV1:
6a19b8fe
JB
161 if (c == L' ' || c == L'\t') {
162 c = fgetwc(source);
163 token[tokenFound].value[i] = c;
4b580abd
JB
164 i++;
165 goto initLV1;
166 }
6a19b8fe
JB
167 if (c == L'\n') {
168 c = fgetwc(source);
169 token[tokenFound].value[i] = c;
fde9417f
JB
170 i++;
171 goto initLV1LV2;
172 }
4b580abd 173 if (istAlpha()) {
6a19b8fe
JB
174 c = fgetwc(source);
175 token[tokenFound].value[i] = c;
4b580abd
JB
176 i++;
177 goto M1;
178 }
6a19b8fe
JB
179 if (c == L'=') {
180 c = fgetwc(source);
181 token[tokenFound].value[i] = c;
4b580abd
JB
182 i++;
183 goto S1SS1;
184 }
6a19b8fe
JB
185 if (c == L'>') {
186 c = fgetwc(source);
187 token[tokenFound].value[i] = c;
4b580abd
JB
188 i++;
189 goto MC1;
190 }
6a19b8fe 191 if (c == WEOF) {
4b580abd
JB
192 goto FIN;
193 }
194
195initLV1LV2:
196 if (isSeparator()) {
6a19b8fe
JB
197 c = fgetwc(source);
198 token[tokenFound].value[i] = c;
4b580abd
JB
199 i++;
200 goto initLV1LV2;
201 }
fde9417f
JB
202 if (istAlpha()) {
203 goto NPARA;
204 }
6a19b8fe
JB
205 if (c == L'>') {
206 c = fgetwc(source);
207 token[tokenFound].value[i] = c;
4b580abd
JB
208 i++;
209 goto MC1;
210 }
6a19b8fe
JB
211 if (c == L'=') {
212 c = fgetwc(source);
213 token[tokenFound].value[i] = c;
4b580abd
JB
214 i++;
215 goto S1SS1;
216 }
6a19b8fe 217 if (c == WEOF) {
d3eb30ef
JB
218 goto FIN;
219 }
4b580abd
JB
220
221NPARA:
222 tokenType = NPARA;
223 return 1;
224
225MOT:
226 tokenType = MOT;
227 return 1;
228
229MC2:
6a19b8fe 230 if (isSeparator() || c == WEOF) {
4b580abd
JB
231 goto MOTCLE;
232 }
233
234MOTCLE:
235 tokenType = MOTCLE;
236 return 1;
237
238FIN:
239 tokenType = FIN;
240 return 1;
241
242error:
243 tokenType = FIN;
244 return -1;
245}
96964f3e 246
6a19b8fe 247int main() {
96964f3e
JB
248
249 // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
250 source = fopen("test.txt", "r");
251 // Cree et ouvre un fichier target.html en lecture/ecriture
252 // avec suppression du contenu au prealable :
253 target = fopen("target.html", "w+");
254
255 if (source == NULL) {
256 printf("Impossible d'ouvrir le fichier source\n");
257 return -1;
258 }
259
260 if (target == NULL) {
261 printf("Impossible d'ouvrir le fichier target\n");
262 return -1;
263 }
264
4b580abd 265 do {
6a19b8fe
JB
266 c = fgetwc(source); // lecture du caractere suivant du fichier source
267 token[tokenFound].value[i] = c;
4b580abd
JB
268 i++;
269 int scanrt = scanner();
270 if (scanrt == -1) {
6a19b8fe 271 wprintf(L"Scanner error with token value: %ls\n", token[tokenFound].value);
4b580abd
JB
272 exit(EXIT_FAILURE);
273 }
6a19b8fe
JB
274 if (c != WEOF) {
275 wprintf(L"Token type found: %s with value: %ls\n", tokenTypestr[tokenType], token[tokenFound].value);
4b580abd 276 } else {
6a19b8fe 277 wprintf(L"Token type found: %s\n", tokenTypestr[tokenType]);
4b580abd 278 }
6a19b8fe 279 token[tokenFound].type = tokenTypestr[tokenType];
28280a4c 280 tokenFound++;
6a19b8fe 281 // reinit token.value array counter
4b580abd 282 i = 0;
6a19b8fe
JB
283 //} while (c != WEOF); // tant que la fin du fichier n'est pas atteinte
284 } while (!feof(source)); // tant que la fin du fichier n'est pas atteinte
96964f3e
JB
285
286 if (source != NULL) fclose(source); // fermeture du fichier source
287 if (target != NULL) fclose(target); // fermeture du fichier target
288
289 return 0;
290}