Add a FIXME comment about multibytes characters support.
[TP_AL_C.git] / lexer / main.c
CommitLineData
96964f3e
JB
1#include <stdlib.h>
2#include <stdio.h>
3#include <string.h>
4b580abd 4#include <stdbool.h>
6a19b8fe 5#include <wchar.h>
e9a51b68 6
6a19b8fe
JB
7#define TOKEN_MAX 500
8
9struct token_s {
10 const char* type;
11 wint_t value[50];
12};
13
14struct token_s token[TOKEN_MAX] = {NULL, 0};
dcfcd9ab
JB
15
16FILE *source = NULL, *target = NULL;
6a19b8fe
JB
17wint_t c;
18unsigned int tokenFound = 0;
4b580abd
JB
19enum TokenType {
20 MOTCLE,
21 SECTION,
22 SSECTION,
23 NPARA,
24 MOT,
25 FIN
26} tokenType;
27const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
28
91e46777 29/* It looks silly to check for each characters but for debugging, it's just the way to go */
4b580abd 30bool istAlpha() {
6a19b8fe
JB
31 if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \
32 c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \
33 c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \
34 c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \
35 c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \
36 c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \
37 c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \
38 c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \
39 c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \
40 c == L'\''|| c == L'#' || \
41 c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \
42 c == L'7' || c == L'8' || c == L'9' || \
fa60d3b4 43 // FIXME: Accentued characters (aka multibytes characters) support is still buggy
6a19b8fe
JB
44 c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \
45 c == L'ù' || c == L'û' || \
46 c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \
47 c == L'Ù' || c == L'Û') {
4b580abd
JB
48 return true;
49 }
50 return false;
51}
52
53bool isSeparator() {
6a19b8fe 54 if (c == L'\t' || c == L' ' || c == L'\n') {
4b580abd
JB
55 return true;
56 }
57 return false;
58}
59
60int scanner() {
62426b89
JB
61 unsigned int i = 0;
62 wchar_t m[6];
4b580abd 63
4b580abd 64init:
6a19b8fe
JB
65 if (c == L' ' || c == L'\t') {
66 c = fgetwc(source);
4b580abd
JB
67 goto init;
68 }
6a19b8fe
JB
69 if (c == L'\n') {
70 c = fgetwc(source);
fde9417f
JB
71 goto initLV1;
72 }
6a19b8fe
JB
73 if (c == L'>') {
74 c = fgetwc(source);
4b580abd
JB
75 goto MC1;
76 }
6a19b8fe
JB
77 if (c == L'=') {
78 c = fgetwc(source);
4b580abd
JB
79 goto S1SS1;
80 }
81 if (istAlpha()) {
6a19b8fe 82 token[tokenFound].value[i] = c;
4b580abd 83 i++;
62426b89 84 c = fgetwc(source);
4b580abd
JB
85 goto M1;
86 }
6a19b8fe 87 if (c == WEOF) {
4b580abd 88 goto FIN;
4b580abd 89 }
62426b89 90 goto error;
4b580abd
JB
91
92MC1:
62426b89
JB
93 if (c == L'A' && !wcscmp(fgetws(m, 6, source), L"uteur")) {
94 wcscpy((wchar_t*)token[tokenFound].value, L">Auteur");
6a19b8fe 95 c = fgetwc(source);
62426b89 96 goto MC2;
4b580abd 97 }
62426b89
JB
98 if (c == L'T' && !wcscmp(fgetws(m, 5, source), L"itre")) {
99 wcscpy((wchar_t*)token[tokenFound].value, L">Titre");
6a19b8fe 100 c = fgetwc(source);
4b580abd
JB
101 goto MC2;
102 }
62426b89 103 goto error;
4b580abd
JB
104
105S1SS1:
6a19b8fe
JB
106 if (c == L'=') {
107 c = fgetwc(source);
4b580abd
JB
108 goto SS2;
109 }
6a19b8fe 110 if (isSeparator() || c == WEOF) {
d3eb30ef
JB
111 goto SECTION;
112 }
62426b89 113 goto error;
d3eb30ef 114
4b580abd 115SS2:
6a19b8fe 116 if (isSeparator() || c == WEOF) {
d3eb30ef 117 goto SSECTION;
4b580abd 118 }
62426b89 119 goto error;
4b580abd
JB
120
121SECTION:
122 tokenType = SECTION;
b5cd8f86 123 return EXIT_SUCCESS;
4b580abd 124
d3eb30ef
JB
125SSECTION:
126 tokenType = SSECTION;
b5cd8f86 127 return EXIT_SUCCESS;
d3eb30ef 128
4b580abd
JB
129M1:
130 if (istAlpha()) {
6a19b8fe 131 token[tokenFound].value[i] = c;
4b580abd 132 i++;
62426b89 133 c = fgetwc(source);
4b580abd
JB
134 goto M1;
135 }
6a19b8fe 136 if (isSeparator() || c == WEOF) {
4b580abd
JB
137 goto MOT;
138 }
62426b89 139 goto error;
4b580abd
JB
140
141initLV1:
6a19b8fe
JB
142 if (c == L' ' || c == L'\t') {
143 c = fgetwc(source);
4b580abd
JB
144 goto initLV1;
145 }
6a19b8fe
JB
146 if (c == L'\n') {
147 c = fgetwc(source);
fde9417f
JB
148 goto initLV1LV2;
149 }
4b580abd 150 if (istAlpha()) {
6a19b8fe 151 token[tokenFound].value[i] = c;
4b580abd 152 i++;
62426b89 153 c = fgetwc(source);
4b580abd
JB
154 goto M1;
155 }
6a19b8fe
JB
156 if (c == L'=') {
157 c = fgetwc(source);
4b580abd
JB
158 goto S1SS1;
159 }
6a19b8fe
JB
160 if (c == L'>') {
161 c = fgetwc(source);
4b580abd
JB
162 goto MC1;
163 }
6a19b8fe 164 if (c == WEOF) {
4b580abd
JB
165 goto FIN;
166 }
62426b89 167 goto error;
4b580abd
JB
168
169initLV1LV2:
170 if (isSeparator()) {
6a19b8fe 171 c = fgetwc(source);
4b580abd
JB
172 goto initLV1LV2;
173 }
fde9417f
JB
174 if (istAlpha()) {
175 goto NPARA;
176 }
6a19b8fe
JB
177 if (c == L'>') {
178 c = fgetwc(source);
4b580abd
JB
179 goto MC1;
180 }
6a19b8fe
JB
181 if (c == L'=') {
182 c = fgetwc(source);
4b580abd
JB
183 goto S1SS1;
184 }
6a19b8fe 185 if (c == WEOF) {
d3eb30ef
JB
186 goto FIN;
187 }
62426b89 188 goto error;
4b580abd
JB
189
190NPARA:
191 tokenType = NPARA;
b5cd8f86 192 return EXIT_SUCCESS;
4b580abd
JB
193
194MOT:
195 tokenType = MOT;
b5cd8f86 196 return EXIT_SUCCESS;
4b580abd
JB
197
198MC2:
6a19b8fe 199 if (isSeparator() || c == WEOF) {
4b580abd
JB
200 goto MOTCLE;
201 }
62426b89 202 goto error;
4b580abd
JB
203
204MOTCLE:
205 tokenType = MOTCLE;
b5cd8f86 206 return EXIT_SUCCESS;
4b580abd
JB
207
208FIN:
209 tokenType = FIN;
b5cd8f86 210 return EXIT_SUCCESS;
4b580abd
JB
211
212error:
213 tokenType = FIN;
b5cd8f86 214 return EXIT_FAILURE;
4b580abd 215}
96964f3e 216
6a19b8fe 217int main() {
96964f3e 218 // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
62426b89 219 source = fopen("test.txt", "r+");
96964f3e
JB
220 // Cree et ouvre un fichier target.html en lecture/ecriture
221 // avec suppression du contenu au prealable :
222 target = fopen("target.html", "w+");
223
224 if (source == NULL) {
225 printf("Impossible d'ouvrir le fichier source\n");
226 return -1;
227 }
228
229 if (target == NULL) {
230 printf("Impossible d'ouvrir le fichier target\n");
231 return -1;
232 }
233
62426b89 234 c = fgetwc(source); // lecture du premier caractere
4b580abd 235 do {
4b580abd 236 int scanrt = scanner();
b5cd8f86 237 if (scanrt == EXIT_FAILURE) {
6a19b8fe 238 wprintf(L"Scanner error with token value: %ls\n", token[tokenFound].value);
4b580abd
JB
239 exit(EXIT_FAILURE);
240 }
3db137c9 241 if (tokenType == MOT || tokenType == MOTCLE) {
62426b89 242 wprintf(L"%20s: %ls\n", tokenTypestr[tokenType], token[tokenFound].value);
4b580abd 243 } else {
62426b89 244 wprintf(L"%20s\n", tokenTypestr[tokenType]);
4b580abd 245 }
6a19b8fe 246 token[tokenFound].type = tokenTypestr[tokenType];
28280a4c 247 tokenFound++;
62426b89 248 } while (tokenType != FIN); // tant que la fin du fichier n'est pas atteinte
96964f3e
JB
249
250 if (source != NULL) fclose(source); // fermeture du fichier source
251 if (target != NULL) fclose(target); // fermeture du fichier target
252
b5cd8f86 253 return EXIT_SUCCESS;
96964f3e 254}