Beautify the token list output.
[TP_AL_C.git] / lexer / main.c
CommitLineData
96964f3e
JB
1#include <stdlib.h>
2#include <stdio.h>
3#include <string.h>
4b580abd 4#include <stdbool.h>
6a19b8fe 5#include <wchar.h>
e9a51b68 6
6a19b8fe
JB
7#define TOKEN_MAX 500
8
9struct token_s {
10 const char* type;
11 wint_t value[50];
12};
13
14struct token_s token[TOKEN_MAX] = {NULL, 0};
dcfcd9ab
JB
15
16FILE *source = NULL, *target = NULL;
6a19b8fe
JB
17wint_t c;
18unsigned int tokenFound = 0;
4b580abd
JB
19enum TokenType {
20 MOTCLE,
21 SECTION,
22 SSECTION,
23 NPARA,
24 MOT,
25 FIN
26} tokenType;
27const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
28
91e46777 29/* It looks silly to check for each characters but for debugging, it's just the way to go */
4b580abd 30bool istAlpha() {
6a19b8fe
JB
31 if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \
32 c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \
33 c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \
34 c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \
35 c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \
36 c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \
37 c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \
38 c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \
39 c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \
40 c == L'\''|| c == L'#' || \
41 c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \
42 c == L'7' || c == L'8' || c == L'9' || \
43 c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \
44 c == L'ù' || c == L'û' || \
45 c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \
46 c == L'Ù' || c == L'Û') {
4b580abd
JB
47 return true;
48 }
49 return false;
50}
51
52bool isSeparator() {
6a19b8fe 53 if (c == L'\t' || c == L' ' || c == L'\n') {
4b580abd
JB
54 return true;
55 }
56 return false;
57}
58
59int scanner() {
62426b89
JB
60 unsigned int i = 0;
61 wchar_t m[6];
4b580abd 62
4b580abd 63init:
6a19b8fe
JB
64 if (c == L' ' || c == L'\t') {
65 c = fgetwc(source);
4b580abd
JB
66 goto init;
67 }
6a19b8fe
JB
68 if (c == L'\n') {
69 c = fgetwc(source);
fde9417f
JB
70 goto initLV1;
71 }
6a19b8fe
JB
72 if (c == L'>') {
73 c = fgetwc(source);
4b580abd
JB
74 goto MC1;
75 }
6a19b8fe
JB
76 if (c == L'=') {
77 c = fgetwc(source);
4b580abd
JB
78 goto S1SS1;
79 }
80 if (istAlpha()) {
6a19b8fe 81 token[tokenFound].value[i] = c;
4b580abd 82 i++;
62426b89 83 c = fgetwc(source);
4b580abd
JB
84 goto M1;
85 }
6a19b8fe 86 if (c == WEOF) {
4b580abd 87 goto FIN;
4b580abd 88 }
62426b89 89 goto error;
4b580abd
JB
90
91MC1:
62426b89
JB
92 if (c == L'A' && !wcscmp(fgetws(m, 6, source), L"uteur")) {
93 wcscpy((wchar_t*)token[tokenFound].value, L">Auteur");
6a19b8fe 94 c = fgetwc(source);
62426b89 95 goto MC2;
4b580abd 96 }
62426b89
JB
97 if (c == L'T' && !wcscmp(fgetws(m, 5, source), L"itre")) {
98 wcscpy((wchar_t*)token[tokenFound].value, L">Titre");
6a19b8fe 99 c = fgetwc(source);
4b580abd
JB
100 goto MC2;
101 }
62426b89 102 goto error;
4b580abd
JB
103
104S1SS1:
6a19b8fe
JB
105 if (c == L'=') {
106 c = fgetwc(source);
4b580abd
JB
107 goto SS2;
108 }
6a19b8fe 109 if (isSeparator() || c == WEOF) {
d3eb30ef
JB
110 goto SECTION;
111 }
62426b89 112 goto error;
d3eb30ef 113
4b580abd 114SS2:
6a19b8fe 115 if (isSeparator() || c == WEOF) {
d3eb30ef 116 goto SSECTION;
4b580abd 117 }
62426b89 118 goto error;
4b580abd
JB
119
120SECTION:
121 tokenType = SECTION;
b5cd8f86 122 return EXIT_SUCCESS;
4b580abd 123
d3eb30ef
JB
124SSECTION:
125 tokenType = SSECTION;
b5cd8f86 126 return EXIT_SUCCESS;
d3eb30ef 127
4b580abd
JB
128M1:
129 if (istAlpha()) {
6a19b8fe 130 token[tokenFound].value[i] = c;
4b580abd 131 i++;
62426b89 132 c = fgetwc(source);
4b580abd
JB
133 goto M1;
134 }
6a19b8fe 135 if (isSeparator() || c == WEOF) {
4b580abd
JB
136 goto MOT;
137 }
62426b89 138 goto error;
4b580abd
JB
139
140initLV1:
6a19b8fe
JB
141 if (c == L' ' || c == L'\t') {
142 c = fgetwc(source);
4b580abd
JB
143 goto initLV1;
144 }
6a19b8fe
JB
145 if (c == L'\n') {
146 c = fgetwc(source);
fde9417f
JB
147 goto initLV1LV2;
148 }
4b580abd 149 if (istAlpha()) {
6a19b8fe 150 token[tokenFound].value[i] = c;
4b580abd 151 i++;
62426b89 152 c = fgetwc(source);
4b580abd
JB
153 goto M1;
154 }
6a19b8fe
JB
155 if (c == L'=') {
156 c = fgetwc(source);
4b580abd
JB
157 goto S1SS1;
158 }
6a19b8fe
JB
159 if (c == L'>') {
160 c = fgetwc(source);
4b580abd
JB
161 goto MC1;
162 }
6a19b8fe 163 if (c == WEOF) {
4b580abd
JB
164 goto FIN;
165 }
62426b89 166 goto error;
4b580abd
JB
167
168initLV1LV2:
169 if (isSeparator()) {
6a19b8fe 170 c = fgetwc(source);
4b580abd
JB
171 goto initLV1LV2;
172 }
fde9417f
JB
173 if (istAlpha()) {
174 goto NPARA;
175 }
6a19b8fe
JB
176 if (c == L'>') {
177 c = fgetwc(source);
4b580abd
JB
178 goto MC1;
179 }
6a19b8fe
JB
180 if (c == L'=') {
181 c = fgetwc(source);
4b580abd
JB
182 goto S1SS1;
183 }
6a19b8fe 184 if (c == WEOF) {
d3eb30ef
JB
185 goto FIN;
186 }
62426b89 187 goto error;
4b580abd
JB
188
189NPARA:
190 tokenType = NPARA;
b5cd8f86 191 return EXIT_SUCCESS;
4b580abd
JB
192
193MOT:
194 tokenType = MOT;
b5cd8f86 195 return EXIT_SUCCESS;
4b580abd
JB
196
197MC2:
6a19b8fe 198 if (isSeparator() || c == WEOF) {
4b580abd
JB
199 goto MOTCLE;
200 }
62426b89 201 goto error;
4b580abd
JB
202
203MOTCLE:
204 tokenType = MOTCLE;
b5cd8f86 205 return EXIT_SUCCESS;
4b580abd
JB
206
207FIN:
208 tokenType = FIN;
b5cd8f86 209 return EXIT_SUCCESS;
4b580abd
JB
210
211error:
212 tokenType = FIN;
b5cd8f86 213 return EXIT_FAILURE;
4b580abd 214}
96964f3e 215
6a19b8fe 216int main() {
96964f3e 217 // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
62426b89 218 source = fopen("test.txt", "r+");
96964f3e
JB
219 // Cree et ouvre un fichier target.html en lecture/ecriture
220 // avec suppression du contenu au prealable :
221 target = fopen("target.html", "w+");
222
223 if (source == NULL) {
224 printf("Impossible d'ouvrir le fichier source\n");
225 return -1;
226 }
227
228 if (target == NULL) {
229 printf("Impossible d'ouvrir le fichier target\n");
230 return -1;
231 }
232
62426b89 233 c = fgetwc(source); // lecture du premier caractere
4b580abd 234 do {
4b580abd 235 int scanrt = scanner();
b5cd8f86 236 if (scanrt == EXIT_FAILURE) {
6a19b8fe 237 wprintf(L"Scanner error with token value: %ls\n", token[tokenFound].value);
4b580abd
JB
238 exit(EXIT_FAILURE);
239 }
3db137c9 240 if (tokenType == MOT || tokenType == MOTCLE) {
62426b89 241 wprintf(L"%20s: %ls\n", tokenTypestr[tokenType], token[tokenFound].value);
4b580abd 242 } else {
62426b89 243 wprintf(L"%20s\n", tokenTypestr[tokenType]);
4b580abd 244 }
6a19b8fe 245 token[tokenFound].type = tokenTypestr[tokenType];
28280a4c 246 tokenFound++;
62426b89 247 } while (tokenType != FIN); // tant que la fin du fichier n'est pas atteinte
96964f3e
JB
248
249 if (source != NULL) fclose(source); // fermeture du fichier source
250 if (target != NULL) fclose(target); // fermeture du fichier target
251
b5cd8f86 252 return EXIT_SUCCESS;
96964f3e 253}