No need to zero the array of token values ends.
[TP_AL_C.git] / lexer / lexical_analyzer.c
CommitLineData
9ed84d89
JB
1/* Lexical analyzer */
2
3#include <stdlib.h>
4#include <stdbool.h>
5
6#include "global_vars.h"
7#include "print_helper.h"
8
9wint_t c;
10
11/* It looks silly to check for each characters but for debugging, it's just the way to go */
12static bool isAlphaNum() {
13 if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \
14 c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \
15 c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \
16 c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \
17 c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \
18 c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \
19 c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \
20 c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \
21 c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \
22 c == L'\''|| c == L'#' || \
23 c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \
24 c == L'7' || c == L'8' || c == L'9' || \
7cfc17fb 25 //FIXME: Accentued characters (aka multibytes characters) support is still buggy
9ed84d89
JB
26 c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \
27 c == L'ù' || c == L'û' || \
28 c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \
29 c == L'Ù' || c == L'Û') {
30 return true;
31 }
32 return false;
33}
34
35static bool isSeparator() {
36 if (c == L'\t' || c == L' ' || c == L'\n') {
37 return true;
38 }
39 return false;
40}
41
42static bool isEOF() {
43 if (c == WEOF) {
44 return true;
45 }
46 return false;
47}
48
49int scanner() {
15ad4b5a 50 tokenValue[0] = 0;
9ed84d89
JB
51 unsigned int i = 0;
52 wchar_t m[6];
53
54init:
55 if (c == L' ' || c == L'\t') {
56 c = fgetwc(source);
57 goto init;
58 }
59 if (c == L'\n') {
60 c = fgetwc(source);
61 goto initLV1;
62 }
63 if (c == L'>') {
64 c = fgetwc(source);
65 goto MC1;
66 }
67 if (c == L'=') {
68 c = fgetwc(source);
69 goto S1SS1;
70 }
71 if (isAlphaNum()) {
72 token[tokenFound].value[i] = c;
15ad4b5a 73 tokenValue[i] = c;
9ed84d89
JB
74 i++;
75 c = fgetwc(source);
76 goto M1;
77 }
78 if (isEOF()) {
79 goto FIN;
80 }
81 goto error;
82
83MC1:
84 if (c == L'A' && !wcscmp(fgetws(m, 6, source), L"uteur")) {
85 wcscpy((wchar_t*)token[tokenFound].value, L">Auteur");
15ad4b5a 86 wcscpy((wchar_t*)tokenValue, L">Auteur");
9ed84d89
JB
87 c = fgetwc(source);
88 goto MC2;
89 }
90 if (c == L'T' && !wcscmp(fgetws(m, 5, source), L"itre")) {
91 wcscpy((wchar_t*)token[tokenFound].value, L">Titre");
15ad4b5a 92 wcscpy((wchar_t*)tokenValue, L">Titre");
9ed84d89
JB
93 c = fgetwc(source);
94 goto MC2;
95 }
96 goto error;
97
98S1SS1:
99 if (c == L'=') {
100 c = fgetwc(source);
101 goto SS2;
102 }
103 if (isSeparator() || isEOF()) {
104 goto SECTION;
105 }
106 goto error;
107
108SS2:
109 if (isSeparator() || isEOF()) {
110 goto SSECTION;
111 }
112 goto error;
113
114SECTION:
115 tokenType = SECTION;
116 return EXIT_SUCCESS;
117
118SSECTION:
119 tokenType = SSECTION;
120 return EXIT_SUCCESS;
121
122M1:
123 if (isAlphaNum()) {
124 token[tokenFound].value[i] = c;
15ad4b5a 125 tokenValue[i] = c;
9ed84d89
JB
126 i++;
127 c = fgetwc(source);
128 goto M1;
129 }
130 if (isSeparator() || isEOF()) {
131 goto MOT;
132 }
133 goto error;
134
135initLV1:
136 if (c == L' ' || c == L'\t') {
137 c = fgetwc(source);
138 goto initLV1;
139 }
140 if (c == L'\n') {
141 c = fgetwc(source);
142 goto initLV1LV2;
143 }
144 if (isAlphaNum()) {
145 token[tokenFound].value[i] = c;
15ad4b5a 146 tokenValue[i] = c;
9ed84d89
JB
147 i++;
148 c = fgetwc(source);
149 goto M1;
150 }
151 if (c == L'=') {
152 c = fgetwc(source);
153 goto S1SS1;
154 }
155 if (c == L'>') {
156 c = fgetwc(source);
157 goto MC1;
158 }
159 if (isEOF()) {
160 goto FIN;
161 }
162 goto error;
163
164initLV1LV2:
165 if (isSeparator()) {
166 c = fgetwc(source);
167 goto initLV1LV2;
168 }
169 if (isAlphaNum()) {
170 goto NPARA;
171 }
172 if (c == L'>') {
173 c = fgetwc(source);
174 goto MC1;
175 }
176 if (c == L'=') {
177 c = fgetwc(source);
178 goto S1SS1;
179 }
180 if (isEOF()) {
181 goto FIN;
182 }
183 goto error;
184
185NPARA:
186 tokenType = NPARA;
187 return EXIT_SUCCESS;
188
189MOT:
15ad4b5a 190 tokenValue[i] = 0;
9ed84d89
JB
191 tokenType = MOT;
192 return EXIT_SUCCESS;
193
194MC2:
195 if (isSeparator() || isEOF()) {
196 goto MOTCLE;
197 }
198 goto error;
199
200MOTCLE:
201 tokenType = MOTCLE;
202 return EXIT_SUCCESS;
203
204FIN:
205 tokenType = FIN;
206 return EXIT_SUCCESS;
207
208error:
209 if (tokenType == MOT || tokenType == MOTCLE) {
e70feb8c 210 fwprintf(stderr, L"%s error with token type: %s and value: %ls\n",
25696723
JB
211 __func__,
212 tokenTypestr[tokenType],
213 token[tokenFound].value);
9ed84d89 214 } else {
e70feb8c 215 fwprintf(stderr, L"%s error with token type: %s\n",
25696723
JB
216 __func__,
217 tokenTypestr[tokenType]);
9ed84d89 218 }
6c47be32 219 fflush(stderr);
9ed84d89
JB
220 tokenType = FIN;
221 exit(EXIT_FAILURE);
222}