Add includes to avoid missing prototype warning.
[TP_AL_C.git] / lexer / lexical_analyzer.c
CommitLineData
9ed84d89
JB
1/* Lexical analyzer */
2
3#include <stdlib.h>
4#include <stdbool.h>
5
6#include "global_vars.h"
7#include "print_helper.h"
2d2a5978 8#include "lexical_analyzer.h"
9ed84d89
JB
9
10wint_t c;
11
12/* It looks silly to check for each characters but for debugging, it's just the way to go */
13static bool isAlphaNum() {
14 if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \
15 c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \
16 c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \
17 c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \
18 c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \
19 c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \
20 c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \
21 c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \
22 c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \
23 c == L'\''|| c == L'#' || \
24 c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \
25 c == L'7' || c == L'8' || c == L'9' || \
7cfc17fb 26 //FIXME: Accentued characters (aka multibytes characters) support is still buggy
9ed84d89
JB
27 c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \
28 c == L'ù' || c == L'û' || \
29 c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \
30 c == L'Ù' || c == L'Û') {
31 return true;
32 }
33 return false;
34}
35
36static bool isSeparator() {
37 if (c == L'\t' || c == L' ' || c == L'\n') {
38 return true;
39 }
40 return false;
41}
42
43static bool isEOF() {
44 if (c == WEOF) {
45 return true;
46 }
47 return false;
48}
49
5aa93c8d 50int scanner(void) {
15ad4b5a 51 tokenValue[0] = 0;
9ed84d89
JB
52 unsigned int i = 0;
53 wchar_t m[6];
54
55init:
56 if (c == L' ' || c == L'\t') {
57 c = fgetwc(source);
58 goto init;
59 }
60 if (c == L'\n') {
61 c = fgetwc(source);
62 goto initLV1;
63 }
64 if (c == L'>') {
65 c = fgetwc(source);
66 goto MC1;
67 }
68 if (c == L'=') {
69 c = fgetwc(source);
70 goto S1SS1;
71 }
72 if (isAlphaNum()) {
15ad4b5a 73 tokenValue[i] = c;
9ed84d89
JB
74 i++;
75 c = fgetwc(source);
76 goto M1;
77 }
78 if (isEOF()) {
79 goto FIN;
80 }
81 goto error;
82
83MC1:
84 if (c == L'A' && !wcscmp(fgetws(m, 6, source), L"uteur")) {
15ad4b5a 85 wcscpy((wchar_t*)tokenValue, L">Auteur");
9ed84d89
JB
86 c = fgetwc(source);
87 goto MC2;
88 }
89 if (c == L'T' && !wcscmp(fgetws(m, 5, source), L"itre")) {
15ad4b5a 90 wcscpy((wchar_t*)tokenValue, L">Titre");
9ed84d89
JB
91 c = fgetwc(source);
92 goto MC2;
93 }
94 goto error;
95
96S1SS1:
97 if (c == L'=') {
98 c = fgetwc(source);
99 goto SS2;
100 }
101 if (isSeparator() || isEOF()) {
102 goto SECTION;
103 }
104 goto error;
105
106SS2:
107 if (isSeparator() || isEOF()) {
108 goto SSECTION;
109 }
110 goto error;
111
112SECTION:
113 tokenType = SECTION;
114 return EXIT_SUCCESS;
115
116SSECTION:
117 tokenType = SSECTION;
118 return EXIT_SUCCESS;
119
120M1:
121 if (isAlphaNum()) {
15ad4b5a 122 tokenValue[i] = c;
9ed84d89
JB
123 i++;
124 c = fgetwc(source);
125 goto M1;
126 }
127 if (isSeparator() || isEOF()) {
128 goto MOT;
129 }
130 goto error;
131
132initLV1:
133 if (c == L' ' || c == L'\t') {
134 c = fgetwc(source);
135 goto initLV1;
136 }
137 if (c == L'\n') {
138 c = fgetwc(source);
139 goto initLV1LV2;
140 }
141 if (isAlphaNum()) {
15ad4b5a 142 tokenValue[i] = c;
9ed84d89
JB
143 i++;
144 c = fgetwc(source);
145 goto M1;
146 }
147 if (c == L'=') {
148 c = fgetwc(source);
149 goto S1SS1;
150 }
151 if (c == L'>') {
152 c = fgetwc(source);
153 goto MC1;
154 }
155 if (isEOF()) {
156 goto FIN;
157 }
158 goto error;
159
160initLV1LV2:
161 if (isSeparator()) {
162 c = fgetwc(source);
163 goto initLV1LV2;
164 }
165 if (isAlphaNum()) {
166 goto NPARA;
167 }
168 if (c == L'>') {
169 c = fgetwc(source);
170 goto MC1;
171 }
172 if (c == L'=') {
173 c = fgetwc(source);
174 goto S1SS1;
175 }
176 if (isEOF()) {
177 goto FIN;
178 }
179 goto error;
180
181NPARA:
182 tokenType = NPARA;
183 return EXIT_SUCCESS;
184
185MOT:
186 tokenType = MOT;
dfbc1df9
JB
187 tokenValue[i] = 0;
188 wcscpy((wchar_t*)token[tokenFound].value, (wchar_t*)tokenValue);
9ed84d89
JB
189 return EXIT_SUCCESS;
190
191MC2:
192 if (isSeparator() || isEOF()) {
193 goto MOTCLE;
194 }
195 goto error;
196
197MOTCLE:
198 tokenType = MOTCLE;
dfbc1df9 199 wcscpy((wchar_t*)token[tokenFound].value, (wchar_t*)tokenValue);
9ed84d89
JB
200 return EXIT_SUCCESS;
201
202FIN:
203 tokenType = FIN;
204 return EXIT_SUCCESS;
205
206error:
207 if (tokenType == MOT || tokenType == MOTCLE) {
e70feb8c 208 fwprintf(stderr, L"%s error with token type: %s and value: %ls\n",
25696723
JB
209 __func__,
210 tokenTypestr[tokenType],
dfbc1df9 211 tokenValue);
9ed84d89 212 } else {
e70feb8c 213 fwprintf(stderr, L"%s error with token type: %s\n",
25696723
JB
214 __func__,
215 tokenTypestr[tokenType]);
9ed84d89 216 }
6c47be32 217 fflush(stderr);
9ed84d89
JB
218 tokenType = FIN;
219 exit(EXIT_FAILURE);
220}