]>
Commit | Line | Data |
---|---|---|
9ed84d89 JB |
1 | /* Lexical analyzer */ |
2 | ||
3 | #include <stdlib.h> | |
4 | #include <stdbool.h> | |
5 | ||
6 | #include "global_vars.h" | |
7 | #include "print_helper.h" | |
2d2a5978 | 8 | #include "lexical_analyzer.h" |
9ed84d89 JB |
9 | |
10 | wint_t c; | |
11 | ||
12 | /* It looks silly to check for each characters but for debugging, it's just the way to go */ | |
13 | static bool isAlphaNum() { | |
14 | if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \ | |
15 | c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \ | |
16 | c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \ | |
17 | c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \ | |
18 | c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \ | |
19 | c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \ | |
20 | c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \ | |
21 | c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \ | |
22 | c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \ | |
23 | c == L'\''|| c == L'#' || \ | |
24 | c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \ | |
25 | c == L'7' || c == L'8' || c == L'9' || \ | |
7cfc17fb | 26 | //FIXME: Accentued characters (aka multibytes characters) support is still buggy |
9ed84d89 JB |
27 | c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \ |
28 | c == L'ù' || c == L'û' || \ | |
29 | c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \ | |
30 | c == L'Ù' || c == L'Û') { | |
31 | return true; | |
32 | } | |
33 | return false; | |
34 | } | |
35 | ||
36 | static bool isSeparator() { | |
37 | if (c == L'\t' || c == L' ' || c == L'\n') { | |
38 | return true; | |
39 | } | |
40 | return false; | |
41 | } | |
42 | ||
43 | static bool isEOF() { | |
44 | if (c == WEOF) { | |
45 | return true; | |
46 | } | |
47 | return false; | |
48 | } | |
49 | ||
5aa93c8d | 50 | int scanner(void) { |
15ad4b5a | 51 | tokenValue[0] = 0; |
9ed84d89 JB |
52 | unsigned int i = 0; |
53 | wchar_t m[6]; | |
54 | ||
55 | init: | |
56 | if (c == L' ' || c == L'\t') { | |
57 | c = fgetwc(source); | |
58 | goto init; | |
59 | } | |
60 | if (c == L'\n') { | |
61 | c = fgetwc(source); | |
62 | goto initLV1; | |
63 | } | |
64 | if (c == L'>') { | |
65 | c = fgetwc(source); | |
66 | goto MC1; | |
67 | } | |
68 | if (c == L'=') { | |
69 | c = fgetwc(source); | |
70 | goto S1SS1; | |
71 | } | |
72 | if (isAlphaNum()) { | |
15ad4b5a | 73 | tokenValue[i] = c; |
9ed84d89 JB |
74 | i++; |
75 | c = fgetwc(source); | |
76 | goto M1; | |
77 | } | |
78 | if (isEOF()) { | |
79 | goto FIN; | |
80 | } | |
81 | goto error; | |
82 | ||
83 | MC1: | |
84 | if (c == L'A' && !wcscmp(fgetws(m, 6, source), L"uteur")) { | |
15ad4b5a | 85 | wcscpy((wchar_t*)tokenValue, L">Auteur"); |
9ed84d89 JB |
86 | c = fgetwc(source); |
87 | goto MC2; | |
88 | } | |
89 | if (c == L'T' && !wcscmp(fgetws(m, 5, source), L"itre")) { | |
15ad4b5a | 90 | wcscpy((wchar_t*)tokenValue, L">Titre"); |
9ed84d89 JB |
91 | c = fgetwc(source); |
92 | goto MC2; | |
93 | } | |
94 | goto error; | |
95 | ||
96 | S1SS1: | |
97 | if (c == L'=') { | |
98 | c = fgetwc(source); | |
99 | goto SS2; | |
100 | } | |
101 | if (isSeparator() || isEOF()) { | |
102 | goto SECTION; | |
103 | } | |
104 | goto error; | |
105 | ||
106 | SS2: | |
107 | if (isSeparator() || isEOF()) { | |
108 | goto SSECTION; | |
109 | } | |
110 | goto error; | |
111 | ||
112 | SECTION: | |
113 | tokenType = SECTION; | |
114 | return EXIT_SUCCESS; | |
115 | ||
116 | SSECTION: | |
117 | tokenType = SSECTION; | |
118 | return EXIT_SUCCESS; | |
119 | ||
120 | M1: | |
121 | if (isAlphaNum()) { | |
15ad4b5a | 122 | tokenValue[i] = c; |
9ed84d89 JB |
123 | i++; |
124 | c = fgetwc(source); | |
125 | goto M1; | |
126 | } | |
127 | if (isSeparator() || isEOF()) { | |
128 | goto MOT; | |
129 | } | |
130 | goto error; | |
131 | ||
132 | initLV1: | |
133 | if (c == L' ' || c == L'\t') { | |
134 | c = fgetwc(source); | |
135 | goto initLV1; | |
136 | } | |
137 | if (c == L'\n') { | |
138 | c = fgetwc(source); | |
139 | goto initLV1LV2; | |
140 | } | |
141 | if (isAlphaNum()) { | |
15ad4b5a | 142 | tokenValue[i] = c; |
9ed84d89 JB |
143 | i++; |
144 | c = fgetwc(source); | |
145 | goto M1; | |
146 | } | |
147 | if (c == L'=') { | |
148 | c = fgetwc(source); | |
149 | goto S1SS1; | |
150 | } | |
151 | if (c == L'>') { | |
152 | c = fgetwc(source); | |
153 | goto MC1; | |
154 | } | |
155 | if (isEOF()) { | |
156 | goto FIN; | |
157 | } | |
158 | goto error; | |
159 | ||
160 | initLV1LV2: | |
161 | if (isSeparator()) { | |
162 | c = fgetwc(source); | |
163 | goto initLV1LV2; | |
164 | } | |
165 | if (isAlphaNum()) { | |
166 | goto NPARA; | |
167 | } | |
168 | if (c == L'>') { | |
169 | c = fgetwc(source); | |
170 | goto MC1; | |
171 | } | |
172 | if (c == L'=') { | |
173 | c = fgetwc(source); | |
174 | goto S1SS1; | |
175 | } | |
176 | if (isEOF()) { | |
177 | goto FIN; | |
178 | } | |
179 | goto error; | |
180 | ||
181 | NPARA: | |
182 | tokenType = NPARA; | |
183 | return EXIT_SUCCESS; | |
184 | ||
185 | MOT: | |
186 | tokenType = MOT; | |
dfbc1df9 JB |
187 | tokenValue[i] = 0; |
188 | wcscpy((wchar_t*)token[tokenFound].value, (wchar_t*)tokenValue); | |
9ed84d89 JB |
189 | return EXIT_SUCCESS; |
190 | ||
191 | MC2: | |
192 | if (isSeparator() || isEOF()) { | |
193 | goto MOTCLE; | |
194 | } | |
195 | goto error; | |
196 | ||
197 | MOTCLE: | |
198 | tokenType = MOTCLE; | |
dfbc1df9 | 199 | wcscpy((wchar_t*)token[tokenFound].value, (wchar_t*)tokenValue); |
9ed84d89 JB |
200 | return EXIT_SUCCESS; |
201 | ||
202 | FIN: | |
203 | tokenType = FIN; | |
204 | return EXIT_SUCCESS; | |
205 | ||
206 | error: | |
207 | if (tokenType == MOT || tokenType == MOTCLE) { | |
e70feb8c | 208 | fwprintf(stderr, L"%s error with token type: %s and value: %ls\n", |
25696723 JB |
209 | __func__, |
210 | tokenTypestr[tokenType], | |
dfbc1df9 | 211 | tokenValue); |
9ed84d89 | 212 | } else { |
e70feb8c | 213 | fwprintf(stderr, L"%s error with token type: %s\n", |
25696723 JB |
214 | __func__, |
215 | tokenTypestr[tokenType]); | |
9ed84d89 | 216 | } |
6c47be32 | 217 | fflush(stderr); |
9ed84d89 JB |
218 | tokenType = FIN; |
219 | exit(EXIT_FAILURE); | |
220 | } |