Commit | Line | Data |
---|---|---|
9ed84d89 JB |
1 | /* Lexical analyzer */ |
2 | ||
3 | #include <stdlib.h> | |
4 | #include <stdbool.h> | |
5 | ||
6 | #include "global_vars.h" | |
7 | #include "print_helper.h" | |
8 | ||
9 | wint_t c; | |
10 | ||
11 | /* It looks silly to check for each characters but for debugging, it's just the way to go */ | |
12 | static bool isAlphaNum() { | |
13 | if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \ | |
14 | c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \ | |
15 | c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \ | |
16 | c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \ | |
17 | c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \ | |
18 | c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \ | |
19 | c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \ | |
20 | c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \ | |
21 | c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \ | |
22 | c == L'\''|| c == L'#' || \ | |
23 | c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \ | |
24 | c == L'7' || c == L'8' || c == L'9' || \ | |
7cfc17fb | 25 | //FIXME: Accentued characters (aka multibytes characters) support is still buggy |
9ed84d89 JB |
26 | c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \ |
27 | c == L'ù' || c == L'û' || \ | |
28 | c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \ | |
29 | c == L'Ù' || c == L'Û') { | |
30 | return true; | |
31 | } | |
32 | return false; | |
33 | } | |
34 | ||
35 | static bool isSeparator() { | |
36 | if (c == L'\t' || c == L' ' || c == L'\n') { | |
37 | return true; | |
38 | } | |
39 | return false; | |
40 | } | |
41 | ||
42 | static bool isEOF() { | |
43 | if (c == WEOF) { | |
44 | return true; | |
45 | } | |
46 | return false; | |
47 | } | |
48 | ||
49 | int scanner() { | |
15ad4b5a | 50 | tokenValue[0] = 0; |
9ed84d89 JB |
51 | unsigned int i = 0; |
52 | wchar_t m[6]; | |
53 | ||
54 | init: | |
55 | if (c == L' ' || c == L'\t') { | |
56 | c = fgetwc(source); | |
57 | goto init; | |
58 | } | |
59 | if (c == L'\n') { | |
60 | c = fgetwc(source); | |
61 | goto initLV1; | |
62 | } | |
63 | if (c == L'>') { | |
64 | c = fgetwc(source); | |
65 | goto MC1; | |
66 | } | |
67 | if (c == L'=') { | |
68 | c = fgetwc(source); | |
69 | goto S1SS1; | |
70 | } | |
71 | if (isAlphaNum()) { | |
72 | token[tokenFound].value[i] = c; | |
15ad4b5a | 73 | tokenValue[i] = c; |
9ed84d89 JB |
74 | i++; |
75 | c = fgetwc(source); | |
76 | goto M1; | |
77 | } | |
78 | if (isEOF()) { | |
79 | goto FIN; | |
80 | } | |
81 | goto error; | |
82 | ||
83 | MC1: | |
84 | if (c == L'A' && !wcscmp(fgetws(m, 6, source), L"uteur")) { | |
85 | wcscpy((wchar_t*)token[tokenFound].value, L">Auteur"); | |
15ad4b5a | 86 | wcscpy((wchar_t*)tokenValue, L">Auteur"); |
9ed84d89 JB |
87 | c = fgetwc(source); |
88 | goto MC2; | |
89 | } | |
90 | if (c == L'T' && !wcscmp(fgetws(m, 5, source), L"itre")) { | |
91 | wcscpy((wchar_t*)token[tokenFound].value, L">Titre"); | |
15ad4b5a | 92 | wcscpy((wchar_t*)tokenValue, L">Titre"); |
9ed84d89 JB |
93 | c = fgetwc(source); |
94 | goto MC2; | |
95 | } | |
96 | goto error; | |
97 | ||
98 | S1SS1: | |
99 | if (c == L'=') { | |
100 | c = fgetwc(source); | |
101 | goto SS2; | |
102 | } | |
103 | if (isSeparator() || isEOF()) { | |
104 | goto SECTION; | |
105 | } | |
106 | goto error; | |
107 | ||
108 | SS2: | |
109 | if (isSeparator() || isEOF()) { | |
110 | goto SSECTION; | |
111 | } | |
112 | goto error; | |
113 | ||
114 | SECTION: | |
115 | tokenType = SECTION; | |
116 | return EXIT_SUCCESS; | |
117 | ||
118 | SSECTION: | |
119 | tokenType = SSECTION; | |
120 | return EXIT_SUCCESS; | |
121 | ||
122 | M1: | |
123 | if (isAlphaNum()) { | |
124 | token[tokenFound].value[i] = c; | |
15ad4b5a | 125 | tokenValue[i] = c; |
9ed84d89 JB |
126 | i++; |
127 | c = fgetwc(source); | |
128 | goto M1; | |
129 | } | |
130 | if (isSeparator() || isEOF()) { | |
131 | goto MOT; | |
132 | } | |
133 | goto error; | |
134 | ||
135 | initLV1: | |
136 | if (c == L' ' || c == L'\t') { | |
137 | c = fgetwc(source); | |
138 | goto initLV1; | |
139 | } | |
140 | if (c == L'\n') { | |
141 | c = fgetwc(source); | |
142 | goto initLV1LV2; | |
143 | } | |
144 | if (isAlphaNum()) { | |
145 | token[tokenFound].value[i] = c; | |
15ad4b5a | 146 | tokenValue[i] = c; |
9ed84d89 JB |
147 | i++; |
148 | c = fgetwc(source); | |
149 | goto M1; | |
150 | } | |
151 | if (c == L'=') { | |
152 | c = fgetwc(source); | |
153 | goto S1SS1; | |
154 | } | |
155 | if (c == L'>') { | |
156 | c = fgetwc(source); | |
157 | goto MC1; | |
158 | } | |
159 | if (isEOF()) { | |
160 | goto FIN; | |
161 | } | |
162 | goto error; | |
163 | ||
164 | initLV1LV2: | |
165 | if (isSeparator()) { | |
166 | c = fgetwc(source); | |
167 | goto initLV1LV2; | |
168 | } | |
169 | if (isAlphaNum()) { | |
170 | goto NPARA; | |
171 | } | |
172 | if (c == L'>') { | |
173 | c = fgetwc(source); | |
174 | goto MC1; | |
175 | } | |
176 | if (c == L'=') { | |
177 | c = fgetwc(source); | |
178 | goto S1SS1; | |
179 | } | |
180 | if (isEOF()) { | |
181 | goto FIN; | |
182 | } | |
183 | goto error; | |
184 | ||
185 | NPARA: | |
186 | tokenType = NPARA; | |
187 | return EXIT_SUCCESS; | |
188 | ||
189 | MOT: | |
15ad4b5a JB |
190 | token[tokenFound].value[i] = 0; |
191 | tokenValue[i] = 0; | |
9ed84d89 JB |
192 | tokenType = MOT; |
193 | return EXIT_SUCCESS; | |
194 | ||
195 | MC2: | |
196 | if (isSeparator() || isEOF()) { | |
197 | goto MOTCLE; | |
198 | } | |
199 | goto error; | |
200 | ||
201 | MOTCLE: | |
202 | tokenType = MOTCLE; | |
203 | return EXIT_SUCCESS; | |
204 | ||
205 | FIN: | |
206 | tokenType = FIN; | |
207 | return EXIT_SUCCESS; | |
208 | ||
209 | error: | |
210 | if (tokenType == MOT || tokenType == MOTCLE) { | |
e70feb8c | 211 | fwprintf(stderr, L"%s error with token type: %s and value: %ls\n", |
25696723 JB |
212 | __func__, |
213 | tokenTypestr[tokenType], | |
214 | token[tokenFound].value); | |
9ed84d89 | 215 | } else { |
e70feb8c | 216 | fwprintf(stderr, L"%s error with token type: %s\n", |
25696723 JB |
217 | __func__, |
218 | tokenTypestr[tokenType]); | |
9ed84d89 | 219 | } |
6c47be32 | 220 | fflush(stderr); |
9ed84d89 JB |
221 | tokenType = FIN; |
222 | exit(EXIT_FAILURE); | |
223 | } |