Commit | Line | Data |
---|---|---|
9ed84d89 JB |
1 | /* Lexical analyzer */ |
2 | ||
3 | #include <stdlib.h> | |
4 | #include <stdbool.h> | |
5 | ||
6 | #include "global_vars.h" | |
7 | #include "print_helper.h" | |
8 | ||
9 | wint_t c; | |
10 | ||
11 | /* It looks silly to check for each characters but for debugging, it's just the way to go */ | |
12 | static bool isAlphaNum() { | |
13 | if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \ | |
14 | c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \ | |
15 | c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \ | |
16 | c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \ | |
17 | c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \ | |
18 | c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \ | |
19 | c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \ | |
20 | c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \ | |
21 | c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \ | |
22 | c == L'\''|| c == L'#' || \ | |
23 | c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \ | |
24 | c == L'7' || c == L'8' || c == L'9' || \ | |
7cfc17fb | 25 | //FIXME: Accentued characters (aka multibytes characters) support is still buggy |
9ed84d89 JB |
26 | c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \ |
27 | c == L'ù' || c == L'û' || \ | |
28 | c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \ | |
29 | c == L'Ù' || c == L'Û') { | |
30 | return true; | |
31 | } | |
32 | return false; | |
33 | } | |
34 | ||
35 | static bool isSeparator() { | |
36 | if (c == L'\t' || c == L' ' || c == L'\n') { | |
37 | return true; | |
38 | } | |
39 | return false; | |
40 | } | |
41 | ||
42 | static bool isEOF() { | |
43 | if (c == WEOF) { | |
44 | return true; | |
45 | } | |
46 | return false; | |
47 | } | |
48 | ||
49 | int scanner() { | |
15ad4b5a | 50 | tokenValue[0] = 0; |
9ed84d89 JB |
51 | unsigned int i = 0; |
52 | wchar_t m[6]; | |
53 | ||
54 | init: | |
55 | if (c == L' ' || c == L'\t') { | |
56 | c = fgetwc(source); | |
57 | goto init; | |
58 | } | |
59 | if (c == L'\n') { | |
60 | c = fgetwc(source); | |
61 | goto initLV1; | |
62 | } | |
63 | if (c == L'>') { | |
64 | c = fgetwc(source); | |
65 | goto MC1; | |
66 | } | |
67 | if (c == L'=') { | |
68 | c = fgetwc(source); | |
69 | goto S1SS1; | |
70 | } | |
71 | if (isAlphaNum()) { | |
15ad4b5a | 72 | tokenValue[i] = c; |
9ed84d89 JB |
73 | i++; |
74 | c = fgetwc(source); | |
75 | goto M1; | |
76 | } | |
77 | if (isEOF()) { | |
78 | goto FIN; | |
79 | } | |
80 | goto error; | |
81 | ||
82 | MC1: | |
83 | if (c == L'A' && !wcscmp(fgetws(m, 6, source), L"uteur")) { | |
15ad4b5a | 84 | wcscpy((wchar_t*)tokenValue, L">Auteur"); |
9ed84d89 JB |
85 | c = fgetwc(source); |
86 | goto MC2; | |
87 | } | |
88 | if (c == L'T' && !wcscmp(fgetws(m, 5, source), L"itre")) { | |
15ad4b5a | 89 | wcscpy((wchar_t*)tokenValue, L">Titre"); |
9ed84d89 JB |
90 | c = fgetwc(source); |
91 | goto MC2; | |
92 | } | |
93 | goto error; | |
94 | ||
95 | S1SS1: | |
96 | if (c == L'=') { | |
97 | c = fgetwc(source); | |
98 | goto SS2; | |
99 | } | |
100 | if (isSeparator() || isEOF()) { | |
101 | goto SECTION; | |
102 | } | |
103 | goto error; | |
104 | ||
105 | SS2: | |
106 | if (isSeparator() || isEOF()) { | |
107 | goto SSECTION; | |
108 | } | |
109 | goto error; | |
110 | ||
111 | SECTION: | |
112 | tokenType = SECTION; | |
113 | return EXIT_SUCCESS; | |
114 | ||
115 | SSECTION: | |
116 | tokenType = SSECTION; | |
117 | return EXIT_SUCCESS; | |
118 | ||
119 | M1: | |
120 | if (isAlphaNum()) { | |
15ad4b5a | 121 | tokenValue[i] = c; |
9ed84d89 JB |
122 | i++; |
123 | c = fgetwc(source); | |
124 | goto M1; | |
125 | } | |
126 | if (isSeparator() || isEOF()) { | |
127 | goto MOT; | |
128 | } | |
129 | goto error; | |
130 | ||
131 | initLV1: | |
132 | if (c == L' ' || c == L'\t') { | |
133 | c = fgetwc(source); | |
134 | goto initLV1; | |
135 | } | |
136 | if (c == L'\n') { | |
137 | c = fgetwc(source); | |
138 | goto initLV1LV2; | |
139 | } | |
140 | if (isAlphaNum()) { | |
15ad4b5a | 141 | tokenValue[i] = c; |
9ed84d89 JB |
142 | i++; |
143 | c = fgetwc(source); | |
144 | goto M1; | |
145 | } | |
146 | if (c == L'=') { | |
147 | c = fgetwc(source); | |
148 | goto S1SS1; | |
149 | } | |
150 | if (c == L'>') { | |
151 | c = fgetwc(source); | |
152 | goto MC1; | |
153 | } | |
154 | if (isEOF()) { | |
155 | goto FIN; | |
156 | } | |
157 | goto error; | |
158 | ||
159 | initLV1LV2: | |
160 | if (isSeparator()) { | |
161 | c = fgetwc(source); | |
162 | goto initLV1LV2; | |
163 | } | |
164 | if (isAlphaNum()) { | |
165 | goto NPARA; | |
166 | } | |
167 | if (c == L'>') { | |
168 | c = fgetwc(source); | |
169 | goto MC1; | |
170 | } | |
171 | if (c == L'=') { | |
172 | c = fgetwc(source); | |
173 | goto S1SS1; | |
174 | } | |
175 | if (isEOF()) { | |
176 | goto FIN; | |
177 | } | |
178 | goto error; | |
179 | ||
180 | NPARA: | |
181 | tokenType = NPARA; | |
182 | return EXIT_SUCCESS; | |
183 | ||
184 | MOT: | |
185 | tokenType = MOT; | |
dfbc1df9 JB |
186 | tokenValue[i] = 0; |
187 | wcscpy((wchar_t*)token[tokenFound].value, (wchar_t*)tokenValue); | |
9ed84d89 JB |
188 | return EXIT_SUCCESS; |
189 | ||
190 | MC2: | |
191 | if (isSeparator() || isEOF()) { | |
192 | goto MOTCLE; | |
193 | } | |
194 | goto error; | |
195 | ||
196 | MOTCLE: | |
197 | tokenType = MOTCLE; | |
dfbc1df9 | 198 | wcscpy((wchar_t*)token[tokenFound].value, (wchar_t*)tokenValue); |
9ed84d89 JB |
199 | return EXIT_SUCCESS; |
200 | ||
201 | FIN: | |
202 | tokenType = FIN; | |
203 | return EXIT_SUCCESS; | |
204 | ||
205 | error: | |
206 | if (tokenType == MOT || tokenType == MOTCLE) { | |
e70feb8c | 207 | fwprintf(stderr, L"%s error with token type: %s and value: %ls\n", |
25696723 JB |
208 | __func__, |
209 | tokenTypestr[tokenType], | |
dfbc1df9 | 210 | tokenValue); |
9ed84d89 | 211 | } else { |
e70feb8c | 212 | fwprintf(stderr, L"%s error with token type: %s\n", |
25696723 JB |
213 | __func__, |
214 | tokenTypestr[tokenType]); | |
9ed84d89 | 215 | } |
6c47be32 | 216 | fflush(stderr); |
9ed84d89 JB |
217 | tokenType = FIN; |
218 | exit(EXIT_FAILURE); | |
219 | } |