Commit | Line | Data |
---|---|---|
9ed84d89 JB |
1 | /* Lexical analyzer */ |
2 | ||
3 | #include <stdlib.h> | |
4 | #include <stdbool.h> | |
5 | ||
6 | #include "global_vars.h" | |
7 | #include "print_helper.h" | |
8 | ||
9 | wint_t c; | |
10 | ||
11 | /* It looks silly to check for each characters but for debugging, it's just the way to go */ | |
12 | static bool isAlphaNum() { | |
13 | if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \ | |
14 | c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \ | |
15 | c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \ | |
16 | c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \ | |
17 | c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \ | |
18 | c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \ | |
19 | c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \ | |
20 | c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \ | |
21 | c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \ | |
22 | c == L'\''|| c == L'#' || \ | |
23 | c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \ | |
24 | c == L'7' || c == L'8' || c == L'9' || \ | |
25 | // FIXME: Accentued characters (aka multibytes characters) support is still buggy | |
26 | c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \ | |
27 | c == L'ù' || c == L'û' || \ | |
28 | c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \ | |
29 | c == L'Ù' || c == L'Û') { | |
30 | return true; | |
31 | } | |
32 | return false; | |
33 | } | |
34 | ||
35 | static bool isSeparator() { | |
36 | if (c == L'\t' || c == L' ' || c == L'\n') { | |
37 | return true; | |
38 | } | |
39 | return false; | |
40 | } | |
41 | ||
42 | static bool isEOF() { | |
43 | if (c == WEOF) { | |
44 | return true; | |
45 | } | |
46 | return false; | |
47 | } | |
48 | ||
49 | int scanner() { | |
50 | unsigned int i = 0; | |
51 | wchar_t m[6]; | |
52 | ||
53 | init: | |
54 | if (c == L' ' || c == L'\t') { | |
55 | c = fgetwc(source); | |
56 | goto init; | |
57 | } | |
58 | if (c == L'\n') { | |
59 | c = fgetwc(source); | |
60 | goto initLV1; | |
61 | } | |
62 | if (c == L'>') { | |
63 | c = fgetwc(source); | |
64 | goto MC1; | |
65 | } | |
66 | if (c == L'=') { | |
67 | c = fgetwc(source); | |
68 | goto S1SS1; | |
69 | } | |
70 | if (isAlphaNum()) { | |
71 | token[tokenFound].value[i] = c; | |
72 | i++; | |
73 | c = fgetwc(source); | |
74 | goto M1; | |
75 | } | |
76 | if (isEOF()) { | |
77 | goto FIN; | |
78 | } | |
79 | goto error; | |
80 | ||
81 | MC1: | |
82 | if (c == L'A' && !wcscmp(fgetws(m, 6, source), L"uteur")) { | |
83 | wcscpy((wchar_t*)token[tokenFound].value, L">Auteur"); | |
84 | c = fgetwc(source); | |
85 | goto MC2; | |
86 | } | |
87 | if (c == L'T' && !wcscmp(fgetws(m, 5, source), L"itre")) { | |
88 | wcscpy((wchar_t*)token[tokenFound].value, L">Titre"); | |
89 | c = fgetwc(source); | |
90 | goto MC2; | |
91 | } | |
92 | goto error; | |
93 | ||
94 | S1SS1: | |
95 | if (c == L'=') { | |
96 | c = fgetwc(source); | |
97 | goto SS2; | |
98 | } | |
99 | if (isSeparator() || isEOF()) { | |
100 | goto SECTION; | |
101 | } | |
102 | goto error; | |
103 | ||
104 | SS2: | |
105 | if (isSeparator() || isEOF()) { | |
106 | goto SSECTION; | |
107 | } | |
108 | goto error; | |
109 | ||
110 | SECTION: | |
111 | tokenType = SECTION; | |
112 | return EXIT_SUCCESS; | |
113 | ||
114 | SSECTION: | |
115 | tokenType = SSECTION; | |
116 | return EXIT_SUCCESS; | |
117 | ||
118 | M1: | |
119 | if (isAlphaNum()) { | |
120 | token[tokenFound].value[i] = c; | |
121 | i++; | |
122 | c = fgetwc(source); | |
123 | goto M1; | |
124 | } | |
125 | if (isSeparator() || isEOF()) { | |
126 | goto MOT; | |
127 | } | |
128 | goto error; | |
129 | ||
130 | initLV1: | |
131 | if (c == L' ' || c == L'\t') { | |
132 | c = fgetwc(source); | |
133 | goto initLV1; | |
134 | } | |
135 | if (c == L'\n') { | |
136 | c = fgetwc(source); | |
137 | goto initLV1LV2; | |
138 | } | |
139 | if (isAlphaNum()) { | |
140 | token[tokenFound].value[i] = c; | |
141 | i++; | |
142 | c = fgetwc(source); | |
143 | goto M1; | |
144 | } | |
145 | if (c == L'=') { | |
146 | c = fgetwc(source); | |
147 | goto S1SS1; | |
148 | } | |
149 | if (c == L'>') { | |
150 | c = fgetwc(source); | |
151 | goto MC1; | |
152 | } | |
153 | if (isEOF()) { | |
154 | goto FIN; | |
155 | } | |
156 | goto error; | |
157 | ||
158 | initLV1LV2: | |
159 | if (isSeparator()) { | |
160 | c = fgetwc(source); | |
161 | goto initLV1LV2; | |
162 | } | |
163 | if (isAlphaNum()) { | |
164 | goto NPARA; | |
165 | } | |
166 | if (c == L'>') { | |
167 | c = fgetwc(source); | |
168 | goto MC1; | |
169 | } | |
170 | if (c == L'=') { | |
171 | c = fgetwc(source); | |
172 | goto S1SS1; | |
173 | } | |
174 | if (isEOF()) { | |
175 | goto FIN; | |
176 | } | |
177 | goto error; | |
178 | ||
179 | NPARA: | |
180 | tokenType = NPARA; | |
181 | return EXIT_SUCCESS; | |
182 | ||
183 | MOT: | |
184 | tokenType = MOT; | |
185 | return EXIT_SUCCESS; | |
186 | ||
187 | MC2: | |
188 | if (isSeparator() || isEOF()) { | |
189 | goto MOTCLE; | |
190 | } | |
191 | goto error; | |
192 | ||
193 | MOTCLE: | |
194 | tokenType = MOTCLE; | |
195 | return EXIT_SUCCESS; | |
196 | ||
197 | FIN: | |
198 | tokenType = FIN; | |
199 | return EXIT_SUCCESS; | |
200 | ||
201 | error: | |
202 | if (tokenType == MOT || tokenType == MOTCLE) { | |
e70feb8c | 203 | fwprintf(stderr, L"%s error with token type: %s and value: %ls\n", |
224a9916 | 204 | __func__, |
9ed84d89 JB |
205 | tokenTypestr[tokenType], |
206 | token[tokenFound].value); | |
207 | } else { | |
e70feb8c | 208 | fwprintf(stderr, L"%s error with token type: %s\n", |
224a9916 | 209 | __func__, |
9ed84d89 JB |
210 | tokenTypestr[tokenType]); |
211 | } | |
6c47be32 | 212 | fflush(stderr); |
9ed84d89 JB |
213 | tokenType = FIN; |
214 | exit(EXIT_FAILURE); | |
215 | } |