Syntactic analyser implementation with HTML conversion code
[TP_AL_C.git] / lexer / lexical_analyzer.c
CommitLineData
9ed84d89
JB
1/* Lexical analyzer */
2
3#include <stdlib.h>
4#include <stdbool.h>
5
6#include "global_vars.h"
7#include "print_helper.h"
8
9wint_t c;
10
11/* It looks silly to check for each characters but for debugging, it's just the way to go */
12static bool isAlphaNum() {
13 if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \
14 c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \
15 c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \
16 c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \
17 c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \
18 c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \
19 c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \
20 c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \
21 c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \
22 c == L'\''|| c == L'#' || \
23 c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \
24 c == L'7' || c == L'8' || c == L'9' || \
25 // FIXME: Accentued characters (aka multibytes characters) support is still buggy
26 c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \
27 c == L'ù' || c == L'û' || \
28 c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \
29 c == L'Ù' || c == L'Û') {
30 return true;
31 }
32 return false;
33}
34
35static bool isSeparator() {
36 if (c == L'\t' || c == L' ' || c == L'\n') {
37 return true;
38 }
39 return false;
40}
41
42static bool isEOF() {
43 if (c == WEOF) {
44 return true;
45 }
46 return false;
47}
48
49int scanner() {
50 unsigned int i = 0;
51 wchar_t m[6];
52
53init:
54 if (c == L' ' || c == L'\t') {
55 c = fgetwc(source);
56 goto init;
57 }
58 if (c == L'\n') {
59 c = fgetwc(source);
60 goto initLV1;
61 }
62 if (c == L'>') {
63 c = fgetwc(source);
64 goto MC1;
65 }
66 if (c == L'=') {
67 c = fgetwc(source);
68 goto S1SS1;
69 }
70 if (isAlphaNum()) {
71 token[tokenFound].value[i] = c;
72 i++;
73 c = fgetwc(source);
74 goto M1;
75 }
76 if (isEOF()) {
77 goto FIN;
78 }
79 goto error;
80
81MC1:
82 if (c == L'A' && !wcscmp(fgetws(m, 6, source), L"uteur")) {
83 wcscpy((wchar_t*)token[tokenFound].value, L">Auteur");
84 c = fgetwc(source);
85 goto MC2;
86 }
87 if (c == L'T' && !wcscmp(fgetws(m, 5, source), L"itre")) {
88 wcscpy((wchar_t*)token[tokenFound].value, L">Titre");
89 c = fgetwc(source);
90 goto MC2;
91 }
92 goto error;
93
94S1SS1:
95 if (c == L'=') {
96 c = fgetwc(source);
97 goto SS2;
98 }
99 if (isSeparator() || isEOF()) {
100 goto SECTION;
101 }
102 goto error;
103
104SS2:
105 if (isSeparator() || isEOF()) {
106 goto SSECTION;
107 }
108 goto error;
109
110SECTION:
111 tokenType = SECTION;
112 return EXIT_SUCCESS;
113
114SSECTION:
115 tokenType = SSECTION;
116 return EXIT_SUCCESS;
117
118M1:
119 if (isAlphaNum()) {
120 token[tokenFound].value[i] = c;
121 i++;
122 c = fgetwc(source);
123 goto M1;
124 }
125 if (isSeparator() || isEOF()) {
126 goto MOT;
127 }
128 goto error;
129
130initLV1:
131 if (c == L' ' || c == L'\t') {
132 c = fgetwc(source);
133 goto initLV1;
134 }
135 if (c == L'\n') {
136 c = fgetwc(source);
137 goto initLV1LV2;
138 }
139 if (isAlphaNum()) {
140 token[tokenFound].value[i] = c;
141 i++;
142 c = fgetwc(source);
143 goto M1;
144 }
145 if (c == L'=') {
146 c = fgetwc(source);
147 goto S1SS1;
148 }
149 if (c == L'>') {
150 c = fgetwc(source);
151 goto MC1;
152 }
153 if (isEOF()) {
154 goto FIN;
155 }
156 goto error;
157
158initLV1LV2:
159 if (isSeparator()) {
160 c = fgetwc(source);
161 goto initLV1LV2;
162 }
163 if (isAlphaNum()) {
164 goto NPARA;
165 }
166 if (c == L'>') {
167 c = fgetwc(source);
168 goto MC1;
169 }
170 if (c == L'=') {
171 c = fgetwc(source);
172 goto S1SS1;
173 }
174 if (isEOF()) {
175 goto FIN;
176 }
177 goto error;
178
179NPARA:
180 tokenType = NPARA;
181 return EXIT_SUCCESS;
182
183MOT:
184 tokenType = MOT;
185 return EXIT_SUCCESS;
186
187MC2:
188 if (isSeparator() || isEOF()) {
189 goto MOTCLE;
190 }
191 goto error;
192
193MOTCLE:
194 tokenType = MOTCLE;
195 return EXIT_SUCCESS;
196
197FIN:
198 tokenType = FIN;
199 return EXIT_SUCCESS;
200
201error:
202 if (tokenType == MOT || tokenType == MOTCLE) {
e70feb8c 203 fwprintf(stderr, L"%s error with token type: %s and value: %ls\n",
224a9916 204 __func__,
9ed84d89
JB
205 tokenTypestr[tokenType],
206 token[tokenFound].value);
207 } else {
e70feb8c 208 fwprintf(stderr, L"%s error with token type: %s\n",
224a9916 209 __func__,
9ed84d89
JB
210 tokenTypestr[tokenType]);
211 }
6c47be32 212 fflush(stderr);
9ed84d89
JB
213 tokenType = FIN;
214 exit(EXIT_FAILURE);
215}