Push the code of a basic lexical analyser for the markdown syntax.
[TP_AL_C.git] / lexer / main.c
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <stdbool.h>
5
6 FILE *source, *target = NULL;
7 char c;
8 unsigned int i = 0;
9 char tokenValue[50];
10 enum TokenType {
11 MOTCLE,
12 SECTION,
13 SSECTION,
14 NPARA,
15 MOT,
16 FIN
17 } tokenType;
18 const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
19
20 bool istAlpha() {
21 if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \
22 c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \
23 c == 'o' || c == 'p' || c == 'q' || c == 'r' || c == 's' || c == 't' || c == 'u' || \
24 c == 'v' || c == 'w' || c == 'x' || c == 'y' || c == 'z' || \
25 c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F' || c == 'G' || \
26 c == 'H' || c == 'I' || c == 'J' || c == 'K' || c == 'L' || c == 'M' || c == 'N' || \
27 c == 'O' || c == 'P' || c == 'Q' || c == 'R' || c == 'S' || c == 'T' || c == 'U' || \
28 c == 'V' || c == 'W' || c == 'X' || c == 'Y' || c == 'Z' || \
29 c == '.' || c == '?' || c == '!' || c == ',' || c == ';' || c == ':' || c == '-' || \
30 c == '\''|| c == '#' || \
31 c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || \
32 c == '7' || c == '8' || c == '9') {
33 return true;
34 }
35 return false;
36 }
37
38 bool isSeparator() {
39 if (c == '\t' || c == ' ' || c == '\n') {
40 return true;
41 }
42 return false;
43 }
44
45 int scanner() {
46 const char* Titre = "Titre";
47 const char* Auteur = "Auteur";
48 unsigned int j = 0;
49
50 // The main loop get the next character
51 init:
52 if (c == ' ' || c == '\t') {
53 c = fgetc(source);
54 tokenValue[i] = c;
55 i++;
56 goto init;
57 }
58 if (c == '>') {
59 c = fgetc(source);
60 tokenValue[i] = c;
61 i++;
62 goto MC1;
63 }
64 if (c == '=') {
65 c = fgetc(source);
66 tokenValue[i] = c;
67 i++;
68 goto S1SS1;
69 }
70 if (istAlpha()) {
71 c = fgetc(source);
72 tokenValue[i] = c;
73 i++;
74 goto M1;
75 }
76 if (c == '\n') {
77 c = fgetc(source);
78 tokenValue[i] = c;
79 i++;
80 goto initLV1;
81 }
82 if (c == EOF) {
83 goto FIN;
84 } else {
85 goto error;
86 }
87
88 MC1:
89 if (c == Titre[j] && j < strlen(Titre) - 1) {
90 c = fgetc(source);
91 tokenValue[i] = c;
92 i++;
93 j++;
94 goto MC1;
95 }
96 if (c == Auteur[j] && j < strlen(Auteur) - 1) {
97 c = fgetc(source);
98 tokenValue[i] = c;
99 i++;
100 j++;
101 goto MC1;
102 } else {
103 c = fgetc(source);
104 tokenValue[i] = c;
105 i++;
106 goto MC2;
107 }
108
109 S1SS1:
110 if (isSeparator() || c == EOF) {
111 goto SECTION;
112 }
113 if (c == '=') {
114 c = fgetc(source);
115 tokenValue[i] = c;
116 i++;
117 goto SS2;
118 }
119
120 SS2:
121 if (isSeparator() || c == EOF) {
122 goto SECTION;
123 }
124
125 SECTION:
126 tokenType = SECTION;
127 return 1;
128
129 M1:
130 if (istAlpha()) {
131 c = fgetc(source);
132 tokenValue[i] = c;
133 i++;
134 goto M1;
135 }
136 if (isSeparator() || c == EOF) {
137 goto MOT;
138 }
139
140 initLV1:
141 if (c == '\n' || c == '\t') {
142 c = fgetc(source);
143 tokenValue[i] = c;
144 i++;
145 goto initLV1;
146 }
147 if (istAlpha()) {
148 c = fgetc(source);
149 tokenValue[i] = c;
150 i++;
151 goto M1;
152 }
153 if (c == '=') {
154 c = fgetc(source);
155 tokenValue[i] = c;
156 i++;
157 goto S1SS1;
158 }
159 if (c == '>') {
160 c = fgetc(source);
161 tokenValue[i] = c;
162 i++;
163 goto MC1;
164 }
165 if (c == '\n') {
166 c = fgetc(source);
167 tokenValue[i] = c;
168 i++;
169 goto initLV1LV2;
170 }
171 if (c == EOF) {
172 goto FIN;
173 }
174
175 initLV1LV2:
176 if (isSeparator()) {
177 c = fgetc(source);
178 tokenValue[i] = c;
179 i++;
180 goto initLV1LV2;
181 }
182 if (c == '>') {
183 c = fgetc(source);
184 tokenValue[i] = c;
185 i++;
186 goto MC1;
187 }
188 if (c == '=') {
189 c = fgetc(source);
190 tokenValue[i] = c;
191 i++;
192 goto S1SS1;
193 }
194 if (c == EOF) {
195 goto FIN;
196 }
197 if (istAlpha()) {
198 goto NPARA;
199 }
200
201 NPARA:
202 tokenType = NPARA;
203 return 1;
204
205 MOT:
206 tokenType = MOT;
207 return 1;
208
209 MC2:
210 if (isSeparator() || c == EOF) {
211 goto MOTCLE;
212 }
213
214 MOTCLE:
215 tokenType = MOTCLE;
216 return 1;
217
218 FIN:
219 tokenType = FIN;
220 return 1;
221
222 error:
223 tokenType = FIN;
224 return -1;
225 }
226
227 int main (int argc, char const *argv[]) {
228
229 // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
230 source = fopen("test.txt", "r");
231 // Cree et ouvre un fichier target.html en lecture/ecriture
232 // avec suppression du contenu au prealable :
233 target = fopen("target.html", "w+");
234
235 if (source == NULL) {
236 printf("Impossible d'ouvrir le fichier source\n");
237 return -1;
238 }
239
240 if (target == NULL) {
241 printf("Impossible d'ouvrir le fichier target\n");
242 return -1;
243 }
244
245 do {
246 c = fgetc(source); // lecture du caractere suivant du fichier source
247 //fputc(c, target); // ecrire c dans le fichier target
248 tokenValue[i] = c;
249 i++;
250 int scanrt = scanner();
251 if (scanrt == -1) {
252 printf ("Scanner error with token value: %s\n", tokenValue);
253 exit(EXIT_FAILURE);
254 }
255 if (c != EOF) {
256 printf ("Token type found: %s with value: %s\n", tokenTypestr[tokenType], tokenValue);
257 } else {
258 printf ("Token type found: %s\n", tokenTypestr[tokenType]);
259 }
260 // reinit
261 i = 0;
262 memset(tokenValue, 0, sizeof(tokenValue));
263 } while (c != EOF); // tant que la fin du fichier n'est pas atteinte
264
265 if (source != NULL) fclose(source); // fermeture du fichier source
266 if (target != NULL) fclose(target); // fermeture du fichier target
267
268 return 0;
269 }