| 1 | #include <stdlib.h> |
| 2 | #include <stdio.h> |
| 3 | #include <string.h> |
| 4 | #include <stdbool.h> |
| 5 | #include <wchar.h> |
| 6 | |
| 7 | #define TOKEN_MAX 500 |
| 8 | |
| 9 | struct token_s { |
| 10 | const char* type; |
| 11 | wint_t value[50]; |
| 12 | }; |
| 13 | |
| 14 | struct token_s token[TOKEN_MAX] = {{NULL, {0}}}; |
| 15 | |
| 16 | FILE *source = NULL, *target = NULL; |
| 17 | wint_t c; |
| 18 | unsigned int tokenFound = 0; |
| 19 | enum TokenType { |
| 20 | MOTCLE, |
| 21 | SECTION, |
| 22 | SSECTION, |
| 23 | NPARA, |
| 24 | MOT, |
| 25 | FIN |
| 26 | } tokenType; |
| 27 | const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" }; |
| 28 | |
| 29 | /* It looks silly to check for each characters but for debugging, it's just the way to go */ |
| 30 | bool istAlpha() { |
| 31 | if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \ |
| 32 | c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \ |
| 33 | c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \ |
| 34 | c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \ |
| 35 | c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \ |
| 36 | c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \ |
| 37 | c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \ |
| 38 | c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \ |
| 39 | c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \ |
| 40 | c == L'\''|| c == L'#' || \ |
| 41 | c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \ |
| 42 | c == L'7' || c == L'8' || c == L'9' || \ |
| 43 | // FIXME: Accentued characters (aka multibytes characters) support is still buggy |
| 44 | c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \ |
| 45 | c == L'ù' || c == L'û' || \ |
| 46 | c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \ |
| 47 | c == L'Ù' || c == L'Û') { |
| 48 | return true; |
| 49 | } |
| 50 | return false; |
| 51 | } |
| 52 | |
| 53 | bool isSeparator() { |
| 54 | if (c == L'\t' || c == L' ' || c == L'\n') { |
| 55 | return true; |
| 56 | } |
| 57 | return false; |
| 58 | } |
| 59 | |
| 60 | int scanner() { |
| 61 | unsigned int i = 0; |
| 62 | wchar_t m[6]; |
| 63 | |
| 64 | init: |
| 65 | if (c == L' ' || c == L'\t') { |
| 66 | c = fgetwc(source); |
| 67 | goto init; |
| 68 | } |
| 69 | if (c == L'\n') { |
| 70 | c = fgetwc(source); |
| 71 | goto initLV1; |
| 72 | } |
| 73 | if (c == L'>') { |
| 74 | c = fgetwc(source); |
| 75 | goto MC1; |
| 76 | } |
| 77 | if (c == L'=') { |
| 78 | c = fgetwc(source); |
| 79 | goto S1SS1; |
| 80 | } |
| 81 | if (istAlpha()) { |
| 82 | token[tokenFound].value[i] = c; |
| 83 | i++; |
| 84 | c = fgetwc(source); |
| 85 | goto M1; |
| 86 | } |
| 87 | if (c == WEOF) { |
| 88 | goto FIN; |
| 89 | } |
| 90 | goto error; |
| 91 | |
| 92 | MC1: |
| 93 | if (c == L'A' && !wcscmp(fgetws(m, 6, source), L"uteur")) { |
| 94 | wcscpy((wchar_t*)token[tokenFound].value, L">Auteur"); |
| 95 | c = fgetwc(source); |
| 96 | goto MC2; |
| 97 | } |
| 98 | if (c == L'T' && !wcscmp(fgetws(m, 5, source), L"itre")) { |
| 99 | wcscpy((wchar_t*)token[tokenFound].value, L">Titre"); |
| 100 | c = fgetwc(source); |
| 101 | goto MC2; |
| 102 | } |
| 103 | goto error; |
| 104 | |
| 105 | S1SS1: |
| 106 | if (c == L'=') { |
| 107 | c = fgetwc(source); |
| 108 | goto SS2; |
| 109 | } |
| 110 | if (isSeparator() || c == WEOF) { |
| 111 | goto SECTION; |
| 112 | } |
| 113 | goto error; |
| 114 | |
| 115 | SS2: |
| 116 | if (isSeparator() || c == WEOF) { |
| 117 | goto SSECTION; |
| 118 | } |
| 119 | goto error; |
| 120 | |
| 121 | SECTION: |
| 122 | tokenType = SECTION; |
| 123 | return EXIT_SUCCESS; |
| 124 | |
| 125 | SSECTION: |
| 126 | tokenType = SSECTION; |
| 127 | return EXIT_SUCCESS; |
| 128 | |
| 129 | M1: |
| 130 | if (istAlpha()) { |
| 131 | token[tokenFound].value[i] = c; |
| 132 | i++; |
| 133 | c = fgetwc(source); |
| 134 | goto M1; |
| 135 | } |
| 136 | if (isSeparator() || c == WEOF) { |
| 137 | goto MOT; |
| 138 | } |
| 139 | goto error; |
| 140 | |
| 141 | initLV1: |
| 142 | if (c == L' ' || c == L'\t') { |
| 143 | c = fgetwc(source); |
| 144 | goto initLV1; |
| 145 | } |
| 146 | if (c == L'\n') { |
| 147 | c = fgetwc(source); |
| 148 | goto initLV1LV2; |
| 149 | } |
| 150 | if (istAlpha()) { |
| 151 | token[tokenFound].value[i] = c; |
| 152 | i++; |
| 153 | c = fgetwc(source); |
| 154 | goto M1; |
| 155 | } |
| 156 | if (c == L'=') { |
| 157 | c = fgetwc(source); |
| 158 | goto S1SS1; |
| 159 | } |
| 160 | if (c == L'>') { |
| 161 | c = fgetwc(source); |
| 162 | goto MC1; |
| 163 | } |
| 164 | if (c == WEOF) { |
| 165 | goto FIN; |
| 166 | } |
| 167 | goto error; |
| 168 | |
| 169 | initLV1LV2: |
| 170 | if (isSeparator()) { |
| 171 | c = fgetwc(source); |
| 172 | goto initLV1LV2; |
| 173 | } |
| 174 | if (istAlpha()) { |
| 175 | goto NPARA; |
| 176 | } |
| 177 | if (c == L'>') { |
| 178 | c = fgetwc(source); |
| 179 | goto MC1; |
| 180 | } |
| 181 | if (c == L'=') { |
| 182 | c = fgetwc(source); |
| 183 | goto S1SS1; |
| 184 | } |
| 185 | if (c == WEOF) { |
| 186 | goto FIN; |
| 187 | } |
| 188 | goto error; |
| 189 | |
| 190 | NPARA: |
| 191 | tokenType = NPARA; |
| 192 | return EXIT_SUCCESS; |
| 193 | |
| 194 | MOT: |
| 195 | tokenType = MOT; |
| 196 | return EXIT_SUCCESS; |
| 197 | |
| 198 | MC2: |
| 199 | if (isSeparator() || c == WEOF) { |
| 200 | goto MOTCLE; |
| 201 | } |
| 202 | goto error; |
| 203 | |
| 204 | MOTCLE: |
| 205 | tokenType = MOTCLE; |
| 206 | return EXIT_SUCCESS; |
| 207 | |
| 208 | FIN: |
| 209 | tokenType = FIN; |
| 210 | return EXIT_SUCCESS; |
| 211 | |
| 212 | error: |
| 213 | tokenType = FIN; |
| 214 | return EXIT_FAILURE; |
| 215 | } |
| 216 | |
| 217 | int main() { |
| 218 | // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) : |
| 219 | source = fopen("test.txt", "r+"); |
| 220 | // Cree et ouvre un fichier target.html en lecture/ecriture |
| 221 | // avec suppression du contenu au prealable : |
| 222 | target = fopen("target.html", "w+"); |
| 223 | |
| 224 | if (source == NULL) { |
| 225 | printf("Impossible d'ouvrir le fichier source\n"); |
| 226 | return -1; |
| 227 | } |
| 228 | |
| 229 | if (target == NULL) { |
| 230 | printf("Impossible d'ouvrir le fichier target\n"); |
| 231 | return -1; |
| 232 | } |
| 233 | |
| 234 | c = fgetwc(source); // lecture du premier caractere |
| 235 | do { |
| 236 | int scanrt = scanner(); |
| 237 | if (scanrt == EXIT_FAILURE) { |
| 238 | wprintf(L"Scanner error with token value: %ls\n", token[tokenFound].value); |
| 239 | exit(EXIT_FAILURE); |
| 240 | } |
| 241 | if (tokenType == MOT || tokenType == MOTCLE) { |
| 242 | wprintf(L"%20s: %ls\n", tokenTypestr[tokenType], token[tokenFound].value); |
| 243 | } else { |
| 244 | wprintf(L"%20s\n", tokenTypestr[tokenType]); |
| 245 | } |
| 246 | token[tokenFound].type = tokenTypestr[tokenType]; |
| 247 | tokenFound++; |
| 248 | } while (tokenType != FIN); // tant que la fin du fichier n'est pas atteinte |
| 249 | |
| 250 | if (source != NULL) fclose(source); // fermeture du fichier source |
| 251 | if (target != NULL) fclose(target); // fermeture du fichier target |
| 252 | |
| 253 | return EXIT_SUCCESS; |
| 254 | } |