#include <stdio.h>
#include <string.h>
#include <stdbool.h>
+#include <wchar.h>
-#define TOKEN_MAX_LENGTH 50
-#define TOKEN_LIST_MAX 500
+#define TOKEN_MAX 500
+
+struct token_s {
+ const char* type;
+ wint_t value[50];
+};
+
+struct token_s token[TOKEN_MAX] = {NULL, 0};
FILE *source = NULL, *target = NULL;
-char c;
-unsigned int i = 0;
-char tokenValue[TOKEN_MAX_LENGTH];
+wint_t c;
+unsigned int tokenFound = 0;
enum TokenType {
MOTCLE,
SECTION,
FIN
} tokenType;
const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
-const char* tokenList[TOKEN_LIST_MAX];
+unsigned int i = 0;
/* It looks silly to check for each characters but for debugging, it's just the way to go */
bool istAlpha() {
- if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \
- c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \
- c == 'o' || c == 'p' || c == 'q' || c == 'r' || c == 's' || c == 't' || c == 'u' || \
- c == 'v' || c == 'w' || c == 'x' || c == 'y' || c == 'z' || \
- c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F' || c == 'G' || \
- c == 'H' || c == 'I' || c == 'J' || c == 'K' || c == 'L' || c == 'M' || c == 'N' || \
- c == 'O' || c == 'P' || c == 'Q' || c == 'R' || c == 'S' || c == 'T' || c == 'U' || \
- c == 'V' || c == 'W' || c == 'X' || c == 'Y' || c == 'Z' || \
- c == '.' || c == '?' || c == '!' || c == ',' || c == ';' || c == ':' || c == '-' || \
- c == '\''|| c == '#' || \
- c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || \
- c == '7' || c == '8' || c == '9') {
+ if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \
+ c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \
+ c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \
+ c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \
+ c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \
+ c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \
+ c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \
+ c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \
+ c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \
+ c == L'\''|| c == L'#' || \
+ c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \
+ c == L'7' || c == L'8' || c == L'9' || \
+ c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \
+ c == L'ù' || c == L'û' || \
+ c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \
+ c == L'Ù' || c == L'Û') {
return true;
}
return false;
}
bool isSeparator() {
- if (c == '\t' || c == ' ' || c == '\n') {
+ if (c == L'\t' || c == L' ' || c == L'\n') {
return true;
}
return false;
}
int scanner() {
- const char* Titre = "Titre";
- const char* Auteur = "Auteur";
+ const wchar_t* Titre = L"Titre";
+ const wchar_t* Auteur = L"Auteur";
unsigned int j = 0;
// The main loop get the next character
init:
- if (c == ' ' || c == '\t') {
- c = fgetc(source);
- tokenValue[i] = c;
+ if (c == L' ' || c == L'\t') {
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto init;
}
- if (c == '\n') {
- c = fgetc(source);
- tokenValue[i] = c;
+ if (c == L'\n') {
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto initLV1;
}
- if (c == '>') {
- c = fgetc(source);
- tokenValue[i] = c;
+ if (c == L'>') {
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto MC1;
}
- if (c == '=') {
- c = fgetc(source);
- tokenValue[i] = c;
+ if (c == L'=') {
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto S1SS1;
}
if (istAlpha()) {
- c = fgetc(source);
- tokenValue[i] = c;
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto M1;
}
- if (c == EOF) {
+ if (c == WEOF) {
goto FIN;
} else {
goto error;
MC1:
// FIXME: Partial match need a rewind in the characters extraction from the file
- if (c == Titre[j] && j < strlen(Titre) - 1) {
- c = fgetc(source);
- tokenValue[i] = c;
+ if (c == (wint_t)Titre[j] && j < wcslen(Titre) - 1) {
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
j++;
goto MC1;
}
- if (c == Auteur[j] && j < strlen(Auteur) - 1) {
- c = fgetc(source);
- tokenValue[i] = c;
+ if (c == (wint_t)Auteur[j] && j < wcslen(Auteur) - 1) {
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
j++;
goto MC1;
} else {
- c = fgetc(source);
- tokenValue[i] = c;
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto MC2;
}
S1SS1:
- if (c == '=') {
- c = fgetc(source);
- tokenValue[i] = c;
+ if (c == L'=') {
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto SS2;
}
- if (isSeparator() || c == EOF) {
+ if (isSeparator() || c == WEOF) {
goto SECTION;
}
SS2:
- if (isSeparator() || c == EOF) {
+ if (isSeparator() || c == WEOF) {
goto SSECTION;
}
M1:
if (istAlpha()) {
- c = fgetc(source);
- tokenValue[i] = c;
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto M1;
}
- if (isSeparator() || c == EOF) {
+ if (isSeparator() || c == WEOF) {
goto MOT;
}
initLV1:
- if (c == ' ' || c == '\t') {
- c = fgetc(source);
- tokenValue[i] = c;
+ if (c == L' ' || c == L'\t') {
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto initLV1;
}
- if (c == '\n') {
- c = fgetc(source);
- tokenValue[i] = c;
+ if (c == L'\n') {
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto initLV1LV2;
}
if (istAlpha()) {
- c = fgetc(source);
- tokenValue[i] = c;
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto M1;
}
- if (c == '=') {
- c = fgetc(source);
- tokenValue[i] = c;
+ if (c == L'=') {
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto S1SS1;
}
- if (c == '>') {
- c = fgetc(source);
- tokenValue[i] = c;
+ if (c == L'>') {
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto MC1;
}
- if (c == EOF) {
+ if (c == WEOF) {
goto FIN;
}
initLV1LV2:
if (isSeparator()) {
- c = fgetc(source);
- tokenValue[i] = c;
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto initLV1LV2;
}
if (istAlpha()) {
goto NPARA;
}
- if (c == '>') {
- c = fgetc(source);
- tokenValue[i] = c;
+ if (c == L'>') {
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto MC1;
}
- if (c == '=') {
- c = fgetc(source);
- tokenValue[i] = c;
+ if (c == L'=') {
+ c = fgetwc(source);
+ token[tokenFound].value[i] = c;
i++;
goto S1SS1;
}
- if (c == EOF) {
+ if (c == WEOF) {
goto FIN;
}
return 1;
MC2:
- if (isSeparator() || c == EOF) {
+ if (isSeparator() || c == WEOF) {
goto MOTCLE;
}
return -1;
}
-int main (int argc, char const *argv[]) {
+int main() {
// Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
source = fopen("test.txt", "r");
return -1;
}
- int tokenFound = 0;
-
do {
- c = fgetc(source); // lecture du caractere suivant du fichier source
- tokenValue[i] = c;
+ c = fgetwc(source); // lecture du caractere suivant du fichier source
+ token[tokenFound].value[i] = c;
i++;
int scanrt = scanner();
if (scanrt == -1) {
- printf ("Scanner error with token value: %s\n", tokenValue);
+ wprintf(L"Scanner error with token value: %ls\n", token[tokenFound].value);
exit(EXIT_FAILURE);
}
- if (c != EOF) {
- printf ("Token type found: %s with value: %s\n", tokenTypestr[tokenType], tokenValue);
+ if (c != WEOF) {
+ wprintf(L"Token type found: %s with value: %ls\n", tokenTypestr[tokenType], token[tokenFound].value);
} else {
- printf ("Token type found: %s\n", tokenTypestr[tokenType]);
+ wprintf(L"Token type found: %s\n", tokenTypestr[tokenType]);
}
- tokenList[tokenFound] = tokenTypestr[tokenType];
+ token[tokenFound].type = tokenTypestr[tokenType];
tokenFound++;
- // reinit tokenValue
+ // reinit token.value array counter
i = 0;
- memset(tokenValue, 0, sizeof(tokenValue));
- } while (c != EOF); // tant que la fin du fichier n'est pas atteinte
+ //} while (c != WEOF); // tant que la fin du fichier n'est pas atteinte
+ } while (!feof(source)); // tant que la fin du fichier n'est pas atteinte
if (source != NULL) fclose(source); // fermeture du fichier source
if (target != NULL) fclose(target); // fermeture du fichier target