[TP_AL_C.git] / lexer / main.c

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <wchar.h>

#define TOKEN_MAX 500

struct token_s {
    const char* type;
    wint_t value[50];
};

struct token_s token[TOKEN_MAX] = {NULL, 0};

FILE *source = NULL, *target = NULL;
wint_t c;
unsigned int tokenFound = 0;
enum TokenType {
    MOTCLE,
    SECTION,
    SSECTION,
    NPARA,
    MOT,
    FIN
} tokenType;
const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
unsigned int i = 0;

/* It looks silly to check for each characters but for debugging, it's just the way to go */
bool istAlpha() {
    if (c == L'a' || c == L'b' || c == L'c' || c == L'd' || c == L'e' || c == L'f' || c == L'g' || \
        c == L'h' || c == L'i' || c == L'j' || c == L'k' || c == L'l' || c == L'm' || c == L'n' || \
        c == L'o' || c == L'p' || c == L'q' || c == L'r' || c == L's' || c == L't' || c == L'u' || \
        c == L'v' || c == L'w' || c == L'x' || c == L'y' || c == L'z' || \
        c == L'A' || c == L'B' || c == L'C' || c == L'D' || c == L'E' || c == L'F' || c == L'G' || \
        c == L'H' || c == L'I' || c == L'J' || c == L'K' || c == L'L' || c == L'M' || c == L'N' || \
        c == L'O' || c == L'P' || c == L'Q' || c == L'R' || c == L'S' || c == L'T' || c == L'U' || \
        c == L'V' || c == L'W' || c == L'X' || c == L'Y' || c == L'Z' || \
        c == L'.' || c == L'?' || c == L'!' || c == L',' || c == L';' || c == L':' || c == L'-' || \
        c == L'\''|| c == L'#' || \
        c == L'0' || c == L'1' || c == L'2' || c == L'3' || c == L'4' || c == L'5' || c == L'6' || \
        c == L'7' || c == L'8' || c == L'9' || \
        c == L'à' || c == L'â' || c == L'ç' || c == L'è' || c == L'é' || c == L'î' || c == L'ô' || \
        c == L'ù' || c == L'û' || \
        c == L'À' || c == L'Â' || c == L'Ç' || c == L'È' || c == L'É' || c == L'Î' || c == L'Ô' || \
        c == L'Ù' || c == L'Û') {
            return true;
        }
        return false;
}

bool isSeparator() {
    if (c == L'\t' || c == L' ' || c == L'\n') {
        return true;
    }
    return false;
}

int scanner() {
    const wchar_t* Titre = L"Titre";
    const wchar_t* Auteur = L"Auteur";
    unsigned int j = 0;

// The main loop get the next character
init:
    if (c == L' ' || c == L'\t') {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto init;
    }
    if (c == L'\n') {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto initLV1;
    }
    if (c == L'>') {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto MC1;
    }
    if (c == L'=') {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto S1SS1;
    }
    if (istAlpha()) {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto M1;
    }
    if (c == WEOF) {
        goto FIN;
    } else {
        goto error;
    }

MC1:
    // FIXME: Partial match need a rewind in the characters extraction from the file
    if (c == (wint_t)Titre[j] && j < wcslen(Titre) - 1) {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        j++;
        goto MC1;
    }
    if (c == (wint_t)Auteur[j] && j < wcslen(Auteur) - 1) {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        j++;
        goto MC1;
    } else {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto MC2;
    }

S1SS1:
    if (c == L'=') {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto SS2;
    }
    if (isSeparator() || c == WEOF) {
        goto SECTION;
    }

SS2:
    if (isSeparator() || c == WEOF) {
        goto SSECTION;
    }

SECTION:
    tokenType = SECTION;
    return 1;

SSECTION:
    tokenType = SSECTION;
    return 1;

M1:
    if (istAlpha()) {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto M1;
    }
    if (isSeparator() || c == WEOF) {
        goto MOT;
    }

initLV1:
    if (c == L' ' || c == L'\t') {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto initLV1;
    }
    if (c == L'\n') {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto initLV1LV2;
    }
    if (istAlpha()) {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto M1;
    }
    if (c == L'=') {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto S1SS1;
    }
    if (c == L'>') {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto MC1;
    }
    if (c == WEOF) {
        goto FIN;
    }

initLV1LV2:
    if (isSeparator()) {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto initLV1LV2;
    }
    if (istAlpha()) {
        goto NPARA;
    }
    if (c == L'>') {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto MC1;
    }
    if (c == L'=') {
        c = fgetwc(source);
        token[tokenFound].value[i] = c;
        i++;
        goto S1SS1;
    }
    if (c == WEOF) {
        goto FIN;
    }

NPARA:
    tokenType = NPARA;
    return 1;

MOT:
    tokenType = MOT;
    return 1;

MC2:
    if (isSeparator() || c == WEOF) {
        goto MOTCLE;
    }

MOTCLE:
    tokenType = MOTCLE;
    return 1;

FIN:
    tokenType = FIN;
    return 1;

error:
    tokenType = FIN;
    return -1;
}

int main() {

    // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
    source = fopen("test.txt", "r");
    // Cree et ouvre un fichier target.html en lecture/ecriture
    // avec suppression du contenu au prealable :
    target = fopen("target.html", "w+");

    if (source == NULL) {
        printf("Impossible d'ouvrir le fichier source\n");
        return -1;
    }

    if (target == NULL) {
        printf("Impossible d'ouvrir le fichier target\n");
        return -1;
    }

    do {
        c = fgetwc(source); // lecture du caractere suivant du fichier source
        token[tokenFound].value[i] = c;
        i++;
        int scanrt = scanner();
        if (scanrt == -1) {
            wprintf(L"Scanner error with token value: %ls\n", token[tokenFound].value);
            exit(EXIT_FAILURE);
        }
        if (c != WEOF) {
            wprintf(L"Token type found: %s with value: %ls\n", tokenTypestr[tokenType], token[tokenFound].value);
        } else {
            wprintf(L"Token type found: %s\n", tokenTypestr[tokenType]);
        }
        token[tokenFound].type = tokenTypestr[tokenType];
        tokenFound++;
        // reinit token.value array counter
        i = 0;
    //} while (c != WEOF); // tant que la fin du fichier n'est pas atteinte
    } while (!feof(source)); // tant que la fin du fichier n'est pas atteinte

    if (source != NULL) fclose(source); // fermeture du fichier source
    if (target != NULL) fclose(target); // fermeture du fichier target

    return 0;
}
Commit	Line	Data
96964f3e JB	1	#include <stdlib.h>
	2	#include <stdio.h>
	3	#include <string.h>
4b580abd	4	#include <stdbool.h>
6a19b8fe	5	#include <wchar.h>
e9a51b68	6
6a19b8fe JB	7	#define TOKEN_MAX 500
	8
	9	struct token_s {
	10	const char* type;
	11	wint_t value[50];
	12	};
	13
	14	struct token_s token[TOKEN_MAX] = {NULL, 0};
dcfcd9ab JB	15
dcfcd9ab JB	16	FILE source = NULL, target = NULL;
6a19b8fe JB	17	wint_t c;
6a19b8fe JB	18	unsigned int tokenFound = 0;
4b580abd JB	19	enum TokenType {
	20	MOTCLE,
	21	SECTION,
	22	SSECTION,
	23	NPARA,
	24	MOT,
	25	FIN
	26	} tokenType;
	27	const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
6a19b8fe	28	unsigned int i = 0;
4b580abd	29
91e46777	30	/* It looks silly to check for each characters but for debugging, it's just the way to go */
4b580abd	31	bool istAlpha() {
6a19b8fe JB	32	if (c == L'a' \|\| c == L'b' \|\| c == L'c' \|\| c == L'd' \|\| c == L'e' \|\| c == L'f' \|\| c == L'g' \|\| \
	33	c == L'h' \|\| c == L'i' \|\| c == L'j' \|\| c == L'k' \|\| c == L'l' \|\| c == L'm' \|\| c == L'n' \|\| \
	34	c == L'o' \|\| c == L'p' \|\| c == L'q' \|\| c == L'r' \|\| c == L's' \|\| c == L't' \|\| c == L'u' \|\| \
	35	c == L'v' \|\| c == L'w' \|\| c == L'x' \|\| c == L'y' \|\| c == L'z' \|\| \
	36	c == L'A' \|\| c == L'B' \|\| c == L'C' \|\| c == L'D' \|\| c == L'E' \|\| c == L'F' \|\| c == L'G' \|\| \
	37	c == L'H' \|\| c == L'I' \|\| c == L'J' \|\| c == L'K' \|\| c == L'L' \|\| c == L'M' \|\| c == L'N' \|\| \
	38	c == L'O' \|\| c == L'P' \|\| c == L'Q' \|\| c == L'R' \|\| c == L'S' \|\| c == L'T' \|\| c == L'U' \|\| \
	39	c == L'V' \|\| c == L'W' \|\| c == L'X' \|\| c == L'Y' \|\| c == L'Z' \|\| \
	40	c == L'.' \|\| c == L'?' \|\| c == L'!' \|\| c == L',' \|\| c == L';' \|\| c == L':' \|\| c == L'-' \|\| \
	41	c == L'\''\|\| c == L'#' \|\| \
	42	c == L'0' \|\| c == L'1' \|\| c == L'2' \|\| c == L'3' \|\| c == L'4' \|\| c == L'5' \|\| c == L'6' \|\| \
	43	c == L'7' \|\| c == L'8' \|\| c == L'9' \|\| \
	44	c == L'à' \|\| c == L'â' \|\| c == L'ç' \|\| c == L'è' \|\| c == L'é' \|\| c == L'î' \|\| c == L'ô' \|\| \
	45	c == L'ù' \|\| c == L'û' \|\| \
	46	c == L'À' \|\| c == L'Â' \|\| c == L'Ç' \|\| c == L'È' \|\| c == L'É' \|\| c == L'Î' \|\| c == L'Ô' \|\| \
	47	c == L'Ù' \|\| c == L'Û') {
4b580abd JB	48	return true;
	49	}
	50	return false;
	51	}
	52
	53	bool isSeparator() {
6a19b8fe	54	if (c == L'\t' \|\| c == L' ' \|\| c == L'\n') {
4b580abd JB	55	return true;
	56	}
	57	return false;
	58	}
	59
	60	int scanner() {
6a19b8fe JB	61	const wchar_t* Titre = L"Titre";
6a19b8fe JB	62	const wchar_t* Auteur = L"Auteur";
da74c1e0	63	unsigned int j = 0;
4b580abd JB	64
	65	// The main loop get the next character
	66	init:
6a19b8fe JB	67	if (c == L' ' \|\| c == L'\t') {
	68	c = fgetwc(source);
	69	token[tokenFound].value[i] = c;
4b580abd JB	70	i++;
	71	goto init;
	72	}
6a19b8fe JB	73	if (c == L'\n') {
	74	c = fgetwc(source);
	75	token[tokenFound].value[i] = c;
fde9417f JB	76	i++;
	77	goto initLV1;
	78	}
6a19b8fe JB	79	if (c == L'>') {
	80	c = fgetwc(source);
	81	token[tokenFound].value[i] = c;
4b580abd JB	82	i++;
	83	goto MC1;
	84	}
6a19b8fe JB	85	if (c == L'=') {
	86	c = fgetwc(source);
	87	token[tokenFound].value[i] = c;
4b580abd JB	88	i++;
	89	goto S1SS1;
	90	}
	91	if (istAlpha()) {
6a19b8fe JB	92	c = fgetwc(source);
6a19b8fe JB	93	token[tokenFound].value[i] = c;
4b580abd JB	94	i++;
	95	goto M1;
	96	}
6a19b8fe	97	if (c == WEOF) {
4b580abd JB	98	goto FIN;
	99	} else {
	100	goto error;
	101	}
	102
	103	MC1:
dcfcd9ab	104	// FIXME: Partial match need a rewind in the characters extraction from the file
6a19b8fe JB	105	if (c == (wint_t)Titre[j] && j < wcslen(Titre) - 1) {
	106	c = fgetwc(source);
	107	token[tokenFound].value[i] = c;
4b580abd JB	108	i++;
	109	j++;
	110	goto MC1;
	111	}
6a19b8fe JB	112	if (c == (wint_t)Auteur[j] && j < wcslen(Auteur) - 1) {
	113	c = fgetwc(source);
	114	token[tokenFound].value[i] = c;
4b580abd JB	115	i++;
	116	j++;
	117	goto MC1;
	118	} else {
6a19b8fe JB	119	c = fgetwc(source);
6a19b8fe JB	120	token[tokenFound].value[i] = c;
4b580abd JB	121	i++;
	122	goto MC2;
	123	}
	124
	125	S1SS1:
6a19b8fe JB	126	if (c == L'=') {
	127	c = fgetwc(source);
	128	token[tokenFound].value[i] = c;
4b580abd JB	129	i++;
	130	goto SS2;
	131	}
6a19b8fe	132	if (isSeparator() \|\| c == WEOF) {
d3eb30ef JB	133	goto SECTION;
	134	}
	135
4b580abd	136	SS2:
6a19b8fe	137	if (isSeparator() \|\| c == WEOF) {
d3eb30ef	138	goto SSECTION;
4b580abd JB	139	}
	140
	141	SECTION:
	142	tokenType = SECTION;
	143	return 1;
	144
d3eb30ef JB	145	SSECTION:
	146	tokenType = SSECTION;
	147	return 1;
	148
4b580abd JB	149	M1:
4b580abd JB	150	if (istAlpha()) {
6a19b8fe JB	151	c = fgetwc(source);
6a19b8fe JB	152	token[tokenFound].value[i] = c;
4b580abd JB	153	i++;
	154	goto M1;
	155	}
6a19b8fe	156	if (isSeparator() \|\| c == WEOF) {
4b580abd JB	157	goto MOT;
	158	}
	159
	160	initLV1:
6a19b8fe JB	161	if (c == L' ' \|\| c == L'\t') {
	162	c = fgetwc(source);
	163	token[tokenFound].value[i] = c;
4b580abd JB	164	i++;
	165	goto initLV1;
	166	}
6a19b8fe JB	167	if (c == L'\n') {
	168	c = fgetwc(source);
	169	token[tokenFound].value[i] = c;
fde9417f JB	170	i++;
	171	goto initLV1LV2;
	172	}
4b580abd	173	if (istAlpha()) {
6a19b8fe JB	174	c = fgetwc(source);
6a19b8fe JB	175	token[tokenFound].value[i] = c;
4b580abd JB	176	i++;
	177	goto M1;
	178	}
6a19b8fe JB	179	if (c == L'=') {
	180	c = fgetwc(source);
	181	token[tokenFound].value[i] = c;
4b580abd JB	182	i++;
	183	goto S1SS1;
	184	}
6a19b8fe JB	185	if (c == L'>') {
	186	c = fgetwc(source);
	187	token[tokenFound].value[i] = c;
4b580abd JB	188	i++;
	189	goto MC1;
	190	}
6a19b8fe	191	if (c == WEOF) {
4b580abd JB	192	goto FIN;
	193	}
	194
	195	initLV1LV2:
	196	if (isSeparator()) {
6a19b8fe JB	197	c = fgetwc(source);
6a19b8fe JB	198	token[tokenFound].value[i] = c;
4b580abd JB	199	i++;
	200	goto initLV1LV2;
	201	}
fde9417f JB	202	if (istAlpha()) {
	203	goto NPARA;
	204	}
6a19b8fe JB	205	if (c == L'>') {
	206	c = fgetwc(source);
	207	token[tokenFound].value[i] = c;
4b580abd JB	208	i++;
	209	goto MC1;
	210	}
6a19b8fe JB	211	if (c == L'=') {
	212	c = fgetwc(source);
	213	token[tokenFound].value[i] = c;
4b580abd JB	214	i++;
	215	goto S1SS1;
	216	}
6a19b8fe	217	if (c == WEOF) {
d3eb30ef JB	218	goto FIN;
d3eb30ef JB	219	}
4b580abd JB	220
	221	NPARA:
	222	tokenType = NPARA;
	223	return 1;
	224
	225	MOT:
	226	tokenType = MOT;
	227	return 1;
	228
	229	MC2:
6a19b8fe	230	if (isSeparator() \|\| c == WEOF) {
4b580abd JB	231	goto MOTCLE;
	232	}
	233
	234	MOTCLE:
	235	tokenType = MOTCLE;
	236	return 1;
	237
	238	FIN:
	239	tokenType = FIN;
	240	return 1;
	241
	242	error:
	243	tokenType = FIN;
	244	return -1;
	245	}
96964f3e	246
6a19b8fe	247	int main() {
96964f3e JB	248
	249	// Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
	250	source = fopen("test.txt", "r");
	251	// Cree et ouvre un fichier target.html en lecture/ecriture
	252	// avec suppression du contenu au prealable :
	253	target = fopen("target.html", "w+");
	254
	255	if (source == NULL) {
	256	printf("Impossible d'ouvrir le fichier source\n");
	257	return -1;
	258	}
	259
	260	if (target == NULL) {
	261	printf("Impossible d'ouvrir le fichier target\n");
	262	return -1;
	263	}
	264
4b580abd	265	do {
6a19b8fe JB	266	c = fgetwc(source); // lecture du caractere suivant du fichier source
6a19b8fe JB	267	token[tokenFound].value[i] = c;
4b580abd JB	268	i++;
	269	int scanrt = scanner();
	270	if (scanrt == -1) {
6a19b8fe	271	wprintf(L"Scanner error with token value: %ls\n", token[tokenFound].value);
4b580abd JB	272	exit(EXIT_FAILURE);
4b580abd JB	273	}
6a19b8fe JB	274	if (c != WEOF) {
6a19b8fe JB	275	wprintf(L"Token type found: %s with value: %ls\n", tokenTypestr[tokenType], token[tokenFound].value);
4b580abd	276	} else {
6a19b8fe	277	wprintf(L"Token type found: %s\n", tokenTypestr[tokenType]);
4b580abd	278	}
6a19b8fe	279	token[tokenFound].type = tokenTypestr[tokenType];
28280a4c	280	tokenFound++;
6a19b8fe	281	// reinit token.value array counter
4b580abd	282	i = 0;
6a19b8fe JB	283	//} while (c != WEOF); // tant que la fin du fichier n'est pas atteinte
6a19b8fe JB	284	} while (!feof(source)); // tant que la fin du fichier n'est pas atteinte
96964f3e JB	285
	286	if (source != NULL) fclose(source); // fermeture du fichier source
	287	if (target != NULL) fclose(target); // fermeture du fichier target
	288
	289	return 0;
	290	}