[TP_AL_C.git] / lexer / main.c

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>

FILE *source, *target = NULL;
char c;
unsigned int i = 0;
char tokenValue[50];
enum TokenType {
    MOTCLE,
    SECTION,
    SSECTION,
    NPARA,
    MOT,
    FIN
} tokenType;
const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };

/* This looks silly to check for each characters but for debugging, it's just the way to go */
bool istAlpha() {
    if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \
        c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \
        c == 'o' || c == 'p' || c == 'q' || c == 'r' || c == 's' || c == 't' || c == 'u' || \
        c == 'v' || c == 'w' || c == 'x' || c == 'y' || c == 'z' || \
        c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F' || c == 'G' || \
        c == 'H' || c == 'I' || c == 'J' || c == 'K' || c == 'L' || c == 'M' || c == 'N' || \
        c == 'O' || c == 'P' || c == 'Q' || c == 'R' || c == 'S' || c == 'T' || c == 'U' || \
        c == 'V' || c == 'W' || c == 'X' || c == 'Y' || c == 'Z' || \
        c == '.' || c == '?' || c == '!' || c == ',' || c == ';' || c == ':' || c == '-' || \
        c == '\''|| c == '#' || \
        c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || \
        c == '7' || c == '8' || c == '9') {
            return true;
        }
        return false;
}

bool isSeparator() {
    if (c == '\t' || c == ' ' || c == '\n') {
        return true;
    }
    return false;
}

int scanner() {
const char* Titre = "Titre";
const char* Auteur = "Auteur";
unsigned int j = 0;

// The main loop get the next character
init:
    if (c == ' ' || c == '\t') {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto init;
    }
    if (c == '>') {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto MC1;
    }
    if (c == '=') {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto S1SS1;
    }
    if (istAlpha()) {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto M1;
    }
    if (c == '\n') {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto initLV1;
    }
    if (c == EOF) {
        goto FIN;
    } else {
        goto error;
    }

MC1:
    if (c == Titre[j] && j < strlen(Titre) - 1) {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        j++;
        goto MC1;
    }
    if (c == Auteur[j] && j < strlen(Auteur) - 1) {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        j++;
        goto MC1;
    } else {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto MC2;
    }

S1SS1:
    if (c == '=') {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto SS2;
    }
    if (isSeparator() || c == EOF) {
        goto SECTION;
    }


SS2:
    if (isSeparator() || c == EOF) {
        goto SSECTION;
    }

SECTION:
    tokenType = SECTION;
    return 1;

SSECTION:
    tokenType = SSECTION;
    return 1;

M1:
    if (istAlpha()) {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto M1;
    }
    if (isSeparator() || c == EOF) {
        goto MOT;
    }

initLV1:
    if (c == '\n' || c == '\t') {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto initLV1;
    }
    if (istAlpha()) {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto M1;
    }
    if (c == '=') {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto S1SS1;
    }
    if (c == '>') {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto MC1;
    }
    if (c == '\n') {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto initLV1LV2;
    }
    if (c == EOF) {
        goto FIN;
    }

initLV1LV2:
    if (isSeparator()) {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto initLV1LV2;
    }
    if (c == '>') {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto MC1;
    }
    if (c == '=') {
        c = fgetc(source);
        tokenValue[i] = c;
        i++;
        goto S1SS1;
    }
    if (istAlpha()) {
        goto NPARA;
    }
    if (c == EOF) {
        goto FIN;
    }

NPARA:
    tokenType = NPARA;
    return 1;

MOT:
    tokenType = MOT;
    return 1;

MC2:
    if (isSeparator() || c == EOF) {
        goto MOTCLE;
    }

MOTCLE:
    tokenType = MOTCLE;
    return 1;

FIN:
    tokenType = FIN;
    return 1;

error:
    tokenType = FIN;
    return -1;
}

int main (int argc, char const *argv[]) {

    // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
    source = fopen("test.txt", "r");
    // Cree et ouvre un fichier target.html en lecture/ecriture
    // avec suppression du contenu au prealable :
    target = fopen("target.html", "w+");

    if (source == NULL) {
        printf("Impossible d'ouvrir le fichier source\n");
        return -1;
    }

    if (target == NULL) {
        printf("Impossible d'ouvrir le fichier target\n");
        return -1;
    }

    do {
        c = fgetc(source); // lecture du caractere suivant du fichier source
        //fputc(c, target);  // ecrire c dans le fichier target
        tokenValue[i] = c;
        i++;
        int scanrt = scanner();
        if (scanrt == -1) {
            printf ("Scanner error with token value: %s\n", tokenValue);
            exit(EXIT_FAILURE);
        }
        if (c != EOF) {
            printf ("Token type found: %s with value: %s\n", tokenTypestr[tokenType], tokenValue);
        } else {
            printf ("Token type found: %s\n", tokenTypestr[tokenType]);
        }
        // reinit
        i = 0;
        memset(tokenValue, 0, sizeof(tokenValue));
    } while (c != EOF);    // tant que la fin du fichier n'est pas atteinte

    if (source != NULL) fclose(source); // fermeture du fichier source
    if (target != NULL) fclose(target); // fermeture du fichier target

    return 0;
}
Commit	Line	Data
	1	#include <stdlib.h>
	2	#include <stdio.h>
	3	#include <string.h>
	4	#include <stdbool.h>
	5
	6	FILE source, target = NULL;
	7	char c;
	8	unsigned int i = 0;
	9	char tokenValue[50];
	10	enum TokenType {
	11	MOTCLE,
	12	SECTION,
	13	SSECTION,
	14	NPARA,
	15	MOT,
	16	FIN
	17	} tokenType;
	18	const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
	19
	20	/* This looks silly to check for each characters but for debugging, it's just the way to go */
	21	bool istAlpha() {
	22	if (c == 'a' \|\| c == 'b' \|\| c == 'c' \|\| c == 'd' \|\| c == 'e' \|\| c == 'f' \|\| c == 'g' \|\| \
	23	c == 'h' \|\| c == 'i' \|\| c == 'j' \|\| c == 'k' \|\| c == 'l' \|\| c == 'm' \|\| c == 'n' \|\| \
	24	c == 'o' \|\| c == 'p' \|\| c == 'q' \|\| c == 'r' \|\| c == 's' \|\| c == 't' \|\| c == 'u' \|\| \
	25	c == 'v' \|\| c == 'w' \|\| c == 'x' \|\| c == 'y' \|\| c == 'z' \|\| \
	26	c == 'A' \|\| c == 'B' \|\| c == 'C' \|\| c == 'D' \|\| c == 'E' \|\| c == 'F' \|\| c == 'G' \|\| \
	27	c == 'H' \|\| c == 'I' \|\| c == 'J' \|\| c == 'K' \|\| c == 'L' \|\| c == 'M' \|\| c == 'N' \|\| \
	28	c == 'O' \|\| c == 'P' \|\| c == 'Q' \|\| c == 'R' \|\| c == 'S' \|\| c == 'T' \|\| c == 'U' \|\| \
	29	c == 'V' \|\| c == 'W' \|\| c == 'X' \|\| c == 'Y' \|\| c == 'Z' \|\| \
	30	c == '.' \|\| c == '?' \|\| c == '!' \|\| c == ',' \|\| c == ';' \|\| c == ':' \|\| c == '-' \|\| \
	31	c == '\''\|\| c == '#' \|\| \
	32	c == '0' \|\| c == '1' \|\| c == '2' \|\| c == '3' \|\| c == '4' \|\| c == '5' \|\| c == '6' \|\| \
	33	c == '7' \|\| c == '8' \|\| c == '9') {
	34	return true;
	35	}
	36	return false;
	37	}
	38
	39	bool isSeparator() {
	40	if (c == '\t' \|\| c == ' ' \|\| c == '\n') {
	41	return true;
	42	}
	43	return false;
	44	}
	45
	46	int scanner() {
	47	const char* Titre = "Titre";
	48	const char* Auteur = "Auteur";
	49	unsigned int j = 0;
	50
	51	// The main loop get the next character
	52	init:
	53	if (c == ' ' \|\| c == '\t') {
	54	c = fgetc(source);
	55	tokenValue[i] = c;
	56	i++;
	57	goto init;
	58	}
	59	if (c == '>') {
	60	c = fgetc(source);
	61	tokenValue[i] = c;
	62	i++;
	63	goto MC1;
	64	}
	65	if (c == '=') {
	66	c = fgetc(source);
	67	tokenValue[i] = c;
	68	i++;
	69	goto S1SS1;
	70	}
	71	if (istAlpha()) {
	72	c = fgetc(source);
	73	tokenValue[i] = c;
	74	i++;
	75	goto M1;
	76	}
	77	if (c == '\n') {
	78	c = fgetc(source);
	79	tokenValue[i] = c;
	80	i++;
	81	goto initLV1;
	82	}
	83	if (c == EOF) {
	84	goto FIN;
	85	} else {
	86	goto error;
	87	}
	88
	89	MC1:
	90	if (c == Titre[j] && j < strlen(Titre) - 1) {
	91	c = fgetc(source);
	92	tokenValue[i] = c;
	93	i++;
	94	j++;
	95	goto MC1;
	96	}
	97	if (c == Auteur[j] && j < strlen(Auteur) - 1) {
	98	c = fgetc(source);
	99	tokenValue[i] = c;
	100	i++;
	101	j++;
	102	goto MC1;
	103	} else {
	104	c = fgetc(source);
	105	tokenValue[i] = c;
	106	i++;
	107	goto MC2;
	108	}
	109
	110	S1SS1:
	111	if (c == '=') {
	112	c = fgetc(source);
	113	tokenValue[i] = c;
	114	i++;
	115	goto SS2;
	116	}
	117	if (isSeparator() \|\| c == EOF) {
	118	goto SECTION;
	119	}
	120
	121
	122	SS2:
	123	if (isSeparator() \|\| c == EOF) {
	124	goto SSECTION;
	125	}
	126
	127	SECTION:
	128	tokenType = SECTION;
	129	return 1;
	130
	131	SSECTION:
	132	tokenType = SSECTION;
	133	return 1;
	134
	135	M1:
	136	if (istAlpha()) {
	137	c = fgetc(source);
	138	tokenValue[i] = c;
	139	i++;
	140	goto M1;
	141	}
	142	if (isSeparator() \|\| c == EOF) {
	143	goto MOT;
	144	}
	145
	146	initLV1:
	147	if (c == '\n' \|\| c == '\t') {
	148	c = fgetc(source);
	149	tokenValue[i] = c;
	150	i++;
	151	goto initLV1;
	152	}
	153	if (istAlpha()) {
	154	c = fgetc(source);
	155	tokenValue[i] = c;
	156	i++;
	157	goto M1;
	158	}
	159	if (c == '=') {
	160	c = fgetc(source);
	161	tokenValue[i] = c;
	162	i++;
	163	goto S1SS1;
	164	}
	165	if (c == '>') {
	166	c = fgetc(source);
	167	tokenValue[i] = c;
	168	i++;
	169	goto MC1;
	170	}
	171	if (c == '\n') {
	172	c = fgetc(source);
	173	tokenValue[i] = c;
	174	i++;
	175	goto initLV1LV2;
	176	}
	177	if (c == EOF) {
	178	goto FIN;
	179	}
	180
	181	initLV1LV2:
	182	if (isSeparator()) {
	183	c = fgetc(source);
	184	tokenValue[i] = c;
	185	i++;
	186	goto initLV1LV2;
	187	}
	188	if (c == '>') {
	189	c = fgetc(source);
	190	tokenValue[i] = c;
	191	i++;
	192	goto MC1;
	193	}
	194	if (c == '=') {
	195	c = fgetc(source);
	196	tokenValue[i] = c;
	197	i++;
	198	goto S1SS1;
	199	}
	200	if (istAlpha()) {
	201	goto NPARA;
	202	}
	203	if (c == EOF) {
	204	goto FIN;
	205	}
	206
	207	NPARA:
	208	tokenType = NPARA;
	209	return 1;
	210
	211	MOT:
	212	tokenType = MOT;
	213	return 1;
	214
	215	MC2:
	216	if (isSeparator() \|\| c == EOF) {
	217	goto MOTCLE;
	218	}
	219
	220	MOTCLE:
	221	tokenType = MOTCLE;
	222	return 1;
	223
	224	FIN:
	225	tokenType = FIN;
	226	return 1;
	227
	228	error:
	229	tokenType = FIN;
	230	return -1;
	231	}
	232
	233	int main (int argc, char const *argv[]) {
	234
	235	// Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
	236	source = fopen("test.txt", "r");
	237	// Cree et ouvre un fichier target.html en lecture/ecriture
	238	// avec suppression du contenu au prealable :
	239	target = fopen("target.html", "w+");
	240
	241	if (source == NULL) {
	242	printf("Impossible d'ouvrir le fichier source\n");
	243	return -1;
	244	}
	245
	246	if (target == NULL) {
	247	printf("Impossible d'ouvrir le fichier target\n");
	248	return -1;
	249	}
	250
	251	do {
	252	c = fgetc(source); // lecture du caractere suivant du fichier source
	253	//fputc(c, target); // ecrire c dans le fichier target
	254	tokenValue[i] = c;
	255	i++;
	256	int scanrt = scanner();
	257	if (scanrt == -1) {
	258	printf ("Scanner error with token value: %s\n", tokenValue);
	259	exit(EXIT_FAILURE);
	260	}
	261	if (c != EOF) {
	262	printf ("Token type found: %s with value: %s\n", tokenTypestr[tokenType], tokenValue);
	263	} else {
	264	printf ("Token type found: %s\n", tokenTypestr[tokenType]);
	265	}
	266	// reinit
	267	i = 0;
	268	memset(tokenValue, 0, sizeof(tokenValue));
	269	} while (c != EOF); // tant que la fin du fichier n'est pas atteinte
	270
	271	if (source != NULL) fclose(source); // fermeture du fichier source
	272	if (target != NULL) fclose(target); // fermeture du fichier target
	273
	274	return 0;
	275	}