#include <stdio.h>
#include <string.h>
#include <stdbool.h>
+#include <getopt.h>
-#define TOKEN_MAX_LENGTH 50
-#define TOKEN_LIST_MAX 500
+#include "global_vars.h"
+#include "print_helper.h"
+#include "lexical_analyzer.h"
+#include "syntactic_analyzer.h"
-FILE *source = NULL, *target = NULL;
-char c;
-unsigned int i = 0;
-char tokenValue[TOKEN_MAX_LENGTH];
-enum TokenType {
- MOTCLE,
- SECTION,
- SSECTION,
- NPARA,
- MOT,
- FIN
-} tokenType;
-const char* tokenTypestr[] = { "MOTCLE", "SECTION", "SSECTION", "NPARA", "MOT", "FIN" };
-const char* tokenList[TOKEN_LIST_MAX];
-
-/* It looks silly to check for each characters but for debugging, it's just the way to go */
-bool istAlpha() {
- if (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' || c == 'g' || \
- c == 'h' || c == 'i' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || \
- c == 'o' || c == 'p' || c == 'q' || c == 'r' || c == 's' || c == 't' || c == 'u' || \
- c == 'v' || c == 'w' || c == 'x' || c == 'y' || c == 'z' || \
- c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F' || c == 'G' || \
- c == 'H' || c == 'I' || c == 'J' || c == 'K' || c == 'L' || c == 'M' || c == 'N' || \
- c == 'O' || c == 'P' || c == 'Q' || c == 'R' || c == 'S' || c == 'T' || c == 'U' || \
- c == 'V' || c == 'W' || c == 'X' || c == 'Y' || c == 'Z' || \
- c == '.' || c == '?' || c == '!' || c == ',' || c == ';' || c == ':' || c == '-' || \
- c == '\''|| c == '#' || \
- c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || \
- c == '7' || c == '8' || c == '9') {
- return true;
- }
- return false;
+static void do_lexical_analysis() {
+ c = fgetwc(source); // lecture du premier caractere
+ do {
+ scanner();
+ wprint_token(target);
+ token[tokenFound].type = tokenTypestr[tokenType];
+ tokenFound++;
+ } while (tokenType != FIN); // tant que la fin du fichier n'est pas atteinte
}
-bool isSeparator() {
- if (c == '\t' || c == ' ' || c == '\n') {
- return true;
- }
- return false;
+static void do_syntactic_analysis() {
+ fputws(L"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"fr_FR\" lang=\"fr_FR\">\n",
+ target);
+ c = fgetwc(source); // lecture du premier caractere
+ scanner();
+ analyze_AXIOME();
+ fputws(L"</html>\n", target);
}
-int scanner() {
- const char* Titre = "Titre";
- const char* Auteur = "Auteur";
- unsigned int j = 0;
+static void print_usage(const char* name) {
+ fprintf(stdout,"Usage: %s [options]\n"
+ "Where [options] are:\n"
+ " -h, --help: display this help message\n"
+ " -l, --lexical-only: do only the lexical analysis\n"
+ " -i, --input<filename>: use <filename> as input file instead of standard input\n"
+ " -o, --output<filename>: use <filename> as output file instead of standard output\n",
+ name);
+ fflush(stdout);
+}
-// The main loop get the next character
-init:
- if (c == ' ' || c == '\t') {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto init;
- }
- if (c == '\n') {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto initLV1;
- }
- if (c == '>') {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto MC1;
- }
- if (c == '=') {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto S1SS1;
- }
- if (istAlpha()) {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto M1;
- }
- if (c == EOF) {
- goto FIN;
- } else {
- goto error;
+int main(int argc, char **argv) {
+ /* In and out files name */
+ const char* in_file = NULL;
+ const char* out_file = NULL;
+ static int hflag = 0;
+ static int lflag = 0;
+
+ /* getopt_long stores the option index here. */
+ int option_index = 0;
+
+ static struct option long_options[] =
+ {
+ {"help", no_argument, &hflag, 1},
+ {"input", optional_argument, NULL, 'i'},
+ {"lexical-only", no_argument, &lflag, 1},
+ {"output", optional_argument, NULL, 'o'},
+ {0, 0, 0, 0}
+ };
+
+ int c_in;
+
+ while ((c_in = getopt_long(argc, argv, "hi::lo::", long_options,
+ &option_index)) != -1) {
+ switch (c_in) {
+ case 'h':
+ hflag = 1;
+ break;
+ case 'i':
+ if (optarg != NULL) {
+ in_file = optarg;
+ }
+ break;
+ case 'l':
+ lflag = 1;
+ break;
+ case 'o':
+ if (optarg != NULL) {
+ out_file = optarg;
+ }
+ break;
+ case 0:
+ /* getopt_long() set a variable, just keep going */
+ break;
+ case ':':
+ /* missing option argument */
+ pr_error("%s: option '-%c' requires an argument\n",
+ argv[0], optopt);
+ break;
+ case '?':
+ default:
+ /* invalid option */
+ pr_error("%s: option '-%c' is invalid: ignored\n",
+ argv[0], optopt);
+ /* print the help message for invalid options */
+ hflag = 1;
+ break;
+ }
}
-MC1:
- // FIXME: Partial match need a rewind in the characters extraction from the file
- if (c == Titre[j] && j < strlen(Titre) - 1) {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- j++;
- goto MC1;
- }
- if (c == Auteur[j] && j < strlen(Auteur) - 1) {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- j++;
- goto MC1;
+ if (in_file != NULL) {
+ // Ouvre le fichier source en lecture seulement (le fichier doit exister) :
+ source = fopen(in_file, "r+");
+ if (source == NULL) {
+ pr_error("Fail to open file %s\n", in_file);
+ return EXIT_FAILURE;
+ }
} else {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto MC2;
+ source = stdin;
}
-S1SS1:
- if (c == '=') {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto SS2;
- }
- if (isSeparator() || c == EOF) {
- goto SECTION;
- }
-
-SS2:
- if (isSeparator() || c == EOF) {
- goto SSECTION;
- }
-
-SECTION:
- tokenType = SECTION;
- return 1;
-
-SSECTION:
- tokenType = SSECTION;
- return 1;
-
-M1:
- if (istAlpha()) {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto M1;
- }
- if (isSeparator() || c == EOF) {
- goto MOT;
- }
-
-initLV1:
- if (c == ' ' || c == '\t') {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto initLV1;
- }
- if (c == '\n') {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto initLV1LV2;
- }
- if (istAlpha()) {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto M1;
- }
- if (c == '=') {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto S1SS1;
- }
- if (c == '>') {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto MC1;
- }
- if (c == EOF) {
- goto FIN;
- }
-
-initLV1LV2:
- if (isSeparator()) {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto initLV1LV2;
- }
- if (istAlpha()) {
- goto NPARA;
- }
- if (c == '>') {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto MC1;
- }
- if (c == '=') {
- c = fgetc(source);
- tokenValue[i] = c;
- i++;
- goto S1SS1;
- }
- if (c == EOF) {
- goto FIN;
- }
-
-NPARA:
- tokenType = NPARA;
- return 1;
-
-MOT:
- tokenType = MOT;
- return 1;
-
-MC2:
- if (isSeparator() || c == EOF) {
- goto MOTCLE;
- }
-
-MOTCLE:
- tokenType = MOTCLE;
- return 1;
-
-FIN:
- tokenType = FIN;
- return 1;
-
-error:
- tokenType = FIN;
- return -1;
-}
-
-int main (int argc, char const *argv[]) {
-
- // Ouvre le fichier test.txt en lecture seulement (le fichier doit exister) :
- source = fopen("test.txt", "r");
- // Cree et ouvre un fichier target.html en lecture/ecriture
- // avec suppression du contenu au prealable :
- target = fopen("target.html", "w+");
-
- if (source == NULL) {
- printf("Impossible d'ouvrir le fichier source\n");
- return -1;
+ if (out_file != NULL) {
+ // Cree et ouvre le fichier cible en lecture/ecriture
+ // avec suppression du contenu au prealable :
+ target = fopen(out_file, "w+");
+ if (target == NULL) {
+ pr_error("Fail to open file %s\n", out_file);
+ return EXIT_FAILURE;
+ }
+ } else {
+ target = stdout;
}
- if (target == NULL) {
- printf("Impossible d'ouvrir le fichier target\n");
- return -1;
+ if (hflag) {
+ print_usage(argv[0]);
+ } else if (lflag) {
+ do_lexical_analysis();
+ } else {
+ do_syntactic_analysis();
}
- int tokenFound = 0;
-
- do {
- c = fgetc(source); // lecture du caractere suivant du fichier source
- tokenValue[i] = c;
- i++;
- int scanrt = scanner();
- if (scanrt == -1) {
- printf ("Scanner error with token value: %s\n", tokenValue);
- exit(EXIT_FAILURE);
- }
- if (c != EOF) {
- printf ("Token type found: %s with value: %s\n", tokenTypestr[tokenType], tokenValue);
- } else {
- printf ("Token type found: %s\n", tokenTypestr[tokenType]);
- }
- tokenFound++;
- tokenList[tokenFound] = tokenTypestr[tokenType];
- // reinit tokenValue
- i = 0;
- memset(tokenValue, 0, sizeof(tokenValue));
- } while (c != EOF); // tant que la fin du fichier n'est pas atteinte
-
if (source != NULL) fclose(source); // fermeture du fichier source
if (target != NULL) fclose(target); // fermeture du fichier target
- return 0;
+ return EXIT_SUCCESS;
}