diff --git a/link-grammar/dict-common/dict-common.h b/link-grammar/dict-common/dict-common.h index d4d58d2c7..4f4bba989 100644 --- a/link-grammar/dict-common/dict-common.h +++ b/link-grammar/dict-common/dict-common.h @@ -205,19 +205,9 @@ struct Dictionary_s Category * category; /* Word lists - indexed by category number */ bool generate_walls; /* Generate walls too for wildcard words */ - /* Private data elements that come in play only while file-backed - * dictionaries are being read, and are not otherwise used. - * TODO: These should be moved to a private structure, accessible - * only to the file backend. - */ - const char * input; - const char * pin; - bool recursive_error; - bool is_special; - int already_got_it; /* For char, but needs to hold EOF */ + /* File I/O cruft */ int line_number; char current_idiom[IDIOM_LINK_SZ]; - char token[MAX_TOKEN_LENGTH]; }; bool is_stem(const char *); diff --git a/link-grammar/dict-file/dictionary.c b/link-grammar/dict-file/dictionary.c index 120b7a66b..9feedee2a 100644 --- a/link-grammar/dict-file/dictionary.c +++ b/link-grammar/dict-file/dictionary.c @@ -228,13 +228,8 @@ dictionary_six_str(const char * lang, /*align*/false, /*exact*/false); /* Read dictionary from the input string. */ - - dict->input = input; - dict->pin = dict->input; - if (!read_dictionary(dict)) - { + if (!read_dictionary(dict, input)) goto failure; - } if (NULL == affix_name) { diff --git a/link-grammar/dict-file/read-dict.c b/link-grammar/dict-file/read-dict.c index 2daaa261f..5a315f33b 100644 --- a/link-grammar/dict-file/read-dict.c +++ b/link-grammar/dict-file/read-dict.c @@ -121,58 +121,54 @@ automatically generated (currently only for idioms). */ -static bool link_advance(Dictionary dict); - -void dict_error2(Dictionary dict, const char * s, const char *s2) +struct FileCursor_s +{ + Dictionary dict; + const char * input; + const char * pin; + bool recursive_error; + bool is_special; + int already_got_it; /* For char, but needs to hold EOF */ + char token[MAX_TOKEN_LENGTH]; +}; +typedef struct FileCursor_s * FileCursor; + +static bool link_advance(FileCursor); + +static void dict_error2(FileCursor fcurs, const char * s, const char *s2) { -#define ERRBUFLEN 1024 - char tokens[ERRBUFLEN], t[ERRBUFLEN]; - int pos = 1; - int i; - - if (IS_DYNAMIC_DICT(dict)) - { - if (s2) - { - prt_error("Error: While handling storage-node\n \"%s\":\n" - "%s \"%s\"\n", dict->name, s, s2); - } - else - { - prt_error("Error: While handling storage-node\n \"%s\":\n" - "%s\n", dict->name, s); - } - return; - } - /* The link_advance used to print the error message can * throw more errors while printing... */ - if (dict->recursive_error) return; - dict->recursive_error = true; + if (fcurs->recursive_error) return; + fcurs->recursive_error = true; + Dictionary dict = fcurs->dict; char token[MAX_TOKEN_LENGTH]; - strcpy(token, dict->token); - bool save_is_special = dict->is_special; - const char * save_input = dict->input; - const char * save_pin = dict->pin; - int save_already_got_it = dict->already_got_it; + strcpy(token, fcurs->token); + bool save_is_special = fcurs->is_special; + const char * save_input = fcurs->input; + const char * save_pin = fcurs->pin; + int save_already_got_it = fcurs->already_got_it; int save_line_number = dict->line_number; +#define ERRBUFLEN 1024 + char tokens[ERRBUFLEN], t[ERRBUFLEN]; + int pos = 1; tokens[0] = '\0'; - for (i=0; i<5 && dict->token[0] != '\0'; i++) + for (int i=0; i<5 && fcurs->token[0] != '\0'; i++) { - pos += snprintf(t, ERRBUFLEN, "\"%s\" ", dict->token); + pos += snprintf(t, ERRBUFLEN, "\"%s\" ", fcurs->token); strncat(tokens, t, ERRBUFLEN-1-pos); - if (!link_advance(dict)) break; + if (!link_advance(fcurs)) break; } tokens[pos] = '\0'; - strcpy(dict->token, token); - dict->is_special = save_is_special; - dict->input = save_input; - dict->pin = save_pin; - dict->already_got_it = save_already_got_it; - dict->line_number = save_line_number; + strcpy(fcurs->token, token); + fcurs->is_special = save_is_special; + fcurs->input = save_input; + fcurs->pin = save_pin; + fcurs->already_got_it = save_already_got_it; + dict->line_number = save_line_number; if (s2) { @@ -186,19 +182,19 @@ void dict_error2(Dictionary dict, const char * s, const char *s2) "%s\n\t Line %d, next tokens: %s\n", dict->name, s, dict->line_number, tokens); } - dict->recursive_error = false; + fcurs->recursive_error = false; } -static void dict_error(Dictionary dict, const char * s) +static void dict_error(FileCursor fcurs, const char * s) { - dict_error2(dict, s, NULL); + dict_error2(fcurs, s, NULL); } -static void warning(Dictionary dict, const char * s) +static void warning(FileCursor fcurs, const char * s) { prt_error("Warning: %s\n" "\tline %d, current token = \"%s\"\n", - s, dict->line_number, dict->token); + s, fcurs->dict->line_number, fcurs->token); } /** @@ -208,18 +204,19 @@ static void warning(Dictionary dict, const char * s) */ #define MAXUTFLEN 7 typedef char utf8char[MAXUTFLEN]; -static bool get_character(Dictionary dict, int quote_mode, utf8char uc) +static bool get_character(FileCursor fcurs, int quote_mode, utf8char uc) { - int i = 0; + Dictionary dict = fcurs->dict; + int i = 0; while (1) { - char c = *(dict->pin++); + char c = *(fcurs->pin++); /* Skip over all comments */ if ((c == '%') && (!quote_mode)) { - while ((c != 0x0) && (c != '\n')) c = *(dict->pin++); + while ((c != 0x0) && (c != '\n')) c = *(fcurs->pin++); if (c == 0x0) break; dict->line_number++; continue; @@ -241,18 +238,18 @@ static bool get_character(Dictionary dict, int quote_mode, utf8char uc) i = 1; while (i < MAXUTFLEN-1) { - c = *(dict->pin++); + c = *(fcurs->pin++); /* If we're onto the next char, we're done. */ if (((c & 0x80) == 0x0) || ((c & 0xc0) == 0xc0)) { - dict->pin--; + fcurs->pin--; uc[i] = 0x0; return true; } uc[i] = c; i++; } - dict_error(dict, "UTF8 char is too long."); + dict_error(fcurs, "UTF8 char is too long."); return false; } uc[0] = 0x0; @@ -289,39 +286,37 @@ static bool char_is_special(char c) * Return 1 if a character was read, else return 0 (and print a warning). */ NO_SAN_DICT -static bool link_advance(Dictionary dict) +static bool link_advance(FileCursor fcurs) { - utf8char c; - int nr, i; bool quote_mode = false; + fcurs->is_special = false; - dict->is_special = false; - - if (dict->already_got_it != '\0') + if (fcurs->already_got_it != '\0') { - dict->is_special = char_is_special(dict->already_got_it); - if (dict->already_got_it == EOF) { - dict->token[0] = '\0'; + fcurs->is_special = char_is_special(fcurs->already_got_it); + if (fcurs->already_got_it == EOF) { + fcurs->token[0] = '\0'; } else { - dict->token[0] = (char)dict->already_got_it; /* specials are one byte */ - dict->token[1] = '\0'; + fcurs->token[0] = (char)fcurs->already_got_it; /* specials are one byte */ + fcurs->token[1] = '\0'; } - dict->already_got_it = '\0'; + fcurs->already_got_it = '\0'; return true; } + utf8char c; do { - bool ok = get_character(dict, false, c); + bool ok = get_character(fcurs, false, c); if (!ok) return false; } while (lg_isspace((unsigned char)c[0])); - i = 0; + int i = 0; for (;;) { if (i > MAX_TOKEN_LENGTH-3) { - dict_error(dict, "Token too long."); + dict_error(fcurs, "Token too long."); return false; } @@ -337,56 +332,55 @@ static bool link_advance(Dictionary dict) * when a quoted string is used with a #define statement. */ if (quote_mode) { - if (c[0] == '"' && /* Check the next character too, to allow " in words */ - (*dict->pin == ':' || *dict->pin == ';' || - lg_isspace((unsigned char)*dict->pin))) { + (*fcurs->pin == ':' || *fcurs->pin == ';' || + lg_isspace((unsigned char)*fcurs->pin))) { - dict->token[i] = '\0'; + fcurs->token[i] = '\0'; return true; } if (c[0] == '\0') { - dict_error(dict, "EOF while reading quoted token."); + dict_error(fcurs, "EOF while reading quoted token."); return false; } /* Copy all of the UTF8 bytes. */ - nr = 0; - while (c[nr]) {dict->token[i] = c[nr]; i++; nr++; } + int nr = 0; + while (c[nr]) {fcurs->token[i] = c[nr]; i++; nr++; } } else { if ('\0' == c[1] && char_is_special(c[0])) { if (i == 0) { - dict->token[0] = c[0]; /* special toks are one char always */ - dict->token[1] = '\0'; - dict->is_special = true; + fcurs->token[0] = c[0]; /* special toks are one char always */ + fcurs->token[1] = '\0'; + fcurs->is_special = true; return true; } - dict->token[i] = '\0'; - dict->already_got_it = c[0]; + fcurs->token[i] = '\0'; + fcurs->already_got_it = c[0]; return true; } if (c[0] == 0x0) { - if (i != 0) dict->already_got_it = '\0'; - dict->token[0] = '\0'; + if (i != 0) fcurs->already_got_it = '\0'; + fcurs->token[0] = '\0'; return true; } if (lg_isspace((unsigned char)c[0])) { - dict->token[i] = '\0'; + fcurs->token[i] = '\0'; return true; } if (c[0] == '\"') { quote_mode = true; } else { - nr = 0; - while (c[nr]) {dict->token[i] = c[nr]; i++; nr++; } + int nr = 0; + while (c[nr]) {fcurs->token[i] = c[nr]; i++; nr++; } } } - bool ok = get_character(dict, quote_mode, c); + bool ok = get_character(fcurs, quote_mode, c); if (!ok) return false; } /* unreachable */ @@ -395,11 +389,11 @@ static bool link_advance(Dictionary dict) /** * Returns true if this token is a special token and it is equal to c */ -static int is_equal(Dictionary dict, char c) +static int is_equal(FileCursor fcurs, char c) { - return (dict->is_special && - c == dict->token[0] && - dict->token[1] == '\0'); + return (fcurs->is_special && + c == fcurs->token[0] && + fcurs->token[1] == '\0'); } /** @@ -407,30 +401,30 @@ static int is_equal(Dictionary dict, char c) * Return true if the connector is valid, else return false, * and print an appropriate warning message. */ -static bool check_connector(Dictionary dict, const char * s) +static bool check_connector(FileCursor fcurs, const char * s) { int i; i = strlen(s); if (i < 1) { - dict_error(dict, "Expecting a connector."); + dict_error(fcurs, "Expecting a connector."); return false; } i = s[i-1]; /* the last character of the token */ if ((i != '+') && (i != '-') && (i != ANY_DIR)) { - dict_error(dict, "A connector must end in a \"+\", \"-\" or \"$\"."); + dict_error(fcurs, "A connector must end in a \"+\", \"-\" or \"$\"."); return false; } if (*s == '@') s++; if (('h' == *s) || ('d' == *s)) s++; if (!is_connector_name_char(*s)) { - dict_error2(dict, "Invalid character in connector " + dict_error2(fcurs, "Invalid character in connector " "(connectors must start with an uppercase letter " "after an optional \"h\" or \"d\"):", (char[]){*s, '\0'}); return false; } if (*s == '_') { - dict_error(dict, "Invalid character in connector " + dict_error(fcurs, "Invalid character in connector " "(an initial \"_\" is reserved for internal use)."); return false; } @@ -439,7 +433,7 @@ static bool check_connector(Dictionary dict, const char * s) do { s++; } while (is_connector_name_char(*s)); while (s[1]) { if (!is_connector_subscript_char(*s) && (*s != WILD_TYPE)) { - dict_error2(dict, "Invalid character in connector subscript " + dict_error2(fcurs, "Invalid character in connector subscript " "(only lowercase letters, digits, and \"*\" are allowed):", (char[]){*s, '\0'}); return false; @@ -456,20 +450,20 @@ static bool check_connector(Dictionary dict, const char * s) * * Assumes the current token is the connector. */ -static Exp * make_dir_connector(Dictionary dict, int i) +static Exp * make_dir_connector(Dictionary dict, FileCursor fcurs, int i) { char *constring; bool multi = false; - char dir = dict->token[i]; - dict->token[i] = '\0'; /* get rid of the + or - */ - if (dict->token[0] == '@') + char dir = fcurs->token[i]; + fcurs->token[i] = '\0'; /* get rid of the + or - */ + if (fcurs->token[0] == '@') { - constring = dict->token+1; + constring = fcurs->token+1; multi = true; } else - constring = dict->token; + constring = fcurs->token; return make_connector_node(dict, dict->Exp_pool, constring, dir, multi); @@ -504,32 +498,31 @@ static unsigned int exptag_macro_add(Dictionary dict, const char *tag) * * Assumes the current token is a connector or dictionary word. */ -static Exp * make_connector(Dictionary dict) +static Exp * make_connector(FileCursor fcurs) { + Dictionary dict = fcurs->dict; Exp * n; - Dict_node *dn; - int i; - i = strlen(dict->token) - 1; /* this must be +, - or $ if a connector */ - if ((dict->token[i] != '+') && - (dict->token[i] != '-') && - (dict->token[i] != ANY_DIR)) + int i = strlen(fcurs->token) - 1; /* this must be +, - or $ if a connector */ + if ((fcurs->token[i] != '+') && + (fcurs->token[i] != '-') && + (fcurs->token[i] != ANY_DIR)) { /* If we are here, token is a word */ - patch_subscript(dict->token); - dn = strict_lookup_list(dict, dict->token); + patch_subscript(fcurs->token); + Dict_node * dn = strict_lookup_list(dict, fcurs->token); if (dn == NULL) { - dict_error2(dict, "Perhaps missing + or - in a connector.\n" + dict_error2(fcurs, "Perhaps missing + or - in a connector.\n" "Or perhaps you forgot the subscript on a word.\n" "Or perhaps the word is used before it is defined:", - dict->token); + fcurs->token); return NULL; } if (dn->right != NULL) { dict_node_free_list(dn); - dict_error2(dict, "Referencing a duplicate word:", dict->token); + dict_error2(fcurs, "Referencing a duplicate word:", fcurs->token); /* Note: A word which becomes duplicate latter evades this check. */ return NULL; } @@ -544,38 +537,38 @@ static Exp * make_connector(Dictionary dict) else { /* If we are here, token is a connector */ - if (!check_connector(dict, dict->token)) + if (!check_connector(fcurs, fcurs->token)) { return NULL; } - if ((dict->token[i] == '+') || (dict->token[i] == '-')) + if ((fcurs->token[i] == '+') || (fcurs->token[i] == '-')) { /* A simple, unidirectional connector. Just make that. */ - n = make_dir_connector(dict, i); + n = make_dir_connector(dict, fcurs, i); if (NULL == n) return NULL; } - else if (dict->token[i] == ANY_DIR) + else if (fcurs->token[i] == ANY_DIR) { Exp *plu, *min; /* If we are here, then it's a bi-directional connector. * Make both a + and a - version, and or them together. */ - dict->token[i] = '+'; - plu = make_dir_connector(dict, i); + fcurs->token[i] = '+'; + plu = make_dir_connector(dict, fcurs, i); if (NULL == plu) return NULL; - dict->token[i] = '-'; - min = make_dir_connector(dict, i); + fcurs->token[i] = '-'; + min = make_dir_connector(dict, fcurs, i); if (NULL == min) return NULL; n = make_or_node(dict->Exp_pool, plu, min); } else { - dict_error(dict, "Unknown connector direction type."); + dict_error(fcurs, "Unknown connector direction type."); return NULL; } } - if (!link_advance(dict)) + if (!link_advance(fcurs)) { free(n); return NULL; @@ -605,8 +598,10 @@ static bool is_number(const char * str) * with the current token. At the end, the token is the first one not * part of this expression. */ -static Exp *make_expression(Dictionary dict) +static Exp *make_expression(FileCursor fcurs) { + Dictionary dict = fcurs->dict; + Exp *nl = NULL; Exp *e_head = NULL; Exp *e_tail = NULL; /* last part of the expression */ @@ -614,55 +609,55 @@ static Exp *make_expression(Dictionary dict) while (true) { - if (is_equal(dict, '(')) + if (is_equal(fcurs, '(')) { - if (!link_advance(dict)) { + if (!link_advance(fcurs)) { return NULL; } - nl = make_expression(dict); + nl = make_expression(fcurs); if (nl == NULL) { return NULL; } - if (!is_equal(dict, ')')) { - dict_error(dict, "Expecting a \")\"."); + if (!is_equal(fcurs, ')')) { + dict_error(fcurs, "Expecting a \")\"."); return NULL; } - if (!link_advance(dict)) { + if (!link_advance(fcurs)) { return NULL; } } - else if (is_equal(dict, '{')) + else if (is_equal(fcurs, '{')) { - if (!link_advance(dict)) { + if (!link_advance(fcurs)) { return NULL; } - nl = make_expression(dict); + nl = make_expression(fcurs); if (nl == NULL) { return NULL; } - if (!is_equal(dict, '}')) { - dict_error(dict, "Expecting a \"}\"."); + if (!is_equal(fcurs, '}')) { + dict_error(fcurs, "Expecting a \"}\"."); return NULL; } - if (!link_advance(dict)) { + if (!link_advance(fcurs)) { return NULL; } nl = make_optional_node(dict->Exp_pool, nl); } - else if (is_equal(dict, '[')) + else if (is_equal(fcurs, '[')) { - if (!link_advance(dict)) { + if (!link_advance(fcurs)) { return NULL; } - nl = make_expression(dict); + nl = make_expression(fcurs); if (nl == NULL) { return NULL; } - if (!is_equal(dict, ']')) { - dict_error(dict, "Expecting a \"]\"."); + if (!is_equal(fcurs, ']')) { + dict_error(fcurs, "Expecting a \"]\"."); return NULL; } - if (!link_advance(dict)) { + if (!link_advance(fcurs)) { return NULL; } @@ -672,32 +667,32 @@ static Exp *make_expression(Dictionary dict) * is used as an expression tag. Else, the cost of a * square bracket is 1.0. */ - if (is_number(dict->token)) + if (is_number(fcurs->token)) { float cost; - if (strtofC(dict->token, &cost)) + if (strtofC(fcurs->token, &cost)) { nl->cost += cost; } else { - warning(dict, "Invalid cost (using 1.0)\n"); + warning(fcurs, "Invalid cost (using 1.0)\n"); nl->cost += 1.0F; } - if (!link_advance(dict)) { + if (!link_advance(fcurs)) { return NULL; } } - else if ((strcmp(dict->token, "or") != 0) && - (strcmp(dict->token, "and") != 0) && - isalpha((unsigned char)dict->token[0])) + else if ((strcmp(fcurs->token, "or") != 0) && + (strcmp(fcurs->token, "and") != 0) && + isalpha((unsigned char)fcurs->token[0])) { - const char *bad = valid_dialect_name(dict->token); + const char *bad = valid_dialect_name(fcurs->token); if (bad != NULL) { char badchar[] = { *bad, '\0' }; - dict_error2(dict, "Invalid character in dialect tag name:", + dict_error2(fcurs, "Invalid character in dialect tag name:", badchar); return NULL; } @@ -705,9 +700,9 @@ static Exp *make_expression(Dictionary dict) { nl = make_unary_node(dict->Exp_pool, nl); } - nl->tag_id = exptag_dialect_add(dict, dict->token); + nl->tag_id = exptag_dialect_add(dict, fcurs->token); nl->tag_type = Exptag_dialect; - if (!link_advance(dict)) { + if (!link_advance(fcurs)) { return NULL; } } @@ -716,21 +711,21 @@ static Exp *make_expression(Dictionary dict) nl->cost += 1.0F; } } - else if (!dict->is_special) + else if (!fcurs->is_special) { - nl = make_connector(dict); + nl = make_connector(fcurs); if (nl == NULL) { return NULL; } } - else if (is_equal(dict, ')') || is_equal(dict, ']')) + else if (is_equal(fcurs, ')') || is_equal(fcurs, ']')) { /* allows "()" or "[]" */ nl = make_zeroary_node(dict->Exp_pool); } else { - dict_error(dict, "Connector, \"(\", \"[\", or \"{\" expected."); + dict_error(fcurs, "Connector, \"(\", \"[\", or \"{\" expected."); return NULL; } @@ -763,17 +758,17 @@ static Exp *make_expression(Dictionary dict) Exp_type op; /* Non-commuting AND */ - if (is_equal(dict, '&') || (strcmp(dict->token, "and") == 0)) + if (is_equal(fcurs, '&') || (strcmp(fcurs->token, "and") == 0)) { op = AND_type; } /* Commuting OR */ - else if (is_equal(dict, '|') || (strcmp(dict->token, "or") == 0)) + else if (is_equal(fcurs, '|') || (strcmp(fcurs->token, "or") == 0)) { op = OR_type; } /* Commuting AND */ - else if (is_equal(dict, SYM_AND) || (strcmp(dict->token, "sym") == 0)) + else if (is_equal(fcurs, SYM_AND) || (strcmp(fcurs->token, "sym") == 0)) { /* Part 1/2 of SYM_AND processing */ op = AND_type; /* allow mixing with ordinary ands at the same level */ @@ -796,12 +791,12 @@ static Exp *make_expression(Dictionary dict) { if (e_head->type != op) { - dict_error(dict, "\"and\" and \"or\" at the same level in an expression."); + dict_error(fcurs, "\"and\" and \"or\" at the same level in an expression."); return NULL; } } - if (!link_advance(dict)) { + if (!link_advance(fcurs)) { return NULL; } @@ -940,18 +935,15 @@ void insert_list(Dictionary dict, Dict_node * p, int l) * and is terminated by a semi-colon. * Add these words to the dictionary. */ -static bool read_entry(Dictionary dict) +static bool read_entry(FileCursor fcurs) { - Exp *n; - int i; - Dict_node *dnx, *dn = NULL; - while (!is_equal(dict, ':')) + while (!is_equal(fcurs, ':')) { - if (dict->is_special) + if (fcurs->is_special) { - dict_error(dict, "I expected a word but didn\'t get it."); + dict_error(fcurs, "I expected a word but didn\'t get it."); goto syntax_error; } @@ -959,95 +951,74 @@ static bool read_entry(Dictionary dict) /* However, be careful to reject "/.v" which is the division symbol * used in equations (.v means verb-like). Also reject an affix regex * specification (may appear only in the affix file). */ - if ((dict->token[0] == '/') && - (dict->token[1] != '.') && (get_affix_regex_cg(dict->token) < 0)) + if ((fcurs->token[0] == '/') && + (fcurs->token[1] != '.') && (get_affix_regex_cg(fcurs->token) < 0)) { - Dict_node *new_dn = read_word_file(dict, dn, dict->token); + Dict_node *new_dn = read_word_file(fcurs->dict, dn, fcurs->token); if (new_dn == NULL) { - prt_error("Error: Cannot open word file \"%s\".\n", dict->token); + prt_error("Error: Cannot open word file \"%s\".\n", fcurs->token); goto syntax_error; /* not a syntax error, but need to free dn */ } dn = new_dn; } - else if (0 == strcmp(dict->token, "#include")) + else if (0 == strcmp(fcurs->token, "#include")) { - bool rc; - char* instr; - char* dict_name; - const char * save_name; - bool save_is_special; - const char * save_input; - const char * save_pin; - int save_already_got_it; - int save_line_number; - size_t skip_slash; - - if (!link_advance(dict)) goto syntax_error; - - skip_slash = ('/' == dict->token[0]) ? 1 : 0; - dict_name = strdupa(dict->token); - save_name = dict->name; - save_is_special = dict->is_special; - save_input = dict->input; - save_pin = dict->pin; - save_already_got_it = dict->already_got_it; - save_line_number = dict->line_number; + if (!link_advance(fcurs)) goto syntax_error; /* OK, token contains the filename to read ... */ - instr = get_file_contents(dict_name + skip_slash); + char* dict_name = strdupa(fcurs->token); + size_t skip_slash = ('/' == fcurs->token[0]) ? 1 : 0; + char* instr = get_file_contents(dict_name + skip_slash); if (NULL == instr) { + Dictionary dict = fcurs->dict; prt_error("Error: While parsing dictionary \"%s\":\n" "\t Line %d: Could not open subdictionary \"%s\"\n", dict->name, dict->line_number-1, dict_name); goto syntax_error; } - dict->input = instr; - dict->pin = dict->input; - /* The line number and dict name are used for error reporting */ - dict->line_number = 1; + /* The dict name and line-number are used for error reporting */ + Dictionary dict = fcurs->dict; + const char * save_name = dict->name; + int save_line_number = dict->line_number; dict->name = dict_name; /* Now read the thing in. */ - rc = read_dictionary(dict); + bool rc = read_dictionary(dict, instr); - dict->name = save_name; - dict->is_special = save_is_special; - dict->input = save_input; - dict->pin = save_pin; - dict->already_got_it = save_already_got_it; - dict->line_number = save_line_number; + dict->name = save_name; + dict->line_number = save_line_number; free_file_contents(instr); if (!rc) goto syntax_error; /* when we return, point to the next entry */ - if (!link_advance(dict)) goto syntax_error; + if (!link_advance(fcurs)) goto syntax_error; /* If a semicolon follows the include, that's OK... ignore it. */ - if (';' == dict->token[0]) + if (';' == fcurs->token[0]) { - if (!link_advance(dict)) goto syntax_error; + if (!link_advance(fcurs)) goto syntax_error; } return true; } - else if (0 == strcmp(dict->token, "#define")) + else if (0 == strcmp(fcurs->token, "#define")) { - if (!link_advance(dict)) goto syntax_error; - const char *name = strdupa(dict->token); + if (!link_advance(fcurs)) goto syntax_error; + const char *name = strdupa(fcurs->token); /* Get the value. */ - if (!link_advance(dict)) goto syntax_error; - add_define(dict, name, dict->token); + if (!link_advance(fcurs)) goto syntax_error; + add_define(fcurs->dict, name, fcurs->token); - if (!link_advance(dict)) goto syntax_error; - if (!is_equal(dict, ';')) + if (!link_advance(fcurs)) goto syntax_error; + if (!is_equal(fcurs, ';')) { - dict_error(dict, "Expecting \";\" at the end of #define."); + dict_error(fcurs, "Expecting \";\" at the end of #define."); goto syntax_error; } } @@ -1062,53 +1033,53 @@ static bool read_entry(Dictionary dict) /* Note: The following patches a dot in regexes appearing in * the affix file... It is corrected later. */ - patch_subscript(dict->token); - dn->string = string_set_add(dict->token, dict->string_set); + patch_subscript(fcurs->token); + dn->string = string_set_add(fcurs->token, fcurs->dict->string_set); } /* Advance to next entry, unless error */ - if (!link_advance(dict)) goto syntax_error; + if (!link_advance(fcurs)) goto syntax_error; } /* pass the : */ - if (!link_advance(dict)) + if (!link_advance(fcurs)) { goto syntax_error; } - n = make_expression(dict); + Exp * n = make_expression(fcurs); if (n == NULL) - { goto syntax_error; - } - if (!is_equal(dict, ';')) + if (!is_equal(fcurs, ';')) { - dict_error(dict, "Expecting \";\" at the end of an entry."); + dict_error(fcurs, "Expecting \";\" at the end of an entry."); goto syntax_error; } if (dn == NULL) { - dict_error(dict, "Expecting a token before \":\"."); + dict_error(fcurs, "Expecting a token before \":\"."); goto syntax_error; } /* At this point, dn points to a list of Dict_nodes connected by * their left pointers. These are to be inserted into the dictionary. */ - i = 0; + int i = 0; for (dnx = dn; dnx != NULL; dnx = dnx->left) { dnx->exp = n; i++; } + + Dictionary dict = fcurs->dict; if (IS_GENERATION(dict)) add_category(dict, n, dn, i); dict->insert_entry(dict, dn, i); /* pass the ; */ - if (!link_advance(dict)) + if (!link_advance(fcurs)) { /* Avoid freeing dn, since it is already inserted into the dict. */ return false; @@ -1121,24 +1092,22 @@ static bool read_entry(Dictionary dict) return false; } -bool read_dictionary(Dictionary dict) +static bool fread_dict(FileCursor fcurs) { - if (!link_advance(dict)) - { + if (!link_advance(fcurs)) return false; - } + /* The last character of a dictionary is NUL. * Note: At the end of reading a dictionary, dict->pin points to one * character after the input. Referring its [-1] element is safe even if * the dict file size is 0. */ - while ('\0' != dict->pin[-1]) + while ('\0' != fcurs->pin[-1]) { - if (!read_entry(dict)) - { + if (!read_entry(fcurs)) return false; - } } + Dictionary dict = fcurs->dict; if (dict->category != NULL) { /* Create a category element which contains 0 words, to signify the @@ -1155,4 +1124,20 @@ bool read_dictionary(Dictionary dict) return true; } +bool read_dictionary(Dictionary dict, const char * input) +{ + FileCursor fcurs = alloca(sizeof(struct FileCursor_s)); + + dict->line_number = 1; + fcurs->dict = dict; + fcurs->input = input; + fcurs->pin = fcurs->input; + fcurs->recursive_error = false; + fcurs->is_special = false; + fcurs->already_got_it = false; + fcurs->token[0] = 0; + + return fread_dict(fcurs); +} + /* ======================================================================= */ diff --git a/link-grammar/dict-file/read-dict.h b/link-grammar/dict-file/read-dict.h index 82a79efad..03e98aed1 100644 --- a/link-grammar/dict-file/read-dict.h +++ b/link-grammar/dict-file/read-dict.h @@ -19,8 +19,7 @@ Dictionary dictionary_six(const char *lang, const char *dict_name, const char *pp_name, const char *cons_name, const char *affix_name, const char *regex_name); Dictionary dictionary_create_from_file(const char *lang); -bool read_dictionary(Dictionary dict); -void dict_error2(Dictionary dict, const char *s, const char *s2); +bool read_dictionary(Dictionary dict, const char *input); void insert_list(Dictionary dict, Dict_node * p, int l); void free_insert_list(Dict_node *ilist); diff --git a/link-grammar/dict-ram/dict-ram.c b/link-grammar/dict-ram/dict-ram.c index eed2d0aa9..48e0bd508 100644 --- a/link-grammar/dict-ram/dict-ram.c +++ b/link-grammar/dict-ram/dict-ram.c @@ -17,7 +17,6 @@ #include "dict-common/dict-internals.h" #include "dict-common/dict-utils.h" // patch_subscript #include "dict-common/idiom.h" -#include "dict-file/read-dict.h" // dict_error2 #include "string-id.h" #include "string-set.h" @@ -591,7 +590,6 @@ static int dup_word_status(Dictionary dict, const Dict_node *newnode) static bool dup_word_error(Dictionary dict, Dict_node *newnode) { - if (dup_word_status(dict, newnode) == 1) return false; if (dict->allow_duplicate_words == 0) @@ -606,8 +604,19 @@ static bool dup_word_error(Dictionary dict, Dict_node *newnode) if (dup_word_status(dict, newnode) == 1) return false; } - dict_error2(dict, "Ignoring word which has been multiply defined:", - newnode->string); + + if (IS_DYNAMIC_DICT(dict)) + { + prt_error("Error: While handling storage-node\n \"%s\":\n" + "Ignoring word which has been multiply defined: \"%s\"\n", + dict->name, newnode->string); + } else { + // File-backed dictionary sets a line number, for debugging. + prt_error("Error: While parsing dictionary \"%s\":\n" + "Ignoring word which has been multiply defined: \"%s\"\n" + "\t Line %d\n", + dict->name, newnode->string, dict->line_number); + } /* Too late to skip insertion - insert it with a null expression. */ newnode->exp = make_zeroary_node(dict->Exp_pool);