Skip to content

Commit

Permalink
max-disjuncts parse option gets default from dict.
Browse files Browse the repository at this point in the history
The max-disjuncts to use will typically be deictionary-dependent.
But it behaves like a parse option, controlling parsing. So the
dictionary provides a default. Use can over-ride at run-time, if
they really want to.
  • Loading branch information
linas committed Mar 27, 2024
1 parent 6169be7 commit fdeb108
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 22 deletions.
9 changes: 8 additions & 1 deletion data/demo-atomese/storage.dict
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

#define dictionary-lang en;
#define dictionary-locale en_US.UTF-8;
#define dictionary-version-number 5.12.3;
#define dictionary-version-number 5.12.4;

% Never use LG built-in capitalization and downcasing rules.
#define disable-downcasing true;
Expand Down Expand Up @@ -190,6 +190,13 @@
% dynamically, from pairs.
#define max-disjunct-cost 3.0;

% No more than this many disjuncts will be created for a word. If the
% dictionary encodes for more than these, then disjuncts will be
% selected randomly, to not exceed this limit. The selection is applied
% after the above cost-max limit, and after some pre-pruning that avoids
% the selection of disjuncts that cannot possibly be used.
#define max-disjuncts 75000

% -----------------------
% If disjuncts are not directly available, they can be created on the
% fly, from word-pairs. The settings below are analogous to those above,
Expand Down
18 changes: 13 additions & 5 deletions link-grammar/dict-common/dict-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,14 @@
* is used. */
static const float UNINITIALIZED_MAX_DISJUNCT_COST = -10000.0f;
static const float DEFAULT_MAX_DISJUNCT_COST = 2.7f;
static const float UNINITIALIZED_MAX_DISJUNCTS = -1;

/* We need some of these as literal strings. */
#define LG_DISJUNCT_COST "max-disjunct-cost"
#define LG_DICTIONARY_VERSION_NUMBER "dictionary-version-number"
#define LG_DICTIONARY_LOCALE "dictionary-locale"
#define LG_DISABLE_DOWNCASING "disable-downcasing"
#define LG_DISJUNCT_COST "max-disjunct-cost"
#define LG_MAX_DISJUNCTS "max-disjuncts"

/* Forward decls */
typedef struct Afdict_class_struct Afdict_class;
Expand Down Expand Up @@ -126,11 +128,16 @@ struct Dictionary_s
locale_t lctype; /* Locale argument for the *_l() functions */

int num_entries;
float default_max_disjunct_cost;
dfine_s dfine; /* Name-value definitions */

/* Parse options for which defaults are provided by the dictionary. */
float default_max_disjunct_cost; /* Dictionary-specific scale. */
int default_max_disjuncts; /* Max number of disjuncts. */

const char * zzz_connector;

/* Dictionary-defined parameters. Control behavior of how
* words are looked up in the dictionary. */
bool use_unknown_word;
bool unknown_word_defined;
bool left_wall_defined;
Expand Down Expand Up @@ -183,13 +190,14 @@ struct Dictionary_s
void (*clear_cache)(Dictionary);
void (*close)(Dictionary);

pp_knowledge * base_knowledge; /* Core post-processing rules */
pp_knowledge * hpsg_knowledge; /* Head-Phrase Structure rules */
String_set * string_set; /* Set of link names in the dictionary */
Word_file * word_file_header;
ConTable contable;
Pool_desc * Exp_pool;

Pool_desc * Exp_pool;
/* Post-processing */
pp_knowledge * base_knowledge; /* Core post-processing rules */
pp_knowledge * hpsg_knowledge; /* Head-Phrase Structure rules */

/* Sentence generation */
unsigned int num_categories;
Expand Down
30 changes: 16 additions & 14 deletions link-grammar/dict-common/dict-locale.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,24 +393,21 @@ void dictionary_setup_locale(Dictionary dict)

static bool dictionary_setup_max_disjunct_cost(Dictionary dict)
{
const char *disjunct_cost_str =
linkgrammar_get_dict_define(dict, LG_DISJUNCT_COST);
if (NULL == disjunct_cost_str)
const char *valstr = linkgrammar_get_dict_define(dict, LG_DISJUNCT_COST);
if (NULL == valstr)
{
dict->default_max_disjunct_cost = DEFAULT_MAX_DISJUNCT_COST;
return true;
}
else

float value;
if (!strtofC(valstr, &value))
{
float disjunct_cost_value;
if (!strtofC(disjunct_cost_str, &disjunct_cost_value))
{
prt_error("Error: %s: Invalid cost \"%s\"\n", LG_DISJUNCT_COST,
disjunct_cost_str);
return false;
}
dict->default_max_disjunct_cost = disjunct_cost_value;
prt_error("Error: %s: Invalid cost \"%s\"\n",
LG_DISJUNCT_COST, valstr);
return false;
}

dict->default_max_disjunct_cost = value;
return true;
}

Expand Down Expand Up @@ -454,8 +451,13 @@ bool dictionary_setup_defines(Dictionary dict)
if (NULL != ddn && 0 != strcmp(ddn, "false") && 0 != strcmp(ddn, "0"))
dict->disable_downcasing = true;

if (!dictionary_setup_max_disjunct_cost(dict)) return false;
/* Parse options that have default values in dictionaries */
dict->default_max_disjuncts = 0;
const char *mdstr = linkgrammar_get_dict_define(dict, LG_MAX_DISJUNCTS);
if (mdstr)
dict->default_max_disjuncts = atoi(mdstr);

if (!dictionary_setup_max_disjunct_cost(dict)) return false;
return true;
}

Expand Down
2 changes: 1 addition & 1 deletion link-grammar/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Parse_Options parse_options_create(void)

/* Disable cap on number of disjuncts. Individual dicts might
* over-ride. Atomese dicts often have crazy-large numbers. */
po->max_disjuncts = 0;
po->max_disjuncts = UNINITIALIZED_MAX_DISJUNCTS;

/* Set disjunct_cost to a bogus value of -10000. The dict-common
* code will set this to a more reasonable default. */
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/prepare/build-disjuncts.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
/* */
/*************************************************************************/

/* stuff for transforming a dictionary entry into a disjunct list */
/* Code that transforms a dictionary entry into a disjunct list */

#include "build-disjuncts.h"
#include "connectors.h"
Expand Down
3 changes: 3 additions & 0 deletions link-grammar/sentence.c
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,9 @@ int sentence_parse(Sentence sent, Parse_Options opts)
if (opts->disjunct_cost == UNINITIALIZED_MAX_DISJUNCT_COST)
opts->disjunct_cost = dict->default_max_disjunct_cost;

if (opts->max_disjuncts == UNINITIALIZED_MAX_DISJUNCTS)
opts->max_disjuncts = dict->default_max_disjuncts;

sent->num_valid_linkages = 0;

/* If the sentence has not yet been split, do so now.
Expand Down

0 comments on commit fdeb108

Please sign in to comment.