Skip to content

Commit

Permalink
Merge pull request #1535 from ampli/connector-hash-num
Browse files Browse the repository at this point in the history
Move the condesc_t::more data to a new element condesc_more_t
  • Loading branch information
linas authored May 25, 2024
2 parents d1b9d40 + 63d3b24 commit ad13436
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 45 deletions.
81 changes: 45 additions & 36 deletions link-grammar/connectors.c
Original file line number Diff line number Diff line change
Expand Up @@ -288,27 +288,27 @@ static void connector_encode_lc(const char *lc_string, condesc_t *desc)
*
* Note: check_connector() has already validated the connector string.
*/
void calculate_connector_info(hdesc_t *hdesc)
void calculate_connector_info(condesc_t *condesc)
{
const char *s;
condesc_more_t *m = condesc->more;

s = hdesc->string;
s = m->string;
if (islower((unsigned char)*s))
{
dassert((hdesc->string[0] == 'h') || (hdesc->string[0] == 'd'),
"\'%hdesc\': Bad head/dependent character", hdesc->string[0]);
dassert((s[0] == 'h') || (s[0] == 'd'), "'%s': Bad head/dependent", s);

if ((*s == 'h') || (*s == 'd')) hdesc->flags |= CD_HEAD_DEPENDENT;
if (*s == 'h') hdesc->flags |= CD_HEAD;
if ((s[0] == 'h') || (s[0] == 'd')) m->flags |= CD_HEAD_DEPENDENT;
if (s[0] == 'h') m->flags |= CD_HEAD;
s++; /* Ignore head-dependent indicator. */
}

hdesc->uc_start = (uint8_t)(s - hdesc->string);
m->uc_start = (uint8_t)(s - m->string);
/* Skip the uppercase part. */
do { s++; } while (is_connector_name_char(*s));
hdesc->uc_length = (uint8_t)(s - hdesc->string - hdesc->uc_start);
m->uc_length = (uint8_t)(s - m->string - m->uc_start);

connector_encode_lc(s, hdesc->desc);
connector_encode_lc(s, condesc);
}

/* ================= Connector descriptor table. ====================== */
Expand Down Expand Up @@ -407,35 +407,36 @@ int condesc_by_uc_constring(const void * a, const void * b)
*/
static bool sort_condesc_by_uc_constring(Dictionary dict)
{
if ((0 == dict->contable.num_con) && !IS_DYNAMIC_DICT(dict))
ConTable *ct = &dict->contable;

if ((0 == ct->num_con) && !IS_DYNAMIC_DICT(dict))
{
prt_error("Error: Dictionary %s: No connectors found.\n", dict->name);
return false;
}

/* An SQL dict without <UNKNOWN-WORD> may have 0 connectors here. */
if (0 == dict->contable.num_con)
if (0 == ct->num_con)
return true;

condesc_t **sdesc = malloc(dict->contable.num_con * sizeof(condesc_t *));
condesc_t **sdesc = malloc(ct->num_con * sizeof(condesc_t *));
size_t i = 0;
for (size_t n = 0; n < dict->contable.size; n++)
for (size_t n = 0; n < ct->size; n++)
{
condesc_t *condesc = dict->contable.hdesc[n].desc;
condesc_t *condesc = ct->hdesc[n].desc;

if (NULL == condesc) continue;
calculate_connector_info(&dict->contable.hdesc[n]);
sdesc[i++] = dict->contable.hdesc[n].desc;
calculate_connector_info(condesc);
sdesc[i++] = condesc;
}

qsort(sdesc, dict->contable.num_con, sizeof(*dict->contable.sdesc),
condesc_by_uc_constring);
qsort(sdesc, ct->num_con, sizeof(*ct->sdesc), condesc_by_uc_constring);

/* Enumerate the connectors according to their UC part. */
int uc_num = 0;

sdesc[0]->uc_num = uc_num;
for (size_t n = 1; n < dict->contable.num_con; n++)
for (size_t n = 1; n < ct->num_con; n++)
{
condesc_t **condesc = &sdesc[n];

Expand All @@ -461,10 +462,10 @@ static bool sort_condesc_by_uc_constring(Dictionary dict)

lgdebug(+11, "Dictionary %s: %zu different connectors "
"(%d with a different UC part)\n",
dict->name, dict->contable.num_con, uc_num+1);
dict->name, ct->num_con, uc_num+1);

dict->contable.sdesc = sdesc;
dict->contable.num_uc = uc_num + 1;
ct->sdesc = sdesc;
ct->num_uc = uc_num + 1;

/* hdesc is not freed here because it is needed for finding ZZZ.
* It could be freed here if we have ZZZ cached in the dict structure. */
Expand All @@ -476,7 +477,8 @@ void condesc_delete(Dictionary dict)
ConTable *ct = &dict->contable;

free(ct->hdesc);
pool_delete(ct->mempool);
pool_delete(ct->desc_pool);
pool_delete(ct->more_pool);
condesc_length_limit_def_delete(ct);
}

Expand All @@ -487,15 +489,16 @@ void condesc_reuse(Dictionary dict)
ct->num_con = 0;
ct->num_uc = 0;
memset(ct->hdesc, 0, ct->size * sizeof(hdesc_t));
pool_reuse(ct->mempool);
pool_reuse(ct->desc_pool);
pool_reuse(ct->more_pool);
}

static hdesc_t *condesc_find(ConTable *ct, const char *constring, uint32_t hash)
{
uint32_t i = hash & (ct->size-1);

while ((NULL != ct->hdesc[i].desc) &&
!string_set_cmp(constring, ct->hdesc[i].string))
!string_set_cmp(constring, ct->hdesc[i].desc->more->string))
{
i = (i + 1) & (ct->size-1);
}
Expand All @@ -522,18 +525,18 @@ static bool condesc_grow(ConTable *ct)

for (size_t i = 0; i < old_size; i++)
{
hdesc_t *old_h = &old_hdesc[i];
if (NULL == old_h->desc) continue;
hdesc_t *new_h = condesc_find(ct, old_h->string, old_h->str_hash);
condesc_t *old_desc = old_hdesc[i].desc;
if (NULL == old_desc) continue;
hdesc_t *new_hdesc =
condesc_find(ct, old_desc->more->string, old_desc->more->str_hash);

if (NULL != new_h->desc)
if (NULL != new_hdesc->desc)
{
prt_error("Fatal Error: condesc_grow(): Internal error\n");
free(old_hdesc);
return false;
}
*new_h = *old_h;
new_h->desc->more = new_h;
new_hdesc->desc = old_desc;
}

free(old_hdesc);
Expand All @@ -548,12 +551,15 @@ condesc_t *condesc_add(ConTable *ct, const char *constring)
if (NULL == h->desc)
{
lgdebug(+11, "Creating connector '%s' (%zu)\n", constring, ct->num_con);
h->desc = pool_alloc(ct->mempool);
h->string = constring;

h->desc = pool_alloc(ct->desc_pool);
h->desc->uc_num = UINT32_MAX;
h->str_hash = hash;
h->desc->more = h;
h->desc->con_num = ct->num_con;

condesc_more_t *m = h->desc->more = pool_alloc(ct->desc_pool);
m->string = constring;
m->str_hash = hash;

ct->num_con++;

if ((8 * ct->num_con) > (3 * ct->size))
Expand All @@ -570,9 +576,12 @@ void condesc_init(Dictionary dict, size_t num_con)
{
ConTable *ct = &dict->contable;

ct->mempool = pool_new(__func__, "ConTable",
ct->desc_pool = pool_new(__func__, "condesc_t",
/*num_elements*/num_con, sizeof(condesc_t),
/*zero_out*/true, /*align*/true, /*exact*/false);
ct->more_pool = pool_new(__func__, "condesc_more_t",
/*num_elements*/num_con, sizeof(condesc_more_t),
/*zero_out*/true, /*align*/true, /*exact*/false);

// Connector hash table must be an exact power of two.
int nbits = 0;
Expand Down
18 changes: 12 additions & 6 deletions link-grammar/connectors.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,11 @@ typedef struct condesc_struct condesc_t;
typedef struct hdesc
{
condesc_t *desc;
const char *string; /* The connector name w/o the direction mark, e.g. AB */
// float *cost; // Array of cost by connector length (cost[0]: default)
} hdesc_t;

typedef struct
{
const char *string; /* The connector name w/o the direction mark, e.g. ABc */
connector_uc_hash_t str_hash;
uint8_t length_limit; /* If not 0, it gives the limit of the length of the
* link that can be used on this connector type. The
Expand All @@ -100,7 +103,9 @@ typedef struct hdesc
/* For connector match speedup when sorting the connector table. */
uint8_t uc_length; /* uc part length */
uint8_t uc_start; /* uc start position */
} hdesc_t;

// float *cost; // Array of cost by connector length (cost[0]: default)
} condesc_more_t;

/* Each connector type has a connector descriptor. The size of this
* struct is 32 byes, to facilitate CPU memory caching during parsing.
Expand All @@ -114,7 +119,7 @@ struct condesc_struct
{
lc_enc_t lc_letters;
lc_enc_t lc_mask;
hdesc_t *more; /* More information, for keeping small struct size. */
condesc_more_t *more;/* More information, for keeping small struct size. */
connector_uc_hash_t uc_num; /* uc part enumeration. */
uint32_t con_num; /* Connector ordinal number. */
};
Expand All @@ -135,7 +140,8 @@ typedef struct
size_t num_con; /* Number of connector types */
size_t num_uc; /* Number of connector types with different UC part */
size_t last_num; /* All condescs up to here have been done already. */
Pool_desc *mempool;
Pool_desc *desc_pool; /* For condesc_t elements. */
Pool_desc *more_pool; /* For condesc_t::more. */
length_limit_def_t *length_limit_def;
length_limit_def_t **length_limit_def_next;
} ConTable;
Expand Down Expand Up @@ -223,7 +229,7 @@ static inline unsigned int connector_num(const Connector * c)
Connector * connector_new(Pool_desc *, const condesc_t *);
void set_connector_farthest_word(Exp *, int, int, Parse_Options);
void free_connectors(Connector *);
void calculate_connector_info(hdesc_t *);
void calculate_connector_info(condesc_t *);
int condesc_by_uc_constring(const void *, const void *);

/**
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/dict-atomese/lookup-atomese.cc
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ static void update_condesc(Dictionary dict)
if (NULL == condesc) continue;
if (UINT32_MAX != condesc->uc_num) continue;

calculate_connector_info(&ct->hdesc[n]);
calculate_connector_info(condesc);
condesc->more->length_limit = UNLIMITED_LEN;
sdesc[i++] = condesc;
}
Expand Down
4 changes: 2 additions & 2 deletions link-grammar/parse/prune.c
Original file line number Diff line number Diff line change
Expand Up @@ -448,14 +448,14 @@ static void clean_table(unsigned int size, C_list **t)
{
/* Table entry tombstone. */
#define UC_NUM_TOMBSTONE ((connector_uc_hash_t)-1)
static hdesc_t hdesc_no_match =
static condesc_more_t cm_no_match =
{
.string = "TOMBSTONE",
};
static condesc_t desc_no_match =
{
.uc_num = UC_NUM_TOMBSTONE, /* get_power_table_entry() will skip. */
.more = &hdesc_no_match
.more = &cm_no_match
};
static Connector con_no_match =
{
Expand Down

0 comments on commit ad13436

Please sign in to comment.