diff --git a/include/adt/common.h b/include/adt/common.h index 8604edacd..a83604c97 100644 --- a/include/adt/common.h +++ b/include/adt/common.h @@ -36,7 +36,7 @@ /* If non-zero, expand the timer macros defined below, otherwise * they compile away. */ #ifndef TRACK_TIMES -#define TRACK_TIMES 0 +#define TRACK_TIMES (0 && !BUILD_FOR_FUZZER) #endif #if EXPENSIVE_CHECKS && TRACK_TIMES diff --git a/include/fsm/walk.h b/include/fsm/walk.h index e1ab5f29e..cb97e989a 100644 --- a/include/fsm/walk.h +++ b/include/fsm/walk.h @@ -90,6 +90,11 @@ fsm_walk_edges(const struct fsm *fsm, void *opaque, * functionally equivalent cases makes testing dramatically faster, * but exploring every edge could be added later. * + * If randomized is zero then it will generate the first label in the + * label set, otherwise a label from the set will be chosen using rand() + * (favoring printable characters). The caller can use srand() + * beforehand to set a PRNG seed. + * * Note: fsm is non-const because it calls fsm_trim on the FSM * internally. This records the shortest distance from each state to an * end state, which is used to prune branches that would not produce @@ -114,7 +119,7 @@ fsm_generate_matches_cb(const struct fsm *fsm, const char *input, size_t input_length, fsm_state_t end_state, void *opaque); int -fsm_generate_matches(struct fsm *fsm, size_t max_length, +fsm_generate_matches(struct fsm *fsm, size_t max_length, int randomized, fsm_generate_matches_cb *cb, void *opaque); /* Callback provided for the most basic use case for diff --git a/src/fsm/main.c b/src/fsm/main.c index da65791dd..f9d1bb3b0 100644 --- a/src/fsm/main.c +++ b/src/fsm/main.c @@ -770,7 +770,7 @@ main(int argc, char *argv[]) } if (generate_bounds > 0) { - r = fsm_generate_matches(fsm, generate_bounds, fsm_generate_cb_printf_escaped, &opt); + r = fsm_generate_matches(fsm, generate_bounds, 0, fsm_generate_cb_printf_escaped, &opt); } fsm_free(fsm); diff --git a/src/libfsm/determinise.c b/src/libfsm/determinise.c index 559230175..fc7c68ba4 100644 --- a/src/libfsm/determinise.c +++ b/src/libfsm/determinise.c @@ -185,8 +185,6 @@ fsm_determinise_with_config(struct fsm *nfa, } ac_env.output_count = 0; - - /* All elements in sclosures[] are interned, so they will be freed later. */ } while ((curr = stack_pop(stack))); { diff --git a/src/libfsm/gen.c b/src/libfsm/gen.c index 26a77be1c..8b8551489 100644 --- a/src/libfsm/gen.c +++ b/src/libfsm/gen.c @@ -77,6 +77,7 @@ struct gen_ctx { fsm_generate_matches_cb *cb; bool done; + bool randomized; size_t buf_ceil; size_t buf_used; @@ -106,7 +107,7 @@ struct gen_ctx { static bool gen_init_outer(struct fsm *fsm, size_t max_length, fsm_generate_matches_cb *cb, void *opaque, - bool randomized, unsigned seed); + bool randomized); static bool gen_init(struct gen_ctx *ctx, struct fsm *fsm); @@ -139,7 +140,7 @@ static bool grow_stack(struct gen_ctx *ctx); int -fsm_generate_matches(struct fsm *fsm, size_t max_length, +fsm_generate_matches(struct fsm *fsm, size_t max_length, int randomized, fsm_generate_matches_cb *cb, void *opaque) { if (max_length == 0) { @@ -147,9 +148,13 @@ fsm_generate_matches(struct fsm *fsm, size_t max_length, return 0; } + if (!fsm_has(fsm, fsm_isend)) { + return 1; /* no end state -> nothing to do */ + } + INIT_TIMERS(); TIME(&pre); - int res = gen_init_outer(fsm, max_length, cb, opaque, false, 0); + int res = gen_init_outer(fsm, max_length, cb, opaque, randomized != 0); TIME(&post); DIFF_MSEC("fsm_generate_matches", pre, post, NULL); @@ -199,7 +204,7 @@ fsm_generate_cb_printf(const struct fsm *fsm, static bool gen_init_outer(struct fsm *fsm, size_t max_length, fsm_generate_matches_cb *cb, void *opaque, - bool randomized, unsigned seed) + bool randomized) { int res = false; if (fsm == NULL || cb == NULL || max_length == 0) { @@ -208,9 +213,6 @@ gen_init_outer(struct fsm *fsm, size_t max_length, assert(fsm_all(fsm, fsm_isdfa)); /* DFA-only */ - assert(!randomized); /* not yet supported */ - (void)seed; - #if LOG_GEN > 1 fprintf(stderr, "%s: %u states\n", __func__, fsm_countstates(fsm)); #endif @@ -224,6 +226,7 @@ gen_init_outer(struct fsm *fsm, size_t max_length, .max_length = max_length, .cb = cb, .opaque = opaque, + .randomized = randomized, }; if (!gen_init(&ctx, fsm)) { @@ -524,6 +527,55 @@ first_symbol(const uint64_t *symbols) return 0; } +static unsigned char +random_symbol(const uint64_t *symbols) +{ + bool has_zero = false; + unsigned i = 0; + + /* printable and non-printable character choices */ + size_t choice_count = 0; + unsigned char choices[256]; + size_t np_choice_count = 0; + unsigned char np_choices[256]; + + while (i < 256) { + const uint64_t w = symbols[i/64]; + if ((i & 63) == 0 && w == 0) { + i += 64; + continue; + } + if (w & (1ULL << (i & 63))) { + if (i == 0) { + has_zero = true; + } else if (isprint(i)) { + choices[choice_count++] = (unsigned char)i; + } else { + np_choices[np_choice_count++] = (unsigned char)i; + } + } + i++; + } + + if (choice_count > 0) { + const size_t c = rand() % choice_count; + return choices[c]; + } + + if (np_choice_count > 0) { + const size_t c = rand() % np_choice_count; + return np_choices[c]; + } + + /* Prefer anything besides 0x00 if present, since that will truncate the string. */ + if (has_zero) { + return 0; + } + + assert(!"empty set"); + return 0; +} + #if DUMP_EDGES static void dump_edges(fsm_state_t state, struct edge_set *edges) @@ -538,6 +590,7 @@ dump_edges(fsm_state_t state, struct edge_set *edges) size_t i = 0; while (edge_set_group_iter_next(&ei, &eg)) { const unsigned char symbol = first_symbol(eg.symbols); + const unsigned char symbol = random_symbol(eg.symbols); fprintf(stderr, "%s: %d -- %zu/%zu -- 0x%02x (%c) -> %d\n", __func__, state, i, count, symbol, isprint(symbol) ? symbol : '.', eg.to); @@ -585,7 +638,9 @@ sfs_step_edges(struct gen_ctx *ctx, struct gen_stack_frame *sf) struct edge_group_iter_info eg; if (iter_next_transition(ctx, sf, &eg)) { - const unsigned char symbol = first_symbol(eg.symbols); + const unsigned char symbol = ctx->randomized + ? random_symbol(eg.symbols) + : first_symbol(eg.symbols); const fsm_state_t state = eg.to; LOG(2, "sfs_step_edges: got edge 0x%x ('%c')\n", diff --git a/src/libfsm/trim.c b/src/libfsm/trim.c index 4f45607bd..c37965fd8 100644 --- a/src/libfsm/trim.c +++ b/src/libfsm/trim.c @@ -462,6 +462,10 @@ integrity_check(const char *descr, const struct fsm *fsm) return; #endif +#if !EXPENSIVE_CHECKS + return; +#endif + if (LOG_TRIM > 1) { fprintf(stderr, "integrity check: %s...\n", descr); } diff --git a/src/re/main.c b/src/re/main.c index 62e51f78d..9059af96e 100644 --- a/src/re/main.c +++ b/src/re/main.c @@ -1047,7 +1047,7 @@ main(int argc, char *argv[]) } if (generate_bounds > 0) { - if (!fsm_generate_matches(fsm, generate_bounds, fsm_generate_cb_printf_escaped, &opt)) { + if (!fsm_generate_matches(fsm, generate_bounds, 0, fsm_generate_cb_printf_escaped, &opt)) { exit(EXIT_FAILURE); } diff --git a/tests/gen/gen1.c b/tests/gen/gen1.c index 25b30b82b..b25ab2bbc 100644 --- a/tests/gen/gen1.c +++ b/tests/gen/gen1.c @@ -34,6 +34,7 @@ int main(void) { assert(fsm != NULL); if (!fsm_generate_matches(fsm, MAX_EXP_MATCH + 1 /* for \0 */, + 0, gtest_matches_cb, &matches)) { fprintf(stderr, "fsm_generate_matches: error\n"); exit(EXIT_FAILURE); diff --git a/tests/gen/gen2.c b/tests/gen/gen2.c index 02faa5e50..a475e5395 100644 --- a/tests/gen/gen2.c +++ b/tests/gen/gen2.c @@ -28,7 +28,7 @@ int main(void) { struct fsm *fsm = gtest_fsm_of_matches(&matches); assert(fsm != NULL); - if (!fsm_generate_matches(fsm, MAX_EXP_MATCH + 1, gtest_matches_cb, &matches)) { + if (!fsm_generate_matches(fsm, MAX_EXP_MATCH + 1, 0, gtest_matches_cb, &matches)) { fprintf(stderr, "fsm_generate_matches: error\n"); exit(EXIT_FAILURE); } diff --git a/tests/gen/gen3.c b/tests/gen/gen3.c index f24217622..7aa0aebce 100644 --- a/tests/gen/gen3.c +++ b/tests/gen/gen3.c @@ -146,7 +146,7 @@ int main(void) { struct fsm *fsm = build(); assert(fsm != NULL); - if (!fsm_generate_matches(fsm, 11, matches_cb, NULL)) { + if (!fsm_generate_matches(fsm, 11, 0, matches_cb, NULL)) { fprintf(stderr, "fsm_generate_matches: error\n"); exit(EXIT_FAILURE); }