Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cdata codegen, with eager output support #30

Merged
merged 15 commits into from
Oct 13, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions fuzz/target.c
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,8 @@ fuzz_eager_output(const uint8_t *data, size_t size)

size_t max_pattern_length = 0;

const unsigned seed = size == 0 ? 0 : data[0];
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so I guess we'll srand() here

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I'll add that before I switch from a draft PR.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now done in e133a74 on #29.


/* chop data into a series of patterns */
{
size_t prev = 0;
Expand Down Expand Up @@ -645,7 +647,7 @@ fuzz_eager_output(const uint8_t *data, size_t size)
* Use the combined DFA to generate matches, check that the
* match behavior agrees with the individual DFA copies. */
env.current_pattern = (size_t)-1;
if (!fsm_generate_matches(env.combined, max_pattern_length, gen_combined_check_individual_cb, &env)) {
if (!fsm_generate_matches(env.combined, max_pattern_length, seed, gen_combined_check_individual_cb, &env)) {
goto cleanup;
}

Expand All @@ -655,7 +657,7 @@ fuzz_eager_output(const uint8_t *data, size_t size)
/* check behavior against the combined DFA. */
for (size_t i = 0; i < env.pattern_count; i++) {
env.current_pattern = i;
if (!fsm_generate_matches(env.combined, max_pattern_length, gen_individual_check_combined_cb, &env)) {
if (!fsm_generate_matches(env.combined, max_pattern_length, seed, gen_individual_check_combined_cb, &env)) {
goto cleanup;
}
}
Expand Down
2 changes: 2 additions & 0 deletions include/fsm/print.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ enum fsm_print_lang {
FSM_PRINT_VMC, /* ISO C90 code, VM style */
FSM_PRINT_VMDOT, /* Graphviz Dot format, showing VM opcodes */

FSM_PRINT_CDATA, /* C data tables and small interpreter */

FSM_PRINT_VMOPS_C, /* VM opcodes as a datastructure */
FSM_PRINT_VMOPS_H,
FSM_PRINT_VMOPS_MAIN
Expand Down
6 changes: 5 additions & 1 deletion include/fsm/walk.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ fsm_walk_edges(const struct fsm *fsm, void *opaque,
* functionally equivalent cases makes testing dramatically faster,
* but exploring every edge could be added later.
*
* If seed is zero then it will generate the first label in the label
* set, otherwise a label from the set will be chosen using rand()
* (favoring printable characters).
*
* Note: fsm is non-const because it calls fsm_trim on the FSM
* internally. This records the shortest distance from each state to an
* end state, which is used to prune branches that would not produce
Expand All @@ -114,7 +118,7 @@ fsm_generate_matches_cb(const struct fsm *fsm,
const char *input, size_t input_length,
fsm_state_t end_state, void *opaque);
int
fsm_generate_matches(struct fsm *fsm, size_t max_length,
fsm_generate_matches(struct fsm *fsm, size_t max_length, unsigned seed,
fsm_generate_matches_cb *cb, void *opaque);

/* Callback provided for the most basic use case for
Expand Down
2 changes: 1 addition & 1 deletion src/fsm/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ main(int argc, char *argv[])
}

if (generate_bounds > 0) {
r = fsm_generate_matches(fsm, generate_bounds, fsm_generate_cb_printf_escaped, &opt);
r = fsm_generate_matches(fsm, generate_bounds, 0, fsm_generate_cb_printf_escaped, &opt);
}

fsm_free(fsm);
Expand Down
65 changes: 60 additions & 5 deletions src/libfsm/gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ struct gen_ctx {
fsm_generate_matches_cb *cb;

bool done;
bool randomized;

size_t buf_ceil;
size_t buf_used;
Expand Down Expand Up @@ -139,7 +140,7 @@ static bool
grow_stack(struct gen_ctx *ctx);

int
fsm_generate_matches(struct fsm *fsm, size_t max_length,
fsm_generate_matches(struct fsm *fsm, size_t max_length, unsigned seed,
fsm_generate_matches_cb *cb, void *opaque)
{
if (max_length == 0) {
Expand All @@ -153,7 +154,7 @@ fsm_generate_matches(struct fsm *fsm, size_t max_length,

INIT_TIMERS();
TIME(&pre);
int res = gen_init_outer(fsm, max_length, cb, opaque, false, 0);
int res = gen_init_outer(fsm, max_length, cb, opaque, seed != 0, seed);
TIME(&post);

DIFF_MSEC("fsm_generate_matches", pre, post, NULL);
Expand Down Expand Up @@ -212,8 +213,9 @@ gen_init_outer(struct fsm *fsm, size_t max_length,

assert(fsm_all(fsm, fsm_isdfa)); /* DFA-only */

assert(!randomized); /* not yet supported */
(void)seed;
if (randomized) {
srand(seed);
}

#if LOG_GEN > 1
fprintf(stderr, "%s: %u states\n", __func__, fsm_countstates(fsm));
Expand All @@ -228,6 +230,7 @@ gen_init_outer(struct fsm *fsm, size_t max_length,
.max_length = max_length,
.cb = cb,
.opaque = opaque,
.randomized = randomized,
};

if (!gen_init(&ctx, fsm)) {
Expand Down Expand Up @@ -528,6 +531,55 @@ first_symbol(const uint64_t *symbols)
return 0;
}

static unsigned char
random_symbol(const uint64_t *symbols)
{
bool has_zero = false;
unsigned i = 0;

/* printable and non-printable character choices */
size_t choice_count = 0;
unsigned char choices[256];
size_t np_choice_count = 0;
unsigned char np_choices[256];

while (i < 256) {
const uint64_t w = symbols[i/64];
if ((i & 63) == 0 && w == 0) {
i += 64;
continue;
}
if (w & (1ULL << (i & 63))) {
if (i == 0) {
has_zero = true;
} else if (isprint(i)) {
choices[choice_count++] = (unsigned char)i;
} else {
np_choices[np_choice_count++] = (unsigned char)i;
}
}
i++;
}

if (choice_count > 0) {
const size_t c = rand() % choice_count;
return choices[c];
}

if (np_choice_count > 0) {
const size_t c = rand() % np_choice_count;
return np_choices[c];
}

/* Prefer anything besides 0x00 if present, since that will truncate the string. */
if (has_zero) {
return 0;
}

assert(!"empty set");
return 0;
}

#if DUMP_EDGES
static void
dump_edges(fsm_state_t state, struct edge_set *edges)
Expand All @@ -542,6 +594,7 @@ dump_edges(fsm_state_t state, struct edge_set *edges)
size_t i = 0;
while (edge_set_group_iter_next(&ei, &eg)) {
const unsigned char symbol = first_symbol(eg.symbols);
const unsigned char symbol = random_symbol(eg.symbols);
fprintf(stderr, "%s: %d -- %zu/%zu -- 0x%02x (%c) -> %d\n",
__func__, state, i, count,
symbol, isprint(symbol) ? symbol : '.', eg.to);
Expand Down Expand Up @@ -589,7 +642,9 @@ sfs_step_edges(struct gen_ctx *ctx, struct gen_stack_frame *sf)
struct edge_group_iter_info eg;

if (iter_next_transition(ctx, sf, &eg)) {
const unsigned char symbol = first_symbol(eg.symbols);
const unsigned char symbol = ctx->randomized
? random_symbol(eg.symbols)
: first_symbol(eg.symbols);
const fsm_state_t state = eg.to;

LOG(2, "sfs_step_edges: got edge 0x%x ('%c')\n",
Expand Down
2 changes: 2 additions & 0 deletions src/libfsm/print.c
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,8 @@ fsm_print(FILE *f, const struct fsm *fsm,
case FSM_PRINT_VMC: print_vm = fsm_print_vmc; break;
case FSM_PRINT_VMDOT: print_vm = fsm_print_vmdot; break;

case FSM_PRINT_CDATA: print_ir = fsm_print_cdata; break;

case FSM_PRINT_VMOPS_C: print_vm = fsm_print_vmops_c; break;
case FSM_PRINT_VMOPS_H: print_vm = fsm_print_vmops_h; break;
case FSM_PRINT_VMOPS_MAIN: print_vm = fsm_print_vmops_main; break;
Expand Down
1 change: 1 addition & 0 deletions src/libfsm/print.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ vm_print_f fsm_print_llvm;
vm_print_f fsm_print_rust;
vm_print_f fsm_print_sh;
vm_print_f fsm_print_vmc;
ir_print_f fsm_print_cdata;

vm_print_f fsm_print_vmdot;
vm_print_f fsm_print_vmops_c;
Expand Down
1 change: 1 addition & 0 deletions src/libfsm/print/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ SRC += src/libfsm/print/irdot.c
SRC += src/libfsm/print/irjson.c
SRC += src/libfsm/print/json.c
SRC += src/libfsm/print/llvm.c
SRC += src/libfsm/print/cdata.c
SRC += src/libfsm/print/rust.c
SRC += src/libfsm/print/sh.c
SRC += src/libfsm/print/vmasm.c
Expand Down
Loading