Skip to content

Commit

Permalink
Apply suggestions from code review
Browse files Browse the repository at this point in the history
Co-authored-by: Max Leske <[email protected]>
  • Loading branch information
fzipi and theseion authored Sep 19, 2024
1 parent 32bd0e9 commit d2fba4c
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 38 deletions.
14 changes: 7 additions & 7 deletions cmd/quantitative.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,26 @@ import (
func NewQuantitativeCmd() *cobra.Command {
runCmd := &cobra.Command{
Use: "quantitative",
Short: "Run Quantitative Tests",
Short: "Run quantitative tests",
Long: `Run all quantitative tests`,
RunE: runQuantitativeE,
}

runCmd.Flags().BoolP("markdown", "m", false, "Markdown table output mode")
runCmd.Flags().IntP("fast", "x", 0, "Process 1 in every X lines of input ('fast run' mode)")
runCmd.Flags().IntP("sample", "s", 0, "Process every s-th line of input (s % of lines)")
runCmd.Flags().IntP("lines", "l", 0, "Number of lines of input to process before stopping")
runCmd.Flags().IntP("paranoia-level", "P", 1, "Paranoia level used to run the quantitative tests")
runCmd.Flags().IntP("number", "n", 0, "Number is the payload line from the corpus to exclusively send")
runCmd.Flags().IntP("corpus-line", "n", 0, "Number is the payload line from the corpus to exclusively send")
runCmd.Flags().StringP("payload", "p", "", "Payload is a string you want to test using quantitative tests. Will not use the corpus.")
runCmd.Flags().IntP("rule", "r", 0, "Rule ID of interest: only show false positives for specified rule ID")
runCmd.Flags().StringP("corpus", "c", "leipzig", "Corpus to use for the quantitative tests")
runCmd.Flags().StringP("corpus-lang", "L", "eng", "Corpus language to use for the quantitative tests.")
runCmd.Flags().StringP("corpus-size", "s", "100K", "Corpus size to use for the quantitative tests. Most corpus will have a size like \"100K\", \"1M\", etc.")
runCmd.Flags().StringP("corpus-lang", "L", "eng", "Corpus language to use for the quantitative tests")
runCmd.Flags().StringP("corpus-size", "s", "100K", "Corpus size to use for the quantitative tests. Most corpora will have sizes like \"100K\", \"1M\", etc.")
runCmd.Flags().StringP("corpus-year", "y", "2023", "Corpus year to use for the quantitative tests. Most corpus will have a year like \"2023\", \"2022\", etc.")
runCmd.Flags().StringP("corpus-source", "S", "news", "Corpus source to use for the quantitative tests. Most corpus will have a source like \"news\", \"web\", \"wikipedia\", etc.")
runCmd.Flags().StringP("directory", "d", ".", "Directory where the CRS rules are stored")
runCmd.Flags().StringP("file", "f", "", "output file path for quantitative tests. Prints to standard output by default.")
runCmd.Flags().StringP("output", "o", "normal", "output type for quantitative tests. \"normal\" is the default.")
runCmd.Flags().StringP("file", "f", "", "Output file path for quantitative tests. Prints to standard output by default.")
runCmd.Flags().StringP("output", "o", "normal", "Output type for quantitative tests. \"normal\" is the default.")

return runCmd
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/quantitative_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func (s *quantitativeCmdTestSuite) TearDownTest() {
func (s *quantitativeCmdTestSuite) TestQuantitativeCommand() {
s.rootCmd.SetArgs([]string{"quantitative", "-d", s.tempDir})
cmd, err := s.rootCmd.ExecuteContextC(context.Background())
s.Require().NoError(err, "quantitative command should not return an error")
s.Require().NoError(err, "quantitative command should not return error")
s.Equal("quantitative", cmd.Name(), "quantitative command should have the name 'quantitative'")
s.Require().NoError(err)
}
33 changes: 18 additions & 15 deletions experimental/corpus/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
// The corpus package provides an interface for working with corpora, as well as a set of built-in corpora
// that can be used for detecting which text will generate false positives in WAF rules.
//
// This interface includes methods for getting the URL of the corpus, getting the file from the remote URL,
// getting an iterator for the corpus, getting the payload given a line from the corpus iterator. Each corpus
// This interface includes methods for retrieving the URL of a corpus, fetching the file from the remote URL,
// creating an iterator for the corpus, and retrieving the payload of a given a line from the corpus iterator. Each corpus
// will have a size, year, source, and language.
// The iterator interface includes methods for getting the next sentence from the corpus and checking if there
// The iterator interface includes methods for fetching the next sentence from the corpus and checking whether there
// is another sentence in the corpus.
// Each corpus will need its own implementation of the Corpus interface. As this is an experimental package, this
// Each corpus must implement the corpus interface. As this is an experimental package, this
// interface is subject to change.
package corpus

Expand All @@ -23,17 +23,16 @@ type CorpusFile struct {
FilePath string
}

// Corpus is the interface that needs to be implemented for getting the payload from a corpus
// Corpus is the interface that must be implemented to make a corpus available to clients
type Corpus interface {
// URL returns the URL of the corpus
URL() string

// WithURL sets the URL of the corpus
WithURL(url string) Corpus

// GetCorpusFile gets the file from the remote url.
// It returns the local file path were the corpus is stored.
GetCorpusFile() CorpusFile
// FetchCorpusFile fetches the corpus file from the remote URL and returns a CorpusFile for interaction with the file.
FetchCorpusFile() CorpusFile

// GetIterator returns an iterator for the corpus
GetIterator(c CorpusFile) Iterator
Expand All @@ -43,34 +42,38 @@ type Corpus interface {

// Size returns the size of the corpus
Size() string

// WithSize sets the size of the corpus
// Most corpus will have a size like "100K", "1M", etc., related to the amount of sentences in the corpus
// Most corpora will have a sizes like "100K", "1M", etc., related to the amount of sentences in the corpus
WithSize(size string) Corpus

// Year returns the year of the corpus
Year() string

// WithYear sets the year of the corpus
// Most corpus will have a year like "2023", "2022", etc.
// Most corpora will have a year like "2023", "2022", etc.
WithYear(year string) Corpus

// Source returns the source of the corpus
Source() string

// WithSource sets the source of the corpus
// Most corpus will have a source like "news", "web", "wikipedia", etc.
// Most corpora will have a source like "news", "web", "wikipedia", etc.
WithSource(source string) Corpus

// Lang returns the language of the corpus
Lang() string
// Language returns the language of the corpus
Language() string

// WithLanguage sets the language of the corpus
// Most corpus will have a language like "eng", "de", etc.
// Most corpora will have a language like "eng", "de", etc.
WithLanguage(lang string) Corpus
}

// Iterator is an interface for iterating over a corpus
type Iterator interface {
// Next returns the next sentence from the corpus
Next() string
// HasNext returns true if there is another sentence in the corpus
// HasNext returns true unless the end of the corpus has been reached
// false otherwise
HasNext() bool
}
11 changes: 6 additions & 5 deletions internal/quantitative/leipzig/corpus.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ const (
defaultCorpusType = "sentences.txt"
)

// LeipzigCorpus is a corpus of text data
// LeipzigCorpus is a corpus of text data.
// Implements the Corpus interface.
type LeipzigCorpus struct {
// url_ is the URL of the corpus
url_ string
Expand All @@ -33,7 +34,7 @@ type LeipzigCorpus struct {
// corpusFile is the original file name that contains the corpus file
corpusFile string
// File is the file name of the corpus
File string
Filename string
// size is the size of the corpus
size string
// source is the source of the corpus
Expand Down Expand Up @@ -149,7 +150,7 @@ func (c *LeipzigCorpus) GetPayload(line string) string {
}

// GetCorpusFile gets the file from the remote url.
// We assume that the file is compressed somehow, and we want to get a file inside it.
// We assume that the file is compressed somehow, and we want to get a file from the container.
func (c *LeipzigCorpus) GetCorpusFile() corpus.CorpusFile {
home, err := os.UserHomeDir()
if err != nil {
Expand All @@ -160,7 +161,7 @@ func (c *LeipzigCorpus) GetCorpusFile() corpus.CorpusFile {

cacheDir := path.Join(home, ".ftw")

log.Debug().Msgf("Downloading corpus file from %s", url)
log.Debug().Msgf("Preparing download of corpus file from %s", url)
dest := path.Join(cacheDir, "extracted")
if err := os.MkdirAll(dest, os.ModePerm); err != nil {
log.Fatal().Err(err).Msg("Could not create destination directory")
Expand Down Expand Up @@ -205,7 +206,7 @@ func (c *LeipzigCorpus) GetCorpusFile() corpus.CorpusFile {
newPath := filepath.Join(cacheDir, info.Name())
err = os.Rename(path, newPath)
if err != nil {
fmt.Println("Error renaming:", err)
fmt.Println("Error moving:", err)
return err
}
fmt.Println("Moved", path, "to", newPath)
Expand Down
2 changes: 1 addition & 1 deletion internal/quantitative/leipzig/iterator.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package leipzig

import "bufio"

// Implements the Iterator interface.
type LeipzigIterator struct {
scanner *bufio.Scanner
}
Expand Down
8 changes: 4 additions & 4 deletions internal/quantitative/local_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ func NewEngine(prefix string, paranoia int) *LocalEngine {
return eng
}

// CRSCall benchmarks the CRS WAF with a GET request
// CrsCall benchmarks the CRS WAF with a GET request
// payload: the string to be passed in the request body
// returns the status of the transaction and a map of the matched rules with their IDs and the data that matched.
// returns the status of the HTTP response and a map of the matched rules with their IDs and the data that matched.
func (e *LocalEngine) CRSCall(payload string) (int, map[int]string) {
var status = http.StatusOK
var matchedRules = make(map[int]string)
Expand Down Expand Up @@ -105,10 +105,10 @@ func (e *LocalEngine) CRSCall(payload string) (int, map[int]string) {
return status, matchedRules
}

// crsWAF creates a WAF with the CRS rules
// newCrsWaf creates a WAF with the CRS rules
// prefix: the path to the CRS rules
// paranoiaLevel: 1 - 4 should be added as a template to the crs-setup.conf file
// If you want to run your own waf rules instead of crs, create a similar function to crsWAF
// If you want to run your own WAF rules instead of CRS, create a similar function to newCrsWaf
func crsWAF(prefix string, paranoiaLevel int) coraza.WAF {
if prefix == "" {
prefix = defaultPrefix
Expand Down
4 changes: 2 additions & 2 deletions internal/quantitative/local_engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"testing"
)

const crsURL = "https://github.com/coreruleset/coreruleset/releases/download/v4.6.0/coreruleset-4.6.0-minimal.tar.gz"
const crsUrl = "https://github.com/coreruleset/coreruleset/releases/download/v4.6.0/coreruleset-4.6.0-minimal.tar.gz"

type localEngineTestSuite struct {
suite.Suite
Expand Down Expand Up @@ -42,7 +42,7 @@ func (s *localEngineTestSuite) TeardownTest() {
}

// TestCRSCall For this test you will need to have the Core Rule Set repository cloned in the parent directory as the project.
func (s *localEngineTestSuite) TestCRSCall() {
func (s *localEngineTestSuite) TestCrsCall() {
// simple payload, no matches
status, matchedRules := s.engine.CRSCall("this is a test")
s.Require().Equal(http.StatusOK, status)
Expand Down
6 changes: 3 additions & 3 deletions internal/quantitative/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"time"
)

// QuantitativeParams is the parameters for the quantitative tests
// QuantitativeParams holds the parameters for the quantitative tests
type QuantitativeParams struct {
// Lines is the number of lines of input to process before stopping
Lines int
Expand Down Expand Up @@ -83,14 +83,14 @@ func RunQuantitativeTests(params QuantitativeParams, out *output.Output) error {

// Are we using the corpus at all?
if params.Payload != "" {
// CRSCall with payload
// CrsCall with payload
doEngineCall(runner, params.Payload, params.Rule, stats)
} else { // iterate over the corpus
for iter := corpusRunner.GetIterator(lc); iter.HasNext(); {
line := iter.Next()
stats.Run++
log.Trace().Msgf("Line: %s", line)
// check if we look for a specific payload line #
// check if we are looking for a specific payload line #
if needSpecificPayload(params.Number, stats.Run) {
continue
}
Expand Down

0 comments on commit d2fba4c

Please sign in to comment.