diff --git a/cmd/quantitative.go b/cmd/quantitative.go index 65980fb..396551b 100644 --- a/cmd/quantitative.go +++ b/cmd/quantitative.go @@ -4,10 +4,13 @@ package cmd import ( + "fmt" + "os" + + "github.com/spf13/cobra" + "github.com/coreruleset/go-ftw/internal/quantitative" "github.com/coreruleset/go-ftw/output" - "github.com/spf13/cobra" - "os" ) // NewQuantitativeCmd @@ -20,8 +23,6 @@ func NewQuantitativeCmd() *cobra.Command { RunE: runQuantitativeE, } - runCmd.Flags().BoolP("markdown", "m", false, "Markdown table output mode") - runCmd.Flags().IntP("sample", "s", 0, "Process every s-th line of input (s % of lines)") runCmd.Flags().IntP("lines", "l", 0, "Number of lines of input to process before stopping") runCmd.Flags().IntP("paranoia-level", "P", 1, "Paranoia level used to run the quantitative tests") runCmd.Flags().IntP("corpus-line", "n", 0, "Number is the payload line from the corpus to exclusively send") @@ -50,7 +51,6 @@ func runQuantitativeE(cmd *cobra.Command, _ []string) error { directory, _ := cmd.Flags().GetString("directory") fast, _ := cmd.Flags().GetInt("fast") lines, _ := cmd.Flags().GetInt("lines") - markdown, _ := cmd.Flags().GetBool("markdown") outputFilename, _ := cmd.Flags().GetString("file") paranoiaLevel, _ := cmd.Flags().GetInt("paranoia-level") payload, _ := cmd.Flags().GetString("payload") @@ -58,6 +58,10 @@ func runQuantitativeE(cmd *cobra.Command, _ []string) error { rule, _ := cmd.Flags().GetInt("rule") wantedOutput, _ := cmd.Flags().GetString("output") + if paranoiaLevel > 1 && rule > 0 { + return fmt.Errorf("paranoia level and rule ID cannot be used together") + } + // use outputFile to write to file var outputFile *os.File var err error @@ -80,7 +84,6 @@ func runQuantitativeE(cmd *cobra.Command, _ []string) error { Directory: directory, Fast: fast, Lines: lines, - Markdown: markdown, ParanoiaLevel: paranoiaLevel, Number: number, Payload: payload, diff --git a/cmd/quantitative_test.go b/cmd/quantitative_test.go index 0a6dd7d..84a0058 100644 --- a/cmd/quantitative_test.go +++ b/cmd/quantitative_test.go @@ -5,16 +5,17 @@ package cmd import ( "context" - "github.com/spf13/cobra" - "github.com/stretchr/testify/suite" "io/fs" "os" "path" "testing" + + "github.com/spf13/cobra" + "github.com/stretchr/testify/suite" ) -var crsSetupFileContents = `# CRS Setup Configuration File` -var emptyRulesFile = `# Empty Rules File` +var crsSetupFileContents = `# CRS Setup Configuration filename` +var emptyRulesFile = `# Empty Rules filename` type quantitativeCmdTestSuite struct { suite.Suite diff --git a/experimental/corpus/types.go b/experimental/corpus/types.go index bfa4866..e0ba21e 100644 --- a/experimental/corpus/types.go +++ b/experimental/corpus/types.go @@ -1,3 +1,6 @@ +// Copyright 2024 OWASP CRS Project +// SPDX-License-Identifier: Apache-2.0 + // Package corpus provides functionality for creating and managing corpora. // // A corpus is a collection of text documents that are used for training and testing machine learning models. @@ -15,12 +18,27 @@ // interface is subject to change. package corpus -// CorpusFile contains the cache directory and file name -type CorpusFile struct { +// Define an enum for CorpusType +type Type string + +const ( + Leipzig Type = "leipzig" +) + +// File interface is used to interact with Corpus files. +// It provides methods for setting the cache directory and file path. +type File interface { // CacheDir is the directory where files are cached - CacheDir string + CacheDir() string + // FilePath is the path to the cached file - FilePath string + FilePath() string + + // WithCacheDir sets the cache directory + WithCacheDir(cacheDir string) File + + // WithFilePath sets the file path + WithFilePath(filePath string) File } // Corpus is the interface that must be implemented to make a corpus available to clients @@ -32,24 +50,21 @@ type Corpus interface { WithURL(url string) Corpus // FetchCorpusFile fetches the corpus file from the remote URL and returns a CorpusFile for interaction with the file. - FetchCorpusFile() CorpusFile + FetchCorpusFile() File // GetIterator returns an iterator for the corpus - GetIterator(c CorpusFile) Iterator - - // GetPayload returns the payload given a line from the Corpus Iterator - GetPayload(line string) string + GetIterator(c File) Iterator // Size returns the size of the corpus Size() string - + // WithSize sets the size of the corpus // Most corpora will have a sizes like "100K", "1M", etc., related to the amount of sentences in the corpus WithSize(size string) Corpus // Year returns the year of the corpus Year() string - + // WithYear sets the year of the corpus // Most corpora will have a year like "2023", "2022", etc. WithYear(year string) Corpus @@ -72,8 +87,15 @@ type Corpus interface { // Iterator is an interface for iterating over a corpus type Iterator interface { // Next returns the next sentence from the corpus - Next() string + Next() Payload // HasNext returns true unless the end of the corpus has been reached // false otherwise HasNext() bool } + +type Payload interface { + // LineNumber returns the payload given a line from the Corpus Iterator + LineNumber() int + // Content returns the payload given a line from the Corpus Iterator + Content() string +} diff --git a/internal/quantitative/leipzig/corpus.go b/internal/quantitative/leipzig/corpus.go index 62cb496..a0e2c23 100644 --- a/internal/quantitative/leipzig/corpus.go +++ b/internal/quantitative/leipzig/corpus.go @@ -1,15 +1,19 @@ +// Copyright 2024 OWASP CRS Project +// SPDX-License-Identifier: Apache-2.0 + package leipzig import ( "bufio" "fmt" - "github.com/coreruleset/go-ftw/experimental/corpus" - "github.com/hashicorp/go-getter" - "github.com/rs/zerolog/log" "os" "path" "path/filepath" - "strings" + + "github.com/hashicorp/go-getter" + "github.com/rs/zerolog/log" + + "github.com/coreruleset/go-ftw/experimental/corpus" ) // LeipzigCorpus represents a corpus of text data @@ -33,8 +37,8 @@ type LeipzigCorpus struct { lang string // corpusFile is the original file name that contains the corpus file corpusFile string - // File is the file name of the corpus - Filename string + // filename is the file name of the corpus + filename string // size is the size of the corpus size string // source is the source of the corpus @@ -47,7 +51,7 @@ func (c *LeipzigCorpus) regenerateFileNames() { c.corpusFile = fmt.Sprintf("%s_%s_%s_%s.%s", c.lang, c.source, c.year, c.size, defaultCorpusExt) - c.File = fmt.Sprintf("%s_%s_%s_%s-%s", + c.filename = fmt.Sprintf("%s_%s_%s_%s-%s", c.lang, c.source, c.year, c.size, defaultCorpusType) } @@ -57,7 +61,7 @@ func NewLeipzigCorpus() corpus.Corpus { leipzig := &LeipzigCorpus{ url_: defaultCorpusSite, corpusFile: "", - File: "", + filename: "", lang: defaultCorpusLanguage, source: defaultCorpusSource, year: defaultCorpusYear, @@ -115,7 +119,7 @@ func (c *LeipzigCorpus) WithSource(source string) corpus.Corpus { } // Lang returns the language of the corpus -func (c *LeipzigCorpus) Lang() string { +func (c *LeipzigCorpus) Language() string { return c.lang } @@ -126,14 +130,15 @@ func (c *LeipzigCorpus) WithLanguage(lang string) corpus.Corpus { } // GetIterator returns an iterator for the corpus -func (c *LeipzigCorpus) GetIterator(cache corpus.CorpusFile) corpus.Iterator { +func (c *LeipzigCorpus) GetIterator(cache corpus.File) corpus.Iterator { // open cache file - if cache.FilePath == "" { + cached := cache.FilePath() + if cached == "" { log.Fatal().Msg("Cache file path is empty") } - file, err := os.Open(cache.FilePath) + file, err := os.Open(cached) if err != nil { - log.Fatal().Err(err).Msgf("Could not open the file %s", cache.FilePath) + log.Fatal().Err(err).Msgf("Could not open the file %s", cached) } scanner := bufio.NewScanner(file) it := &LeipzigIterator{ @@ -142,16 +147,9 @@ func (c *LeipzigCorpus) GetIterator(cache corpus.CorpusFile) corpus.Iterator { return it } -// GetPayload returns the payload from the line -// We assume that the first word is the line number, -// and we want the rest -func (c *LeipzigCorpus) GetPayload(line string) string { - return strings.Join(strings.Split(line, "\t")[1:], " ") -} - -// GetCorpusFile gets the file from the remote url. +// FetchCorpusFile gets the file from the remote url. // We assume that the file is compressed somehow, and we want to get a file from the container. -func (c *LeipzigCorpus) GetCorpusFile() corpus.CorpusFile { +func (c *LeipzigCorpus) FetchCorpusFile() corpus.File { home, err := os.UserHomeDir() if err != nil { log.Fatal().Err(err).Msg("Could not get home directory") @@ -167,14 +165,11 @@ func (c *LeipzigCorpus) GetCorpusFile() corpus.CorpusFile { log.Fatal().Err(err).Msg("Could not create destination directory") } - cache := corpus.CorpusFile{ - CacheDir: cacheDir, - FilePath: "", - } + cache := NewFile().WithCacheDir(cacheDir) - if info, err := os.Stat(path.Join(home, ".ftw", c.File)); err == nil { - log.Debug().Msgf("File %s already exists", info.Name()) - cache.FilePath = path.Join(home, ".ftw", c.File) + if info, err := os.Stat(path.Join(home, ".ftw", cache.FilePath())); err == nil { + log.Debug().Msgf("filename %s already exists", info.Name()) + cache = cache.WithFilePath(path.Join(home, ".ftw", c.filename)) return cache } @@ -202,7 +197,7 @@ func (c *LeipzigCorpus) GetCorpusFile() corpus.CorpusFile { log.Trace().Msgf("Checking file %s", info.Name()) - if info.Name() == c.File { + if info.Name() == c.filename { newPath := filepath.Join(cacheDir, info.Name()) err = os.Rename(path, newPath) if err != nil { @@ -210,7 +205,7 @@ func (c *LeipzigCorpus) GetCorpusFile() corpus.CorpusFile { return err } fmt.Println("Moved", path, "to", newPath) - cache.FilePath = newPath + cache = cache.WithFilePath(newPath) } return nil diff --git a/internal/quantitative/leipzig/corpus_test.go b/internal/quantitative/leipzig/corpus_test.go index cd55f98..a80f1f6 100644 --- a/internal/quantitative/leipzig/corpus_test.go +++ b/internal/quantitative/leipzig/corpus_test.go @@ -1,15 +1,20 @@ +// Copyright 2024 OWASP CRS Project +// SPDX-License-Identifier: Apache-2.0 + package leipzig import ( - "github.com/coreruleset/go-ftw/experimental/corpus" - "github.com/stretchr/testify/suite" "testing" + + "github.com/stretchr/testify/suite" + + "github.com/coreruleset/go-ftw/experimental/corpus" ) type leipzigCorpusTestSuite struct { suite.Suite corpus corpus.Corpus - cache corpus.CorpusFile + cache corpus.File iter corpus.Iterator } @@ -20,7 +25,7 @@ func TestLeipzigCorpusTestSuite(t *testing.T) { func (s *leipzigCorpusTestSuite) SetupTest() { s.corpus = NewLeipzigCorpus() s.Require().Equal("https://downloads.wortschatz-leipzig.de/corpora", s.corpus.URL()) - s.Require().Equal("eng", s.corpus.Lang()) + s.Require().Equal("eng", s.corpus.Language()) s.Require().Equal("100K", s.corpus.Size()) s.Require().Equal("news", s.corpus.Source()) s.Require().Equal("2023", s.corpus.Year()) @@ -33,20 +38,15 @@ func (s *leipzigCorpusTestSuite) TestWithSize() { func (s *leipzigCorpusTestSuite) TestGetIterator() { s.corpus.WithSize("10K") - s.cache = s.corpus.GetCorpusFile() + s.cache = s.corpus.FetchCorpusFile() s.iter = s.corpus.GetIterator(s.cache) } func (s *leipzigCorpusTestSuite) TestNextSentenceFromCorpus() { - s.cache = s.corpus.GetCorpusFile() - s.iter = s.corpus.GetIterator(s.cache) - s.Require().True(s.iter.HasNext()) - s.Require().Equal("1\t$156,834 for The Pathway to Excellence in Practice program through Neighborhood Place of Puna.", s.iter.Next()) -} - -func (s *leipzigCorpusTestSuite) TestGetPayloadFromString() { - s.cache = s.corpus.GetCorpusFile() + s.cache = s.corpus.FetchCorpusFile() s.iter = s.corpus.GetIterator(s.cache) s.Require().True(s.iter.HasNext()) - s.Require().Equal("1\t$156,834 for The Pathway to Excellence in Practice program through Neighborhood Place of Puna.", s.iter.Next()) + payload := s.iter.Next() + s.Require().Equal(1, payload.LineNumber()) + s.Require().Equal("$156,834 for The Pathway to Excellence in Practice program through Neighborhood Place of Puna.", payload.Content()) } diff --git a/internal/quantitative/leipzig/file.go b/internal/quantitative/leipzig/file.go new file mode 100644 index 0000000..3d67737 --- /dev/null +++ b/internal/quantitative/leipzig/file.go @@ -0,0 +1,39 @@ +// Copyright 2024 OWASP CRS Project +// SPDX-License-Identifier: Apache-2.0 + +package leipzig + +import "github.com/coreruleset/go-ftw/experimental/corpus" + +// File implements the corpus.File interface. +type File struct { + cacheDir string + filePath string +} + +// NewFile returns a new File +func NewFile() corpus.File { + return File{} +} + +// CacheDir is the directory where files are cached +func (f File) CacheDir() string { + return f.cacheDir +} + +// FilePath is the path to the cached file +func (f File) FilePath() string { + return f.filePath +} + +// WithCacheDir sets the cache directory +func (f File) WithCacheDir(cacheDir string) corpus.File { + f.cacheDir = cacheDir + return f +} + +// WithFilePath sets the file path +func (f File) WithFilePath(filePath string) corpus.File { + f.filePath = filePath + return f +} diff --git a/internal/quantitative/leipzig/iterator.go b/internal/quantitative/leipzig/iterator.go index 4ac1d77..4ec71fc 100644 --- a/internal/quantitative/leipzig/iterator.go +++ b/internal/quantitative/leipzig/iterator.go @@ -1,9 +1,18 @@ +// Copyright 2024 OWASP CRS Project +// SPDX-License-Identifier: Apache-2.0 + package leipzig -import "bufio" +import ( + "bufio" + + "github.com/coreruleset/go-ftw/experimental/corpus" +) + // Implements the Iterator interface. type LeipzigIterator struct { scanner *bufio.Scanner + line int } // HasNext returns true if there is another sentence in the corpus @@ -12,6 +21,8 @@ func (c *LeipzigIterator) HasNext() bool { } // Next returns the next sentence from the corpus -func (c *LeipzigIterator) Next() string { - return c.scanner.Text() +func (c *LeipzigIterator) Next() corpus.Payload { + p := c.scanner.Text() + c.line++ + return NewPayload(p) } diff --git a/internal/quantitative/leipzig/payload.go b/internal/quantitative/leipzig/payload.go new file mode 100644 index 0000000..9d60e2f --- /dev/null +++ b/internal/quantitative/leipzig/payload.go @@ -0,0 +1,41 @@ +// Copyright 2024 OWASP CRS Project +// SPDX-License-Identifier: Apache-2.0 + +package leipzig + +import ( + "strconv" + "strings" +) + +// Payload implements the corpus.Payload interface. +type Payload struct { + line int + payload string +} + +// NewPayload returns a new Payload +func NewPayload(line string) *Payload { + split := strings.Split(line, "\t") + // convert to int + num, err := strconv.Atoi(split[0]) + if err != nil { + num = -1 + } + p := strings.Join(split[1:], " ") + return &Payload{ + line: num, + payload: p, + } +} + +// LineNumber returns the payload given a line from the Corpus Iterator +// If the line number is not a number, it will return -1 +func (p *Payload) LineNumber() int { + return p.line +} + +// Content returns the payload given a line from the Corpus Iterator +func (p *Payload) Content() string { + return p.payload +} diff --git a/internal/quantitative/leipzig/payload_test.go b/internal/quantitative/leipzig/payload_test.go new file mode 100644 index 0000000..52df43f --- /dev/null +++ b/internal/quantitative/leipzig/payload_test.go @@ -0,0 +1,138 @@ +// Copyright 2024 OWASP CRS Project +// SPDX-License-Identifier: Apache-2.0 + +package leipzig + +import ( + "reflect" + "testing" + + "github.com/stretchr/testify/suite" +) + +type payloadTestSuite struct { + suite.Suite +} + +func TestPayloadTestSuite(t *testing.T) { + suite.Run(t, new(payloadTestSuite)) +} + +func (s *payloadTestSuite) TestNewPayload() { + type args struct { + line string + } + tests := []struct { + name string + args args + want *Payload + }{ + { + name: "TestNewPayload", + args: args{ + line: "1\t$156,834 for The Pathway to Excellence in Practice program through Neighborhood Place of Puna.", + }, + want: &Payload{ + line: 1, + payload: "$156,834 for The Pathway to Excellence in Practice program through Neighborhood Place of Puna.", + }, + }, + { + name: "TestAdditional", + args: args{ + line: "2000\tThis is an additional payload", + }, + want: &Payload{ + line: 2000, + payload: "This is an additional payload", + }, + }, + } + for _, tt := range tests { + s.Run(tt.name, func() { + if got := NewPayload(tt.args.line); !reflect.DeepEqual(got, tt.want) { + s.Require().Equal(got, tt.want) + } + }) + } +} + +func (s *payloadTestSuite) TestPayload_Content() { + type fields struct { + line int + payload string + } + tests := []struct { + name string + fields fields + want string + }{ + { + name: "TestContent", + fields: fields{ + line: 1, + payload: "$156,834 for The Pathway to Excellence in Practice program through Neighborhood Place of Puna.", + }, + want: "$156,834 for The Pathway to Excellence in Practice program through Neighborhood Place of Puna.", + }, + { + name: "TestContent2", + fields: fields{ + line: 2000, + payload: "This is another test payload", + }, + want: "This is another test payload", + }, + } + for _, tt := range tests { + s.Run(tt.name, func() { + p := &Payload{ + line: tt.fields.line, + payload: tt.fields.payload, + } + if got := p.Content(); got != tt.want { + s.Require().Equal(got, tt.want) + } + }) + } +} + +func (s *payloadTestSuite) TestPayload_LineNumber() { + type fields struct { + line int + payload string + } + tests := []struct { + name string + fields fields + want int + }{ + { + name: "TestLineNumber", + fields: fields{ + line: 1, + payload: "This is a test payload", + }, + want: 1, + }, + { + name: "TestLineNumber2", + fields: fields{ + line: 2000, + payload: "This is another test payload", + }, + want: 2000, + }, + } + for _, tt := range tests { + s.Run(tt.name, func() { + p := &Payload{ + line: tt.fields.line, + payload: tt.fields.payload, + } + if got := p.LineNumber(); got != tt.want { + s.Require().Equal(got, tt.want) + } + }) + } +} diff --git a/internal/quantitative/local_engine.go b/internal/quantitative/local_engine.go index 15a17b8..51724ce 100644 --- a/internal/quantitative/local_engine.go +++ b/internal/quantitative/local_engine.go @@ -6,15 +6,16 @@ package quantitative import ( "bytes" "fmt" - "github.com/corazawaf/coraza/v3" - "github.com/corazawaf/coraza/v3/types" - "github.com/rs/zerolog/log" "net/http" "net/url" "os" "strconv" "strings" "text/template" + + "github.com/corazawaf/coraza/v3" + "github.com/corazawaf/coraza/v3/types" + "github.com/rs/zerolog/log" ) const ( diff --git a/internal/quantitative/local_engine_test.go b/internal/quantitative/local_engine_test.go index 68238bf..15af9e6 100644 --- a/internal/quantitative/local_engine_test.go +++ b/internal/quantitative/local_engine_test.go @@ -1,12 +1,16 @@ +// Copyright 2024 OWASP CRS Project +// SPDX-License-Identifier: Apache-2.0 + package quantitative import ( - "github.com/hashicorp/go-getter" - "github.com/stretchr/testify/suite" "net/http" "os" "path" "testing" + + "github.com/hashicorp/go-getter" + "github.com/stretchr/testify/suite" ) const crsUrl = "https://github.com/coreruleset/coreruleset/releases/download/v4.6.0/coreruleset-4.6.0-minimal.tar.gz" @@ -26,7 +30,7 @@ func (s *localEngineTestSuite) SetupTest() { s.Require().NoError(os.MkdirAll(s.dir, 0755)) client := &getter.Client{ Mode: getter.ClientModeAny, - Src: crsURL, + Src: crsUrl, Dst: s.dir, } diff --git a/internal/quantitative/runner.go b/internal/quantitative/runner.go index 2a71566..345be6c 100644 --- a/internal/quantitative/runner.go +++ b/internal/quantitative/runner.go @@ -1,12 +1,17 @@ +// Copyright 2024 OWASP CRS Project +// SPDX-License-Identifier: Apache-2.0 + package quantitative import ( + "net/http" + "time" + + "github.com/rs/zerolog/log" + "github.com/coreruleset/go-ftw/experimental/corpus" "github.com/coreruleset/go-ftw/internal/quantitative/leipzig" "github.com/coreruleset/go-ftw/output" - "github.com/rs/zerolog/log" - "net/http" - "time" ) // QuantitativeParams holds the parameters for the quantitative tests @@ -23,8 +28,6 @@ type QuantitativeParams struct { Number int // Directory is the directory where the CRS rules are stored Directory string - // Markdown is the Markdown table output mode - Markdown bool // ParanoiaLevel is the paranoia level in where to run the quantitative tests ParanoiaLevel int // CorpusSize is the corpus size to use for the quantitative tests @@ -40,19 +43,19 @@ type QuantitativeParams struct { } // NewCorpus creates a new corpus -func NewCorpus(name string) corpus.Corpus { - switch name { - case "leipzig": +func NewCorpus(corpusType corpus.Type) corpus.Corpus { + switch corpusType { + case corpus.Leipzig: return leipzig.NewLeipzigCorpus() default: - log.Fatal().Msgf("Unknown corpus %s", name) + log.Fatal().Msgf("Unknown corpus implementation: %s", corpusType) return nil } } // RunQuantitativeTests runs all quantitative tests func RunQuantitativeTests(params QuantitativeParams, out *output.Output) error { - log.Info().Msg("Running quantitative tests") + out.Println("Running quantitative tests") log.Trace().Msgf("Lines: %d", params.Lines) log.Trace().Msgf("Fast: %d", params.Fast) @@ -60,7 +63,6 @@ func RunQuantitativeTests(params QuantitativeParams, out *output.Output) error { log.Trace().Msgf("Payload: %s", params.Payload) log.Trace().Msgf("Read Corpus Line: %d", params.Number) log.Trace().Msgf("Directory: %s", params.Directory) - log.Trace().Msgf("Markdown: %t", params.Markdown) log.Trace().Msgf("Paranoia level: %d", params.ParanoiaLevel) log.Trace().Msgf("Corpus size: %s", params.CorpusSize) log.Trace().Msgf("Corpus lang: %s", params.CorpusLang) @@ -68,14 +70,14 @@ func RunQuantitativeTests(params QuantitativeParams, out *output.Output) error { startTime := time.Now() // create a new corpusRunner - corpusRunner := NewCorpus(params.Corpus). + corpusRunner := NewCorpus(corpus.Leipzig). WithSize(params.CorpusSize). WithYear(params.CorpusYear). WithSource(params.CorpusSource). WithLanguage(params.CorpusLang) // download the corpusRunner file - lc := corpusRunner.GetCorpusFile() + lc := corpusRunner.FetchCorpusFile() // create the results stats := NewQuantitativeStats() @@ -83,30 +85,31 @@ func RunQuantitativeTests(params QuantitativeParams, out *output.Output) error { // Are we using the corpus at all? if params.Payload != "" { + log.Trace().Msgf("Payload received from cmdline: %s", params.Payload) // CrsCall with payload doEngineCall(runner, params.Payload, params.Rule, stats) } else { // iterate over the corpus + log.Trace().Msgf("Iterating over corpus") for iter := corpusRunner.GetIterator(lc); iter.HasNext(); { - line := iter.Next() - stats.Run++ - log.Trace().Msgf("Line: %s", line) + p := iter.Next() + stats.incrementRun() + payload := p.Content() + log.Trace().Msgf("Line: %s", payload) // check if we are looking for a specific payload line # - if needSpecificPayload(params.Number, stats.Run) { + if needSpecificPayload(params.Number, stats.Count()) { continue } - // ask the corpus to get the payload - payload := corpusRunner.GetPayload(line) - log.Trace().Msgf("Payload: %s", payload) // check if we only want to process a specific number of lines - if params.Lines > 0 && stats.Run >= params.Lines { + if params.Lines > 0 && stats.Count() >= params.Lines { break } + doEngineCall(runner, payload, params.Rule, stats) } } - stats.TotalTime = time.Since(startTime) + stats.SetTotalTime(time.Since(startTime)) stats.printSummary(out) return nil } diff --git a/internal/quantitative/stats.go b/internal/quantitative/stats.go index 8a2e6d4..6969598 100644 --- a/internal/quantitative/stats.go +++ b/internal/quantitative/stats.go @@ -1,53 +1,90 @@ +// Copyright 2024 OWASP CRS Project +// SPDX-License-Identifier: Apache-2.0 + package quantitative import ( "encoding/json" - "github.com/coreruleset/go-ftw/output" - "github.com/rs/zerolog/log" "time" + + "github.com/rs/zerolog/log" + + "github.com/coreruleset/go-ftw/output" ) // RunStats accumulates test statistics. type QuantitativeRunStats struct { - // Run is the amount of tests executed in this run. - Run int `json:"run"` - // TotalTime is the duration over all runs, the sum of all individual run times. - TotalTime time.Duration - // FalsePositives is the total false positives detected - FalsePositives int `json:"falsePositives"` - // FalsePositivesPerRule is the aggregated false positives per rule - FalsePositivesPerRule map[int]int `json:"falsePositivesPerRule"` + // count_ is the amount of tests executed in this run. + count_ int + // totalTime is the duration over all runs, the sum of all individual run times. + totalTime time.Duration + // falsePositives is the total false positives detected + falsePositives int + // falsePositivesPerRule is the aggregated false positives per rule + falsePositivesPerRule map[int]int } // NewQuantitativeStats returns a new empty stats func NewQuantitativeStats() *QuantitativeRunStats { return &QuantitativeRunStats{ - Run: 0, - FalsePositives: 0, - FalsePositivesPerRule: make(map[int]int), - TotalTime: 0, + count_: 0, + falsePositives: 0, + falsePositivesPerRule: make(map[int]int), + totalTime: 0, } } // print final statistics func (s *QuantitativeRunStats) printSummary(out *output.Output) { log.Debug().Msg("Printing Stats summary") - if s.FalsePositives > 0 { + if s.falsePositives > 0 { if out.IsJson() { b, _ := json.Marshal(s) out.RawPrint(string(b)) } else { - ratio := float64(s.FalsePositives) / float64(s.Run) - out.Println("Run %d payloads in %s", s.Run, s.TotalTime) - out.Println("Total False positive ratio: %d/%d = %.4f", s.FalsePositives, s.Run, ratio) - out.Println("False positives per rule: %+v", s.FalsePositivesPerRule) + ratio := float64(s.falsePositives) / float64(s.count_) + out.Println("Run %d payloads in %s", s.count_, s.totalTime) + out.Println("Total False positive ratio: %d/%d = %.4f", s.falsePositives, s.count_, ratio) + out.Println("False positives per rule: %+v", s.falsePositivesPerRule) // echo "| Freq. | ID # | Paranoia Level |" // echo "| ------ | ------ | -------------- |" } } } +// addFalsePositive increments the false positive count and the false positive count for the rule. func (s *QuantitativeRunStats) addFalsePositive(rule int) { - s.FalsePositives++ - s.FalsePositivesPerRule[rule]++ + s.falsePositives++ + s.falsePositivesPerRule[rule]++ +} + +// incrementRun increments the amount of tests executed in this run. +func (s *QuantitativeRunStats) incrementRun() { + s.count_++ +} + +// Count returns the amount of tests executed in this run. +func (s *QuantitativeRunStats) Count() int { + return s.count_ +} + +// TotalTime returns the duration over all runs, the sum of all individual run times. +func (s *QuantitativeRunStats) TotalTime() time.Duration { + return s.totalTime +} + +// SetTotalTime sets the duration over all runs, the sum of all individual run times. +func (s *QuantitativeRunStats) SetTotalTime(totalTime time.Duration) { + s.totalTime = totalTime +} + +// MarshalJSON marshals the stats to JSON. +func (s *QuantitativeRunStats) MarshalJSON() ([]byte, error) { + // Custom marshaling logic here + return json.Marshal(map[string]interface{}{ + "count": s.count_, + "totalTime": s.totalTime, + "falsePositives": s.falsePositives, + "falsePositivesPerRule": s.falsePositivesPerRule, + }) }