Skip to content

Commit

Permalink
Match Multipart mail
Browse files Browse the repository at this point in the history
  • Loading branch information
Mizuho32 committed Oct 19, 2024
1 parent 9768cb3 commit 21ed331
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 3 deletions.
4 changes: 4 additions & 0 deletions message/part.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ func (p *Part) String() string {
return fmt.Sprintf("&Part{%s/%s offsets %d/%d/%d/%d lines %d decodedsize %d next %d last %d bound %q parts %v}", p.MediaType, p.MediaSubType, p.BoundaryOffset, p.HeaderOffset, p.BodyOffset, p.EndOffset, p.RawLineCount, p.DecodedSize, p.nextBoundOffset, p.lastBoundOffset, p.bound, p.Parts)
}

func (p *Part) GetBound() string {
return string(p.bound)
}

// newPart parses a new part, which can be the top-level message.
// offset is the bound offset for parts, and the start of message for top-level messages. parent indicates if this is a top-level message or sub-part.
// If an error occurs, p's exported values can still be relevant. EnsurePart uses these values.
Expand Down
21 changes: 18 additions & 3 deletions store/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,26 @@ func (ws WordSearch) matchPart(log mlog.Log, p *message.Part, headerToo bool, se
}

if len(p.Parts) == 0 {
var tp io.Reader
if p.MediaType != "TEXT" {
// todo: for other types we could try to find a library for parsing and search in there too.
return false, nil
if p.MediaType == "MULTIPART" {
// Decode and make io.Reader
// todo: avoid to load all content
content, err := io.ReadAll(p.RawReader())
if err != nil {
return false, err
}
tp, err = decodeMultiPart(string(content), p.GetBound())
if err != nil {
return false, err
}
} else {
// todo: for other types we could try to find a library for parsing and search in there too.
return false, nil
}
} else {
tp = p.ReaderUTF8OrBinary()
}
tp := p.ReaderUTF8OrBinary()
// todo: for html and perhaps other types, we could try to parse as text and filter on the text.
miss, err := ws.searchReader(log, tp, seen)
if miss || err != nil || ws.isQuickHit(seen) {
Expand Down
48 changes: 48 additions & 0 deletions store/search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,14 @@ package store

import (
"fmt"
"io"
"log/slog"
"os"
"strings"
"testing"

"github.com/mjl-/mox/message"
"github.com/mjl-/mox/mlog"
)

func TestSubjectMatch(t *testing.T) {
Expand Down Expand Up @@ -36,3 +43,44 @@ func TestSubjectMatch(t *testing.T) {
}
}
}

func TestMultipartMailDecode(t *testing.T) {
log := mlog.New("search", nil)

// Load raw mail file
filePath := "../../data/mail_raw.txt" // multipart mail raw data
wordFilePath := "../../data/word.txt"

msgFile, err := os.Open(filePath)
if err != nil {
t.Fatalf("Failed to open file: %v", err)
}
defer msgFile.Close()

// load word
wordFile, err := os.Open(wordFilePath)
if err != nil {
t.Fatalf("Failed to open file: %v", err)
}
defer wordFile.Close()
tmp, err := io.ReadAll(wordFile)
if err != nil {
t.Fatalf("Failed to load search word: %v", err)
}
searchWord := strings.TrimSpace(string(tmp))

// Parse mail
mr := FileMsgReader([]byte{}, msgFile)
p, err := message.Parse(log.Logger, false, mr)
if err != nil {
t.Fatalf("parsing message for evaluating rulesets, continuing with headers %v, %s", err, slog.String("parse", ""))
}

// Match
ws := PrepareWordSearch([]string{searchWord}, []string{})
ok, _ := ws.MatchPart(log, &p, true)
if !ok {
t.Fatalf("Match failed %s", ws.words)
}
log.Debug("Check match", slog.String("word", string(searchWord)), slog.Bool("ok", ok))
}

0 comments on commit 21ed331

Please sign in to comment.