Skip to content

Commit

Permalink
Refactor RDF parser to use an adapter
Browse files Browse the repository at this point in the history
Avoid tight coupling between `model.Feed` and the original XML RDF feed.
  • Loading branch information
fguillot committed Mar 13, 2024
1 parent ee3486a commit 6bc4b35
Show file tree
Hide file tree
Showing 5 changed files with 443 additions and 335 deletions.
20 changes: 2 additions & 18 deletions internal/reader/dublincore/dublincore.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,13 @@

package dublincore // import "miniflux.app/v2/internal/reader/dublincore"

import (
"strings"

"miniflux.app/v2/internal/reader/sanitizer"
)

// DublinCoreFeedElement represents Dublin Core feed XML elements.
type DublinCoreFeedElement struct {
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ channel>creator"`
}

func (feed *DublinCoreFeedElement) GetSanitizedCreator() string {
return strings.TrimSpace(sanitizer.StripTags(feed.DublinCoreCreator))
type DublinCoreChannelElement struct {
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
}

// DublinCoreItemElement represents Dublin Core entry XML elements.
type DublinCoreItemElement struct {
DublinCoreTitle string `xml:"http://purl.org/dc/elements/1.1/ title"`
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
}

func (item *DublinCoreItemElement) GetSanitizedCreator() string {
return strings.TrimSpace(sanitizer.StripTags(item.DublinCoreCreator))
}
115 changes: 115 additions & 0 deletions internal/reader/rdf/adapter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package rdf // import "miniflux.app/v2/internal/reader/rdf"

import (
"html"
"log/slog"
"strings"
"time"

"miniflux.app/v2/internal/crypto"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/date"
"miniflux.app/v2/internal/reader/sanitizer"
"miniflux.app/v2/internal/urllib"
)

type RDFAdapter struct {
rdf *RDF
}

func NewRDFAdapter(rdf *RDF) *RDFAdapter {
return &RDFAdapter{rdf}
}

func (r *RDFAdapter) BuildFeed(feedURL string) *model.Feed {
feed := &model.Feed{
Title: stripTags(r.rdf.Channel.Title),
FeedURL: feedURL,
}

if feed.Title == "" {
feed.Title = feedURL
}

if siteURL, err := urllib.AbsoluteURL(feedURL, r.rdf.Channel.Link); err != nil {
feed.SiteURL = r.rdf.Channel.Link
} else {
feed.SiteURL = siteURL
}

for _, item := range r.rdf.Items {
entry := model.NewEntry()
itemLink := strings.TrimSpace(item.Link)

// Populate the entry URL.
if itemLink == "" {
entry.URL = feed.SiteURL // Fallback to the feed URL if the entry URL is empty.
} else if entryURL, err := urllib.AbsoluteURL(feed.SiteURL, itemLink); err == nil {
entry.URL = entryURL
} else {
entry.URL = itemLink
}

// Populate the entry title.
for _, title := range []string{item.Title, item.DublinCoreTitle} {
title = strings.TrimSpace(title)
if title != "" {
entry.Title = html.UnescapeString(title)
break
}
}

// If the entry title is empty, we use the entry URL as a fallback.
if entry.Title == "" {
entry.Title = entry.URL
}

// Populate the entry content.
if item.DublinCoreContent != "" {
entry.Content = item.DublinCoreContent
} else {
entry.Content = item.Description
}

// Generate the entry hash.
hashValue := itemLink
if hashValue == "" {
hashValue = item.Title + item.Description // Fallback to the title and description if the link is empty.
}

entry.Hash = crypto.Hash(hashValue)

// Populate the entry date.
entry.Date = time.Now()
if item.DublinCoreDate != "" {
if itemDate, err := date.Parse(item.DublinCoreDate); err != nil {
slog.Debug("Unable to parse date from RDF feed",
slog.String("date", item.DublinCoreDate),
slog.String("link", itemLink),
slog.Any("error", err),
)
} else {
entry.Date = itemDate
}
}

// Populate the entry author.
switch {
case item.DublinCoreCreator != "":
entry.Author = stripTags(item.DublinCoreCreator)
case r.rdf.Channel.DublinCoreCreator != "":
entry.Author = stripTags(r.rdf.Channel.DublinCoreCreator)
}

feed.Entries = append(feed.Entries, entry)
}

return feed
}

func stripTags(value string) string {
return strings.TrimSpace(sanitizer.StripTags(value))
}
6 changes: 3 additions & 3 deletions internal/reader/rdf/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ import (

// Parse returns a normalized feed struct from a RDF feed.
func Parse(baseURL string, data io.ReadSeeker) (*model.Feed, error) {
feed := new(rdfFeed)
if err := xml.NewXMLDecoder(data).Decode(feed); err != nil {
xmlFeed := new(RDF)
if err := xml.NewXMLDecoder(data).Decode(xmlFeed); err != nil {
return nil, fmt.Errorf("rdf: unable to parse feed: %w", err)
}

return feed.Transform(baseURL), nil
return NewRDFAdapter(xmlFeed).BuildFeed(baseURL), nil
}
Loading

0 comments on commit 6bc4b35

Please sign in to comment.