Skip to content

Commit

Permalink
Merge pull request jakopako#268 from jakopako/feature/config-dir
Browse files Browse the repository at this point in the history
Feature/config dir
  • Loading branch information
jakopako authored Jan 1, 2024
2 parents 55b9a20 + f460226 commit 8be1391
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 24 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ config.yml
goskyr.class
goskyr.model
.release-env
.local-env
.local-env
config-test-dir
14 changes: 7 additions & 7 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func worker(sc chan scraper.Scraper, ic chan map[string]interface{}, gc *scraper
func main() {
singleScraper := flag.String("s", "", "The name of the scraper to be run.")
toStdout := flag.Bool("stdout", false, "If set to true the scraped data will be written to stdout despite any other existing writer configurations. In combination with the -generate flag the newly generated config will be written to stdout instead of to a file.")
configFile := flag.String("c", "./config.yml", "The location of the configuration file.")
configLoc := flag.String("c", "./config.yml", "The location of the configuration. Can be a directory containing config files or a single config file.")
printVersion := flag.Bool("v", false, "The version of goskyr.")
generateConfig := flag.String("g", "", "Automatically generate a config file for the given url.")
m := flag.Int("m", 20, "The minimum number of items on a page. This is needed to filter out noise. Works in combination with the -g flag.")
Expand Down Expand Up @@ -76,7 +76,7 @@ func main() {
if *toStdout {
fmt.Println(string(yamlData))
} else {
f, err := os.Create(*configFile)
f, err := os.Create(*configLoc)
if err != nil {
log.Fatalf("ERROR while trying to open file: %v", err)
}
Expand All @@ -85,7 +85,7 @@ func main() {
if err != nil {
log.Fatalf("ERROR while trying to write to file: %v", err)
}
log.Printf("successfully wrote config to file %s", *configFile)
log.Printf("successfully wrote config to file %s", *configLoc)
}
return
}
Expand All @@ -97,7 +97,7 @@ func main() {
return
}

config, err := scraper.NewConfig(*configFile)
config, err := scraper.NewConfig(*configLoc)
if err != nil {
log.Fatal(err)
}
Expand All @@ -118,11 +118,11 @@ func main() {
writer = &output.StdoutWriter{}
} else {
switch config.Writer.Type {
case "stdout":
case output.STDOUT_WRITER_TYPE:
writer = &output.StdoutWriter{}
case "api":
case output.API_WRITER_TYPE:
writer = output.NewAPIWriter(&config.Writer)
case "file":
case output.FILE_WRITER_TYPE:
writer = output.NewFileWriter(&config.Writer)
default:
log.Fatalf("writer of type %s not implemented", config.Writer.Type)
Expand Down
33 changes: 20 additions & 13 deletions output/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,12 @@ func (f *APIWriter) Write(items chan map[string]interface{}) {
apiPassword := f.writerConfig.Password

deletedSources := map[string]bool{}
nrItems := 0
nrItemsWritten := 0
batch := []map[string]interface{}{}

// This code assumes that within one source, items are ordered
// by date ascending.
for item := range items {
nrItems++
currentSrc := item["sourceUrl"].(string)
if _, found := deletedSources[currentSrc]; !found {
deletedSources[currentSrc] = true
Expand All @@ -55,33 +54,41 @@ func (f *APIWriter) Write(items chan map[string]interface{}) {
req.SetBasicAuth(apiUser, apiPassword)
resp, err := client.Do(req)
if err != nil {
log.Printf("error while sending event %+v to the api: %v", item, err)
log.Printf("error while deleting items from the api: %v\n", err)
continue
}
if resp.StatusCode != 200 {
body, err := io.ReadAll(resp.Body)
if err != nil {
log.Fatal(err)
}
log.Fatalf("error while deleting items. Status Code: %d\nUrl: %s Response: %s", resp.StatusCode, deleteURL, body)
log.Fatalf("error while deleting items. Status Code: %d\nUrl: %s Response: %s\n", resp.StatusCode, deleteURL, body)
}
resp.Body.Close()
}
batch = append(batch, item)
if len(batch) == 100 {
postBatch(client, batch, apiURL, apiUser, apiPassword)
if err := postBatch(client, batch, apiURL, apiUser, apiPassword); err != nil {
fmt.Printf("%v\n", err)
} else {
nrItemsWritten = nrItemsWritten + 100
}
batch = []map[string]interface{}{}
}
}
postBatch(client, batch, apiURL, apiUser, apiPassword)
if err := postBatch(client, batch, apiURL, apiUser, apiPassword); err != nil {
fmt.Printf("%v\n", err)
} else {
nrItemsWritten = nrItemsWritten + len(batch)
}

log.Printf("wrote %d items from %d sources to the api", nrItems, len(deletedSources))
log.Printf("wrote %d items from %d sources to the api", nrItemsWritten, len(deletedSources))
}

func postBatch(client *http.Client, batch []map[string]interface{}, apiURL, apiUser, apiPassword string) {
func postBatch(client *http.Client, batch []map[string]interface{}, apiURL, apiUser, apiPassword string) error {
concertJSON, err := json.Marshal(batch)
if err != nil {
log.Fatal(err)
return err
}
req, _ := http.NewRequest("POST", apiURL, bytes.NewBuffer(concertJSON))
req.Header = map[string][]string{
Expand All @@ -90,16 +97,16 @@ func postBatch(client *http.Client, batch []map[string]interface{}, apiURL, apiU
req.SetBasicAuth(apiUser, apiPassword)
resp, err := client.Do(req)
if err != nil {
log.Printf("error while sending post request: %v", err)
return
return fmt.Errorf("error while sending post request: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != 201 {
body, err := io.ReadAll(resp.Body)
if err != nil {
log.Printf("error while reading post request respones: %v", err)
return fmt.Errorf("error while reading post request respones: %v", err)
} else {
log.Printf("error while adding new events. Status Code: %d Response: %s", resp.StatusCode, body)
return fmt.Errorf("error while adding new events. Status Code: %d Response: %s", resp.StatusCode, body)
}
}
return nil
}
8 changes: 7 additions & 1 deletion output/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,15 @@ type Writer interface {
// which is responsible for writing the scraped data to a specific output
// eg. stdout.
type WriterConfig struct {
Type string `yaml:"type" env:"WRITER_TYPE" env-default:"stdout"`
Type string `yaml:"type" env:"WRITER_TYPE"`
Uri string `yaml:"uri" env:"WRITER_URI"`
User string `yaml:"user" env:"WRITER_USER"`
Password string `yaml:"password" env:"WRITER_PASSWORD"`
FilePath string `yaml:"filepath" env:"WRITER_FILEPATH"`
}

const (
STDOUT_WRITER_TYPE = "stdout"
FILE_WRITER_TYPE = "file"
API_WRITER_TYPE = "api"
)
39 changes: 37 additions & 2 deletions scraper/scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@ import (
"bytes"
"errors"
"fmt"
"io/fs"
"log"
"net/url"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
Expand Down Expand Up @@ -41,8 +44,40 @@ type Config struct {

func NewConfig(configPath string) (*Config, error) {
var config Config
err := cleanenv.ReadConfig(configPath, &config)
return &config, err
fileInfo, err := os.Stat(configPath)
if err != nil {
return nil, err
}
if fileInfo.IsDir() {
err := filepath.WalkDir(configPath, func(path string, d fs.DirEntry, err error) error {
if !d.IsDir() {
var configTmp Config
if err := cleanenv.ReadConfig(path, &configTmp); err != nil {
return err
}
config.Scrapers = append(config.Scrapers, configTmp.Scrapers...)
if configTmp.Writer.Type != "" {
if config.Writer.Type == "" {
config.Writer = configTmp.Writer
} else {
return fmt.Errorf("ERROR: config files must only contain max. one writer")
}
}
}
return nil // skipping everything that is not a file
})
if err != nil {
return nil, err
}
} else {
if err := cleanenv.ReadConfig(configPath, &config); err != nil {
return nil, err
}
}
if config.Writer.Type == "" {
config.Writer.Type = output.STDOUT_WRITER_TYPE
}
return &config, nil
}

// RegexConfig is used for extracting a substring from a string based on the
Expand Down

0 comments on commit 8be1391

Please sign in to comment.