Skip to content

Commit

Permalink
Add full html response as output
Browse files Browse the repository at this point in the history
  • Loading branch information
yuseferi committed Dec 6, 2020
1 parent 2afc831 commit 443676a
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
1 change: 1 addition & 0 deletions app/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ type Config struct {
BaseUrl string `env:"TRANSLATOR_URL" envDefault:"https://www.ldoceonline.com/dictionary/"`
CSVWordInputFile string `env:"WORD_CSV_INPUT_FILE" envDefault:"words.csv"`
CSVWordOutputFile string `env:"WORD_CSV_OUT_PUTFILE" envDefault:"words_translated.csv"`
DesireOutPut string `env:"OUTPUT" envDefault:"full_html"`
}

func NewConfig() (cfg *Config, err error) {
Expand Down
16 changes: 14 additions & 2 deletions app/scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@ import (
"io"
"net/http"
"os"
"strings"
)

// Scrape a word from translator website
func (app *Application) ScrapeWord(word string) (string, error) {

word = strings.ReplaceAll(word, " ", "-")
var wordTranslate = ""
var WordUrl = fmt.Sprintf("%s/%s", app.Config.BaseUrl, word)
res, err := http.Get(WordUrl)
var wordUrl = fmt.Sprintf("%s/%s", app.Config.BaseUrl, word)
res, err := http.Get(wordUrl)
if err != nil || res.StatusCode != 200 {
app.Logger.Error("err to get the word", zap.Error(err), zap.Any("word", word), zap.Any("status_code", res.StatusCode))
return "", err
Expand All @@ -29,6 +32,15 @@ func (app *Application) ScrapeWord(word string) (string, error) {
return "", err
}

// if user want the full html of the page
if app.Config.DesireOutPut == "full_html" {
fullPageMarkup, err := doc.Html()
if err != nil {
app.Logger.Error("err on get full page markup", zap.Error(err), zap.Any("wordUrl", wordUrl))
}
return fullPageMarkup, nil
}

doc.Find(".entry_content").Each(func(i int, s *goquery.Selection) {
wordTranslate = s.Text()

Expand Down

0 comments on commit 443676a

Please sign in to comment.