Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
yuseferi committed Dec 6, 2020
0 parents commit 2afc831
Show file tree
Hide file tree
Showing 12 changed files with 460 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .ENV.dist
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
LOG_LEVEL=info
TRANSLATOR_URL=https://www.ldoceonline.com/dictionary/
WORD_CSV_INPUT_FILE=words.csv
WORD_CSV_OUT_PUTFILE=words_translated.csv
18 changes: 18 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
words.csv
words_translated.csv
# Test binary, built with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# Dependency directories (remove the comment below to include it)
# vendor/
.idea
.ENV
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2020 Yusef Mohamadi

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# longman-csv-translator
Translate CSV files with getting translation from longman.
The aim of this project is educational.
Longman is on the best English dictionary in the world. I've been using it a lot.
I remember I memorized words by G5 solution. but it's time to refersh my vocabulary with AnkiDroid tool.


#### What This project does?
Read words csv file, get equivalent translation from Longman and store in the CSV file.

Next step will be storing CSV file into the your AnkiDroid Deck.

#### Requirement
This project has written in Golang. so you need to have golang in your machine.
then copy `.ENV.dist` file to `.ENV`, change values as you wish then run the project with `go run main.go`
#### Contribution
Feel free to create PR to develop this project.

Released under the [MIT License](https://github.com/yuseferi/longman-csv-translator/blob/main/LICENSE).
57 changes: 57 additions & 0 deletions app/app.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package app

import (
"context"
_ "github.com/jackc/pgx/v4/stdlib"
"go.uber.org/zap"
"math"
"net/http"
"sync"
)

type Application struct {
Config *Config
Ctx context.Context
Error chan error
Http http.Handler
Logger *zap.Logger
WaitGroup *sync.WaitGroup
ctxCancel context.CancelFunc

}

func New(config *Config) (app *Application, err error) {
app = &Application{
Error: make(chan error, math.MaxUint8),
WaitGroup: new(sync.WaitGroup),
Config: config,
}

app.Ctx, app.ctxCancel = context.WithCancel(context.Background())
defer func() {
if err != nil {
app.Close()
}
}()

app.Logger, err = NewLogger(app.Config.Level)
if err != nil {
return nil, err
}
app.Logger.Debug("debug mode on")

return app, nil
}

func (app *Application) Close() {
app.Logger.Debug("Application stops")
}

func (app *Application) Run() {
// Run Importer
if err := app.ScrapeAll(); err != nil {
app.Logger.Panic("HTTP Server start error", zap.Error(err))
}
}


17 changes: 17 additions & 0 deletions app/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package app

import (
"github.com/caarlos0/env/v6"
)

type Config struct {
Level string `env:"LOG_LEVEL" envDefault:"info"`
BaseUrl string `env:"TRANSLATOR_URL" envDefault:"https://www.ldoceonline.com/dictionary/"`
CSVWordInputFile string `env:"WORD_CSV_INPUT_FILE" envDefault:"words.csv"`
CSVWordOutputFile string `env:"WORD_CSV_OUT_PUTFILE" envDefault:"words_translated.csv"`
}

func NewConfig() (cfg *Config, err error) {
cfg = new(Config)
return cfg, env.Parse(cfg)
}
31 changes: 31 additions & 0 deletions app/logger.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package app

import (
"fmt"
"go.uber.org/zap"
)

type Logger struct {
logger *zap.Logger
}

func NewLogger(level string) (logger *zap.Logger, err error) {

atom := zap.NewAtomicLevel()
err = atom.UnmarshalText([]byte(level))
if err != nil {
return nil, err
}
logger, _ = zap.NewProduction()
defer logger.Sync()

return logger, err
}

func (l *Logger) Printf(format string, args ...interface{}) {
l.logger.Warn(fmt.Sprintf(format, args...))
}

func (l *Logger) Println(v ...interface{}) {
l.logger.Warn(fmt.Sprint(v...))
}
77 changes: 77 additions & 0 deletions app/scraper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package app

import (
"encoding/csv"
"fmt"
"github.com/PuerkitoBio/goquery"
"go.uber.org/zap"
"io"
"net/http"
"os"
)

// Scrape a word from translator website
func (app *Application) ScrapeWord(word string) (string, error) {
var wordTranslate = ""
var WordUrl = fmt.Sprintf("%s/%s", app.Config.BaseUrl, word)
res, err := http.Get(WordUrl)
if err != nil || res.StatusCode != 200 {
app.Logger.Error("err to get the word", zap.Error(err), zap.Any("word", word), zap.Any("status_code", res.StatusCode))
return "", err
}

defer res.Body.Close()

// Load the HTML document
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
app.Logger.Error("err on load html", zap.Error(err), zap.Any("word", word))
return "", err
}

doc.Find(".entry_content").Each(func(i int, s *goquery.Selection) {
wordTranslate = s.Text()

})
return wordTranslate, nil
}

func (app *Application) ScrapeAll() error {
inputFile, err := os.Open(app.Config.CSVWordInputFile)
if err != nil {
panic(err)
}
defer inputFile.Close()
reader := csv.NewReader(inputFile)
reader.Comma = '|'

// File writer
outputFile, err := os.Create(app.Config.CSVWordOutputFile)
if err != nil {
panic(err)
}
defer outputFile.Close()
writer := csv.NewWriter(outputFile)
defer writer.Flush()
// Process CSV file line by line
for {
record, err := reader.Read()
if err == io.EOF {
break
} else if err != nil {
app.Logger.Error("err on load a word from csv", zap.Error(err), zap.Any("word", record[0]))
continue
}
app.Logger.Info("start to translate", zap.Any("word", record[0]))
translatedWorld, err := app.ScrapeWord(record[0])
if err != nil {
app.Logger.Error("error on get word from translator website", zap.Error(err), zap.Any("word", record[0]))
continue
}
err = writer.Write([]string{record[0], translatedWorld})
if err != nil {
return err
}
}
return nil
}
15 changes: 15 additions & 0 deletions app/signals.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package app

import (
"os"
"os/signal"
"syscall"
)

// WaitExit waits while user don't press Ctrl+C
func WaitExit() chan os.Signal {
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)

return sigs
}
10 changes: 10 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module github.com/yuseferi/longman-csv-translator

go 1.15

require (
github.com/PuerkitoBio/goquery v1.6.0
github.com/caarlos0/env/v6 v6.4.0
github.com/jackc/pgx/v4 v4.10.0
go.uber.org/zap v1.16.0
)
Loading

0 comments on commit 2afc831

Please sign in to comment.