-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 2afc831
Showing
12 changed files
with
460 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
LOG_LEVEL=info | ||
TRANSLATOR_URL=https://www.ldoceonline.com/dictionary/ | ||
WORD_CSV_INPUT_FILE=words.csv | ||
WORD_CSV_OUT_PUTFILE=words_translated.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Binaries for programs and plugins | ||
*.exe | ||
*.exe~ | ||
*.dll | ||
*.so | ||
*.dylib | ||
words.csv | ||
words_translated.csv | ||
# Test binary, built with `go test -c` | ||
*.test | ||
|
||
# Output of the go coverage tool, specifically when used with LiteIDE | ||
*.out | ||
|
||
# Dependency directories (remove the comment below to include it) | ||
# vendor/ | ||
.idea | ||
.ENV |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2020 Yusef Mohamadi | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# longman-csv-translator | ||
Translate CSV files with getting translation from longman. | ||
The aim of this project is educational. | ||
Longman is on the best English dictionary in the world. I've been using it a lot. | ||
I remember I memorized words by G5 solution. but it's time to refersh my vocabulary with AnkiDroid tool. | ||
|
||
|
||
#### What This project does? | ||
Read words csv file, get equivalent translation from Longman and store in the CSV file. | ||
|
||
Next step will be storing CSV file into the your AnkiDroid Deck. | ||
|
||
#### Requirement | ||
This project has written in Golang. so you need to have golang in your machine. | ||
then copy `.ENV.dist` file to `.ENV`, change values as you wish then run the project with `go run main.go` | ||
#### Contribution | ||
Feel free to create PR to develop this project. | ||
|
||
Released under the [MIT License](https://github.com/yuseferi/longman-csv-translator/blob/main/LICENSE). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
package app | ||
|
||
import ( | ||
"context" | ||
_ "github.com/jackc/pgx/v4/stdlib" | ||
"go.uber.org/zap" | ||
"math" | ||
"net/http" | ||
"sync" | ||
) | ||
|
||
type Application struct { | ||
Config *Config | ||
Ctx context.Context | ||
Error chan error | ||
Http http.Handler | ||
Logger *zap.Logger | ||
WaitGroup *sync.WaitGroup | ||
ctxCancel context.CancelFunc | ||
|
||
} | ||
|
||
func New(config *Config) (app *Application, err error) { | ||
app = &Application{ | ||
Error: make(chan error, math.MaxUint8), | ||
WaitGroup: new(sync.WaitGroup), | ||
Config: config, | ||
} | ||
|
||
app.Ctx, app.ctxCancel = context.WithCancel(context.Background()) | ||
defer func() { | ||
if err != nil { | ||
app.Close() | ||
} | ||
}() | ||
|
||
app.Logger, err = NewLogger(app.Config.Level) | ||
if err != nil { | ||
return nil, err | ||
} | ||
app.Logger.Debug("debug mode on") | ||
|
||
return app, nil | ||
} | ||
|
||
func (app *Application) Close() { | ||
app.Logger.Debug("Application stops") | ||
} | ||
|
||
func (app *Application) Run() { | ||
// Run Importer | ||
if err := app.ScrapeAll(); err != nil { | ||
app.Logger.Panic("HTTP Server start error", zap.Error(err)) | ||
} | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package app | ||
|
||
import ( | ||
"github.com/caarlos0/env/v6" | ||
) | ||
|
||
type Config struct { | ||
Level string `env:"LOG_LEVEL" envDefault:"info"` | ||
BaseUrl string `env:"TRANSLATOR_URL" envDefault:"https://www.ldoceonline.com/dictionary/"` | ||
CSVWordInputFile string `env:"WORD_CSV_INPUT_FILE" envDefault:"words.csv"` | ||
CSVWordOutputFile string `env:"WORD_CSV_OUT_PUTFILE" envDefault:"words_translated.csv"` | ||
} | ||
|
||
func NewConfig() (cfg *Config, err error) { | ||
cfg = new(Config) | ||
return cfg, env.Parse(cfg) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package app | ||
|
||
import ( | ||
"fmt" | ||
"go.uber.org/zap" | ||
) | ||
|
||
type Logger struct { | ||
logger *zap.Logger | ||
} | ||
|
||
func NewLogger(level string) (logger *zap.Logger, err error) { | ||
|
||
atom := zap.NewAtomicLevel() | ||
err = atom.UnmarshalText([]byte(level)) | ||
if err != nil { | ||
return nil, err | ||
} | ||
logger, _ = zap.NewProduction() | ||
defer logger.Sync() | ||
|
||
return logger, err | ||
} | ||
|
||
func (l *Logger) Printf(format string, args ...interface{}) { | ||
l.logger.Warn(fmt.Sprintf(format, args...)) | ||
} | ||
|
||
func (l *Logger) Println(v ...interface{}) { | ||
l.logger.Warn(fmt.Sprint(v...)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
package app | ||
|
||
import ( | ||
"encoding/csv" | ||
"fmt" | ||
"github.com/PuerkitoBio/goquery" | ||
"go.uber.org/zap" | ||
"io" | ||
"net/http" | ||
"os" | ||
) | ||
|
||
// Scrape a word from translator website | ||
func (app *Application) ScrapeWord(word string) (string, error) { | ||
var wordTranslate = "" | ||
var WordUrl = fmt.Sprintf("%s/%s", app.Config.BaseUrl, word) | ||
res, err := http.Get(WordUrl) | ||
if err != nil || res.StatusCode != 200 { | ||
app.Logger.Error("err to get the word", zap.Error(err), zap.Any("word", word), zap.Any("status_code", res.StatusCode)) | ||
return "", err | ||
} | ||
|
||
defer res.Body.Close() | ||
|
||
// Load the HTML document | ||
doc, err := goquery.NewDocumentFromReader(res.Body) | ||
if err != nil { | ||
app.Logger.Error("err on load html", zap.Error(err), zap.Any("word", word)) | ||
return "", err | ||
} | ||
|
||
doc.Find(".entry_content").Each(func(i int, s *goquery.Selection) { | ||
wordTranslate = s.Text() | ||
|
||
}) | ||
return wordTranslate, nil | ||
} | ||
|
||
func (app *Application) ScrapeAll() error { | ||
inputFile, err := os.Open(app.Config.CSVWordInputFile) | ||
if err != nil { | ||
panic(err) | ||
} | ||
defer inputFile.Close() | ||
reader := csv.NewReader(inputFile) | ||
reader.Comma = '|' | ||
|
||
// File writer | ||
outputFile, err := os.Create(app.Config.CSVWordOutputFile) | ||
if err != nil { | ||
panic(err) | ||
} | ||
defer outputFile.Close() | ||
writer := csv.NewWriter(outputFile) | ||
defer writer.Flush() | ||
// Process CSV file line by line | ||
for { | ||
record, err := reader.Read() | ||
if err == io.EOF { | ||
break | ||
} else if err != nil { | ||
app.Logger.Error("err on load a word from csv", zap.Error(err), zap.Any("word", record[0])) | ||
continue | ||
} | ||
app.Logger.Info("start to translate", zap.Any("word", record[0])) | ||
translatedWorld, err := app.ScrapeWord(record[0]) | ||
if err != nil { | ||
app.Logger.Error("error on get word from translator website", zap.Error(err), zap.Any("word", record[0])) | ||
continue | ||
} | ||
err = writer.Write([]string{record[0], translatedWorld}) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
package app | ||
|
||
import ( | ||
"os" | ||
"os/signal" | ||
"syscall" | ||
) | ||
|
||
// WaitExit waits while user don't press Ctrl+C | ||
func WaitExit() chan os.Signal { | ||
sigs := make(chan os.Signal, 1) | ||
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) | ||
|
||
return sigs | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
module github.com/yuseferi/longman-csv-translator | ||
|
||
go 1.15 | ||
|
||
require ( | ||
github.com/PuerkitoBio/goquery v1.6.0 | ||
github.com/caarlos0/env/v6 v6.4.0 | ||
github.com/jackc/pgx/v4 v4.10.0 | ||
go.uber.org/zap v1.16.0 | ||
) |
Oops, something went wrong.