Skip to content

Commit

Permalink
🐉 Homepage language improvements (#295)
Browse files Browse the repository at this point in the history
* Add language utils

* Add new property to schema

* Try to predict language for each video

* Generate new mappings
  • Loading branch information
WRadoslaw authored Feb 8, 2024
1 parent 8e8aecb commit 33df6e2
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 6 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
"graphql-tools": "^8.3.11",
"handlebars": "^4.7.7",
"haversine-distance": "^1.2.1",
"languagedetect": "^2.0.0",
"lodash": "^4.17.21",
"node-cache": "^5.1.2",
"node-schedule": "^2.1.1",
Expand Down
5 changes: 4 additions & 1 deletion schema/videos.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@ type Video @entity {
thumbnailPhoto: StorageDataObject

"Video's main langauge"
language: String @index
language: String

"Video's orion langauge"
orionLanguage: String @index

"Whether or not Video contains marketing"
hasMarketing: Boolean
Expand Down
4 changes: 4 additions & 0 deletions src/mappings/content/video.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import {
processNft,
} from './utils'
import { generateAppActionCommitment } from '@joystream/js/utils'
import { predictLanguage } from '../../utils/language'

export async function processVideoCreatedEvent({
overlay,
Expand Down Expand Up @@ -114,6 +115,9 @@ export async function processVideoCreatedEvent({
}
}

const languageText = [video.title ?? '', video.description ?? ''].join(' ')
video.orionLanguage = predictLanguage(languageText) ?? video.language

channel.totalVideosCreated += 1

const eventEntity = overlay.getRepository(Event).new({
Expand Down
22 changes: 22 additions & 0 deletions src/utils/language.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import LanguageDetect from 'languagedetect'

function cleanString(input: string): string {
// Remove symbols, numbers, and emojis
// The regex [\p{P}\p{S}\p{N}\p{M}] matches all kinds of punctuation, symbols, numbers, and mark characters (including emojis)
// \p{P} matches any kind of punctuation character
// \p{S} matches any kind of math symbol, currency sign, or modifier symbol
// \p{N} matches any kind of numeric character in any script
// \p{M} matches characters that are combined with other characters, often used for emojis and diacritics
// The 'u' flag enables Unicode support, allowing the regex to match Unicode characters and properties
const cleanedString = input.replace(/[\p{P}\p{S}\p{N}\p{M}]/gu, '')
return cleanedString.toLowerCase()
}

const lngDetector = new LanguageDetect()
lngDetector.setLanguageType('iso2')

// Example usage
export const predictLanguage = (text: string): string | undefined => {
const cleanedText = cleanString(text)
return lngDetector.detect(cleanedText, 1)[0]?.[0]
}

0 comments on commit 33df6e2

Please sign in to comment.