diff --git a/articles/get_entities.html b/articles/get_entities.html index a53ab881..e337d704 100644 --- a/articles/get_entities.html +++ b/articles/get_entities.html @@ -176,7 +176,7 @@
tagger_ner <- load_tagger_ner("ner")
-#> 2023-11-29 09:41:52,148 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>
Flair NLP operates under the PyTorch framework. As such, we can use
the $to
method to set the device for the Flair Python
library. The flair_device(“cpu”) allows you to select whether to use the
@@ -368,7 +368,7 @@
print(batch_process_results)
#> doc_id entity tag text_id
diff --git a/articles/get_pos.html b/articles/get_pos.html
index 57e23fc0..b4ba2a9e 100644
--- a/articles/get_pos.html
+++ b/articles/get_pos.html
@@ -172,7 +172,7 @@ Generic Approach Using Pa
Hugging Face.
tagger_pos <- load_tagger_pos("pos")
-#> 2023-11-29 09:42:49,554 SequenceTagger predicts: Dictionary with 53 tags: <unk>, O, UH, ,, VBD, PRP, VB, PRP$, NN, RB, ., DT, JJ, VBP, VBG, IN, CD, NNS, NNP, WRB, VBZ, WDT, CC, TO, MD, VBN, WP, :, RP, EX, JJR, FW, XX, HYPH, POS, RBR, JJS, PDT, NNPS, RBS, AFX, WP$, -LRB-, -RRB-, ``, '', LS, $, SYM, ADD
+#> 2023-11-29 12:38:08,493 SequenceTagger predicts: Dictionary with 53 tags: <unk>, O, UH, ,, VBD, PRP, VB, PRP$, NN, RB, ., DT, JJ, VBP, VBG, IN, CD, NNS, NNP, WRB, VBZ, WDT, CC, TO, MD, VBN, WP, :, RP, EX, JJR, FW, XX, HYPH, POS, RBR, JJS, PDT, NNPS, RBS, AFX, WP$, -LRB-, -RRB-, ``, '', LS, $, SYM, ADD
Flair NLP operates under the PyTorch framework. As such, we can use
the $to
method to set the device for the Flair Python
library. The flair_device(“cpu”) allows you to select whether to use the
diff --git a/articles/highlight_text.html b/articles/highlight_text.html
index 24c62550..0575950a 100644
--- a/articles/highlight_text.html
+++ b/articles/highlight_text.html
@@ -168,7 +168,7 @@
# initiate TransformerWordEmbeddings
embedding <- flair_embeddings.TransformerDocumentEmbeddings('bert-base-uncased')
-#> 2023-11-29 09:43:54,925 Using long sentences for Document embeddings is only beneficial for cls_pooling types 'mean' and 'max
+#> 2023-11-29 12:39:14,535 Using long sentences for Document embeddings is only beneficial for cls_pooling types 'mean' and 'max
# create a sentence
sentence <- flair_data.Sentence('The grass is green .')
@@ -698,7 +698,7 @@
library(flaiR)
tagger_pos <- load_tagger_pos("pos-fast")
-#> 2023-11-29 09:43:58,053 SequenceTagger predicts: Dictionary with 53 tags: <unk>, O, UH, ,, VBD, PRP, VB, PRP$, NN, RB, ., DT, JJ, VBP, VBG, IN, CD, NNS, NNP, WRB, VBZ, WDT, CC, TO, MD, VBN, WP, :, RP, EX, JJR, FW, XX, HYPH, POS, RBR, JJS, PDT, NNPS, RBS, AFX, WP$, -LRB-, -RRB-, ``, '', LS, $, SYM, ADD
results <- get_pos(texts, doc_ids, tagger_pos)
head(results, n = 10)
@@ -727,7 +727,7 @@
library(flaiR)
tagger_ner <- load_tagger_ner("ner")
-#> 2023-11-29 09:43:59,225 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>
results <- get_entities(texts, doc_ids, tagger_ner)
head(results, n = 10)
diff --git a/articles/tutorial.html b/articles/tutorial.html
index 62a46587..2b7dfb00 100644
--- a/articles/tutorial.html
+++ b/articles/tutorial.html
@@ -621,7 +621,7 @@
corpus <- Corpus(train=train,
# dev=test,
test=test)
-#> 2023-11-29 09:45:11,080 No dev split found. Using 0% (i.e. 471 samples) of the train split as dev data
+#> 2023-11-29 12:40:29,096 No dev split found. Using 0% (i.e. 471 samples) of the train split as dev data
sprintf("Corpus object sizes - Train: %d | Test: %d | Dev: %d",
length(corpus$train),
length(corpus$test),
@@ -659,7 +659,7 @@
# load the NER tagger
Classifier <- flair_nn()$Classifier
tagger <- Classifier$load('ner')
-#> 2023-11-29 09:45:12,595 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>
+#> 2023-11-29 12:40:30,777 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>
# run NER over sentence
tagger$predict(sentence)
# downsize to 0.05
corpus = IMDB()
-#> 2023-11-29 09:45:22,978 Reading data from /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced
-#> 2023-11-29 09:45:22,978 Train: /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced/train.txt
-#> 2023-11-29 09:45:22,978 Dev: None
-#> 2023-11-29 09:45:22,978 Test: None
-#> 2023-11-29 09:45:23,578 No test split found. Using 0% (i.e. 5000 samples) of the train split as test data
-#> 2023-11-29 09:45:23,593 No dev split found. Using 0% (i.e. 4500 samples) of the train split as dev data
-#> 2023-11-29 09:45:23,593 Initialized corpus /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced (label type name is 'sentiment')
+#> 2023-11-29 12:40:41,193 Reading data from /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced
+#> 2023-11-29 12:40:41,193 Train: /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced/train.txt
+#> 2023-11-29 12:40:41,193 Dev: None
+#> 2023-11-29 12:40:41,193 Test: None
+#> 2023-11-29 12:40:41,794 No test split found. Using 0% (i.e. 5000 samples) of the train split as test data
+#> 2023-11-29 12:40:41,809 No dev split found. Using 0% (i.e. 4500 samples) of the train split as dev data
+#> 2023-11-29 12:40:41,809 Initialized corpus /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced (label type name is 'sentiment')
corpus$downsample(0.05)
-#> <flair.datasets.document_classification.IMDB object at 0x2f99f1bd0>
+#> <flair.datasets.document_classification.IMDB object at 0x2f46f3670>
Print the sizes in the corpus object as follows - test: %d | train: %d | dev: %d”
@@ -1262,8 +1262,8 @@Loading a Tagged Corpus
lbl_type = 'sentiment' label_dict = corpus$make_label_dictionary(label_type=lbl_type) -#> 2023-11-29 09:45:23,699 Computing label dictionary. Progress: -#> 2023-11-29 09:45:27,449 Dictionary created for label 'sentiment' with 2 values: POSITIVE (seen 1014 times), NEGATIVE (seen 1011 times)
{flaiR}
is an R wrapper for the {flairNLP/flair} library in Python, designed specifically for R users, especially those in the social sciences. It provides easy access to the main functionalities of {flairNLP}
. Developed by Developed by Zalando Research in Berlin, Flair NLP offers intuitive interfaces and exceptional multilingual support, particularly for various embedding frameworks, transformers and state-of-the-art natural language processing tasks to analyze your text, such as named entity recognition, sentiment analysis, part-of-speech tagging, biomedical data, sense disambiguation, and classification, with support for a rapidly growing number of languages in the community. For a comprehensive understanding of the {flairNLP/flair}
architecture, you can refer to the research article ‘Contextual String Embeddings for Sequence Labeling’ and the official manual written for its Python implementation.
+ {flaiR} is an R wrapper for the {flairNLP/flair} Python library, specifically tailored for R users, particularly in political science and the social sciences. flaiR provides easy access to the main functionalities of {Flair NLP}
. Developed by Developed by Zalando Research in Berlin, flair NLP is a straightforward framework for state-of-the-art Natural Language Processing (NLP) and is compatible with Hugging Face. Flair offers intuitive interfaces and exceptional multilingual support, particularly for various embedding models, transformers and state-of-the-art NLP tasks to analyze texts, such as named entity recognition, sentiment analysis, part-of-speech tagging, with support for a rapidly growing number of language models in the community.
For a comprehensive understanding of the {flairNLP/flair}
architecture and NLP tagging models by Zalando Research, you can refer to the research article ‘Contextual String Embeddings for Sequence Labeling’ and the official manual written for its Python implementation. The community support for more languages is rapidly expanding. This unofficial platform provides R users with documentation, examples, and tutorials for using Flair NLP. The goal is to make it easier for R users to access the powerful NLP tools provided by Flair NLP.
GitHub
Anaconda or miniconda (highly recommended)
We have tested flaiR using CI/CD with GitHub Actions, conducting integration tests across various operating syste These tests include intergration between R versions 4.2.1, 4.3.2, and 4.2.0 and Python 3.10.x. The testing also covers environments with flair NLP and PyTorch (given that Flair NLP is built on Torch). For stable usage, we strongly recommend installing these specific versions.
-When first installed, {flaiR
} automatically detects whether you have Python 3.8 or higher. If not, it will skip the automatic installation of Python and flair NLP. In this case, you will need to manually install it yourself and reload {flaiR
} again. If you have correct Python installed, the {flaiR
} will automatically install flair Python NLP in your global environment. If you are using {reticulate}, {flaiR} will typically assume the r-reticulate environment by default. At the same time, you can use py_config()
to check the location of your environment. Please note that flaiR will directly install flair NLP in the Python environment that your R is using. This environment can be adjusted through RStudio by navigating to Tools -> Global Options -> Python
. If there are any issues with the installation, feel free to ask in the Discussion .
When first installed, {flaiR
} automatically detects whether you have Python 3.8 or higher. If not, it will skip the automatic installation of Python and flair NLP. In this case, you will need to manually install it yourself and reload {flaiR
} again. If you have correct Python installed, the {flaiR
} will automatically install flair Python NLP in your global environment. If you are using {reticulate}, {flaiR} will typically assume the r-reticulate environment by default. At the same time, you can use py_config()
to check the location of your environment. Please note that flaiR will directly install flair NLP in the Python environment that your R is using. This environment can be adjusted through RStudio by navigating to Tools -> Global Options -> Python
. If there are any issues with the installation, feel free to ask in the Discussion .
First, understanding which Python environment your RStudio is using is very important. We advise you to confirm which Python environment RStudio is using. You can do this by checking with reticulate::py_config()
or manually via Tools -> Global Options -> Python.
install.packages("reticulate")
@@ -250,15 +251,15 @@
Step 2 Preprocess Data and Corpus Object
corpus <- Corpus(train=train, test=test)
-#> 2023-11-29 09:32:41,983 No dev split found. Using 0% (i.e. 282 samples) of the train split as dev data
+#> 2023-11-29 12:28:25,704 No dev split found. Using 0% (i.e. 282 samples) of the train split as dev data
Step 3 Create Classifier Using Transformer
document_embeddings <- TransformerDocumentEmbeddings('distilbert-base-uncased', fine_tune=TRUE)
First, $make_label_dictionary
function is used to automatically create a label dictionary for the classification task. The label dictionary is a mapping from label to index, which is used to map the labels to a tensor of label indices. expcept classifcation task, flair also supports other label types for training custom model, such as ner
, pos
and sentiment
.
label_dict <- corpus$make_label_dictionary(label_type="classification")
-#> 2023-11-29 09:32:43,454 Computing label dictionary. Progress:
-#> 2023-11-29 09:32:43,504 Dictionary created for label 'classification' with 2 values: 0 (seen 1321 times), 1 (seen 1213 times)
Alternatively, you can also create a label dictionary manually. The following code creates a label dictionary with two labels, 0
and 1
, and maps them to the indices 0
and 1
respectively.
# load Dictionary object from flair_data
@@ -409,14 +410,14 @@
classifier$predict(sentence)
print(sentence)
-#> Sentence[55]: "Ladies and gentlemen, I stand before you today not just as a legislator, but as a defender of our very way of life! We are facing a crisis of monumental proportions, and if we don't act now, the very fabric of our society will unravel before our eyes!" → 1 (0.6431)
+#> Sentence[55]: "Ladies and gentlemen, I stand before you today not just as a legislator, but as a defender of our very way of life! We are facing a crisis of monumental proportions, and if we don't act now, the very fabric of our society will unravel before our eyes!" → 1 (0.5151)
sentence$labels
is a list of labels, each of which has a value and a score. The value is the label itself, and the score is the probability of the label. The label with the highest score is the predicted label.
sentence$labels[[1]]$value
#> [1] "1"
sentence$labels[[1]]$score
-#> [1] 0.6430542
Step 7 Reload the Model with the Best Performance
When you train the model with save_final_model=TRUE
, the model with the best performance on the development set will be saved in the output directory. You can reload the model with the best performance using the load
function.
@@ -471,7 +472,6 @@#> 5 Blaming Political Opponents 1 0.999097466468811
Secondly, to facilitate more efficient use for social science research, {flairR
} expands {flairNLP/flair
}’s core functionality for working with three major functions to extract features in a tidy and fast format– data.table in R.
Flair NLP also provides a set of functions to perform NLP tasks, such as named entity recognition, sentiment analysis, and part-of-speech tagging.
-First, we load the data and the model and perform NER task on the text below.
+First, we load the data and the model to perform NER task on the text below.
@@ -489,7 +489,7 @@Yesterday, Dr. Jane Smith spoke at the United Nations in New York. She discussed climate change and its impact on global economies. The event was attended by representatives from various countries including France and Japan. Dr. Smith mentioned that by 2050, the world could see a rise in sea level by approximately 2 feet. The World Health Organization (WHO) has pledged $50 million to combat the health effects of global warming. In an interview with The New York Times, Dr. Smith emphasized the urgent need for action. Later that day, she flew back to London, arriving at 10:00 PM GMT.
Alternatively, the expanded features in flaiR
can be used to perform and extract features from the sentence object in a tidy format.
Alternatively, to facilitate more efficient use for social science research, {flairR
} expands {flairNLP/flair
}’s core functionality for working with three major functions to extract features in a tidy and fast format– data.table in R.
The expanded features in flaiR
can be used to perform and extract features from the sentence object in a tidy format.
For example, we can use the get_entities
function and load_tagger_ner("ner")
in flaiR to extract the named entities from the sentence object in a tidy format.
tagger_ner <- load_tagger_ner("ner")
-#> 2023-11-29 09:32:49,075 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>
+#> 2023-11-29 12:28:33,472 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>
results <- get_entities(text = text,
doc_ids = "example text",
tagger_ner)
@@ -549,7 +550,7 @@
#> 10 This will properly manage the adverse environmental effects of a… New Zealand
tagger_ner <- load_tagger_ner("ner")
-#> 2023-11-29 09:32:51,701 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>
+#> 2023-11-29 12:28:36,082 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>
results <- get_entities(text = examples$text,
doc_ids = examples$countryname,
tagger_ner)
@@ -582,7 +583,7 @@ Contribution and Open Source
Links
diff --git a/pkgdown.yml b/pkgdown.yml
index 3bf72272..f34ef9ad 100644
--- a/pkgdown.yml
+++ b/pkgdown.yml
@@ -10,7 +10,7 @@ articles:
quickstart: quickstart.html
transformer_wordembeddings: transformer_wordembeddings.html
tutorial: tutorial.html
-last_built: 2023-11-29T09:41Z
+last_built: 2023-11-29T12:36Z
urls:
reference: https://davidycliao.github.io/flaiR/reference
article: https://davidycliao.github.io/flaiR/articles
diff --git a/reference/figures/README-unnamed-chunk-12-1.png b/reference/figures/README-unnamed-chunk-12-1.png
index 7a242526..0941f2d2 100644
Binary files a/reference/figures/README-unnamed-chunk-12-1.png and b/reference/figures/README-unnamed-chunk-12-1.png differ
diff --git a/reference/figures/README-unnamed-chunk-15-1.png b/reference/figures/README-unnamed-chunk-15-1.png
index 188f5c6d..bdc962d4 100644
Binary files a/reference/figures/README-unnamed-chunk-15-1.png and b/reference/figures/README-unnamed-chunk-15-1.png differ
diff --git a/search.json b/search.json
index 071c717c..8c041965 100644
--- a/search.json
+++ b/search.json
@@ -1 +1 @@
-[{"path":"https://davidycliao.github.io/flaiR/articles/flair_models.html","id":"ner-models","dir":"Articles","previous_headings":"","what":"NER Models","title":"Flair Models","text":"Source: https://flairnlp.github.io/docs/tutorial-basics/tagging-entities ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/flair_models.html","id":"pos-models","dir":"Articles","previous_headings":"","what":"POS Models","title":"Flair Models","text":"Source: https://flairnlp.github.io/docs/tutorial-basics/part--speech-tagging ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/flair_models.html","id":"sentiment-models","dir":"Articles","previous_headings":"","what":"Sentiment Models","title":"Flair Models","text":"Source: https://flairnlp.github.io/docs/tutorial-basics/tagging-sentiment","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/get_entities.html","id":"generic-approach-using-pre-trained-ner-english-model","dir":"Articles","previous_headings":"","what":"Generic Approach Using Pre-trained NER English Model","title":"Tagging Named Entities with Flair Standard Models","text":"Use load_tagger_ner call NER pretrained model. model downloaded Flair’s Hugging Face repo. Thus, ensure internet connection. downloaded, model stored .flair cache device. , ’ve downloaded hasn’t manually removed, executing command trigger download. Flair NLP operates PyTorch framework. , can use $method set device Flair Python library. flair_device(“cpu”) allows select whether use CPU, CUDA devices (like cuda:0, cuda:1, cuda:2), specific MPS devices Mac (mps:0, mps:1, mps:2). information Accelerated PyTorch training Mac, please refer https://developer.apple.com/metal/pytorch/. CUDA, please visit: https://developer.nvidia.com/cuda-zone want computation run faster, recommended keep show.text_id set FALSE default.","code":"library(flaiR) data(\"uk_immigration\") uk_immigration <- head(uk_immigration, 10) tagger_ner <- load_tagger_ner(\"ner\") #> 2023-11-29 09:41:52,148 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , tagger_pos$to(flair_device(\"mps\")) SequenceTagger( (embeddings): StackedEmbeddings( (list_embedding_0): FlairEmbeddings( (lm): LanguageModel( (drop): Dropout(p=0.05, inplace=False) (encoder): Embedding(300, 100) (rnn): LSTM(100, 2048) (decoder): Linear(in_features=2048, out_features=300, bias=True) ) ) (list_embedding_1): FlairEmbeddings( (lm): LanguageModel( (drop): Dropout(p=0.05, inplace=False) (encoder): Embedding(300, 100) (rnn): LSTM(100, 2048) (decoder): Linear(in_features=2048, out_features=300, bias=True) ) ) ) (word_dropout): WordDropout(p=0.05) (locked_dropout): LockedDropout(p=0.5) (embedding2nn): Linear(in_features=4096, out_features=4096, bias=True) (rnn): LSTM(4096, 256, batch_first=True, bidirectional=True) (linear): Linear(in_features=512, out_features=53, bias=True) (loss_function): ViterbiLoss() (crf): CRF() ) results <- get_entities(uk_immigration$text, uk_immigration$speaker, tagger_ner, show.text_id = FALSE ) print(results) #> doc_id entity tag #> 1: Philip Hollobone Conservative ORG #> 2: Philip Hollobone Liberal Democrat Front Benchers ORG #> 3: Philip Hollobone Back Benches MISC #> 4: Philip Hollobone Kettering LOC #> 5: Philip Hollobone Sikh MISC #> 6: Philip Hollobone Kettering LOC #> 7: Philip Hollobone Kettering LOC #> 8: Philip Hollobone British MISC #> 9: Philip Hollobone United Kingdom LOC #> 10: Philip Hollobone Norman MISC #> 11: Philip Hollobone United Kingdom LOC #> 12: Stewart Jackson Friend PER #> 13: Stewart Jackson Archbishop of Canterbury ORG #> 14: Stewart Jackson Carey PER #> 15: Philip Hollobone Friend PER #> 16: Philip Hollobone United Kingdom LOC #> 17: Philip Hollobone UK LOC #> 18: Philip Hollobone Europe LOC #> 19: Philip Hollobone Malta LOC #> 20: Stewart Jackson Barking LOC #> 21: Stewart Jackson Dagenham LOC #> 22: Stewart Jackson British National ORG #> 23: Stewart Jackson Conservative ORG #> 24: Stewart Jackson Friend PER #> 25: Stewart Jackson Folkestone LOC #> 26: Stewart Jackson Hythe LOC #> 27: Stewart Jackson Howard PER #> 28: Philip Hollobone Friend PER #> 29: Philip Hollobone Shipley PER #> 30: Philip Hollobone Philip Davies PER #> 31: Philip Hollobone Solihull LOC #> 32: Philip Hollobone Lorely Burt ORG #> 33: Philip Hollobone Peterborough LOC #> 34: Philip Hollobone Jackson PER #> 35: Philip Hollobone Friend PER #> 36: Philip Davies Friend PER #> 37: Philip Davies Government ORG #> 38: Philip Hollobone Kettering LOC #> 39: Philip Hollobone Government ORG #> 40: Philip Hollobone Kettering LOC #> 41: Philip Hollobone Kettering LOC #> 42: Philip Hollobone Migrationwatch UK ORG #> 43: Philip Hollobone Carshalton LOC #> 44: Philip Hollobone Wallington LOC #> 45: Philip Hollobone Tom Brake PER #> 46: Philip Hollobone #> 47: Phil Woolas Gentleman PER #> 48: Phil Woolas Carshalton LOC #> 49: Phil Woolas Wallington LOC #> 50: Phil Woolas Tom Brake PER #> doc_id entity tag print(results) #> doc_id entity tag #> 1: Philip Hollobone Conservative ORG #> 2: Philip Hollobone Liberal Democrat Front Benchers ORG #> 3: Philip Hollobone Back Benches MISC #> 4: Philip Hollobone Kettering LOC #> 5: Philip Hollobone Sikh MISC #> 6: Philip Hollobone Kettering LOC #> 7: Philip Hollobone Kettering LOC #> 8: Philip Hollobone British MISC #> 9: Philip Hollobone United Kingdom LOC #> 10: Philip Hollobone Norman MISC #> 11: Philip Hollobone United Kingdom LOC #> 12: Stewart Jackson Friend PER #> 13: Stewart Jackson Archbishop of Canterbury ORG #> 14: Stewart Jackson Carey PER #> 15: Philip Hollobone Friend PER #> 16: Philip Hollobone United Kingdom LOC #> 17: Philip Hollobone UK LOC #> 18: Philip Hollobone Europe LOC #> 19: Philip Hollobone Malta LOC #> 20: Stewart Jackson Barking LOC #> 21: Stewart Jackson Dagenham LOC #> 22: Stewart Jackson British National ORG #> 23: Stewart Jackson Conservative ORG #> 24: Stewart Jackson Friend PER #> 25: Stewart Jackson Folkestone LOC #> 26: Stewart Jackson Hythe LOC #> 27: Stewart Jackson Howard PER #> 28: Philip Hollobone Friend PER #> 29: Philip Hollobone Shipley PER #> 30: Philip Hollobone Philip Davies PER #> 31: Philip Hollobone Solihull LOC #> 32: Philip Hollobone Lorely Burt ORG #> 33: Philip Hollobone Peterborough LOC #> 34: Philip Hollobone Jackson PER #> 35: Philip Hollobone Friend PER #> 36: Philip Davies Friend PER #> 37: Philip Davies Government ORG #> 38: Philip Hollobone Kettering LOC #> 39: Philip Hollobone Government ORG #> 40: Philip Hollobone Kettering LOC #> 41: Philip Hollobone Kettering LOC #> 42: Philip Hollobone Migrationwatch UK ORG #> 43: Philip Hollobone Carshalton LOC #> 44: Philip Hollobone Wallington LOC #> 45: Philip Hollobone Tom Brake PER #> 46: Philip Hollobone #> 47: Phil Woolas Gentleman PER #> 48: Phil Woolas Carshalton LOC #> 49: Phil Woolas Wallington LOC #> 50: Phil Woolas Tom Brake PER #> doc_id entity tag"},{"path":"https://davidycliao.github.io/flaiR/articles/get_entities.html","id":"batch-processing","dir":"Articles","previous_headings":"","what":"Batch Processing","title":"Tagging Named Entities with Flair Standard Models","text":"Processing texts individually can inefficient memory-intensive. hand, processing texts simultaneously surpass memory constraints, especially document dataset sizable. Parsing documents smaller batches may provide optimal compromise two scenarios. Batch processing can enhance efficiency aid memory management. default, batch_size parameter set 5. can consider starting default value experimenting different batch sizes find one works best specific use case. can monitor memory usage processing time help make decision. access GPU, might also try larger batch sizes take advantage GPU parallelism. However, cautious set batch size large, can lead --memory errors. Ultimately, choice batch size based balance memory constraints, processing efficiency, specific requirements entity extraction task.","code":"batch_process_time <- system.time({ batch_process_results <- get_entities_batch(uk_immigration$text, uk_immigration$speaker, tagger_ner, show.text_id = FALSE, batch_size = 5) gc() }) #> CPU is used. #> Processing batch 1 out of 2... #> Processing batch 2 out of 2... print(batch_process_time) #> user system elapsed #> 23.521 0.211 23.593 print(batch_process_results) #> doc_id entity tag text_id #> 1: Philip Hollobone Conservative ORG NA #> 2: Philip Hollobone Liberal Democrat Front Benchers ORG NA #> 3: Philip Hollobone Back Benches MISC NA #> 4: Philip Hollobone Kettering LOC NA #> 5: Philip Hollobone Sikh MISC NA #> 6: Philip Hollobone Kettering LOC NA #> 7: Philip Hollobone Kettering LOC NA #> 8: Philip Hollobone British MISC NA #> 9: Philip Hollobone United Kingdom LOC NA #> 10: Philip Hollobone Norman MISC NA #> 11: Philip Hollobone United Kingdom LOC NA #> 12: Stewart Jackson Friend PER NA #> 13: Stewart Jackson Archbishop of Canterbury ORG NA #> 14: Stewart Jackson Carey PER NA #> 15: Philip Hollobone Friend PER NA #> 16: Philip Hollobone United Kingdom LOC NA #> 17: Philip Hollobone UK LOC NA #> 18: Philip Hollobone Europe LOC NA #> 19: Philip Hollobone Malta LOC NA #> 20: Stewart Jackson Barking LOC NA #> 21: Stewart Jackson Dagenham LOC NA #> 22: Stewart Jackson British National ORG NA #> 23: Stewart Jackson Conservative ORG NA #> 24: Stewart Jackson Friend PER NA #> 25: Stewart Jackson Folkestone LOC NA #> 26: Stewart Jackson Hythe LOC NA #> 27: Stewart Jackson Howard PER NA #> 28: Philip Hollobone Friend PER NA #> 29: Philip Hollobone Shipley PER NA #> 30: Philip Hollobone Philip Davies PER NA #> 31: Philip Hollobone Solihull LOC NA #> 32: Philip Hollobone Lorely Burt ORG NA #> 33: Philip Hollobone Peterborough LOC NA #> 34: Philip Hollobone Jackson PER NA #> 35: Philip Hollobone Friend PER NA #> 36: Philip Davies Friend PER NA #> 37: Philip Davies Government ORG NA #> 38: Philip Hollobone Kettering LOC NA #> 39: Philip Hollobone Government ORG NA #> 40: Philip Hollobone Kettering LOC NA #> 41: Philip Hollobone Kettering LOC NA #> 42: Philip Hollobone Migrationwatch UK ORG NA #> 43: Philip Hollobone Carshalton LOC NA #> 44: Philip Hollobone Wallington LOC NA #> 45: Philip Hollobone Tom Brake PER NA #> 46: Philip Hollobone NA #> 47: Phil Woolas Gentleman PER NA #> 48: Phil Woolas Carshalton LOC NA #> 49: Phil Woolas Wallington LOC NA #> 50: Phil Woolas Tom Brake PER NA #> doc_id entity tag text_id"},{"path":"https://davidycliao.github.io/flaiR/articles/get_pos.html","id":"generic-approach-using-part-of-speech-tagging","dir":"Articles","previous_headings":"","what":"Generic Approach Using Part-of-Speech Tagging","title":"Tagging Part-of-Speech Tagging with Flair Standard Models","text":"Download de-pos part--speech tagging model FlairNLP Hugging Face. Flair NLP operates PyTorch framework. , can use $method set device Flair Python library. flair_device(“cpu”) allows select whether use CPU, CUDA devices (like cuda:0, cuda:1, cuda:2), specific MPS devices Mac (mps:0, mps:1, mps:2). information Accelerated PyTorch training Mac, please refer https://developer.apple.com/metal/pytorch/. CUDA, please visit: https://developer.nvidia.com/cuda-zone","code":"library(flaiR) data(\"de_immigration\") uk_immigration <- head(uk_immigration, 2) tagger_pos <- load_tagger_pos(\"pos\") #> 2023-11-29 09:42:49,554 SequenceTagger predicts: Dictionary with 53 tags: , O, UH, ,, VBD, PRP, VB, PRP$, NN, RB, ., DT, JJ, VBP, VBG, IN, CD, NNS, NNP, WRB, VBZ, WDT, CC, TO, MD, VBN, WP, :, RP, EX, JJR, FW, XX, HYPH, POS, RBR, JJS, PDT, NNPS, RBS, AFX, WP$, -LRB-, -RRB-, ``, '', LS, $, SYM, ADD tagger_pos$to(flair_device(\"mps\")) SequenceTagger( (embeddings): StackedEmbeddings( (list_embedding_0): FlairEmbeddings( (lm): LanguageModel( (drop): Dropout(p=0.05, inplace=False) (encoder): Embedding(300, 100) (rnn): LSTM(100, 2048) (decoder): Linear(in_features=2048, out_features=300, bias=True) ) ) (list_embedding_1): FlairEmbeddings( (lm): LanguageModel( (drop): Dropout(p=0.05, inplace=False) (encoder): Embedding(300, 100) (rnn): LSTM(100, 2048) (decoder): Linear(in_features=2048, out_features=300, bias=True) ) ) ) (word_dropout): WordDropout(p=0.05) (locked_dropout): LockedDropout(p=0.5) (embedding2nn): Linear(in_features=4096, out_features=4096, bias=True) (rnn): LSTM(4096, 256, batch_first=True, bidirectional=True) (linear): Linear(in_features=512, out_features=53, bias=True) (loss_function): ViterbiLoss() (crf): CRF() ) results <- get_pos(uk_immigration$text, uk_immigration$speaker, tagger_pos, show.text_id = FALSE, gc.active = FALSE) print(results) #> doc_id token_id text_id token tag precision #> 1: Philip Hollobone 0 NA I PRP 1.0000 #> 2: Philip Hollobone 1 NA thank VBP 0.9996 #> 3: Philip Hollobone 2 NA Mr. NNP 1.0000 #> 4: Philip Hollobone 3 NA Speaker NNP 1.0000 #> 5: Philip Hollobone 4 NA for IN 1.0000 #> --- #> 440: Stewart Jackson 66 NA parties NNS 1.0000 #> 441: Stewart Jackson 67 NA in IN 1.0000 #> 442: Stewart Jackson 68 NA this DT 1.0000 #> 443: Stewart Jackson 69 NA country NN 1.0000 #> 444: Stewart Jackson 70 NA ? . 0.9949"},{"path":"https://davidycliao.github.io/flaiR/articles/get_pos.html","id":"batch-processing","dir":"Articles","previous_headings":"","what":"Batch Processing","title":"Tagging Part-of-Speech Tagging with Flair Standard Models","text":"default, batch_size parameter set 5. can consider starting default value experimenting different batch sizes find one works best specific use case. can monitor memory usage processing time help make decision. access GPU, might also try larger batch sizes take advantage GPU parallelism. However, cautious set batch size large, can lead --memory errors. Ultimately, choice batch size based balance memory constraints, processing efficiency, specific requirements entity extraction task.","code":"batch_process_results <- get_pos_batch(uk_immigration$text, uk_immigration$speaker, tagger_pos, show.text_id = FALSE, batch_size = 10, verbose = TRUE) #> CPU is used. #> Processing batch starting at index: 1 print(batch_process_results) #> doc_id token_id text_id token tag precision #> 1: Philip Hollobone 0 NA I PRP 1.0000 #> 2: Philip Hollobone 1 NA thank VBP 0.9996 #> 3: Philip Hollobone 2 NA Mr. NNP 1.0000 #> 4: Philip Hollobone 3 NA Speaker NNP 1.0000 #> 5: Philip Hollobone 4 NA for IN 1.0000 #> --- #> 448: 0 NA NA NNP 0.8859 #> 449: 0 NA NA NNP 0.8859 #> 450: 0 NA NA NNP 0.8859 #> 451: 0 NA NA NNP 0.8859 #> 452: 0 NA NA NNP 0.8859"},{"path":"https://davidycliao.github.io/flaiR/articles/get_sentiments.html","id":"an-example-using-sentiment-model-pre-trained-english-model","dir":"Articles","previous_headings":"","what":"An Example Using sentiment Model (Pre-trained English Model)","title":"Tagging Sentiment with Flair Standard Models","text":"Download English sentiment model FlairNLP Hugging Face. Currently, also supports large English sentiment model German pre-trained model. Flair NLP operates PyTorch framework. , can use $method set device Flair Python library. flair_device(“cpu”) allows select whether use CPU, CUDA devices (like cuda:0, cuda:1, cuda:2), specific MPS devices Mac (mps:0, mps:1, mps:2). information Accelerated PyTorch training Mac, please refer https://developer.apple.com/metal/pytorch/. CUDA, please visit: https://developer.nvidia.com/cuda-zone","code":"library(flaiR) data(\"uk_immigration\") uk_immigration <- head(uk_immigration, 5) tagger_sent <- load_tagger_sentiments(\"sentiment\") tagger_sent$to(flair_device(\"mps\")) TextClassifier( (embeddings): TransformerDocumentEmbeddings( (model): DistilBertModel( (embeddings): Embeddings( (word_embeddings): Embedding(30522, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (transformer): Transformer( (layer): ModuleList( (0-5): 6 x TransformerBlock( (attention): MultiHeadSelfAttention( (dropout): Dropout(p=0.1, inplace=False) (q_lin): Linear(in_features=768, out_features=768, bias=True) (k_lin): Linear(in_features=768, out_features=768, bias=True) (v_lin): Linear(in_features=768, out_features=768, bias=True) (out_lin): Linear(in_features=768, out_features=768, bias=True) ) (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (ffn): FFN( (dropout): Dropout(p=0.1, inplace=False) (lin1): Linear(in_features=768, out_features=3072, bias=True) (lin2): Linear(in_features=3072, out_features=768, bias=True) (activation): GELUActivation() ) (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) ) ) ) ) ) (decoder): Linear(in_features=768, out_features=2, bias=True) (dropout): Dropout(p=0.0, inplace=False) (locked_dropout): LockedDropout(p=0.0) (word_dropout): WordDropout(p=0.0) (loss_function): CrossEntropyLoss() ) results <- get_sentiments(uk_immigration$text, seq_len(nrow(uk_immigration)), tagger_sent) print(results) #> doc_id sentiment score #> 1: 1 POSITIVE 0.8097584 #> 2: 2 POSITIVE 0.9990165 #> 3: 3 POSITIVE 0.8827485 #> 4: 4 NEGATIVE 0.9997155 #> 5: 5 POSITIVE 0.8604343"},{"path":"https://davidycliao.github.io/flaiR/articles/get_sentiments.html","id":"batch-processing-in-english-sentiment-model","dir":"Articles","previous_headings":"","what":"Batch Processing in English Sentiment Model","title":"Tagging Sentiment with Flair Standard Models","text":"Processing texts individually can inefficient memory-intensive. hand, processing texts simultaneously surpass memory constraints, especially document dataset sizable. Parsing documents smaller batches may provide optimal compromise two scenarios. Batch processing can enhance efficiency aid memory management. default, batch_size parameter set 5. can consider starting default value experimenting different batch sizes find one works best specific use case. can monitor memory usage processing time help make decision. access GPU, might also try larger batch sizes take advantage GPU parallelism. However, cautious set batch size large, can lead --memory errors. Ultimately, choice batch size based balance memory constraints, processing efficiency, specific requirements entity extraction task.","code":"batch_process_results <- get_sentiments_batch(uk_immigration$text, uk_immigration$speaker, tagger_sent, show.text_id = FALSE, batch_size = 2, verbose = TRUE) #> CPU is used. #> Processing batch 1 out of 3... #> Processing batch 2 out of 3... #> Processing batch 3 out of 3... print(batch_process_results) #> doc_id sentiment score #> 1: Philip Hollobone POSITIVE 0.8097585 #> 2: Stewart Jackson POSITIVE 0.9990165 #> 3: Philip Hollobone POSITIVE 0.8827485 #> 4: Stewart Jackson NEGATIVE 0.9997155 #> 5: Philip Hollobone POSITIVE 0.8604343"},{"path":"https://davidycliao.github.io/flaiR/articles/highlight_text.html","id":"create-text-with-named-entities","dir":"Articles","previous_headings":"","what":"Create Text with Named Entities","title":"Highlight Entities with Colors","text":" ","code":"library(flaiR) data(\"uk_immigration\") uk_immigration <- uk_immigration[30,] tagger_ner <- load_tagger_ner(\"ner\") #> 2023-11-29 09:43:31,686 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , result <- get_entities(uk_immigration$text, tagger = tagger_ner, show.text_id = FALSE ) #> Warning in check_texts_and_ids(texts, doc_ids): doc_ids is NULL. #> Auto-assigning doc_ids."},{"path":"https://davidycliao.github.io/flaiR/articles/highlight_text.html","id":"highlight-text-with-entities","dir":"Articles","previous_headings":"","what":"Highlight Text with Entities","title":"Highlight Entities with Colors","text":"","code":"highlighted_text <- highlight_text(text = uk_immigration$text, entities_mapping = map_entities(result)) highlighted_text"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"why-write-flair-to-access-fair-nlp-in-python","dir":"Articles","previous_headings":"","what":"Why Write flaiR to Access fair NLP in Python?","title":"Quick Start","text":"Python, Flair ( developed Zalando Research) stands notably feature-rich user-friendly NLP framework. Flair NLP provides intuitive interfaces exceptional multilingual support, especially various embedding frameworks like Glove, transformer-based models BERT. Flair also comes equipped pre-trained models context-aware capabilities. addition, Flair also establishes dependencies primary NLP Python libraries (gensim, torch, transformer, ); installing {flaiR} subsequently install related NLP packages Python. installing {flaiR} package R, users can seamlessly access Python-based Flair library within R. integration allows use basic Python libraries, like NumPy, well modern NLP deep learning frameworks PyTorch, reticulate interface R environment. ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"install-flair-with-using-remotes","dir":"Articles","previous_headings":"","what":"Install flaiR with Using remotes","title":"Quick Start","text":"flaiR built top reticulate package incorporates key functions access core features FlairNLP, returning data tidy clean data.table. installation consists two parts: firstly, install Python 3.8 (available ; opt stable version, currently goes 3.11, avoid pre-release versions). Secondly, install R (version 3.6.3 higher), along RStudio. Additionally, ’ll also need Anaconda assist pip safely stably collecting dependencies setting Python environment. System Requirement: Python (>= 3.10.x) R (>= 4.2.0) RStudio (GUI interface allows users adjust manage Python environment R) Anaconda (highly recommended) first installing loading {flaiR} package, utilizes system environment tool undergoes three evaluation stages automatic installation Flair. Initially, flaiR requires least Python 3 installed device. Python 3 present, able successfully install flaiR R. point, essential check correct version Python installed. recommend installing Python 3.8 slightly higher version, avoid installing pre-release versions. installation consists two parts: First, install Python 3.8 higher, R 3.6.3 higher. Although tested Github Action R 3.6.2, strongly recommend installing R 4.0.0 ensure compatibility R environment Python. first installed, {flaiR} automatically detects whether Python 3.8 higher. , skip automatic installation Python flair NLP. case, need mannually install reload {flaiR} . Python 3.8 higher alreadt installed, installer {flaiR} automatically install flair Python NLP global environment. using {reticulate}, {flaiR} typically assume r-reticulate environment default. time, can use py_config() check location environment. Please note flaiR directly install flair NLP Python environment R using. environment can adjusted RStudio navigating Tools -> Global Options -> Python. issues installation, feel free ask Discussion process, observe numerous messages related installation Python environment Python flair module. Notably, flair numerous dependencies, including libraries related transformers (like torch, tokeniser, transformers, gensim, flair, etc). Thus, installation might take time complete. ’s also another scenario consider. {flaiR} unable automatically install Flair PyTorch, attempt force installation . However, attempt fails, ’ll encounter message: “Failed install Flair. {flaiR} requires Flair NLP. Please ensure Flair NLP installed Python manually.” ’re using Apple operating environment, ’s essential point check compatibility M1/M2 chip Python Torch. issues installation, feel free ask Discussion. copy command , generally asked upgrade package. package operates {reticulate}, packages R outdated, RStudio likely display “packages recent versions available.” prompt update. recommend update. ","code":"install.packages(\"remotes\") remotes::install_github(\"davidycliao/flaiR\", force = TRUE) library(flaiR)"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"class-and-object-in-r-via-flair","dir":"Articles","previous_headings":"","what":"Class and Object in R via {flaiR}","title":"Quick Start","text":"R users, {flairR} built top {reticulate}, enabling interact directly Python modules R providing seamless support documents R community. {flairR} architecture, use simplest S3 method wrap modules methods within modules, allowing R users conveniently access use Python functionalities. main modules methods (functions) wrapped {flairR}: Please note following basic examples derived official Flair NLP Python documentation tutorial. ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"tag-entities-in-text","dir":"Articles","previous_headings":"Class and Object in R via {flaiR}","what":"Tag Entities in Text","title":"Quick Start","text":"Let’s run named entity recognition following example sentence: “love Berlin New York. , need make Sentence text, load pre-trained model use predict tags sentence object. print: Use loop print POS tag. ’s important note Python indexed 0. Therefore, R environment, must use seq_along(sentence$get_labels()) - 1. ","code":"# attach flaiR in R library(flaiR) # make a sentence Sentence <- flair_data()$Sentence sentence <- Sentence('I love Berlin and New York.') # load the NER tagger Classifier <- flair_nn()$Classifier tagger <- Classifier$load('ner') #> 2023-11-29 09:43:49,703 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , # run NER over sentence tagger$predict(sentence) # print the sentence with all annotations print(sentence) #> Sentence[7]: \"I love Berlin and New York.\" → [\"Berlin\"/LOC, \"New York\"/LOC] for (i in seq_along(sentence$get_labels())) { print(sentence$get_labels()[[i]]) } #> 'Span[2:3]: \"Berlin\"'/'LOC' (0.9812) #> 'Span[4:6]: \"New York\"'/'LOC' (0.9957)"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"tag-part-of-speech-in-text","dir":"Articles","previous_headings":"Class and Object in R via {flaiR}","what":"Tag Part-of-Speech in Text","title":"Quick Start","text":"use flair/pos-english POS tagging standard models Hugging Face. print: Use loop print pos tag. ","code":"# attach flaiR in R library(flaiR) # make a sentence Sentence <- flair_data()$Sentence sentence <- Sentence('I love Berlin and New York.') # load the NER tagger Classifier <- flair_nn()$Classifier tagger <- Classifier$load('pos') #> 2023-11-29 09:43:50,275 SequenceTagger predicts: Dictionary with 53 tags: , O, UH, ,, VBD, PRP, VB, PRP$, NN, RB, ., DT, JJ, VBP, VBG, IN, CD, NNS, NNP, WRB, VBZ, WDT, CC, TO, MD, VBN, WP, :, RP, EX, JJR, FW, XX, HYPH, POS, RBR, JJS, PDT, NNPS, RBS, AFX, WP$, -LRB-, -RRB-, ``, '', LS, $, SYM, ADD # run NER over sentence tagger$predict(sentence) # print the sentence with all annotations print(sentence) #> Sentence[7]: \"I love Berlin and New York.\" → [\"I\"/PRP, \"love\"/VBP, \"Berlin\"/NNP, \"and\"/CC, \"New\"/NNP, \"York\"/NNP, \".\"/.] for (i in seq_along(sentence$get_labels())) { print(sentence$get_labels()[[i]]) } #> 'Token[0]: \"I\"'/'PRP' (1.0) #> 'Token[1]: \"love\"'/'VBP' (1.0) #> 'Token[2]: \"Berlin\"'/'NNP' (0.9999) #> 'Token[3]: \"and\"'/'CC' (1.0) #> 'Token[4]: \"New\"'/'NNP' (1.0) #> 'Token[5]: \"York\"'/'NNP' (1.0) #> 'Token[6]: \".\"'/'.' (1.0)"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"detect-sentiment","dir":"Articles","previous_headings":"Class and Object in R via {flaiR}","what":"Detect Sentiment","title":"Quick Start","text":"Let’s run sentiment analysis sentence determine whether POSITIVE NEGATIVE. can essentially code . Just instead loading ‘ner’ model, now load ‘sentiment’ model: ","code":"# attach flaiR in R library(flaiR) # make a sentence Sentence <- flair_data()$Sentence sentence <- Sentence('I love Berlin and New York.') # load the flair_nn.classifier_load tagger Classifier <- flair_nn()$Classifier tagger <- Classifier$load('sentiment') # run sentiment analysis over sentence tagger$predict(sentence) # print the sentence with all annotations print(sentence) #> Sentence[7]: \"I love Berlin and New York.\" → POSITIVE (0.9982)"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"embeddings","dir":"Articles","previous_headings":"Class and Object in R via {flaiR}","what":"Embeddings","title":"Quick Start","text":"Embeddings Words Transformers Let’s use standard BERT model (bert-base-uncased) embed sentence “grass green”. Simply instantate flair_embeddings.TransformerWordEmbeddings()use $embed() sentence object: cause word sentence embedded. can iterate words get embedding like : Embeddings Documents Transformers Sometimes want embedding whole document, individual words. case, use one DocumentEmbeddings classes Flair. Let’s use standard BERT model get embedding entire sentence: Use $embedding method extract entire embedding sentence print embedding follows: Stack Embeddings Flair allows combine embeddings “embedding stacks”. fine-tuning, using combinations embeddings often gives best results! Use StackedEmbeddings class instantiate passing list embeddings wish combine. instance, lets combine classic GloVe embeddings forward backward Flair embeddings. First, instantiate two embeddings wish combine: Now, instantiate StackedEmbeddings class pass list containing two embeddings. R Python list functionality. Let’s create StackedEmbedding object combines GloVe forward/backward Flair embeddings. Next, use $embed() method transform text vectors sentences. Words now embedded using concatenation three different embeddings. means resulting embedding vector still single PyTorch vector. ","code":"# attach flaiR in R library(flaiR) # initiate TransformerWordEmbeddings TransformerWordEmbeddings <- flair_embeddings()$TransformerWordEmbeddings embedding <- TransformerWordEmbeddings('bert-base-uncased') # create a sentence sentence <- flair_data.Sentence('The grass is green .') # embed words in sentence embedding$embed(sentence) #> [[1]] #> Sentence[5]: \"The grass is green .\" for (i in seq_along(sentence$tokens)) { cat(\"Token: \", reticulate::py_str(sentence$tokens[[i]]), \"\\n\") # Access the embedding of the token, converting it to an R object, # and print the first 15 elements of the vector. token_embedding <- sentence$tokens[[1]]$embedding print(head(token_embedding, 15)) } #> Token: Token[0]: \"The\" #> tensor([-0.3904, -1.1946, 0.1296, 0.5806, -0.0847, -0.4520, 1.3699, 0.3850, #> -0.6132, -0.3246, -0.9899, -0.6897, 0.2754, -0.5867, 0.2399]) #> Token: Token[1]: \"grass\" #> tensor([-0.3904, -1.1946, 0.1296, 0.5806, -0.0847, -0.4520, 1.3699, 0.3850, #> -0.6132, -0.3246, -0.9899, -0.6897, 0.2754, -0.5867, 0.2399]) #> Token: Token[2]: \"is\" #> tensor([-0.3904, -1.1946, 0.1296, 0.5806, -0.0847, -0.4520, 1.3699, 0.3850, #> -0.6132, -0.3246, -0.9899, -0.6897, 0.2754, -0.5867, 0.2399]) #> Token: Token[3]: \"green\" #> tensor([-0.3904, -1.1946, 0.1296, 0.5806, -0.0847, -0.4520, 1.3699, 0.3850, #> -0.6132, -0.3246, -0.9899, -0.6897, 0.2754, -0.5867, 0.2399]) #> Token: Token[4]: \".\" #> tensor([-0.3904, -1.1946, 0.1296, 0.5806, -0.0847, -0.4520, 1.3699, 0.3850, #> -0.6132, -0.3246, -0.9899, -0.6897, 0.2754, -0.5867, 0.2399]) # initiate TransformerWordEmbeddings embedding <- flair_embeddings.TransformerDocumentEmbeddings('bert-base-uncased') #> 2023-11-29 09:43:54,925 Using long sentences for Document embeddings is only beneficial for cls_pooling types 'mean' and 'max # create a sentence sentence <- flair_data.Sentence('The grass is green .') # embed words in sentence embedding$embed(sentence) #> [[1]] #> Sentence[5]: \"The grass is green .\" print(head(sentence$embedding, n = 20)) #> tensor([-0.2858, -0.3261, -0.1122, 0.0343, -0.2689, -0.0302, -0.0390, 0.0157, #> -0.2828, 0.1436, 0.0426, -0.2203, -0.0023, -0.5525, 0.1092, -0.0211, #> -0.0151, 0.0724, -0.3034, -0.2250]) # init standard GloVe embedding glove_embedding <- flair_embeddings.WordEmbeddings('glove') # init Flair forward and backwards embeddings flair_embedding_forward <- flair_embeddings.FlairEmbeddings('news-forward') flair_embedding_backward <- flair_embeddings.FlairEmbeddings('news-backward') stacked_embeddings <- flair_embeddings()$StackedEmbeddings( list(glove_embedding, flair_embedding_forward, flair_embedding_backward)) # make a sentence sentence <- flair_data.Sentence('I love Berlin and New York.') # just embed a sentence using the StackedEmbedding as you would with any single embedding. stacked_embeddings$embed(sentence) for (i in seq_along(sentence$tokens)) { cat(\"Token: \", reticulate::py_str(sentence$tokens[[i]]), \"\\n\") # Access the embedding of the token, converting it to an R object, # and print the first 15 elements of the vector. token_embedding <- sentence$tokens[[1]]$embedding print(head(token_embedding, 15)) } #> Token: Token[0]: \"I\" #> tensor([ 0.6197, 0.5665, -0.4658, -1.1890, 0.4460, 0.0660, 0.3191, 0.1468, #> -0.2212, 0.7924, 0.2991, 0.1607, 0.0253, 0.1868, -0.3100]) #> Token: Token[1]: \"love\" #> tensor([ 0.6197, 0.5665, -0.4658, -1.1890, 0.4460, 0.0660, 0.3191, 0.1468, #> -0.2212, 0.7924, 0.2991, 0.1607, 0.0253, 0.1868, -0.3100]) #> Token: Token[2]: \"Berlin\" #> tensor([ 0.6197, 0.5665, -0.4658, -1.1890, 0.4460, 0.0660, 0.3191, 0.1468, #> -0.2212, 0.7924, 0.2991, 0.1607, 0.0253, 0.1868, -0.3100]) #> Token: Token[3]: \"and\" #> tensor([ 0.6197, 0.5665, -0.4658, -1.1890, 0.4460, 0.0660, 0.3191, 0.1468, #> -0.2212, 0.7924, 0.2991, 0.1607, 0.0253, 0.1868, -0.3100]) #> Token: Token[4]: \"New\" #> tensor([ 0.6197, 0.5665, -0.4658, -1.1890, 0.4460, 0.0660, 0.3191, 0.1468, #> -0.2212, 0.7924, 0.2991, 0.1607, 0.0253, 0.1868, -0.3100]) #> Token: Token[5]: \"York\" #> tensor([ 0.6197, 0.5665, -0.4658, -1.1890, 0.4460, 0.0660, 0.3191, 0.1468, #> -0.2212, 0.7924, 0.2991, 0.1607, 0.0253, 0.1868, -0.3100]) #> Token: Token[6]: \".\" #> tensor([ 0.6197, 0.5665, -0.4658, -1.1890, 0.4460, 0.0660, 0.3191, 0.1468, #> -0.2212, 0.7924, 0.2991, 0.1607, 0.0253, 0.1868, -0.3100])"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"expanded-feats-in-flair","dir":"Articles","previous_headings":"","what":"Expanded Feats in flaiR","title":"Quick Start","text":"enhance efficient utilization social science research, {flairR} extends FlairNLP three principal functions extract features neat format data.table. featured functions, don’t write loops format parsed output ; {flairR} automatically neat format. main features include part--speech tagging, transformer-based sentiment analysis, named entity recognition. addition, handle load RAM dealing larger corpus, {flairR} supports batch processing handle texts batches, especially useful dealing large datasets, optimize memory usage performance. ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"tagging-parts-of-speech-with-flair-models","dir":"Articles","previous_headings":"Expanded Feats in flaiR","what":"Tagging Parts-of-Speech with Flair Models","title":"Quick Start","text":"can load pre-trained model \"pos-fast\". pre-trained models, see https://flairnlp.github.io/docs/tutorial-basics/part--speech-tagging#--english. ","code":"texts <- c(\"UCD is one of the best universities in Ireland.\", \"UCD has a good campus but is very far from my apartment in Dublin.\", \"Essex is famous for social science research.\", \"Essex is not in the Russell Group, but it is famous for political science research and in 1994 Group.\", \"TCD is the oldest university in Ireland.\", \"TCD is similar to Oxford.\") doc_ids <- c(\"doc1\", \"doc2\", \"doc3\", \"doc4\", \"doc5\", \"doc6\") library(flaiR) tagger_pos <- load_tagger_pos(\"pos-fast\") #> 2023-11-29 09:43:58,053 SequenceTagger predicts: Dictionary with 53 tags: , O, UH, ,, VBD, PRP, VB, PRP$, NN, RB, ., DT, JJ, VBP, VBG, IN, CD, NNS, NNP, WRB, VBZ, WDT, CC, TO, MD, VBN, WP, :, RP, EX, JJR, FW, XX, HYPH, POS, RBR, JJS, PDT, NNPS, RBS, AFX, WP$, -LRB-, -RRB-, ``, '', LS, $, SYM, ADD results <- get_pos(texts, doc_ids, tagger_pos) head(results, n = 10) #> doc_id token_id text_id token tag precision #> 1: doc1 0 NA UCD NNP 0.9967 #> 2: doc1 1 NA is VBZ 1.0000 #> 3: doc1 2 NA one CD 0.9993 #> 4: doc1 3 NA of IN 1.0000 #> 5: doc1 4 NA the DT 1.0000 #> 6: doc1 5 NA best JJS 0.9988 #> 7: doc1 6 NA universities NNS 0.9997 #> 8: doc1 7 NA in IN 1.0000 #> 9: doc1 8 NA Ireland NNP 1.0000 #> 10: doc1 9 NA . . 0.9998"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"tagging-entities-with-flair-models","dir":"Articles","previous_headings":"Expanded Feats in flaiR","what":"Tagging Entities with Flair Models","title":"Quick Start","text":"Load pretrained model ner. pretrained models, see https://flairnlp.github.io/docs/tutorial-basics/tagging-entities. ","code":"library(flaiR) tagger_ner <- load_tagger_ner(\"ner\") #> 2023-11-29 09:43:59,225 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , results <- get_entities(texts, doc_ids, tagger_ner) head(results, n = 10) #> doc_id entity tag #> 1: doc1 UCD ORG #> 2: doc1 Ireland LOC #> 3: doc2 UCD ORG #> 4: doc2 Dublin LOC #> 5: doc3 Essex ORG #> 6: doc4 Essex ORG #> 7: doc4 Russell Group ORG #> 8: doc5 TCD ORG #> 9: doc5 Ireland LOC #> 10: doc6 TCD ORG"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"tagging-sentiment","dir":"Articles","previous_headings":"Expanded Feats in flaiR","what":"Tagging Sentiment","title":"Quick Start","text":"Load pretrained model “sentiment”. pre-trained models “sentiment”, “sentiment-fast”, “de-offensive-language” currently available. pre-trained models, see https://flairnlp.github.io/docs/tutorial-basics/tagging-sentiment. ","code":"library(flaiR) tagger_sent <- load_tagger_sentiments(\"sentiment\") results <- get_sentiments(texts, doc_ids, tagger_sent) head(results, n = 10) #> doc_id sentiment score #> 1: doc1 POSITIVE 0.9970598 #> 2: doc2 NEGATIVE 0.8472329 #> 3: doc3 POSITIVE 0.9928006 #> 4: doc4 POSITIVE 0.9901404 #> 5: doc5 POSITIVE 0.9952670 #> 6: doc6 POSITIVE 0.9291795"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"how-to-contribute","dir":"Articles","previous_headings":"","what":"How to Contribute","title":"Quick Start","text":"R developers want contribute {flaiR} welcome – {flaiR} open source project. warmly invite R users share similar interests join contributing package. Please feel free shoot email collaborate task. Contributions – whether comments, code suggestions, tutorial examples, forking repository – greatly appreciated. Please note flaiR released Contributor Code Conduct. contributing project, agree abide terms. primary communication channel R users can found . Please feel free share insights Discussion page report issues related R interface Issue section.","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"the-overview-of-embedding-in-flair-nlp","dir":"Articles","previous_headings":"","what":"The Overview of Embedding in Flair NLP","title":"WordEmbeddings Supported in Flair NLP","text":"word embedding classes inherit TokenEmbeddings class call embed() method embed text. cases using Flair, various complex embedding processes hidden behind interface. Users simply need instantiate necessary embedding class call embed() embed text. types embeddings currently supported FlairNLP: ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"byte-pair-embeddings","dir":"Articles","previous_headings":"","what":"Byte Pair Embeddings","title":"WordEmbeddings Supported in Flair NLP","text":"Please note ihis document R conversion Flair NLP document implemented Python. BytePairEmbeddings word embeddings precomputed subword-level. means able embed word splitting words subwords looking embeddings. BytePairEmbeddings proposed computed Heinzerling Strube (2018) found offer nearly accuracy word embeddings, fraction model size. great choice want train small models. initialize language code (275 languages supported), number ‘syllables’ (one ) number dimensions (one 50, 100, 200 300). following initializes uses byte pair embeddings English: information can found byte pair embeddings web page. BytePairEmbeddings also multilingual model capable embedding word language. can instantiate : can also load custom BytePairEmbeddings specifying path model_file_path embedding_file_path arguments. correspond respectively SentencePiece model file embedding file (Word2Vec plain text GenSim binary). example:","code":"library(flaiR) ## flaiR: An R Wrapper for Accessing Flair NLP 0.13.0 # initialize embedding BytePairEmbeddings <- flair_embeddings()$BytePairEmbeddings embedding <- BytePairEmbeddings('en') # create a sentence Sentence <- flair_data()$Sentence sentence = Sentence('The grass is green .') # embed words in sentence embedding$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green .\" embedding <- BytePairEmbeddings('multi')"},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"flair-embeddings","dir":"Articles","previous_headings":"","what":"Flair Embeddings","title":"WordEmbeddings Supported in Flair NLP","text":"following example manual translated R Flair NLP Zalando Research. Flair, use embedding quite straightforward. ’s example code snippet use Flair’s contextual string embeddings: Source: https://github.com/flairNLP/flair/blob/master/resources/docs/embeddings/FLAIR_EMBEDDINGS.md#flair-embeddings , want load embeddings German forward LM model, instantiate method follows: want load embeddings Bulgarian backward LM model, instantiate method follows: ","code":"library(flaiR) FlairEmbeddings <- flair_embeddings()$FlairEmbeddings # init embedding flair_embedding_forward <- FlairEmbeddings('news-forward') # create a sentence Sentence <- flair_data()$Sentence sentence = Sentence('The grass is green .') # embed words in sentence flair_embedding_forward$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green .\" flair_de_forward <- FlairEmbeddings('de-forward') flair_bg_backward <- FlairEmbeddings('bg-backward')"},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"recommended-flair-usage-in-flair-in-r","dir":"Articles","previous_headings":"","what":"Recommended Flair Usage in {flaiR} in R","title":"WordEmbeddings Supported in Flair NLP","text":"recommend combining forward backward Flair embeddings. Depending task, also recommend adding standard word embeddings mix. , recommended StackedEmbedding English tasks : ’s ! Now just use embedding like embeddings, .e. call embed() method sentences. Words now embedded using concatenation three different embeddings. combination often gives state---art accuracy. ","code":"FlairEmbeddings <- flair_embeddings()$FlairEmbeddings WordEmbeddings <- flair_embeddings()$WordEmbeddings StackedEmbeddings <- flair_embeddings()$StackedEmbeddings # create a StackedEmbedding object that combines glove and forward/backward flair embeddings stacked_embeddings <- StackedEmbeddings(list(WordEmbeddings(\"glove\"), FlairEmbeddings(\"news-forward\"), FlairEmbeddings(\"news-backward\"))) # create a sentence Sentence <- flair_data()$Sentence sentence = Sentence('The grass is green .') # just embed a sentence using the StackedEmbedding as you would with any single embedding. stacked_embeddings$embed(sentence) # now check out the embedded tokens. # Note that Python is indexing from 0. In an R for loop, using seq_along(sentence) - 1 achieves the same effect. for (i in seq_along(sentence)-1) { print(sentence[i]) print(sentence[i]$embedding) } ## Token[0]: \"The\" ## tensor([-0.0382, -0.2449, 0.7281, ..., -0.0065, -0.0053, 0.0090]) ## Token[1]: \"grass\" ## tensor([-0.8135, 0.9404, -0.2405, ..., 0.0354, -0.0255, -0.0143]) ## Token[2]: \"is\" ## tensor([-5.4264e-01, 4.1476e-01, 1.0322e+00, ..., -5.3691e-04, ## -9.6750e-03, -2.7541e-02]) ## Token[3]: \"green\" ## tensor([-0.6791, 0.3491, -0.2398, ..., -0.0007, -0.1333, 0.0161]) ## Token[4]: \".\" ## tensor([-0.3398, 0.2094, 0.4635, ..., 0.0005, -0.0177, 0.0032])"},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"pooled-flair-embeddings","dir":"Articles","previous_headings":"","what":"Pooled Flair Embeddings","title":"WordEmbeddings Supported in Flair NLP","text":"also developed pooled variant FlairEmbeddings. embeddings differ constantly evolve time, even prediction time (.e. training complete). means words sentence two different points time may different embeddings. PooledFlairEmbeddings manage ‘global’ representation distinct word using pooling operation past occurences. details works may found Akbik et al. (2019). can instantiate use PooledFlairEmbeddings like embedding: Note get best results PooledFlairEmbeddings ineffective memory-wise since keep past embeddings words memory. many cases, regular FlairEmbeddings nearly good much lower memory requirements. ","code":"# initiate embedding from Flair NLP PooledFlairEmbeddings <- flair_embeddings()$PooledFlairEmbeddings flair_embedding_forward <- PooledFlairEmbeddings('news-forward') # create a sentence object sentence <- Sentence('The grass is green .') # embed words in sentence flair_embedding_forward$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green .\""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"transformer-embeddings","dir":"Articles","previous_headings":"","what":"Transformer Embeddings","title":"WordEmbeddings Supported in Flair NLP","text":"Please note content examples section extensively revised TransformerWordEmbeddings official documentation. Flair supports various Transformer-based architectures like BERT XLNet HuggingFace, two classes TransformerWordEmbeddings (embed words tokens) TransformerDocumentEmbeddings (embed documents). ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"embeddings-words-with-transformers","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Embeddings Words with Transformers","title":"WordEmbeddings Supported in Flair NLP","text":"instance, load standard BERT transformer model, : instead want use RoBERTa, : {flaiR} interacts Flair NLP (Zalando Research), allowing use pre-trained models HuggingFace , can search models use.","code":"library(flaiR) # initiate embedding and load BERT model from HugginFaces TransformerWordEmbeddings <- flair_embeddings()$TransformerWordEmbeddings embedding <- TransformerWordEmbeddings('bert-base-uncased') # create a sentence Sentence <- flair_data()$Sentence sentence = Sentence('The grass is green .') # embed words in sentence embedding$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green .\" TransformerWordEmbeddings <- flair_embeddings()$TransformerWordEmbeddings embedding <- TransformerWordEmbeddings('roberta-base') sentence <- Sentence('The grass is green .') embedding$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green .\""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"embedding-documents-with-transformers","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Embedding Documents with Transformers","title":"WordEmbeddings Supported in Flair NLP","text":"embed whole sentence one (instead word sentence), simply use TransformerDocumentEmbeddings instead:","code":"TransformerDocumentEmbeddings <- flair_embeddings()$TransformerDocumentEmbeddings embedding <- TransformerDocumentEmbeddings('roberta-base') sentence <- Sentence('The grass is green .') embedding$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green .\""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"arguments","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Arguments","title":"WordEmbeddings Supported in Flair NLP","text":"several options can set init TransformerWordEmbeddings TransformerDocumentEmbeddings classes:","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"layers","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Layers","title":"WordEmbeddings Supported in Flair NLP","text":"layers argument controls transformer layers used embedding. set value ‘-1,-2,-3,-4’, top 4 layers used make embedding. set ‘-1’, last layer used. set “”, layers used. affects length embedding, since layers just concatenated. ’s example might done: can directly import torch reticulate since already installed flair dependency installed flair Python. Notice L numbers list? ensures R treats numbers integers. ’re generating numbers dynamically (e.g., computation), might want ensure integers attempting create tensor. .e. size embedding increases mode layers use (layer_mean set False, otherwise length always ).","code":"Sentence <- flair_data()$Sentence TransformerWordEmbeddings <- flair_embeddings()$TransformerWordEmbeddings sentence = Sentence('The grass is green.') # use only last layers embeddings <- TransformerWordEmbeddings('bert-base-uncased', layers='-1', layer_mean = FALSE) embeddings$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green.\" print(sentence[0]$embedding$size()) ## torch.Size([768]) sentence$clear_embeddings() sentence <- Sentence('The grass is green.') # use only last layers embeddings <- TransformerWordEmbeddings('bert-base-uncased', layers = \"-1\", layer_mean = FALSE) embeddings$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green.\" print(sentence[0]$embedding$size()) ## torch.Size([768]) sentence$clear_embeddings() # use last two layers embeddings <- TransformerWordEmbeddings('bert-base-uncased', layers='-1,-2', layer_mean = FALSE) embeddings$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green.\" print(sentence[0]$embedding$size()) ## torch.Size([1536]) sentence$clear_embeddings() # use ALL layers embeddings = TransformerWordEmbeddings('bert-base-uncased', layers='all', layer_mean=FALSE) embeddings$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green.\" print(sentence[0]$embedding$size()) ## torch.Size([9984]) # You can directly import torch from reticulate since it has already been installed through the flair dependency when you installed flair in Python. torch <- reticulate::import('torch') # Attempting to create a tensor with integer dimensions torch$Size(list(768L)) ## torch.Size([768]) torch$Size(list(1536L)) ## torch.Size([1536]) torch$Size(list(9984L)) ## torch.Size([9984])"},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"pooling-operation","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Pooling Operation","title":"WordEmbeddings Supported in Flair NLP","text":"Transformer-based models use subword tokenization. E.g. following token puppeteer tokenized subwords: pupp, ##ete ##er. implement different pooling operations subwords generate final token representation: first: embedding first subword used last: embedding last subword used first_last: embeddings first last subwords concatenated used mean: torch.mean subword embeddings calculated used can choose one use passing constructor:","code":"# use first and last subtoken for each word embeddings = TransformerWordEmbeddings('bert-base-uncased', subtoken_pooling='first_last') embeddings$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green.\" print(sentence[0]$embedding$size()) ## torch.Size([9984])"},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"layer-mean","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Layer Mean","title":"WordEmbeddings Supported in Flair NLP","text":"Transformer-based models certain number layers. default, layers select concatenated explained . Alternatively, can set layer_mean=True mean selected layers. resulting vector always dimensionality single layer:","code":"# initiate embedding from transformer. This model will be downloaded from Flair NLP huggingface. embeddings <- TransformerWordEmbeddings('bert-base-uncased', layers=\"all\", layer_mean=TRUE) # create a sentence object sentence = Sentence(\"The Oktoberfest is the world's largest Volksfest .\") # embed words in sentence embedding$embed(sentence) ## [[1]] ## Sentence[9]: \"The Oktoberfest is the world's largest Volksfest .\""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"fine-tuneable-or-not","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Fine-tuneable or Not","title":"WordEmbeddings Supported in Flair NLP","text":"’s example might done: setups, may wish fine-tune transformer embeddings. case, set fine_tune=True init method. fine-tuning, also use topmost layer, best set layers='-1'. print tensor now gradient function can fine-tuned use training routine.","code":"# use first and last subtoken for each word TransformerWordEmbeddings <- flair_embeddings()$TransformerWordEmbeddings embeddings <- TransformerWordEmbeddings('bert-base-uncased', fine_tune=TRUE, layers='-1') embeddings$embed(sentence) ## [[1]] ## Sentence[9]: \"The Oktoberfest is the world's largest Volksfest .\" print(sentence[0]$embedding) ## tensor([-6.5871e-01, 1.0410e-01, 3.4632e-01, -3.3775e-01, -2.1013e-01, ## -1.3037e-02, 5.1998e-01, 1.6574e+00, -5.2520e-02, -4.8633e-02, ## -7.8968e-01, -9.5547e-01, -1.9723e-01, 9.4999e-01, -1.0336e+00, ## 8.6669e-02, 9.8103e-02, 5.6511e-02, 3.1075e-02, 2.4157e-01, ## -1.1427e-01, -2.3692e-01, -2.0700e-01, 7.7985e-01, 2.5459e-01, ## -5.0824e-03, -2.4110e-01, 2.2436e-01, -7.3249e-02, -8.1094e-01, ## -1.8778e-01, 2.1219e-01, -5.9514e-01, 6.3129e-02, -4.8880e-01, ## -3.2300e-02, -1.9123e-02, -1.0991e-01, -1.5603e-02, 4.3068e-01, ## -1.7968e-01, -5.4499e-01, 7.0608e-01, -4.0512e-01, 1.7761e-01, ## -8.5820e-01, 2.3438e-02, -1.4981e-01, -9.0368e-01, -2.1097e-01, ## -3.3535e-01, 1.4919e-01, -7.4522e-03, 1.0239e+00, -6.1776e-02, ## 3.3913e-01, 8.5811e-02, 6.9401e-01, -7.7483e-02, 3.1484e-01, ## -4.3921e-01, 1.2933e+00, 5.7990e-03, -7.0992e-01, 2.7525e-01, ## 8.8792e-01, 2.6309e-03, 1.3640e+00, 5.6885e-01, -2.4904e-01, ## -4.5157e-02, -1.7575e-01, -3.4729e-01, 5.8363e-02, -2.0346e-01, ## -1.2505e+00, -3.0592e-01, -3.6104e-02, -2.4066e-01, -5.1250e-01, ## 2.6930e-01, 1.4068e-01, 3.4056e-01, 7.3297e-01, 2.6848e-01, ## 2.4304e-01, -9.4885e-01, -9.0367e-01, -1.3184e-01, 6.7348e-01, ## -3.2995e-02, 4.7660e-01, -7.1623e-03, -3.4141e-01, 6.8473e-01, ## -4.4869e-01, -4.9831e-01, -8.0143e-01, 1.4073e+00, 5.3251e-01, ## 2.4643e-01, -4.2528e-01, 9.1615e-02, 6.4495e-01, 1.7931e-01, ## -2.1473e-01, 1.5447e-01, -3.2978e-01, 1.0799e-01, -1.9402e+00, ## -5.0380e-01, -2.7636e-01, -1.1227e-01, 1.1576e-01, 2.5885e-01, ## -1.7916e-01, 6.6166e-01, -9.6098e-01, -5.1242e-01, -3.5424e-01, ## 2.1383e-01, 6.6456e-01, 2.5498e-01, 3.7250e-01, -1.1821e+00, ## -4.9551e-01, -2.0858e-01, 1.1511e+00, -1.0366e-02, -1.0682e+00, ## 3.7277e-01, 6.4048e-01, 2.3308e-01, -9.3824e-01, 9.5014e-02, ## 5.7904e-01, 6.3969e-01, 8.2359e-02, -1.4075e-01, 3.0107e-01, ## 3.5823e-03, -4.4684e-01, -2.6913e+00, -3.3933e-01, 2.8733e-03, ## -1.3639e-01, -7.1054e-01, -1.1048e+00, 2.2374e-01, 1.1830e-01, ## 4.8416e-01, -2.9110e-01, -6.7650e-01, 2.3202e-01, -1.0123e-01, ## -1.9174e-01, 4.9960e-02, 5.2067e-01, 1.3272e+00, 6.8250e-01, ## 5.5332e-01, -1.0886e+00, 4.5160e-01, -1.5010e-01, -9.8074e-01, ## 8.5111e-02, 1.6498e-01, 6.6032e-01, 1.0815e-02, 1.8952e-01, ## -5.6608e-01, -1.3743e-02, 9.1171e-01, 2.7812e-01, 2.9551e-01, ## -3.5637e-01, 3.2030e-01, 5.6738e-01, -1.5707e-01, 3.5326e-01, ## -4.7747e-01, 7.8646e-01, 1.3765e-01, 2.2440e-01, 4.2422e-01, ## -2.6504e-01, 2.2014e-02, -6.7154e-01, -8.7999e-02, 1.4284e-01, ## 4.0983e-01, 1.0933e-02, -1.0704e+00, -1.9350e-01, 6.0051e-01, ## 5.0545e-02, 1.1434e-02, -8.0243e-01, -6.6871e-01, 5.3953e-01, ## -5.9856e-01, -1.6915e-01, -3.5307e-01, 4.4568e-01, -7.2761e-01, ## 1.1629e+00, -3.1553e-01, -7.9747e-01, -2.0582e-01, 3.7320e-01, ## 5.9379e-01, -3.1898e-01, -1.6932e-01, -6.2492e-01, 5.7047e-01, ## -2.9779e-01, -5.9106e-01, 8.5436e-02, -2.1839e-01, -2.2214e-01, ## 7.9233e-01, 8.0537e-01, -5.9785e-01, 4.0474e-01, 3.9265e-01, ## 5.8169e-01, -5.2506e-01, 6.9786e-01, 1.1163e-01, 8.7434e-02, ## 1.7549e-01, 9.1438e-02, 5.8816e-01, 6.4338e-01, -2.7138e-01, ## -5.3449e-01, -1.0168e+00, -5.1337e-02, 3.0099e-01, -7.6695e-02, ## -2.1126e-01, 5.8143e-01, 1.3599e-01, 6.2759e-01, -6.2810e-01, ## 5.9966e-01, 3.5836e-01, -3.0707e-02, 1.5563e-01, -1.4016e-01, ## -2.0155e-01, -1.3755e+00, -9.1877e-02, -6.9892e-01, 7.9438e-02, ## -4.2926e-01, 3.7988e-01, 7.6741e-01, 5.3094e-01, 8.5981e-01, ## 4.4184e-02, -6.3507e-01, 3.9587e-01, -3.6635e-01, -7.0770e-01, ## 8.3683e-04, -3.0055e-01, 2.1360e-01, -4.1649e-01, 6.9457e-01, ## -6.2715e-01, -5.1101e-01, 3.0331e-01, -2.3804e+00, -1.0566e-02, ## -9.4488e-01, 4.3318e-02, 2.4188e-01, 1.9204e-02, 1.5712e-03, ## -3.0374e-01, 3.1933e-01, -7.4432e-01, 1.4599e-01, -5.2102e-01, ## -5.2269e-01, 1.3274e-01, -2.8936e-01, 4.1706e-02, 2.6143e-01, ## -4.4796e-01, 7.3136e-01, 6.3893e-02, 4.7398e-01, -5.1062e-01, ## -1.3705e-01, 2.0763e-01, -3.9115e-01, 2.8822e-01, -3.5283e-01, ## 3.4881e-02, -3.3602e-01, 1.7210e-01, 1.3537e-02, -5.3036e-01, ## 1.2847e-01, -4.5576e-01, -3.7251e-01, -3.2254e+00, -3.1650e-01, ## -2.6144e-01, -9.4983e-02, 2.7651e-02, -2.3750e-01, 3.1001e-01, ## 1.1428e-01, -1.2870e-01, -4.7496e-01, 4.4594e-01, -3.6138e-01, ## -3.1009e-01, -9.9612e-02, 5.3967e-01, 1.2840e-02, 1.4507e-01, ## -2.5181e-01, 1.9310e-01, 4.1073e-01, 5.9776e-01, -2.5585e-01, ## 5.7184e-02, -5.1505e-01, -6.8709e-02, 4.7767e-01, -1.2079e-01, ## -5.0894e-01, -9.2884e-01, 7.8471e-01, 2.0216e-01, 4.3242e-01, ## 3.2803e-01, -1.0122e-01, 3.3530e-01, -1.2183e-01, -5.5060e-01, ## 3.5427e-01, 7.4558e-02, -3.1411e-01, -1.7512e-01, 2.2485e-01, ## 4.2295e-01, 7.7110e-02, 1.8063e+00, 7.6636e-03, -1.1082e-02, ## -2.8604e-02, 7.7143e-02, 8.2344e-02, 8.0271e-02, -1.1858e+00, ## 2.0523e-01, 3.4053e-01, 2.0424e-01, -2.0574e-02, 3.0466e-01, ## -2.1858e-01, 6.3737e-01, -5.6264e-01, 1.4153e-01, 2.4319e-01, ## -5.6688e-01, 7.2374e-02, -2.9329e-01, 4.6562e-02, 1.8977e-01, ## 2.4977e-01, 9.1892e-01, 1.1346e-01, 3.8588e-01, -3.5543e-01, ## -1.3380e+00, -8.5644e-01, -5.5443e-01, -7.2317e-01, -2.9225e-01, ## -1.4389e-01, 6.9714e-01, -5.9852e-01, -6.8932e-01, -6.0952e-01, ## 1.8234e-01, -7.5841e-02, 3.6445e-01, -3.8286e-01, 2.6545e-01, ## -2.6569e-01, -4.9999e-01, -3.8354e-01, -2.2809e-01, 8.8314e-01, ## 2.9041e-01, 5.4803e-01, -1.0668e+00, 4.7406e-01, 7.8804e-02, ## -1.1559e+00, -3.0649e-01, 6.0479e-02, -7.1279e-01, -4.3336e-01, ## -8.2402e-04, -1.0236e-01, 3.5497e-01, 1.8665e-01, 1.2045e-01, ## 1.2071e-01, 6.2911e-01, 3.1421e-01, -2.1635e-01, -8.9416e-01, ## 6.6361e-01, -9.2981e-01, 6.9193e-01, -2.5403e-01, -2.5835e-02, ## 1.2342e+00, -6.5908e-01, 7.5741e-01, 2.9014e-01, 3.0760e-01, ## -1.0249e+00, -2.7089e-01, 4.6132e-01, 6.1510e-02, 2.5385e-01, ## -5.2075e-01, -3.5107e-01, 3.3694e-01, -2.5047e-01, -2.7855e-01, ## 2.0280e-01, -1.5703e-01, 4.1619e-02, 1.4451e-01, -1.6666e-01, ## -3.0519e-01, -9.4271e-02, -1.7083e-01, 5.2454e-01, 2.4524e-01, ## 2.0732e-01, 3.7948e-01, 9.7359e-02, -3.2452e-02, 5.5792e-01, ## -2.4703e-01, 5.2864e-01, 5.6343e-01, -1.9198e-01, -8.3370e-02, ## -6.5377e-01, -5.4104e-01, 1.8289e-01, -4.9146e-01, 6.6422e-01, ## -5.2808e-01, -1.4797e-01, -4.5527e-02, -3.9593e-01, 1.2841e-01, ## -7.8591e-01, -3.7564e-02, 6.1912e-01, 3.2458e-01, 3.7858e-01, ## 1.8744e-01, -5.0738e-01, 8.0222e-02, -3.1468e-02, -1.5145e-01, ## 1.6657e-01, -5.2251e-01, -2.5940e-01, -3.8505e-01, -7.4941e-02, ## 3.9530e-01, -2.1742e-01, -1.7113e-01, -5.2492e-01, -7.7781e-02, ## -6.9759e-01, 2.2570e-01, -1.2935e-01, 3.0750e-01, -1.3554e-01, ## 6.0182e-02, -1.1479e-01, 4.7263e-01, 3.7957e-01, 8.9523e-01, ## -3.6411e-01, -6.6355e-01, -7.6647e-01, -1.4479e+00, -5.2238e-01, ## 2.3337e-02, -4.5736e-01, 5.9981e-01, 6.8700e-01, 4.2190e-02, ## 1.5894e-01, 2.0744e-02, 9.2334e-02, -7.2747e-01, 1.2388e-01, ## -4.7257e-01, -2.9889e-01, 4.8955e-01, -9.1618e-01, -1.9497e-01, ## -1.4157e-01, -1.7472e-01, 4.9251e-02, -2.2263e-01, 6.1700e-01, ## -2.4691e-01, 6.0937e-01, 3.6134e-01, 4.3398e-01, -2.7615e-01, ## -2.6582e-01, -1.3132e-01, -4.4156e-02, 5.3686e-01, 1.2956e-01, ## -6.4218e-01, -1.5820e-01, -1.0249e+00, -9.3593e-03, -3.5060e-01, ## 3.6650e-01, 4.9503e-01, 7.4325e-01, 9.6526e-02, 4.3141e-01, ## 3.9512e-02, -7.0726e-02, 6.2696e-01, 1.3066e-01, 1.0243e-01, ## 3.3839e-01, 1.9224e-01, 4.8800e-01, -2.1052e-01, 3.9523e-02, ## 7.7567e-01, -1.2005e-01, -1.1262e-01, 8.7001e-02, 2.7273e-01, ## -4.6831e-02, -2.4966e-01, -3.2083e-01, -2.6389e-01, 1.6225e-01, ## 2.8800e-01, -1.0799e-01, -1.0841e-01, 6.6873e-01, 3.4369e-01, ## 5.8675e-01, 9.2084e-01, -1.8131e-01, 5.6372e-02, -5.7125e-01, ## 3.1048e-01, 3.1630e-02, 1.2097e+00, 4.4492e-01, -2.3792e-01, ## -9.9342e-02, -5.0657e-01, -3.1333e-02, 1.5045e-01, 3.1493e-01, ## -4.1287e-01, -1.8618e-01, -4.2640e-02, 1.8266e+00, 4.8565e-01, ## 6.3892e-01, -2.9107e-01, -3.2557e-01, 1.1088e-01, -1.3212e+00, ## 7.1113e-01, 2.3618e-01, 2.1473e-01, 1.6360e-01, -5.2535e-01, ## 3.4322e-01, 9.0777e-01, 1.8697e-01, -3.0532e-01, 2.7574e-01, ## 5.1451e-01, -2.6733e-01, 2.4207e-01, -3.3234e-01, 6.3520e-01, ## 2.5884e-01, -5.7923e-01, 3.0204e-01, 4.1745e-02, 4.7539e-02, ## -6.7038e-01, 4.6699e-01, -1.6951e-01, -1.5161e-01, -1.2805e-01, ## -4.3990e-01, 1.0177e+00, -3.8138e-01, 4.3114e-01, -7.5447e-03, ## 2.7385e-01, 4.6314e-01, -8.6565e-02, -7.9458e-01, 1.4369e-02, ## 2.6016e-01, 9.2568e-03, 9.3968e-01, 7.9679e-01, 3.3144e-03, ## -5.6733e-01, 2.9052e-01, -9.5894e-02, 1.8630e-01, 1.4475e-01, ## 1.8935e-01, 5.1735e-01, -1.2187e+00, -1.3298e-01, -4.3538e-01, ## -6.5398e-01, -2.9286e-01, 1.3199e-01, 3.9075e-01, 9.0172e-01, ## 9.9439e-01, 6.2783e-01, -1.6103e-01, 1.4155e-03, -9.1476e-01, ## 7.7760e-01, 1.2264e+00, 8.1482e-02, 6.6732e-01, -7.4576e-01, ## -1.0470e-01, -6.7781e-01, 8.0405e-01, 3.6676e-02, 3.6362e-01, ## 4.4962e-01, 8.9600e-01, -1.8276e+00, 6.7828e-01, -9.4121e-03, ## 3.8665e-01, -2.2149e-02, 7.4756e-02, 3.7438e-01, -1.2696e-01, ## -5.3397e-01, -3.5782e-01, 3.0400e-01, 7.7663e-01, -1.9122e-01, ## -1.3041e-01, -2.1522e-01, 1.1086e+00, 1.0237e+00, -4.7554e-02, ## -3.9538e-01, 1.1568e+00, -4.2549e-01, -2.5641e-02, 2.1993e-01, ## -4.7488e-01, -7.7624e-02, -5.5211e-01, -5.3169e-01, -5.3790e-02, ## -6.0536e-01, 4.2789e-01, -3.8606e-01, 9.8630e-01, 4.3331e-01, ## 4.8414e-01, -1.3519e-01, -6.5505e-01, -2.2913e-01, -3.1254e-01, ## 1.2920e-01, -7.7762e-02, -3.1123e-01, 8.2576e-01, 8.6486e-01, ## -3.4766e-01, -3.8491e-01, 3.5731e-02, 3.7518e-01, -3.7511e-01, ## 5.2371e-01, -7.9721e-01, 3.3401e-01, 8.3976e-01, -3.2525e-01, ## -3.0268e-01, -1.3558e-01, 2.2812e-01, 1.5632e-01, 3.1584e-01, ## 9.3902e-02, -3.8647e-01, -1.0177e-01, -2.8833e-01, 3.6028e-01, ## 2.2565e-01, -1.5595e-01, -4.4974e-01, -5.0904e-01, 4.5058e-01, ## 7.9031e-01, 2.7041e-01, -3.6712e-01, -3.9090e-01, 2.3358e-01, ## 1.2162e+00, -1.1371e+00, -8.2702e-01, -9.2748e-02, 5.8958e-01, ## 4.4429e-02, -2.3344e-01, -5.6492e-01, 4.9406e-01, -4.0302e-01, ## 5.0951e-01, -1.6740e-01, -4.0176e+00, -8.2092e-01, -3.9132e-01, ## -2.9754e-01, -2.6798e-01, -2.5174e-01, 6.6282e-01, -5.7532e-02, ## 7.7360e-01, 2.5238e-01, 2.5733e-02, 1.7694e-01, 9.4648e-02, ## 2.6886e-01, 9.3711e-01, -8.3929e-02])"},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"models","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Models","title":"WordEmbeddings Supported in Flair NLP","text":"Please look awesome HuggingFace supported pre-trained models! ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"classic-word-embeddings","dir":"Articles","previous_headings":"","what":"Classic Word Embeddings","title":"WordEmbeddings Supported in Flair NLP","text":"Classic word embeddings static word-level, meaning distinct word gets exactly one pre-computed embedding. embeddings fall class, including popular GloVe Komninos embeddings. Simply instantiate WordEmbeddings class pass string identifier embedding wish load. , want use GloVe embeddings, pass string ‘glove’ constructor: Now, create example sentence call embedding’s embed() method. can also pass list sentences method since embedding types make use batching increase speed. prints tokens embeddings. GloVe embeddings Pytorch vectors dimensionality 100. choose pre-trained embeddings load passing appropriate id string constructor WordEmbeddings class. Typically, use two-letter language code init embedding, ‘en’ English ‘de’ German . default, initialize FastText embeddings trained Wikipedia. can also always use FastText embeddings Web crawls, instantiating ‘-crawl’. ‘de-crawl’ use embeddings trained German web crawls. English, provide options, can choose instantiating ‘en-glove’, ‘en-extvec’ .","code":"library(flaiR) # initiate embedding with glove WordEmbeddings <- flair_embeddings()$WordEmbeddings glove_embedding <- WordEmbeddings('glove') library(flaiR) # initiate a sentence object Sentence <- flair_data()$Sentence # create sentence object. sentence = Sentence('The grass is green .') # embed a sentence using glove. glove_embedding$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green .\" # view embedded tokens. for (token in seq_along(sentence)-1) { print(sentence[token]) print(sentence[token]$embedding$numpy()) } ## Token[0]: \"The\" ## [1] -0.038194 -0.244870 0.728120 -0.399610 0.083172 0.043953 ## [7] -0.391410 0.334400 -0.575450 0.087459 0.287870 -0.067310 ## [13] 0.309060 -0.263840 -0.132310 -0.207570 0.333950 -0.338480 ## [19] -0.317430 -0.483360 0.146400 -0.373040 0.345770 0.052041 ## [25] 0.449460 -0.469710 0.026280 -0.541550 -0.155180 -0.141070 ## [31] -0.039722 0.282770 0.143930 0.234640 -0.310210 0.086173 ## [37] 0.203970 0.526240 0.171640 -0.082378 -0.717870 -0.415310 ## [43] 0.203350 -0.127630 0.413670 0.551870 0.579080 -0.334770 ## [49] -0.365590 -0.548570 -0.062892 0.265840 0.302050 0.997750 ## [55] -0.804810 -3.024300 0.012540 -0.369420 2.216700 0.722010 ## [61] -0.249780 0.921360 0.034514 0.467450 1.107900 -0.193580 ## [67] -0.074575 0.233530 -0.052062 -0.220440 0.057162 -0.158060 ## [73] -0.307980 -0.416250 0.379720 0.150060 -0.532120 -0.205500 ## [79] -1.252600 0.071624 0.705650 0.497440 -0.420630 0.261480 ## [85] -1.538000 -0.302230 -0.073438 -0.283120 0.371040 -0.252170 ## [91] 0.016215 -0.017099 -0.389840 0.874240 -0.725690 -0.510580 ## [97] -0.520280 -0.145900 0.827800 0.270620 ## Token[1]: \"grass\" ## [1] -0.8135300 0.9404200 -0.2404800 -0.1350100 0.0556780 0.3362500 ## [7] 0.0802090 -0.1014800 -0.5477600 -0.3536500 0.0733820 0.2586800 ## [13] 0.1986600 -0.1432800 0.2507000 0.4281400 0.1949800 0.5345600 ## [19] 0.7424100 0.0578160 -0.3178100 0.9435900 0.8145000 -0.0823750 ## [25] 0.6165800 0.7284400 -0.3262300 -1.3641000 0.1232000 0.5372800 ## [31] -0.5122800 0.0245900 1.0822001 -0.2295900 0.6038500 0.5541500 ## [37] -0.9609900 0.4803300 0.0022260 0.5591300 -0.1636500 -0.8468100 ## [43] 0.0740790 -0.6215700 0.0259670 -0.5162100 -0.0524620 -0.1417700 ## [49] -0.0161230 -0.4971900 -0.5534500 -0.4037100 0.5095600 1.0276000 ## [55] -0.0840000 -1.1179000 0.3225700 0.4928100 0.9487600 0.2040300 ## [61] 0.5388300 0.8397200 -0.0688830 0.3136100 1.0450000 -0.2266900 ## [67] -0.0896010 -0.6427100 0.6442900 -1.1001000 -0.0095814 0.2668200 ## [73] -0.3230200 -0.6065200 0.0479150 -0.1663700 0.8571200 0.2335500 ## [79] 0.2539500 1.2546000 0.5471600 -0.1979600 -0.7186300 0.2076000 ## [85] -0.2587500 -0.3649900 0.0834360 0.6931700 0.1573700 1.0931000 ## [91] 0.0912950 -1.3773000 -0.2717000 0.7070800 0.1872000 -0.3307200 ## [97] -0.2835900 0.1029600 1.2228000 0.8374100 ## Token[2]: \"is\" ## [1] -0.5426400 0.4147600 1.0322000 -0.4024400 0.4669100 0.2181600 ## [7] -0.0748640 0.4733200 0.0809960 -0.2207900 -0.1280800 -0.1144000 ## [13] 0.5089100 0.1156800 0.0282110 -0.3628000 0.4382300 0.0475110 ## [19] 0.2028200 0.4985700 -0.1006800 0.1326900 0.1697200 0.1165300 ## [25] 0.3135500 0.2571300 0.0927830 -0.5682600 -0.5297500 -0.0514560 ## [31] -0.6732600 0.9253300 0.2693000 0.2273400 0.6636500 0.2622100 ## [37] 0.1971900 0.2609000 0.1877400 -0.3454000 -0.4263500 0.1397500 ## [43] 0.5633800 -0.5690700 0.1239800 -0.1289400 0.7248400 -0.2610500 ## [49] -0.2631400 -0.4360500 0.0789080 -0.8414600 0.5159500 1.3997000 ## [55] -0.7646000 -3.1452999 -0.2920200 -0.3124700 1.5129000 0.5243500 ## [61] 0.2145600 0.4245200 -0.0884110 -0.1780500 1.1876000 0.1057900 ## [67] 0.7657100 0.2191400 0.3582400 -0.1163600 0.0932610 -0.6248300 ## [73] -0.2189800 0.2179600 0.7405600 -0.4373500 0.1434300 0.1471900 ## [79] -1.1605000 -0.0505080 0.1267700 -0.0143950 -0.9867600 -0.0912970 ## [85] -1.2054000 -0.1197400 0.0478470 -0.5400100 0.5245700 -0.7096300 ## [91] -0.3252800 -0.1346000 -0.4131400 0.3343500 -0.0072412 0.3225300 ## [97] -0.0442190 -1.2969000 0.7621700 0.4634900 ## Token[3]: \"green\" ## [1] -0.67907000 0.34908000 -0.23984000 -0.99651998 0.73782003 ## [6] -0.00065911 0.28009999 0.01728700 -0.36063001 0.03695500 ## [11] -0.40395001 0.02409200 0.28957999 0.40496999 0.69992000 ## [16] 0.25268999 0.80350000 0.04937000 0.15561999 -0.00632860 ## [21] -0.29414001 0.14727999 0.18977000 -0.51791000 0.36985999 ## [26] 0.74581999 0.08268900 -0.72601002 -0.40939000 -0.09782200 ## [31] -0.14095999 0.71121001 0.61932999 -0.25014001 0.42250001 ## [36] 0.48458001 -0.51915002 0.77125001 0.36684999 0.49652001 ## [41] -0.04129800 -1.46829998 0.20038000 0.18591000 0.04986000 ## [46] -0.17523000 -0.35528001 0.94152999 -0.11898000 -0.51902997 ## [51] -0.01188700 -0.39186001 -0.17478999 0.93450999 -0.58930999 ## [56] -2.77010012 0.34522000 0.86532998 1.08080006 -0.10291000 ## [61] -0.09122000 0.55092001 -0.39473000 0.53675997 1.03830004 ## [66] -0.40658000 0.24590001 -0.26797000 -0.26036000 -0.14150999 ## [71] -0.12022000 0.16234000 -0.74320000 -0.64727998 0.04713300 ## [76] 0.51642001 0.19898000 0.23919000 0.12549999 0.22471000 ## [81] 0.82612997 0.07832800 -0.57020003 0.02393400 -0.15410000 ## [86] -0.25738999 0.41262001 -0.46967000 0.87914002 0.72628999 ## [91] 0.05386200 -1.15750003 -0.47835001 0.20139000 -1.00510001 ## [96] 0.11515000 -0.96609002 0.12960000 0.18388000 -0.03038300 ## Token[4]: \".\" ## [1] -0.3397900 0.2094100 0.4634800 -0.6479200 -0.3837700 0.0380340 ## [7] 0.1712700 0.1597800 0.4661900 -0.0191690 0.4147900 -0.3434900 ## [13] 0.2687200 0.0446400 0.4213100 -0.4103200 0.1545900 0.0222390 ## [19] -0.6465300 0.2525600 0.0431360 -0.1944500 0.4651600 0.4565100 ## [25] 0.6858800 0.0912950 0.2187500 -0.7035100 0.1678500 -0.3507900 ## [31] -0.1263400 0.6638400 -0.2582000 0.0365420 -0.1360500 0.4025300 ## [37] 0.1428900 0.3813200 -0.1228300 -0.4588600 -0.2528200 -0.3043200 ## [43] -0.1121500 -0.2618200 -0.2248200 -0.4455400 0.2991000 -0.8561200 ## [49] -0.1450300 -0.4908600 0.0082973 -0.1749100 0.2752400 1.4401000 ## [55] -0.2123900 -2.8434999 -0.2795800 -0.4572200 1.6386000 0.7880800 ## [61] -0.5526200 0.6500000 0.0864260 0.3901200 1.0632000 -0.3537900 ## [67] 0.4832800 0.3460000 0.8417400 0.0987070 -0.2421300 -0.2705300 ## [73] 0.0452870 -0.4014700 0.1139500 0.0062226 0.0366730 0.0185180 ## [79] -1.0213000 -0.2080600 0.6407200 -0.0687630 -0.5863500 0.3347600 ## [85] -1.1432000 -0.1148000 -0.2509100 -0.4590700 -0.0968190 -0.1794600 ## [91] -0.0633510 -0.6741200 -0.0688950 0.5360400 -0.8777300 0.3180200 ## [97] -0.3924200 -0.2339400 0.4729800 -0.0288030"},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"suppored-models","dir":"Articles","previous_headings":"Classic Word Embeddings","what":"Suppored Models:","title":"WordEmbeddings Supported in Flair NLP","text":"following embeddings currently supported: , want load German FastText embeddings, instantiate follows: Alternatively, want load German FastText embeddings trained crawls, instantiate follows:","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Tutorials","text":"Flair NLP open-source library Natural Language Processing (NLP) developed Zalando Research. Known state---art solutions NLP tasks like Named Entity Recognition (NER), Part--Speech tagging (POS), , garnered attention NLP community ease use powerful functionalities. Developed Python, built PyTorch framework, offers flexible dynamic approach deal textual data. hand, {flaiR} R aims continue framework established Flair Python creating framework R, thereby extending Flair’s capabilities R programming environment. One hallmark features Flair contextual string embeddings, crucial discerning meaning words different contextual usages. Traditional embeddings assign fixed vector word, without considering context, can limitation trying understand nuances word’s usage across different sentences. contrary, Flair’s contextual embeddings generate word vectors considering surrounding text, thus capturing word’s context semantics accurately. particularly impactful scenarios word can different meanings based usage. Flair offers pre-trained models various languages tasks, providing solid foundation various NLP applications text classification, sentiment analysis, entity recognition, etc. instance, ’re involved project requires identifying persons, organizations, locations text, Flair pre-trained NER models can simplify task.","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"oop-in-r-when-introducing-python-module","dir":"Articles","previous_headings":"Introduction","what":"OOP in R when Introducing Python Module","title":"Tutorials","text":"Object-Oriented Programming (OOP) programming paradigm uses objects, contain data (attributes) functions (methods), design applications software. idea bind data methods operate data one single unit, object. advent R6, OOP common early stages R. knowledge, R6 relatively rare; aside {mlr3}, written R6, packages accomplished S4 S3 (personal experience), , course, may greatly related habits tasks R users. However, purpose {flaiR} standardize wrapping ‘{flair NLP}’ Python functionality R provide convenient access R users utilize flair NLP features. usage Flair NLP within {flaiR} employs concepts objects classes, similar R6 . However, features packaged {reticulate} Python. words, functionalities imported R essentially belong Python classes modules. {flairR} architecture, use simplest S3 method wrap modules methods within modules, allowing R users conveniently access use Python functionalities. addition, tensors serve fundamental building block creating training neural networks conducting various numerical computations Python. Flair’s NLP tasks Python PyTorch, numerous extensive functionalities tensor operations, including element-wise operations, matrix multiplications, reshaping. tutorial, also cover work tensors R convert tensors matrices R environment. particularly important using Flair word embeddings R environment. ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"the-overview","dir":"Articles","previous_headings":"Introduction","what":"The Overview","title":"Tutorials","text":"following tutorial mainly based Tadej Magajna’s ‘Natural Language Processing Flair: Practical Guide Understanding Solving NLP Problems’, well official Flair NLP Python tutorial blog. written Python. utilize examples {flaiR} R , welcome cite R repository, also cite works. Except necessary, everything accomplished within R environment, utilizing several important R packages, {quanteda}, {udpipe}, {mlr3}, complete following topics: Sentence Token Object Sequence Taggings Embedding flaiR Training Binary Classifier flaiR Training RNN FlaiR Finetune BERT FlaiR (progress) ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"sentence-and-token","dir":"Articles","previous_headings":"","what":"Sentence and Token","title":"Tutorials","text":"Sentence Token fundamental classes.","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"sentence","dir":"Articles","previous_headings":"Sentence and Token","what":"Sentence","title":"Tutorials","text":"Sentence Flair object contains sequence Token objects, can annotated labels, named entities, part--speech tags, . also can store embeddings sentence whole different kinds linguistic annotations. ’s simple example create Sentence: Sentence[26] means total 26 tokens sentence.","code":"# Creating a Sentence object library(flaiR) string <- \"What I see in UCD today, what I have seen of UCD in its impact on my own life and the life of Ireland.\" Sentence <- flair_data()$Sentence sentence <- Sentence(string) print(sentence) #> Sentence[26]: \"What I see in UCD today, what I have seen of UCD in its impact on my own life and the life of Ireland.\""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"token","dir":"Articles","previous_headings":"Sentence and Token","what":"Token","title":"Tutorials","text":"use Flair handle text data,1 Sentence Token objects often play central roles many use cases. create Sentence object, usually automatically decomposes internal raw text multiple Token objects. words, Sentence object automatically handles text tokenization work, usually don’t need create Token objects manually. Unlike R, indexes 1, Python indexes 0. Therefore, use loop, use seq_along(sentence) - 1. output something like: can directly use $tokens method print tokens. Retrieve Token comprehend string representation format Sentence object, tagging least one token adequate. get_token(n) method, Python method, allows us retrieve Token object particular token. Additionally, can use [] index specific token. noteworthy Python indexes 0, whereas R starts indexing 1. word (punctuation) sentence treated individual Token object. Token objects store text information possible linguistic information (part--speech tags named entity tags) embeddings (used model generate ). Even though cases need create Token objects manually, understanding manage objects manually still useful situations, want fine-grained control tokenization process. example, can control exactness tokenization adding manually created Token objects Sentence object. design pattern Flair allows users handle text data flexible way. Users can use automatic tokenization feature rapid development, also perform finer-grained control accommodate use cases. Annotate POS tag NER tag add_label(label_type, value) method can employed assign label token. manually add tag preliminary tutorial, usually, Universal POS tags, sentence[10] ‘see’, ‘seen’ might tagged VERB, indicating past participle form verb. can also add NER (Named Entity Recognition) tag sentence[4], “UCD”, identifying university Dublin. print sentence object, Sentence[50] provides information 50 tokens → [‘’/ORG, ‘seen’/VERB], thus displaying two tagging pieces information.","code":"# The Sentence object has automatically created and contains multiple Token objects # We can iterate through the Sentence object to view each Token. for (i in seq_along(sentence)-1) { print(sentence[[i]]) } #> Token[0]: \"What\" #> Token[1]: \"I\" #> Token[2]: \"see\" #> Token[3]: \"in\" #> Token[4]: \"UCD\" #> Token[5]: \"today\" #> Token[6]: \",\" #> Token[7]: \"what\" #> Token[8]: \"I\" #> Token[9]: \"have\" #> Token[10]: \"seen\" #> Token[11]: \"of\" #> Token[12]: \"UCD\" #> Token[13]: \"in\" #> Token[14]: \"its\" #> Token[15]: \"impact\" #> Token[16]: \"on\" #> Token[17]: \"my\" #> Token[18]: \"own\" #> Token[19]: \"life\" #> Token[20]: \"and\" #> Token[21]: \"the\" #> Token[22]: \"life\" #> Token[23]: \"of\" #> Token[24]: \"Ireland\" #> Token[25]: \".\" print(sentence$tokens) #> [[1]] #> Token[0]: \"What\" #> #> [[2]] #> Token[1]: \"I\" #> #> [[3]] #> Token[2]: \"see\" #> #> [[4]] #> Token[3]: \"in\" #> #> [[5]] #> Token[4]: \"UCD\" #> #> [[6]] #> Token[5]: \"today\" #> #> [[7]] #> Token[6]: \",\" #> #> [[8]] #> Token[7]: \"what\" #> #> [[9]] #> Token[8]: \"I\" #> #> [[10]] #> Token[9]: \"have\" #> #> [[11]] #> Token[10]: \"seen\" #> #> [[12]] #> Token[11]: \"of\" #> #> [[13]] #> Token[12]: \"UCD\" #> #> [[14]] #> Token[13]: \"in\" #> #> [[15]] #> Token[14]: \"its\" #> #> [[16]] #> Token[15]: \"impact\" #> #> [[17]] #> Token[16]: \"on\" #> #> [[18]] #> Token[17]: \"my\" #> #> [[19]] #> Token[18]: \"own\" #> #> [[20]] #> Token[19]: \"life\" #> #> [[21]] #> Token[20]: \"and\" #> #> [[22]] #> Token[21]: \"the\" #> #> [[23]] #> Token[22]: \"life\" #> #> [[24]] #> Token[23]: \"of\" #> #> [[25]] #> Token[24]: \"Ireland\" #> #> [[26]] #> Token[25]: \".\" # method in Python sentence$get_token(5) #> Token[4]: \"UCD\" # indexing in R sentence[4] #> Token[4]: \"UCD\" sentence[10]$add_label('manual-pos', 'VERB') print(sentence[10]) #> Token[10]: \"seen\" → VERB (1.0) sentence[4]$add_label('ner', 'ORG') print(sentence[4]) #> Token[4]: \"UCD\" → ORG (1.0) print(sentence) #> Sentence[26]: \"What I see in UCD today, what I have seen of UCD in its impact on my own life and the life of Ireland.\" → [\"UCD\"/ORG, \"seen\"/VERB]"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"corpus","dir":"Articles","previous_headings":"Sentence and Token","what":"Corpus","title":"Tutorials","text":"Corpus object Flair fundamental data structure represents dataset containing text samples, usually comprising training set, development set (validation set), test set. ’s designed work smoothly Flair’s models tasks like named entity recognition, text classification, . Attributes: train: list sentences (List[Sentence]) form training dataset. dev (development): list sentences (List[Sentence]) form development (validation) dataset. test: list sentences (List[Sentence]) form test dataset. Important Methods: downsample: method allows downsample (reduce) number sentences train, dev, test splits. obtain_statistics: method gives quick overview statistics corpus, including number sentences distribution labels. make_vocab_dictionary: Used create vocabulary dictionary corpus. $obtain_statistics() method Corpus object Flair library provides overview dataset statistics. method returns Python’s dictionary details training, validation (development), test datasets make corpus. R, can use jsonlite package format JSON. R , use data article Temporal Focus Campaign Communication Stefan Muller, published Journal Politics 2020, example. First, vectorize cc_muller$text using Sentence function transform list object. , reformat cc_muller$class_pro_retro factor. ’s essential note R handles numerical values differently Python. R, numerical values represented floating point, ’s advisable convert factors strings. Lastly, employ map function purrr package assign labels sentence corpus using $add_label method. perform train-test split using base R, can follow steps: don’t provide dev set, Flair won’t force carve portion test set serve dev set. However, cases train test sets provided without dev set, Flair might automatically take fraction train set (e.g., 10%) use dev set (#2259). offer mechanism model selection early stopping prevent model overfitting train set. “Corpus” function, random selection “dev.” ensure reproducibility, need set seed Flair framework. can accomplish calling top-level module “flair” via {flaiR} using $set_seed(1964L) set seed. later sections, similar processing using Corpus. Following , focus advanced NLP applications. ","code":"library(flaiR) Corpus <- flair_data()$Corpus Sentence <- flair_data()$Sentence # Create some example sentences train <- list(Sentence('This is a training example.')) dev <- list(Sentence('This is a validation example.')) test <- list(Sentence('This is a test example.')) # Create a corpus using the custom data splits corpus <- Corpus(train = train, dev = dev, test = test) library(jsonlite) data <- fromJSON(corpus$obtain_statistics()) formatted_str <- toJSON(data, pretty=TRUE) print(formatted_str) #> { #> \"TRAIN\": { #> \"dataset\": [\"TRAIN\"], #> \"total_number_of_documents\": [1], #> \"number_of_documents_per_class\": {}, #> \"number_of_tokens_per_tag\": {}, #> \"number_of_tokens\": { #> \"total\": [6], #> \"min\": [6], #> \"max\": [6], #> \"avg\": [6] #> } #> }, #> \"TEST\": { #> \"dataset\": [\"TEST\"], #> \"total_number_of_documents\": [1], #> \"number_of_documents_per_class\": {}, #> \"number_of_tokens_per_tag\": {}, #> \"number_of_tokens\": { #> \"total\": [6], #> \"min\": [6], #> \"max\": [6], #> \"avg\": [6] #> } #> }, #> \"DEV\": { #> \"dataset\": [\"DEV\"], #> \"total_number_of_documents\": [1], #> \"number_of_documents_per_class\": {}, #> \"number_of_tokens_per_tag\": {}, #> \"number_of_tokens\": { #> \"total\": [6], #> \"min\": [6], #> \"max\": [6], #> \"avg\": [6] #> } #> } #> } library(purrr) #> #> Attaching package: 'purrr' #> The following object is masked from 'package:jsonlite': #> #> flatten data(cc_muller) # The `Sentence` object tokenizes text text <- lapply( cc_muller$text, Sentence) # split sentence object to train and test. labels <- as.factor(cc_muller$class_pro_retro) # `$add_label` method assigns the corresponding coded type to each Sentence corpus. text <- map2(text, labels, ~ .x$add_label(\"classification\", .y), .progress = TRUE) set.seed(2046) sample <- sample(c(TRUE, FALSE), length(text), replace=TRUE, prob=c(0.8, 0.2)) train <- text[sample] test <- text[!sample] sprintf(\"Corpus object sizes - Train: %d | Test: %d\", length(train), length(test)) #> [1] \"Corpus object sizes - Train: 4710 | Test: 1148\" flair <- import_flair() flair$set_seed(1964L) corpus <- Corpus(train=train, # dev=test, test=test) #> 2023-11-29 09:45:11,080 No dev split found. Using 0% (i.e. 471 samples) of the train split as dev data sprintf(\"Corpus object sizes - Train: %d | Test: %d | Dev: %d\", length(corpus$train), length(corpus$test), length(corpus$dev)) #> [1] \"Corpus object sizes - Train: 4239 | Test: 1148 | Dev: 471\""},{"path":[]},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"tag-entities-in-text","dir":"Articles","previous_headings":"Sequence Taggings","what":"Tag Entities in Text","title":"Tutorials","text":"Let’s run named entity recognition following example sentence: “love Berlin New York”. , need make Sentence text, load pre-trained model use predict tags sentence object. print: Use loop print POS tag. ’s important note Python indexed 0. Therefore, R environment, must use seq_along(sentence$get_labels()) - 1. ","code":"# attach flaiR in R library(flaiR) # make a sentence Sentence <- flair_data()$Sentence sentence <- Sentence('I love Berlin and New York.') # load the NER tagger Classifier <- flair_nn()$Classifier tagger <- Classifier$load('ner') #> 2023-11-29 09:45:12,595 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , # run NER over sentence tagger$predict(sentence) # print the sentence with all annotations print(sentence) #> Sentence[7]: \"I love Berlin and New York.\" → [\"Berlin\"/LOC, \"New York\"/LOC] for (i in seq_along(sentence$get_labels())) { print(sentence$get_labels()[[i]]) } #> 'Span[2:3]: \"Berlin\"'/'LOC' (0.9812) #> 'Span[4:6]: \"New York\"'/'LOC' (0.9957)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"tag-part-of-speech-in-text","dir":"Articles","previous_headings":"Sequence Taggings","what":"Tag Part-of-Speech in Text","title":"Tutorials","text":"use flair/pos-english POS tagging standard models Hugging Face. print: Use loop print pos tag. ","code":"# attach flaiR in R library(flaiR) # make a sentence Sentence <- flair_data()$Sentence sentence <- Sentence('I love Berlin and New York.') # load the NER tagger Classifier <- flair_nn()$Classifier tagger <- Classifier$load('pos') #> 2023-11-29 09:45:13,472 SequenceTagger predicts: Dictionary with 53 tags: , O, UH, ,, VBD, PRP, VB, PRP$, NN, RB, ., DT, JJ, VBP, VBG, IN, CD, NNS, NNP, WRB, VBZ, WDT, CC, TO, MD, VBN, WP, :, RP, EX, JJR, FW, XX, HYPH, POS, RBR, JJS, PDT, NNPS, RBS, AFX, WP$, -LRB-, -RRB-, ``, '', LS, $, SYM, ADD # run NER over sentence tagger$predict(sentence) # print the sentence with all annotations print(sentence) #> Sentence[7]: \"I love Berlin and New York.\" → [\"I\"/PRP, \"love\"/VBP, \"Berlin\"/NNP, \"and\"/CC, \"New\"/NNP, \"York\"/NNP, \".\"/.] for (i in seq_along(sentence$get_labels())) { print(sentence$get_labels()[[i]]) } #> 'Token[0]: \"I\"'/'PRP' (1.0) #> 'Token[1]: \"love\"'/'VBP' (1.0) #> 'Token[2]: \"Berlin\"'/'NNP' (0.9999) #> 'Token[3]: \"and\"'/'CC' (1.0) #> 'Token[4]: \"New\"'/'NNP' (1.0) #> 'Token[5]: \"York\"'/'NNP' (1.0) #> 'Token[6]: \".\"'/'.' (1.0)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"detect-sentiment","dir":"Articles","previous_headings":"Sequence Taggings","what":"Detect Sentiment","title":"Tutorials","text":"Let’s run sentiment analysis sentence determine whether POSITIVE NEGATIVE. can essentially code . Just instead loading ‘ner’ model, now load ‘sentiment’ model: ","code":"# attach flaiR in R library(flaiR) # make a sentence Sentence <- flair_data()$Sentence sentence <- Sentence('I love Berlin and New York.') # load the Classifier tagger from flair.nn module Classifier <- flair_nn()$Classifier tagger <- Classifier$load('sentiment') # run sentiment analysis over sentence tagger$predict(sentence) # print the sentence with all annotations print(sentence) #> Sentence[7]: \"I love Berlin and New York.\" → POSITIVE (0.9982)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"flair-embedding","dir":"Articles","previous_headings":"","what":"Flair Embedding","title":"Tutorials","text":"Flair popular natural language processing library, providing variety embedding methods text representation Flair. Flair Embeddings word embedding framowork Natural Language Processing, developed Zalando. Flair focuses word-level representation can capture contextual information words, meaning word can different embeddings different contexts. Unlike traditional word embeddings (Word2Vec GloVe), Flair can dynamically generate word embeddings based context achieved excellent results various NLP tasks. key points Flair Embeddings: Context-Aware Flair can understand context word sentence dynamically generate word embeddings based context. different static embeddings, embedding word consider context sentence. Flair dynamic word embedding technique can understand meaning words based context. contrast, static word embeddings, Word2Vec GloVe, provide fixed embedding word without considering context sentence. Therefore, context-sensitive embedding techniques, Flair, can capture meaning words specific sentences accurately, thus enhancing performance language models various tasks. Example: Consider following two English sentences: “interested bank river.” “need go bank withdraw money.” , word “bank” two different meanings. first sentence, refers edge shore river. second sentence, refers financial institution. static embeddings, word “bank” might embedding lies somewhere two meanings doesn’t consider context. dynamic embeddings like Flair, “bank” first sentence embedding related rivers, second sentence, embedding related finance. word, similar vector representation, essentially different. way, can see dynamic embeddings “bank” two sentences differ based context. Although printed embeddings , reality, high-dimensional vectors, might see lot numbers. want intuitive view differences, compute cosine similarity metrics two embeddings. just simple demonstration. practice, can also combine multiple embedding techniques, WordEmbeddings FlairEmbeddings, get richer word vectors. Character-Based Flair uses character-level language model, meaning can generate embeddings rare words even misspelled words. important feature allows model understand process words never appeared training data. Flair uses bidirectional LSTM (Long Short-Term Memory) network operates character level. means feeds individual characters LSTM instead words. Multilingual Support Flair provides various pre-trained character-level language models, supporting contextual word embeddings multiple languages. Flair allows easily combine different word embeddings (e.g., Flair Embeddings, Word2Vec, GloVe, etc.) create powerful stacked embeddings.","code":"FlairEmbeddings <- flair_embeddings()$FlairEmbeddings Sentence <- flair_data()$Sentence # Initialize Flair embeddings flair_embedding_forward <- FlairEmbeddings('news-forward') # Define the two sentences sentence1 <- Sentence(\"I am interested in the bank of the river.\") sentence2 <- Sentence(\"I need to go to the bank to withdraw money.\") # Get the embeddings flair_embedding_forward$embed(sentence1) #> [[1]] #> Sentence[10]: \"I am interested in the bank of the river.\" flair_embedding_forward$embed(sentence2) #> [[1]] #> Sentence[11]: \"I need to go to the bank to withdraw money.\" # Extract the embedding for \"bank\" from the sentences bank_embedding_sentence1 = sentence1[5]$embedding # \"bank\" is the seventh word bank_embedding_sentence2 = sentence2[6]$embedding # \"bank\" is the sixth word library(lsa) #> Loading required package: SnowballC cosine(as.numeric( bank_embedding_sentence1$numpy()), as.numeric( bank_embedding_sentence2$numpy())) #> [,1] #> [1,] 0.7329552"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"classic-wordembeddings","dir":"Articles","previous_headings":"Flair Embedding","what":"Classic Wordembeddings","title":"Tutorials","text":"Flair, simplest form embedding still contains semantic information word called classic word embeddings. embeddings pre-trained non-contextual. Let’s retrieve word embeddings. , can utilize FastText embeddings following code. use , simply instantiate WordEmbeddings class passing ID embedding choice. , simply wrap text Sentence object, call embed(sentence) method WordEmbeddings class. Flair supports range classic word embeddings, offering unique features application scopes. overview, detailing ID required load embedding corresponding language. ","code":"embedding = flair_embeddings.WordEmbeddings('crawl') sentence = flair_data.Sentence(\"one two three one\") embedding$embed(sentence) #> [[1]] #> Sentence[4]: \"one two three one\" for (i in seq_along(sentence$tokens)) { print(head(sentence$tokens[[i]]$embedding), n =5) } #> tensor([-0.0535, -0.0368, -0.2851, -0.0381, -0.0486, 0.2383]) #> tensor([ 0.0282, -0.0786, -0.1236, 0.1756, -0.1199, 0.0964]) #> tensor([-0.0920, -0.0690, -0.1475, 0.2313, -0.0872, 0.0799]) #> tensor([-0.0535, -0.0368, -0.2851, -0.0381, -0.0486, 0.2383])"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"contexual-embeddings","dir":"Articles","previous_headings":"Flair Embedding","what":"Contexual Embeddings","title":"Tutorials","text":"Understanding contextuality Flair embeddings idea behind contextual string embeddings word embedding defined syntactic-semantic meaning also context appears . means word different embedding every context appears . pre-trained Flair model offers forward version backward version. Let’s assume processing language , just like book, uses left--right script. forward version takes account context happens word – left-hand side. backward version works opposite direction. takes account context word – right-hand side word. true, two words appear beginning two different sentences identical forward embeddings, context null. Let’s test : using forward model, takes account context occurs word. Additionally, since word context left-hand side position sentence, two embeddings identical, code assumes identical, indeed output True. test whether sum two 2048 embeddings ‘nice’ equal 2048. true, indicates embedding results consistent, theoretically case. Now separately add words, pretty, two sentence objects. two sets embeddings identical words different, returns False. measure similarity two vectors inner product space known cosine similarity. formula calculating cosine similarity two vectors, vectors B, follows: \\(Cosine Similarity = \\frac{\\sum_{} (A_i \\cdot B_i)}{\\sqrt{\\sum_{} (A_i^2)} \\cdot \\sqrt{\\sum_{} (B_i^2)}}\\) can observe similarity two words 0.55. ","code":"embedding <- flair_embeddings.FlairEmbeddings('news-forward') s1 <- flair_data.Sentence(\"nice shirt\") s2 <- flair_data.Sentence(\"nice pants\") embedding$embed(s1) #> [[1]] #> Sentence[2]: \"nice shirt\" embedding$embed(s2) #> [[1]] #> Sentence[2]: \"nice pants\" cat(\" s1 sentence:\", paste(s1[0], sep = \"\"), \"\\n\", \"s2 sentence:\", paste(s2[0], sep = \"\")) #> s1 sentence: Token[0]: \"nice\" #> s2 sentence: Token[0]: \"nice\" length(s1[0]$embedding$numpy()) == sum(s1[0]$embedding$numpy() == s2[0]$embedding$numpy()) #> [1] TRUE embedding <- flair_embeddings.FlairEmbeddings('news-forward') s1 <- flair_data.Sentence(\"nice shirt\") s2 <- flair_data.Sentence(\"nice pants\") embedding <- flair_embeddings.FlairEmbeddings('news-forward') s1 <- flair_data.Sentence(\"very nice shirt\") s2 <- flair_data.Sentence(\"pretty nice pants\") embedding$embed(s1) #> [[1]] #> Sentence[3]: \"very nice shirt\" embedding$embed(s2) #> [[1]] #> Sentence[3]: \"pretty nice pants\" length(s1[0]$embedding$numpy()) == sum(s1[0]$embedding$numpy() == s2[0]$embedding$numpy()) #> [1] FALSE library(lsa) vector1 <- as.numeric(s1[0]$embedding$numpy()) vector2 <- as.numeric(s2[0]$embedding$numpy()) cosine_similarity <- cosine(vector1, vector2) print(cosine_similarity) #> [,1] #> [1,] 0.5571664"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"extracting-embeddings-from-bert","dir":"Articles","previous_headings":"Flair Embedding","what":"Extracting Embeddings from BERT","title":"Tutorials","text":"First, utilize flair.embeddings.TransformerWordEmbeddings function download BERT, transformer models can also found Flair NLP’s Hugging Face. Traverse token sentence print . view token, ’s necessary usereticulate::py_str(token) since sentence Python object. ","code":"TransformerWordEmbeddings <- flair_embeddings.TransformerWordEmbeddings(\"bert-base-uncased\") embedding <- TransformerWordEmbeddings$embed(sentence) # Iterate through each token in the sentence, printing them. # Utilize reticulate::py_str(token) to view each token, given that the sentence is a Python object. for (i in seq_along(sentence$tokens)) { cat(\"Token: \", reticulate::py_str(sentence$tokens[[i]]), \"\\n\") # Access the embedding of the token, converting it to an R object, # and print the first 10 elements of the vector. token_embedding <- sentence$tokens[[i]]$embedding print(head(token_embedding, 10)) } #> Token: Token[0]: \"one\" #> tensor([-0.0535, -0.0368, -0.2851, -0.0381, -0.0486, 0.2383, -0.1200, 0.2620, #> -0.0575, 0.0228]) #> Token: Token[1]: \"two\" #> tensor([ 0.0282, -0.0786, -0.1236, 0.1756, -0.1199, 0.0964, -0.1327, 0.4449, #> -0.0264, -0.1168]) #> Token: Token[2]: \"three\" #> tensor([-0.0920, -0.0690, -0.1475, 0.2313, -0.0872, 0.0799, -0.0901, 0.4403, #> -0.0103, -0.1494]) #> Token: Token[3]: \"one\" #> tensor([-0.0535, -0.0368, -0.2851, -0.0381, -0.0486, 0.2383, -0.1200, 0.2620, #> -0.0575, 0.0228])"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"training-a-binary-classifier-in-flair","dir":"Articles","previous_headings":"","what":"Training a Binary Classifier in flaiR","title":"Tutorials","text":"section, ’ll train sentiment analysis model can categorize text either positive negative. case study adapted pages 116 130 Tadej Magajna’s book, ‘Natural Language Processing Flair’. process training text classifiers Flair mirrors sequence followed sequence labeling models. Specifically, steps train text classifiers : Load tagged corpus compute label dictionary map. Prepare document embeddings. Initialize TextClassifier class. Train model.","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"loading-a-tagged-corpus","dir":"Articles","previous_headings":"Training a Binary Classifier in flaiR","what":"Loading a Tagged Corpus","title":"Tutorials","text":"Training text classification models requires set text documents (typically, sentences paragraphs) document associated one classification labels. train sentiment analysis text classification model, using famous Internet Movie Database (IMDb) dataset, contains 50,000 movie reviews IMDB, review labeled either positive negative. References dataset already baked Flair, loading dataset couldn’t easier: Print sizes corpus object follows - test: %d | train: %d | dev: %d”","code":"library(flaiR) # load IMDB from flair_datasets module Corpus <- flair_data()$Corpus IMDB <- flair_datasets()$IMDB # downsize to 0.05 corpus = IMDB() #> 2023-11-29 09:45:22,978 Reading data from /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced #> 2023-11-29 09:45:22,978 Train: /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced/train.txt #> 2023-11-29 09:45:22,978 Dev: None #> 2023-11-29 09:45:22,978 Test: None #> 2023-11-29 09:45:23,578 No test split found. Using 0% (i.e. 5000 samples) of the train split as test data #> 2023-11-29 09:45:23,593 No dev split found. Using 0% (i.e. 4500 samples) of the train split as dev data #> 2023-11-29 09:45:23,593 Initialized corpus /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced (label type name is 'sentiment') corpus$downsample(0.05) #> test_size <- length(corpus$test) train_size <- length(corpus$train) dev_size <- length(corpus$dev) output <- sprintf(\"Corpus object sizes - Test: %d | Train: %d | Dev: %d\", test_size, train_size, dev_size) print(output) #> [1] \"Corpus object sizes - Test: 250 | Train: 2025 | Dev: 225\" lbl_type = 'sentiment' label_dict = corpus$make_label_dictionary(label_type=lbl_type) #> 2023-11-29 09:45:23,699 Computing label dictionary. Progress: #> 2023-11-29 09:45:27,449 Dictionary created for label 'sentiment' with 2 values: POSITIVE (seen 1014 times), NEGATIVE (seen 1011 times)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"loading-the-embeddings","dir":"Articles","previous_headings":"Training a Binary Classifier in flaiR","what":"Loading the Embeddings","title":"Tutorials","text":"flair, iit covers different types document embeddings can use. , simply use DocumentPoolEmbeddings. require training prior training classification model :","code":"DocumentPoolEmbeddings <- flair_embeddings()$DocumentPoolEmbeddings WordEmbeddings <- flair_embeddings()$WordEmbeddings glove = WordEmbeddings('glove') document_embeddings = DocumentPoolEmbeddings(glove)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"initializing-the-textclassifier-class","dir":"Articles","previous_headings":"Training a Binary Classifier in flaiR","what":"Initializing the TextClassifier Class","title":"Tutorials","text":"$allows set device use CPU, GPU, specific MPS devices Mac (mps:0, mps:1, mps:2).","code":"# initiate TextClassifier TextClassifier <- flair_models()$TextClassifier classifier <- TextClassifier(document_embeddings, label_dictionary = label_dict, label_type = lbl_type) classifier$to(flair_device(\"mps\")) TextClassifier( (embeddings): DocumentPoolEmbeddings( fine_tune_mode=none, pooling=mean (embeddings): StackedEmbeddings( (list_embedding_0): WordEmbeddings( 'glove' (embedding): Embedding(400001, 100) ) ) ) (decoder): Linear(in_features=100, out_features=3, bias=True) (dropout): Dropout(p=0.0, inplace=False) (locked_dropout): LockedDropout(p=0.0) (word_dropout): WordDropout(p=0.0) (loss_function): CrossEntropyLoss() )"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"training-the-model","dir":"Articles","previous_headings":"Training a Binary Classifier in flaiR","what":"Training the Model","title":"Tutorials","text":"Training text classifier model involves two simple steps: - Defining model trainer class passing classifier model corpus - Setting training process passing required training hyperparameters. worth noting ‘L’ numbers like 32L 5L used R denote number integer. Without ‘L’ suffix, numbers R treated numeric, default double-precision floating-point numbers. contrast, Python determines type based value number . Whole numbers (e.g., 5 32) type int, numbers decimal points (e.g., 5.0) type float. Floating-point numbers languages representations real numbers can approximation due way stored memory.","code":"# initiate ModelTrainer ModelTrainer <- flair_trainers()$ModelTrainer # fit the model trainer <- ModelTrainer(classifier, corpus) # start to train # note: the 'L' in 32L is used in R to denote that the number is an integer. trainer$train('classifier', learning_rate=0.1, mini_batch_size=32L, # specifies how embeddings are stored in RAM, ie.\"cpu\", \"cuda\", \"gpu\", \"mps\". # embeddings_storage_mode = \"mps\", max_epochs=10L) #> 2023-11-29 09:45:29,229 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:29,229 Model: \"TextClassifier( #> (embeddings): DocumentPoolEmbeddings( #> fine_tune_mode=none, pooling=mean #> (embeddings): StackedEmbeddings( #> (list_embedding_0): WordEmbeddings( #> 'glove' #> (embedding): Embedding(400001, 100) #> ) #> ) #> ) #> (decoder): Linear(in_features=100, out_features=2, bias=True) #> (dropout): Dropout(p=0.0, inplace=False) #> (locked_dropout): LockedDropout(p=0.0) #> (word_dropout): WordDropout(p=0.0) #> (loss_function): CrossEntropyLoss() #> (weights): None #> (weight_tensor) None #> )\" #> 2023-11-29 09:45:29,230 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:29,230 Corpus: 2025 train + 225 dev + 250 test sentences #> 2023-11-29 09:45:29,230 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:29,230 Train: 2025 sentences #> 2023-11-29 09:45:29,230 (train_with_dev=False, train_with_test=False) #> 2023-11-29 09:45:29,230 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:29,230 Training Params: #> 2023-11-29 09:45:29,230 - learning_rate: \"0.1\" #> 2023-11-29 09:45:29,230 - mini_batch_size: \"32\" #> 2023-11-29 09:45:29,230 - max_epochs: \"10\" #> 2023-11-29 09:45:29,230 - shuffle: \"True\" #> 2023-11-29 09:45:29,230 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:29,230 Plugins: #> 2023-11-29 09:45:29,230 - AnnealOnPlateau | patience: '3', anneal_factor: '0.5', min_learning_rate: '0.0001' #> 2023-11-29 09:45:29,230 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:29,230 Final evaluation on model from best epoch (best-model.pt) #> 2023-11-29 09:45:29,230 - metric: \"('micro avg', 'f1-score')\" #> 2023-11-29 09:45:29,230 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:29,230 Computation: #> 2023-11-29 09:45:29,230 - compute on device: cpu #> 2023-11-29 09:45:29,230 - embedding storage: cpu #> 2023-11-29 09:45:29,230 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:29,230 Model training base path: \"classifier\" #> 2023-11-29 09:45:29,230 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:29,230 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:29,964 epoch 1 - iter 6/64 - loss 0.88400855 - time (sec): 0.73 - samples/sec: 261.81 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:30,639 epoch 1 - iter 12/64 - loss 0.88396302 - time (sec): 1.41 - samples/sec: 272.61 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:31,578 epoch 1 - iter 18/64 - loss 0.90135088 - time (sec): 2.35 - samples/sec: 245.34 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:32,297 epoch 1 - iter 24/64 - loss 0.91910574 - time (sec): 3.07 - samples/sec: 250.46 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:32,975 epoch 1 - iter 30/64 - loss 0.92947888 - time (sec): 3.74 - samples/sec: 256.36 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:33,859 epoch 1 - iter 36/64 - loss 0.91739042 - time (sec): 4.63 - samples/sec: 248.92 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:34,564 epoch 1 - iter 42/64 - loss 0.91689622 - time (sec): 5.33 - samples/sec: 252.01 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:35,265 epoch 1 - iter 48/64 - loss 0.91121411 - time (sec): 6.03 - samples/sec: 254.52 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:35,999 epoch 1 - iter 54/64 - loss 0.90482956 - time (sec): 6.77 - samples/sec: 255.31 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:36,902 epoch 1 - iter 60/64 - loss 0.90788143 - time (sec): 7.67 - samples/sec: 250.28 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:37,218 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:37,218 EPOCH 1 done: loss 0.9077 - lr: 0.100000 #> 2023-11-29 09:45:38,373 DEV : loss 0.8828572034835815 - f1-score (micro avg) 0.4533 #> 2023-11-29 09:45:38,979 - 0 epochs without improvement #> 2023-11-29 09:45:38,980 saving best model #> 2023-11-29 09:45:39,398 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:40,192 epoch 2 - iter 6/64 - loss 0.88782200 - time (sec): 0.79 - samples/sec: 241.98 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:40,879 epoch 2 - iter 12/64 - loss 0.89557258 - time (sec): 1.48 - samples/sec: 259.40 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:41,616 epoch 2 - iter 18/64 - loss 0.88284143 - time (sec): 2.22 - samples/sec: 259.75 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:42,521 epoch 2 - iter 24/64 - loss 0.86361855 - time (sec): 3.12 - samples/sec: 245.94 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:43,244 epoch 2 - iter 30/64 - loss 0.86423665 - time (sec): 3.85 - samples/sec: 249.66 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:43,970 epoch 2 - iter 36/64 - loss 0.85561348 - time (sec): 4.57 - samples/sec: 251.96 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:44,687 epoch 2 - iter 42/64 - loss 0.85215194 - time (sec): 5.29 - samples/sec: 254.13 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:45,569 epoch 2 - iter 48/64 - loss 0.85517522 - time (sec): 6.17 - samples/sec: 248.93 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:46,358 epoch 2 - iter 54/64 - loss 0.84677640 - time (sec): 6.96 - samples/sec: 248.28 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:47,097 epoch 2 - iter 60/64 - loss 0.85021446 - time (sec): 7.70 - samples/sec: 249.39 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:47,563 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:47,563 EPOCH 2 done: loss 0.8530 - lr: 0.100000 #> 2023-11-29 09:45:48,557 DEV : loss 0.8783490061759949 - f1-score (micro avg) 0.4533 #> 2023-11-29 09:45:49,193 - 0 epochs without improvement #> 2023-11-29 09:45:49,194 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:49,916 epoch 3 - iter 6/64 - loss 0.87799916 - time (sec): 0.72 - samples/sec: 265.93 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:50,843 epoch 3 - iter 12/64 - loss 0.89608843 - time (sec): 1.65 - samples/sec: 232.80 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:51,572 epoch 3 - iter 18/64 - loss 0.90057748 - time (sec): 2.38 - samples/sec: 242.17 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:52,306 epoch 3 - iter 24/64 - loss 0.89696234 - time (sec): 3.11 - samples/sec: 246.75 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:53,218 epoch 3 - iter 30/64 - loss 0.89147007 - time (sec): 4.02 - samples/sec: 238.59 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:53,965 epoch 3 - iter 36/64 - loss 0.89204659 - time (sec): 4.77 - samples/sec: 241.47 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:54,667 epoch 3 - iter 42/64 - loss 0.87829229 - time (sec): 5.47 - samples/sec: 245.57 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:55,379 epoch 3 - iter 48/64 - loss 0.87632222 - time (sec): 6.19 - samples/sec: 248.34 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:56,098 epoch 3 - iter 54/64 - loss 0.87269292 - time (sec): 6.90 - samples/sec: 250.30 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:56,985 epoch 3 - iter 60/64 - loss 0.87970024 - time (sec): 7.79 - samples/sec: 246.44 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:45:57,307 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:57,307 EPOCH 3 done: loss 0.8762 - lr: 0.100000 #> 2023-11-29 09:45:58,455 DEV : loss 0.8697565793991089 - f1-score (micro avg) 0.4533 #> 2023-11-29 09:45:59,065 - 0 epochs without improvement #> 2023-11-29 09:45:59,065 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:45:59,875 epoch 4 - iter 6/64 - loss 0.88036998 - time (sec): 0.81 - samples/sec: 237.15 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:00,627 epoch 4 - iter 12/64 - loss 0.84520512 - time (sec): 1.56 - samples/sec: 245.90 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:01,523 epoch 4 - iter 18/64 - loss 0.84692961 - time (sec): 2.46 - samples/sec: 234.38 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:02,233 epoch 4 - iter 24/64 - loss 0.84134499 - time (sec): 3.17 - samples/sec: 242.50 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:02,944 epoch 4 - iter 30/64 - loss 0.85828587 - time (sec): 3.88 - samples/sec: 247.51 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:03,661 epoch 4 - iter 36/64 - loss 0.85654225 - time (sec): 4.60 - samples/sec: 250.68 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:04,413 epoch 4 - iter 42/64 - loss 0.84629689 - time (sec): 5.35 - samples/sec: 251.32 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:05,317 epoch 4 - iter 48/64 - loss 0.84268748 - time (sec): 6.25 - samples/sec: 245.71 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:06,018 epoch 4 - iter 54/64 - loss 0.84196810 - time (sec): 6.95 - samples/sec: 248.57 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:06,762 epoch 4 - iter 60/64 - loss 0.85795100 - time (sec): 7.70 - samples/sec: 249.46 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:07,236 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:46:07,236 EPOCH 4 done: loss 0.8562 - lr: 0.100000 #> 2023-11-29 09:46:08,223 DEV : loss 0.8513666391372681 - f1-score (micro avg) 0.4533 #> 2023-11-29 09:46:08,860 - 0 epochs without improvement #> 2023-11-29 09:46:08,860 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:46:09,622 epoch 5 - iter 6/64 - loss 0.87054925 - time (sec): 0.76 - samples/sec: 252.11 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:10,315 epoch 5 - iter 12/64 - loss 0.88675048 - time (sec): 1.45 - samples/sec: 264.04 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:11,253 epoch 5 - iter 18/64 - loss 0.85735505 - time (sec): 2.39 - samples/sec: 240.79 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:11,975 epoch 5 - iter 24/64 - loss 0.86416615 - time (sec): 3.11 - samples/sec: 246.61 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:12,717 epoch 5 - iter 30/64 - loss 0.85780198 - time (sec): 3.86 - samples/sec: 248.90 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:13,465 epoch 5 - iter 36/64 - loss 0.85106002 - time (sec): 4.60 - samples/sec: 250.17 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:14,353 epoch 5 - iter 42/64 - loss 0.84119070 - time (sec): 5.49 - samples/sec: 244.69 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:15,145 epoch 5 - iter 48/64 - loss 0.84184100 - time (sec): 6.28 - samples/sec: 244.40 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:15,861 epoch 5 - iter 54/64 - loss 0.83640844 - time (sec): 7.00 - samples/sec: 246.82 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:16,600 epoch 5 - iter 60/64 - loss 0.83129187 - time (sec): 7.74 - samples/sec: 248.08 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:17,078 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:46:17,079 EPOCH 5 done: loss 0.8262 - lr: 0.100000 #> 2023-11-29 09:46:18,236 DEV : loss 0.8393897414207458 - f1-score (micro avg) 0.4578 #> 2023-11-29 09:46:18,652 - 0 epochs without improvement #> 2023-11-29 09:46:18,653 saving best model #> 2023-11-29 09:46:19,006 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:46:19,813 epoch 6 - iter 6/64 - loss 0.90684560 - time (sec): 0.81 - samples/sec: 238.02 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:20,737 epoch 6 - iter 12/64 - loss 0.86297455 - time (sec): 1.73 - samples/sec: 221.92 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:21,506 epoch 6 - iter 18/64 - loss 0.84712324 - time (sec): 2.50 - samples/sec: 230.38 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:22,224 epoch 6 - iter 24/64 - loss 0.84480650 - time (sec): 3.22 - samples/sec: 238.68 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:23,091 epoch 6 - iter 30/64 - loss 0.84118373 - time (sec): 4.08 - samples/sec: 235.04 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:23,861 epoch 6 - iter 36/64 - loss 0.84887512 - time (sec): 4.86 - samples/sec: 237.27 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:24,546 epoch 6 - iter 42/64 - loss 0.85247641 - time (sec): 5.54 - samples/sec: 242.61 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:25,317 epoch 6 - iter 48/64 - loss 0.83503058 - time (sec): 6.31 - samples/sec: 243.38 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:26,260 epoch 6 - iter 54/64 - loss 0.83527769 - time (sec): 7.25 - samples/sec: 238.23 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:26,972 epoch 6 - iter 60/64 - loss 0.82645011 - time (sec): 7.97 - samples/sec: 241.03 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:27,261 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:46:27,261 EPOCH 6 done: loss 0.8187 - lr: 0.100000 #> 2023-11-29 09:46:28,414 DEV : loss 0.697002649307251 - f1-score (micro avg) 0.5289 #> 2023-11-29 09:46:29,017 - 0 epochs without improvement #> 2023-11-29 09:46:29,018 saving best model #> 2023-11-29 09:46:29,346 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:46:30,144 epoch 7 - iter 6/64 - loss 0.92862150 - time (sec): 0.80 - samples/sec: 240.83 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:30,835 epoch 7 - iter 12/64 - loss 0.88754721 - time (sec): 1.49 - samples/sec: 258.07 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:31,606 epoch 7 - iter 18/64 - loss 0.87636076 - time (sec): 2.26 - samples/sec: 254.97 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:32,542 epoch 7 - iter 24/64 - loss 0.86338259 - time (sec): 3.19 - samples/sec: 240.38 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:33,309 epoch 7 - iter 30/64 - loss 0.86797423 - time (sec): 3.96 - samples/sec: 242.26 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:34,053 epoch 7 - iter 36/64 - loss 0.85439281 - time (sec): 4.71 - samples/sec: 244.76 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:34,788 epoch 7 - iter 42/64 - loss 0.85024860 - time (sec): 5.44 - samples/sec: 247.01 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:35,647 epoch 7 - iter 48/64 - loss 0.83954957 - time (sec): 6.30 - samples/sec: 243.78 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:36,397 epoch 7 - iter 54/64 - loss 0.83340724 - time (sec): 7.05 - samples/sec: 245.08 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:37,109 epoch 7 - iter 60/64 - loss 0.83293155 - time (sec): 7.76 - samples/sec: 247.36 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:37,561 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:46:37,561 EPOCH 7 done: loss 0.8286 - lr: 0.100000 #> 2023-11-29 09:46:38,547 DEV : loss 0.7402542233467102 - f1-score (micro avg) 0.5022 #> 2023-11-29 09:46:39,182 - 1 epochs without improvement #> 2023-11-29 09:46:39,183 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:46:39,967 epoch 8 - iter 6/64 - loss 0.72758570 - time (sec): 0.78 - samples/sec: 245.06 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:40,713 epoch 8 - iter 12/64 - loss 0.77498165 - time (sec): 1.53 - samples/sec: 251.09 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:41,576 epoch 8 - iter 18/64 - loss 0.78188934 - time (sec): 2.39 - samples/sec: 240.68 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:42,334 epoch 8 - iter 24/64 - loss 0.79232205 - time (sec): 3.15 - samples/sec: 243.76 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:43,067 epoch 8 - iter 30/64 - loss 0.78611903 - time (sec): 3.88 - samples/sec: 247.20 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:43,805 epoch 8 - iter 36/64 - loss 0.77288217 - time (sec): 4.62 - samples/sec: 249.25 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:44,705 epoch 8 - iter 42/64 - loss 0.77235644 - time (sec): 5.52 - samples/sec: 243.42 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:45,391 epoch 8 - iter 48/64 - loss 0.78255222 - time (sec): 6.21 - samples/sec: 247.43 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:46,135 epoch 8 - iter 54/64 - loss 0.78456129 - time (sec): 6.95 - samples/sec: 248.55 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:46,905 epoch 8 - iter 60/64 - loss 0.77978850 - time (sec): 7.72 - samples/sec: 248.65 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:47,357 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:46:47,357 EPOCH 8 done: loss 0.7780 - lr: 0.100000 #> 2023-11-29 09:46:48,348 DEV : loss 0.7568300366401672 - f1-score (micro avg) 0.4978 #> 2023-11-29 09:46:48,984 - 2 epochs without improvement #> 2023-11-29 09:46:48,986 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:46:49,726 epoch 9 - iter 6/64 - loss 0.83446981 - time (sec): 0.74 - samples/sec: 259.52 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:50,655 epoch 9 - iter 12/64 - loss 0.83481617 - time (sec): 1.67 - samples/sec: 230.11 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:51,339 epoch 9 - iter 18/64 - loss 0.84183356 - time (sec): 2.35 - samples/sec: 244.80 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:52,052 epoch 9 - iter 24/64 - loss 0.82389081 - time (sec): 3.07 - samples/sec: 250.54 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:52,837 epoch 9 - iter 30/64 - loss 0.81802387 - time (sec): 3.85 - samples/sec: 249.32 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:53,727 epoch 9 - iter 36/64 - loss 0.81533981 - time (sec): 4.74 - samples/sec: 243.04 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:54,433 epoch 9 - iter 42/64 - loss 0.80514485 - time (sec): 5.45 - samples/sec: 246.75 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:55,196 epoch 9 - iter 48/64 - loss 0.81017195 - time (sec): 6.21 - samples/sec: 247.36 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:55,917 epoch 9 - iter 54/64 - loss 0.80359553 - time (sec): 6.93 - samples/sec: 249.35 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:56,798 epoch 9 - iter 60/64 - loss 0.79928778 - time (sec): 7.81 - samples/sec: 245.80 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:46:57,136 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:46:57,136 EPOCH 9 done: loss 0.7969 - lr: 0.100000 #> 2023-11-29 09:46:58,275 DEV : loss 0.7745500206947327 - f1-score (micro avg) 0.4844 #> 2023-11-29 09:46:58,881 - 3 epochs without improvement #> 2023-11-29 09:46:58,882 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:46:59,709 epoch 10 - iter 6/64 - loss 0.74788894 - time (sec): 0.83 - samples/sec: 232.13 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:47:00,395 epoch 10 - iter 12/64 - loss 0.76720373 - time (sec): 1.51 - samples/sec: 253.81 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:47:01,152 epoch 10 - iter 18/64 - loss 0.79161676 - time (sec): 2.27 - samples/sec: 253.72 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:47:01,913 epoch 10 - iter 24/64 - loss 0.78399789 - time (sec): 3.03 - samples/sec: 253.41 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:47:02,789 epoch 10 - iter 30/64 - loss 0.78045449 - time (sec): 3.91 - samples/sec: 245.74 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:47:03,491 epoch 10 - iter 36/64 - loss 0.78069342 - time (sec): 4.61 - samples/sec: 249.96 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:47:04,246 epoch 10 - iter 42/64 - loss 0.76349049 - time (sec): 5.36 - samples/sec: 250.56 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:47:05,005 epoch 10 - iter 48/64 - loss 0.76165402 - time (sec): 6.12 - samples/sec: 250.88 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:47:05,888 epoch 10 - iter 54/64 - loss 0.77187297 - time (sec): 7.01 - samples/sec: 246.65 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:47:06,612 epoch 10 - iter 60/64 - loss 0.77289468 - time (sec): 7.73 - samples/sec: 248.38 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 09:47:07,105 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:47:07,105 EPOCH 10 done: loss 0.7810 - lr: 0.100000 #> 2023-11-29 09:47:08,087 DEV : loss 0.7909061908721924 - f1-score (micro avg) 0.4889 #> 2023-11-29 09:47:08,719 - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.05] #> 2023-11-29 09:47:09,052 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 09:47:09,053 Loading model from best epoch ... #> 2023-11-29 09:47:10,226 #> Results: #> - F-score (micro) 0.592 #> - F-score (macro) 0.5066 #> - Accuracy 0.592 #> #> By class: #> precision recall f1-score support #> #> POSITIVE 0.5600 0.9767 0.7119 129 #> NEGATIVE 0.8800 0.1818 0.3014 121 #> #> accuracy 0.5920 250 #> macro avg 0.7200 0.5793 0.5066 250 #> weighted avg 0.7149 0.5920 0.5132 250 #> #> 2023-11-29 09:47:10,227 ---------------------------------------------------------------------------------------------------- #> $test_score #> [1] 0.592"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"loading-and-using-the-classifiers","dir":"Articles","previous_headings":"Training a Binary Classifier in flaiR","what":"Loading and Using the Classifiers","title":"Tutorials","text":"training text classification model, resulting classifier already stored memory part classifier variable. possible, however, Python session exited training. , ’ll need load model memory following: import Sentence object. Now, can generate predictions example text inputs. ","code":"TextClassifier <- flair_models()$TextClassifier classifier <- TextClassifier$load('classifier/best-model.pt') Sentence <- flair_data()$Sentence sentence <- Sentence(\"great\") classifier$predict(sentence) print(sentence$labels) #> [[1]] #> 'Sentence[1]: \"great\"'/'POSITIVE' (0.9999) sentence <- Sentence(\"sad\") classifier$predict(sentence) print(sentence$labels) #> [[1]] #> 'Sentence[1]: \"sad\"'/'NEGATIVE' (0.9021)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"training-a-rnn-with-flair","dir":"Articles","previous_headings":"","what":"Training a RNN with FlaiR","title":"Tutorials","text":", train sentiment analysis model categorize text. case, also include pipeline implements use Recurrent Neural Networks (RNN). makes particularly effective tasks involving sequential data. section also show implent one powerful feature featrue, stacked Embeddings. can stack multiple embeddings different layers let classifier learn different types features. Flair NLP, {flaiR} package, ’s easy accomplish task.","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"import-necessary-modules-from-flair-via-flair-in-r","dir":"Articles","previous_headings":"Training a RNN with FlaiR","what":"Import Necessary Modules from Flair via {flaiR} in R","title":"Tutorials","text":"","code":"library(flaiR) WordEmbeddings <- flair_embeddings()$WordEmbeddings FlairEmbeddings <- flair_embeddings()$FlairEmbeddings DocumentRNNEmbeddings <- flair_embeddings()$DocumentRNNEmbeddings TextClassifier <- flair_models()$TextClassifier ModelTrainer <- flair_trainers()$ModelTrainer"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"get-the-imdb-corpus","dir":"Articles","previous_headings":"Training a RNN with FlaiR","what":"Get the IMDB Corpus","title":"Tutorials","text":"IMDB movie review dataset used , commonly utilized dataset sentiment analysis. $downsample(0.1) method means 10% dataset used, allowing faster demonstration","code":"# load the IMDB file and downsize it to 0.1 IMDB <- flair_datasets()$IMDB corpus <- IMDB()$downsample(0.1) #> 2023-11-29 09:47:10,640 Reading data from /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced #> 2023-11-29 09:47:10,640 Train: /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced/train.txt #> 2023-11-29 09:47:10,640 Dev: None #> 2023-11-29 09:47:10,640 Test: None #> 2023-11-29 09:47:11,239 No test split found. Using 0% (i.e. 5000 samples) of the train split as test data #> 2023-11-29 09:47:11,254 No dev split found. Using 0% (i.e. 4500 samples) of the train split as dev data #> 2023-11-29 09:47:11,254 Initialized corpus /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced (label type name is 'sentiment') # create the label dictionary lbl_type <- 'sentiment' label_dict <- corpus$make_label_dictionary(label_type=lbl_type) #> 2023-11-29 09:47:11,272 Computing label dictionary. Progress: #> 2023-11-29 09:47:19,010 Dictionary created for label 'sentiment' with 2 values: POSITIVE (seen 2056 times), NEGATIVE (seen 1994 times)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"stacked-embeddings-in-flair","dir":"Articles","previous_headings":"Training a RNN with FlaiR","what":"Stacked Embeddings in flaiR","title":"Tutorials","text":"one Flair’s powerful features: allows integration embeddings enable model learn sparse features. Three types embeddings utilized : GloVe embeddings, two types Flair embeddings (forward backward). Word embeddings used convert words vectors.","code":"# make a list of word embeddings word_embeddings <- list(WordEmbeddings('glove'), FlairEmbeddings('news-forward-fast'), FlairEmbeddings('news-backward-fast')) # initialize the document embeddings document_embeddings <- DocumentRNNEmbeddings(word_embeddings, hidden_size = 512L, reproject_words = TRUE, reproject_words_dimension = 256L) # create a Text Classifier with the embeddings and label dictionary classifier <- TextClassifier(document_embeddings, label_dictionary=label_dict, label_type='class') # initialize the text classifier trainer with our corpus trainer <- ModelTrainer(classifier, corpus)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"start-the-training","dir":"Articles","previous_headings":"Training a RNN with FlaiR","what":"Start the Training","title":"Tutorials","text":"sake example, setting max_epochs 5. might want increase better performance. worth noting thelearning rate parameter determines step size iteration moving towards minimum loss function. smaller learning rate slow learning process, lead precise convergence. mini_batch_size determines number samples used compute gradient step. ‘L’ 32L used R denote number integer. patience (aka early stop) hyperparameter used conjunction early stopping avoid overfitting. determines number epochs training process tolerate without improvements stopping training. Setting max_epochs 5 means algorithm make five passes dataset.","code":"# note: the 'L' in 32L is used in R to denote that the number is an integer. trainer$train('models/sentiment', learning_rate=0.1, mini_batch_size=32L, patience=5L, max_epochs=5L)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"to-apply-the-trained-model-for-prediction","dir":"Articles","previous_headings":"Training a RNN with FlaiR","what":"To Apply the Trained Model for Prediction","title":"Tutorials","text":"","code":"sentence <- \"This movie was really exciting!\" classifier$predict(sentence) print(sentence.labels)"},{"path":"https://davidycliao.github.io/flaiR/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"David Liao. Maintainer, author. Akbik Alan. Author, contributor. Blythe Duncan. Author, contributor. Vollgraf Roland. Author, contributor.","code":""},{"path":"https://davidycliao.github.io/flaiR/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Liao D, Alan , Duncan B, Roland V (2023). flaiR: R Wrapper Accessing FLAIR Python. R package version 0.0.6.","code":"@Manual{, title = {flaiR: An R Wrapper for Accessing FLAIR in Python}, author = {David Liao and Akbik Alan and Blythe Duncan and Vollgraf Roland}, year = {2023}, note = {R package version 0.0.6}, }"},{"path":"https://davidycliao.github.io/flaiR/index.html","id":"flairr-an-r-wrapper-for-accessing-flair-nlp-library-","dir":"","previous_headings":"","what":"flairR: An R Wrapper for Accessing Flair NLP Library","title":"An R Wrapper for Accessing Flair NLP Library","text":"{flaiR} R wrapper {flairNLP/flair} library Python, designed specifically R users, especially social sciences. provides easy access main functionalities {flairNLP}. Developed Developed Zalando Research Berlin, Flair NLP offers intuitive interfaces exceptional multilingual support, particularly various embedding frameworks, transformers state---art natural language processing tasks analyze text, named entity recognition, sentiment analysis, part--speech tagging, biomedical data, sense disambiguation, classification, support rapidly growing number languages community. comprehensive understanding {flairNLP/flair} architecture, can refer research article ‘Contextual String Embeddings Sequence Labeling’ official manual written Python implementation.","code":""},{"path":"https://davidycliao.github.io/flaiR/index.html","id":"installation-via-github","dir":"","previous_headings":"","what":"Installation via GitHub","title":"An R Wrapper for Accessing Flair NLP Library","text":"installation consists two parts: First, install Python 3.8 higher (avoid developmental versions latest release compatibility reasons). Secondly, install R 4.2.0 higher. System Requirement: Python (>= 3.10.x) R (>= 4.2.0) RStudio (GUI interface allows users adjust manage Python environment R) Anaconda miniconda (highly recommended) tested flaiR using CI/CD GitHub Actions, conducting integration tests across various operating syste tests include intergration R versions 4.2.1, 4.3.2, 4.2.0 Python 3.10.x. testing also covers environments flair NLP PyTorch (given Flair NLP built Torch). stable usage, strongly recommend installing specific versions. first installed, {flaiR} automatically detects whether Python 3.8 higher. , skip automatic installation Python flair NLP. case, need manually install reload {flaiR} . correct Python installed, {flaiR} automatically install flair Python NLP global environment. using {reticulate}, {flaiR} typically assume r-reticulate environment default. time, can use py_config() check location environment. Please note flaiR directly install flair NLP Python environment R using. environment can adjusted RStudio navigating Tools -> Global Options -> Python. issues installation, feel free ask Discussion . First, understanding Python environment RStudio using important. advise confirm Python environment RStudio using. can checking reticulate::py_config() manually via Tools -> Global Options -> Python. stage, ’ll observe RStudio defaulted using ‘flair_env’ environment (personal environment) set . , Python Flair package installed within environment. wish modify setting, option either adjust within RStudio’s settings use {reticulate} package manage Python environment R Now, can confidently install flaiR R environment. notice following message, indicating successful installation. means RStudio successfully detected correct Python installed Flair Python environment","code":"install.packages(\"reticulate\") reticulate::py_config() #> python: /Users/*********/.virtualenvs/flair_env/bin/python #> libpython: /Users/*********/.pyenv/versions/3.10.13/lib/libpython3.10.dylib #> pythonhome: /Users/*********/.virtualenvs/flair_env:/Users/*********/.virtualenvs/flair_env #> version: 3.10.13 (main, Oct 27 2023, 04:44:16) [Clang 15.0.0 (clang-1500.0.40.1)] #> numpy: /Users/*********/.virtualenvs/flair_env/lib/python3.10/site-packages/numpy #> numpy_version: 1.26.2 #> flair: /Users/*********/.virtualenvs/flair_env/lib/python3.10/site-packages/flair #> NOTE: Python version was forced by use_python() function install.packages(\"remotes\") remotes::install_github(\"davidycliao/flaiR\", force = TRUE) library(flaiR) #> flaiR: An R Wrapper for Accessing Flair NLP 0.13.0"},{"path":"https://davidycliao.github.io/flaiR/index.html","id":"introduction","dir":"","previous_headings":"","what":"Introduction","title":"An R Wrapper for Accessing Flair NLP Library","text":"R users, {flairR} primarily consists two main components. first wrapper functions {flaiR} built top {reticulate}, enables interact directly Python modules R provides seamless support documents tutorial (progress) R community. {flaiR} package enables R users leverage Flair’s capabilities train models using Flair framework state---art NLP models without need interact directly Python. Flair offers simpler intuitive approach training custom NLP models compared using Transformer-based models directly. Flair, data loading preprocessing streamlined, facilitating easy integration various pre-trained embeddings, including traditional Transformer-based types like BERT. training process Flair condensed just lines code, automatic handling fundamental preprocessing steps. Evaluation optimization also made user-friendly accessible tools. addition, Flair NLP provides easy framework training language models compatible HuggingFace.","code":""},{"path":"https://davidycliao.github.io/flaiR/index.html","id":"training-models-with-huggingface-via-flair","dir":"","previous_headings":"Introduction","what":"Training Models with HuggingFace via flaiR","title":"An R Wrapper for Accessing Flair NLP Library","text":"following example offers straightforward introduction fully train model using Flair framework import BERT model HuggingFace 🤗. example utilizes grandstanding score training data Julia Park’s paper (Politicians Grandstand? Measuring Message Politics Committee Hearings) trains model using Transformer-based models via flair NLP {flaiR}. Step 1 Split Data Train Test Sets flair Sentence Object Step 2 Preprocess Data Corpus Object Step 3 Create Classifier Using Transformer First, $make_label_dictionary function used automatically create label dictionary classification task. label dictionary mapping label index, used map labels tensor label indices. expcept classifcation task, flair also supports label types training custom model, ner, pos sentiment. Alternatively, can also create label dictionary manually. following code creates label dictionary two labels, 0 1, maps indices 0 1 respectively. , can use $item2idx method check mapping label index. important make sure labels mapped correctly indices tensors. TextClassifier used create text classifier. classifier takes document embeddings (importing 'distilbert-base-uncased' HugginFace) label dictionary input. label type also specified classification. Step 4 Start Training specific computation devices local machine. GPU, can use flair_gpu specify GPU device. don’t GPU, can use flaiR::flair_device specify CPU device. ModelTrainer used train model, learns data based grandstanding score. Step 5 Evaluate Model model training process, evaluating performance trained model development set straightforward easy. overall performance model test set also straightforward easy evaluate. can find performance metrics model/training.log file. Step 6 Apply Trained Model Unseen Data Prediction use statement dataset example. lassifier$predict function used predict label sentence. function returns sentence object predicted label. sentence$labels list labels, value score. value label , score probability label. label highest score predicted label. Step 7 Reload Model Best Performance train model save_final_model=TRUE, model best performance development set saved output directory. can reload model best performance using load function. can create function classify text using specified Flair classifier. performing classification task, let’s quickly check exmaple dataset. Let’s apply function dataset. Secondly, facilitate efficient use social science research, {flairR} expands {flairNLP/flair}’s core functionality working three major functions extract features tidy fast format– data.table R.","code":"# load training data: grandstanding score from Julia Park's paper library(flaiR) data(gs_score) # load flair functions via flaiR Sentence <- flair_data()$Sentence Corpus <- flair_data()$Corpus TransformerDocumentEmbeddings <- flair_embeddings()$TransformerDocumentEmbeddings TextClassifier <- flair_models()$TextClassifier ModelTrainer <- flair_trainers()$ModelTrainer # split the data text <- lapply(gs_score$speech, Sentence) labels <- as.character(gs_score$rescaled_gs) for (i in 1:length(text)) { text[[i]]$add_label(\"classification\", labels[[i]]) } set.seed(2046) sample <- sample(c(TRUE, FALSE), length(text), replace=TRUE, prob=c(0.8, 0.2)) train <- text[sample] test <- text[!sample] corpus <- Corpus(train=train, test=test) #> 2023-11-29 09:32:41,983 No dev split found. Using 0% (i.e. 282 samples) of the train split as dev data document_embeddings <- TransformerDocumentEmbeddings('distilbert-base-uncased', fine_tune=TRUE) label_dict <- corpus$make_label_dictionary(label_type=\"classification\") #> 2023-11-29 09:32:43,454 Computing label dictionary. Progress: #> 2023-11-29 09:32:43,504 Dictionary created for label 'classification' with 2 values: 0 (seen 1321 times), 1 (seen 1213 times) # load Dictionary object from flair_data Dictionary <- flair_data()$Dictionary # manually create label_dict with two labels, 0 and 1 label_dict <- Dictionary(add_unk=FALSE) # you can specify the order of labels. Please note the label should be a list and character (string) type. specific_order_labels <- list('0', '1') for (label in seq_along(specific_order_labels)) { label_dict$add_item(as.character(specific_order_labels [[label]])) } print(label_dict$idx2item) #> [[1]] #> b'0' #> #> [[2]] #> b'1' print(label_dict$item2idx) #> $`b'0'` #> [1] 0 #> #> $`b'1'` #> [1] 1 classifier <- TextClassifier(document_embeddings, label_dictionary=label_dict, label_type='classification') classifier$to(flair_device(\"cpu\")) #> TextClassifier( #> (embeddings): TransformerDocumentEmbeddings( #> (model): DistilBertModel( #> (embeddings): Embeddings( #> (word_embeddings): Embedding(30523, 768) #> (position_embeddings): Embedding(512, 768) #> (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) #> (dropout): Dropout(p=0.1, inplace=False) #> ) #> (transformer): Transformer( #> (layer): ModuleList( #> (0-5): 6 x TransformerBlock( #> (attention): MultiHeadSelfAttention( #> (dropout): Dropout(p=0.1, inplace=False) #> (q_lin): Linear(in_features=768, out_features=768, bias=True) #> (k_lin): Linear(in_features=768, out_features=768, bias=True) #> (v_lin): Linear(in_features=768, out_features=768, bias=True) #> (out_lin): Linear(in_features=768, out_features=768, bias=True) #> ) #> (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) #> (ffn): FFN( #> (dropout): Dropout(p=0.1, inplace=False) #> (lin1): Linear(in_features=768, out_features=3072, bias=True) #> (lin2): Linear(in_features=3072, out_features=768, bias=True) #> (activation): GELUActivation() #> ) #> (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) #> ) #> ) #> ) #> ) #> ) #> (decoder): Linear(in_features=768, out_features=2, bias=True) #> (dropout): Dropout(p=0.0, inplace=False) #> (locked_dropout): LockedDropout(p=0.0) #> (word_dropout): WordDropout(p=0.0) #> (loss_function): CrossEntropyLoss() #> ) trainer <- ModelTrainer(classifier, corpus) trainer$train('grand_standing_model', # output directory learning_rate=0.02, # learning rate: if batch_growth_annealing activates,lr should starts a bit higher. mini_batch_size=8L, # batch size anneal_with_restarts = TRUE, save_final_model=TRUE, max_epochs=10L) # Maximum number of epochs # import the performance metrics generated during the training process performance_df <- read.table(file = \"grand_standing/loss.tsv\", header = TRUE, sep = \"\\t\") head(performance_df) #> EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL #> 1 1 13:07:11 0.02 0.6071 0.6314 0.7250 0.7250 #> 2 2 13:11:19 0.02 0.4509 0.6139 0.7393 0.7393 #> 3 3 13:21:47 0.02 0.3294 0.6228 0.7464 0.7464 #> 4 4 13:25:03 0.02 0.2513 0.6628 0.7393 0.7393 #> 5 5 13:28:10 0.02 0.1109 0.6920 0.7429 0.7429 #> 6 6 13:31:16 0.02 0.0553 0.7023 0.7429 0.7429 #> DEV_F1 DEV_ACCURACY #> 1 0.7250 0.7250 #> 2 0.7393 0.7393 #> 3 0.7464 0.7464 #> 4 0.7393 0.7393 #> 5 0.7429 0.7429 #> 6 0.7429 0.7429 library(ggplot2) ggplot(performance_df, aes(x = EPOCH)) + geom_line(aes(y = TRAIN_LOSS, color = \"Training Loss\")) + geom_line(aes(y = DEV_LOSS, color = \"Development Loss\")) + geom_line(aes(y = DEV_RECALL, color = \"Development Recall\")) + geom_line(aes(y = DEV_F1, color = \"Development F1\")) + labs(title = \"Training and Development Loss per Epoch\", x = \"Epochs / Grandstanding Classifier\", y = \"\") + scale_color_manual(\"\", values = c(\"Training Loss\" = \"blue\", \"Development Loss\" = \"red\", \"Development F1\" = \"green\"))+ theme_minimal() Results: - F-score (micro) 0.7443 - F-score (macro) 0.7438 - Accuracy 0.7443 By class: precision recall f1-score support 1 0.6781 0.8519 0.7551 324 0 0.8362 0.6516 0.7324 376 accuracy 0.7443 700 macro avg 0.7572 0.7517 0.7438 700 weighted avg 0.7630 0.7443 0.7429 700 # load the trained model data(statements) Sentence <- flair_data()$Sentence text <- statements[1, \"Statement\"] sentence <- Sentence(text) classifier$predict(sentence) print(sentence) #> Sentence[55]: \"Ladies and gentlemen, I stand before you today not just as a legislator, but as a defender of our very way of life! We are facing a crisis of monumental proportions, and if we don't act now, the very fabric of our society will unravel before our eyes!\" → 1 (0.6431) sentence$labels[[1]]$value #> [1] \"1\" sentence$labels[[1]]$score #> [1] 0.6430542 Sentence <- flair_data()$Sentence TextClassifier <- flair_models()$TextClassifier classifier <- TextClassifier$load('grand_standing/best-model.pt') classify_text <- function(text, classifier) { # Classifies the given text using the specified Flair classifier. # # Args: # text (str): The text to be classified. # classifier (TextClassifier): The Flair classifier to use for prediction. # # Returns: # list: A list containing the predicted class label and score as strings. sentence <- Sentence(text) classifier$predict(sentence) return(list (labels = sentence$labels[[1]]$value, score = as.character(sentence$labels[[1]]$score))) } data(statements) print(statements) #> Type #> 1 Dramatic Appeal to Emotion #> 2 Exaggerated Praise for a Local Issue #> 3 Over-Simplified Solution to Complex Issue #> 4 Personal Anecdote Over Policy #> 5 Blaming Political Opponents #> Statement #> 1 Ladies and gentlemen, I stand before you today not just as a legislator, but as a defender of our very way of life! We are facing a crisis of monumental proportions, and if we don't act now, the very fabric of our society will unravel before our eyes! #> 2 I want to bring attention to the extraordinary achievement of the Smallville High School baseball team. Their victory is not just a win for Smallville, but a symbol of hope for our nation! This is what true American spirit looks like! #> 3 The solution to our nation's economic struggles is simple: cut taxes. That's it. Cut them. The economy will skyrocket like never before. Why complicate things when the answer is right there in front of us? #> 4 I remember, back in my hometown, old Mr. Jenkins used to say, 'If it ain't broke, don't fix it.' That's exactly how I feel about our current healthcare system. We don't need reform; we just need good, old-fashioned common sense. #> 5 Every problem we face today can be traced back to the disastrous policies of the other party. They are the reason we are in this mess, and until we recognize that, we cannot move forward as a nation. for (i in seq_along(statements$Statement) ) { out_come <- classify_text(statements$Statement[[i]], classifier) statements[i, 'predicted_labels'] <- out_come[[1]] statements[i, 'prop_score'] <- out_come[[2]] } statements[c(\"Type\", \"predicted_labels\", \"prop_score\")] #> Type predicted_labels prop_score #> 1 Dramatic Appeal to Emotion 1 0.998062312602997 #> 2 Exaggerated Praise for a Local Issue 1 0.985962450504303 #> 3 Over-Simplified Solution to Complex Issue 1 0.967254757881165 #> 4 Personal Anecdote Over Policy 1 0.998513281345367 #> 5 Blaming Political Opponents 1 0.999097466468811"},{"path":"https://davidycliao.github.io/flaiR/index.html","id":"performing-nlp-tasks-in-r","dir":"","previous_headings":"Introduction","what":"Performing NLP Tasks in R","title":"An R Wrapper for Accessing Flair NLP Library","text":"Flair NLP also provides set functions perform NLP tasks, named entity recognition, sentiment analysis, part--speech tagging. First, load data model perform NER task text . Yesterday, Dr. Jane Smith spoke United Nations New York. discussed climate change impact global economies. event attended representatives various countries including France Japan. Dr. Smith mentioned 2050, world see rise sea level approximately 2 feet. World Health Organization () pledged $50 million combat health effects global warming. interview New York Times, Dr. Smith emphasized urgent need action. Later day, flew back London, arriving 10:00 PM GMT. Alternatively, expanded features flaiR can used perform extract features sentence object tidy format. named entity recognition transformer-based sentiment analysis part--speech tagging example, can use get_entities function load_tagger_ner(\"ner\")flaiR extract named entities sentence object tidy format. cases, need extract named entities large corpus. example, can use Stefan’s data Temporal Focus Campaign Communication (JOP 2022) example. addition, handle load RAM dealing larger corpus, {flairR} supports batch processing handle texts batches, especially useful dealing large datasets, optimize memory usage performance. implementation batch processing can also utilize GPU acceleration faster computations.","code":"Classifier <- flair_nn()$Classifier Sentence <- flair_data()$Sentence # load the model flair NLP already trained for us tagger <- Classifier$load('ner') #> 2023-11-29 09:32:46,364 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , # make a sentence object text <- \"Yesterday, Dr. Jane Smith spoke at the United Nations in New York. She discussed climate change and its impact on global economies. The event was attended by representatives from various countries including France and Japan. Dr. Smith mentioned that by 2050, the world could see a rise in sea level by approximately 2 feet. The World Health Organization (WHO) has pledged $50 million to combat the health effects of global warming. In an interview with The New York Times, Dr. Smith emphasized the urgent need for action. Later that day, she flew back to London, arriving at 10:00 PM GMT.\" sentence <- Sentence(text) # predict NER tags tagger$predict(sentence) # print sentence with predicted tags print(sentence) #> Sentence[115]: \"Yesterday, Dr. Jane Smith spoke at the United Nations in New York. She discussed climate change and its impact on global economies. The event was attended by representatives from various countries including France and Japan. Dr. Smith mentioned that by 2050, the world could see a rise in sea level by approximately 2 feet. The World Health Organization (WHO) has pledged $50 million to combat the health effects of global warming. In an interview with The New York Times, Dr. Smith emphasized the urgent need for action. Later that day, she flew back to London, arriving at 10:00 PM GMT.\" → [\"Jane Smith\"/PER, \"United Nations\"/ORG, \"New York\"/LOC, \"France\"/LOC, \"Japan\"/LOC, \"Smith\"/PER, \"World Health Organization\"/ORG, \"WHO\"/ORG, \"The New York Times\"/ORG, \"Smith\"/PER, \"London\"/LOC, \"GMT\"/MISC] tagger_ner <- load_tagger_ner(\"ner\") #> 2023-11-29 09:32:49,075 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , results <- get_entities(text = text, doc_ids = \"example text\", tagger_ner) print(results) #> doc_id entity tag #> 1: example text Jane Smith PER #> 2: example text United Nations ORG #> 3: example text New York LOC #> 4: example text France LOC #> 5: example text Japan LOC #> 6: example text Smith PER #> 7: example text World Health Organization ORG #> 8: example text WHO ORG #> 9: example text The New York Times ORG #> 10: example text Smith PER #> 11: example text London LOC #> 12: example text GMT MISC library(flaiR) data(cc_muller) examples <- head(cc_muller, 10) examples[c(\"text\", \"countryname\")] #> # A tibble: 10 × 2 #> text countryname #> #> 1 And to boost the housing we need, we will start to build a new g… United Kin… #> 2 In many cases, their value to society in economic, social and en… Ireland #> 3 However, requests for Standing Order 31 adjournments of Dáil bus… Ireland #> 4 We will work with the Pig Industry Stakeholder group to enhance … Ireland #> 5 The legacy of the Celtic Tiger includes 'ghost' housing estates,… Ireland #> 6 We must not allow ISIS to hold a safe haven from which it can pu… Canada #> 7 The declaration of the G20 as the premier forum for internationa… Australia #> 8 This funding represents the next instalment (Round Five, Phase O… Australia #> 9 We'll provide free after-school care and holiday programmes for … New Zealand #> 10 This will properly manage the adverse environmental effects of a… New Zealand tagger_ner <- load_tagger_ner(\"ner\") #> 2023-11-29 09:32:51,701 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , results <- get_entities(text = examples$text, doc_ids = examples$countryname, tagger_ner) print(results) #> doc_id entity tag #> 1: United Kingdom #> 2: Ireland #> 3: Ireland Dáil ORG #> 4: Ireland Order of Business ORG #> 5: Ireland Standing Orders MISC #> 6: Ireland Pig Industry Stakeholder ORG #> 7: Ireland Celtic Tiger ORG #> 8: Canada ISIS ORG #> 9: Australia G20 ORG #> 10: Australia Round Five MISC #> 11: Australia Phase One MISC #> 12: Australia Rudd Labor Government ORG #> 13: New Zealand OSCAR ORG #> 14: New Zealand Exclusive Economic Zone MISC"},{"path":"https://davidycliao.github.io/flaiR/index.html","id":"contribution-and-open-source","dir":"","previous_headings":"","what":"Contribution and Open Source","title":"An R Wrapper for Accessing Flair NLP Library","text":"{flaiR} maintained developed David Liao friends. R developers want contribute {flaiR} welcome – {flaiR} open source project. warmly invite R users share similar interests join contributing package. Please feel free shoot email collaborate task. Contributions – whether comments, code suggestions, tutorial examples, forking repository – greatly appreciated. Please note flaiR released Contributor Code Conduct. contributing project, agree abide terms. primary communication channel R users can found . Please feel free share insights Discussion page report issues related R interface Issue section. issue pertains actual implementation Flair Python, please submit pull request offical flair NLP.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/cc_muller.html","id":null,"dir":"Reference","previous_headings":"","what":"Training Data from : The Temporal Focus of Campaign Communication (2020 JOP) — cc_muller","title":"Training Data from : The Temporal Focus of Campaign Communication (2020 JOP) — cc_muller","text":"replication data sourced \"Temporal Focus Campaign Communication,\" authored Stefan Müller, published Journal Politics 2022. study primarily delves temporal emphasis party manifestos. dataset encompasses 5,858 annotated data entries countries United Kingdom, Ireland, Canada, Australia, New Zealand, United States. central objective compute percentage sentences quasi-sentences referring past, present, future. differentiation made based two categories: \"Prospective\" \"Retrospective\".","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/cc_muller.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Training Data from : The Temporal Focus of Campaign Communication (2020 JOP) — cc_muller","text":"","code":"data(\"cc_muller\")"},{"path":"https://davidycliao.github.io/flaiR/reference/cc_muller.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Training Data from : The Temporal Focus of Campaign Communication (2020 JOP) — cc_muller","text":"data frame 7 variables: text Content text. sentence_id Unique identifier sentence. countryname Country's name. party Associated political party text. date Date record. class Type classification. class_pro_retro Classification either 'Prospective' 'Retrospective'.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/cc_muller.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Training Data from : The Temporal Focus of Campaign Communication (2020 JOP) — cc_muller","text":"Data provided author https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/7NP2XH","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/cc_muller.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Training Data from : The Temporal Focus of Campaign Communication (2020 JOP) — cc_muller","text":"","code":"if (FALSE) { data(cc_muller) head(cc_muller) }"},{"path":"https://davidycliao.github.io/flaiR/reference/check_and_gc.html","id":null,"dir":"Reference","previous_headings":"","what":"Perform Garbage Collection Based on Condition — check_and_gc","title":"Perform Garbage Collection Based on Condition — check_and_gc","text":"function checks value gc.active determine whether perform garbage collection. gc.active TRUE, function perform garbage collection send message indicating completion process.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_and_gc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Perform Garbage Collection Based on Condition — check_and_gc","text":"","code":"check_and_gc(gc.active)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_and_gc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Perform Garbage Collection Based on Condition — check_and_gc","text":"gc.active logical value indicating whether activate garbage collection.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_and_gc.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Perform Garbage Collection Based on Condition — check_and_gc","text":"message indicating garbage collection performed gc.active TRUE. Otherwise, action taken message displayed.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_batch_size.html","id":null,"dir":"Reference","previous_headings":"","what":"Check the Specified Batch Size — check_batch_size","title":"Check the Specified Batch Size — check_batch_size","text":"Validates given batch size positive integer.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_batch_size.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check the Specified Batch Size — check_batch_size","text":"","code":"check_batch_size(batch_size)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_batch_size.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check the Specified Batch Size — check_batch_size","text":"batch_size Integer. batch size checked.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_device.html","id":null,"dir":"Reference","previous_headings":"","what":"Check the Device for Accelerating PyTorch — check_device","title":"Check the Device for Accelerating PyTorch — check_device","text":"function verifies specified device available PyTorch. CUDA available, message shown. Additionally, system running Mac M1, MPS used instead CUDA. Checks specified device compatible current system's hardware operating system configuration, particularly Mac systems Apple M1/M2 silicon using Metal Performance Shaders (MPS).","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_device.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check the Device for Accelerating PyTorch — check_device","text":"","code":"check_device(device)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_device.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check the Device for Accelerating PyTorch — check_device","text":"device character string specifying device type.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_device.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check the Device for Accelerating PyTorch — check_device","text":"PyTorch device object.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_device.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Check the Device for Accelerating PyTorch — check_device","text":"MPS available system meets requirements, device type MPS returned. Otherwise, CPU device used. requirements using MPS follows:\\cr Mac computers Apple silicon AMD GPUs\\cr macOS 12.3 later\\cr Python 3.7 later\\cr Xcode command-line tools installed (xcode-select --install)\\cr information : https://developer.apple.com/metal/pytorch/.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_flair_installed.html","id":null,"dir":"Reference","previous_headings":"","what":"Check Flair — check_flair_installed","title":"Check Flair — check_flair_installed","text":"Determines Flair Python module available current Python environment.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_flair_installed.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check Flair — check_flair_installed","text":"","code":"check_flair_installed(...)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_flair_installed.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check Flair — check_flair_installed","text":"Logical. TRUE Flair installed, otherwise FALSE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_language_supported.html","id":null,"dir":"Reference","previous_headings":"","what":"Check the Given Language Models against Supported Languages Models — check_language_supported","title":"Check the Given Language Models against Supported Languages Models — check_language_supported","text":"function checks whether provided language supported. , stops execution returns message indicating supported languages.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_language_supported.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check the Given Language Models against Supported Languages Models — check_language_supported","text":"","code":"check_language_supported(language, supported_lan_models)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_language_supported.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check the Given Language Models against Supported Languages Models — check_language_supported","text":"language language check. supported_lan_models vector supported languages.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_language_supported.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check the Given Language Models against Supported Languages Models — check_language_supported","text":"function return anything, stops execution check fails.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_language_supported.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Check the Given Language Models against Supported Languages Models — check_language_supported","text":"","code":"# Assuming 'en' is a supported language and 'abc' is not: check_language_supported(\"en\", c(\"en\", \"de\", \"fr\")) # check_language_supported(\"abc\", c(\"en\", \"de\", \"fr\")) # will stop execution"},{"path":"https://davidycliao.github.io/flaiR/reference/check_prerequisites.html","id":null,"dir":"Reference","previous_headings":"","what":"Check Environment Pre-requisites — check_prerequisites","title":"Check Environment Pre-requisites — check_prerequisites","text":"function checks Python installed, flair module available Python, active internet connection.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_prerequisites.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check Environment Pre-requisites — check_prerequisites","text":"","code":"check_prerequisites(...)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_prerequisites.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check Environment Pre-requisites — check_prerequisites","text":"... passing additional arguments.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_prerequisites.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check Environment Pre-requisites — check_prerequisites","text":"message detailing missing pre-requisites.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_python_installed.html","id":null,"dir":"Reference","previous_headings":"","what":"Check for Available Python Installation — check_python_installed","title":"Check for Available Python Installation — check_python_installed","text":"function checks environment installed R system.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_python_installed.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check for Available Python Installation — check_python_installed","text":"","code":"check_python_installed(...)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_python_installed.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check for Available Python Installation — check_python_installed","text":"... param run.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_python_installed.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check for Available Python Installation — check_python_installed","text":"Logical. TRUE Python installed, FALSE otherwise. Additionally, installed, path Python installation printed.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_show.text_id.html","id":null,"dir":"Reference","previous_headings":"","what":"Check the show.text_id Parameter — check_show.text_id","title":"Check the show.text_id Parameter — check_show.text_id","text":"Validates given show.text_id logical value.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_show.text_id.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check the show.text_id Parameter — check_show.text_id","text":"","code":"check_show.text_id(show.text_id)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_show.text_id.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check the show.text_id Parameter — check_show.text_id","text":"show.text_id Logical. parameter checked.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_texts_and_ids.html","id":null,"dir":"Reference","previous_headings":"","what":"Check the texts and document IDs — check_texts_and_ids","title":"Check the texts and document IDs — check_texts_and_ids","text":"Validates given texts document IDs NULL empty.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_texts_and_ids.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check the texts and document IDs — check_texts_and_ids","text":"","code":"check_texts_and_ids(texts, doc_ids)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_texts_and_ids.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check the texts and document IDs — check_texts_and_ids","text":"texts List. list texts. doc_ids List. list document IDs.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/clear_flair_cache.html","id":null,"dir":"Reference","previous_headings":"","what":"Clear Flair Cache — clear_flair_cache","title":"Clear Flair Cache — clear_flair_cache","text":"function clears cache associated Flair Python library. cache directory typically located \"~/.flair\".","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/clear_flair_cache.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Clear Flair Cache — clear_flair_cache","text":"","code":"clear_flair_cache(...)"},{"path":"https://davidycliao.github.io/flaiR/reference/clear_flair_cache.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Clear Flair Cache — clear_flair_cache","text":"... argument passed next.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/clear_flair_cache.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Clear Flair Cache — clear_flair_cache","text":"Returns NULL invisibly. Messages printed indicating whether cache found cleared.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/clear_flair_cache.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Clear Flair Cache — clear_flair_cache","text":"","code":"if (FALSE) { clear_flair_cache() }"},{"path":"https://davidycliao.github.io/flaiR/reference/de_immigration.html","id":null,"dir":"Reference","previous_headings":"","what":"German Bundestag Immigration Debate Data — de_immigration","title":"German Bundestag Immigration Debate Data — de_immigration","text":"dataset containing speeches debates German Bundestag topic immigration.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/de_immigration.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"German Bundestag Immigration Debate Data — de_immigration","text":"","code":"data(\"de_immigration\")"},{"path":"https://davidycliao.github.io/flaiR/reference/de_immigration.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"German Bundestag Immigration Debate Data — de_immigration","text":"data frame 16 variables: date Date speech, Date type agenda Agenda subject speech, character speechnumber Unique identifier speech, numeric speaker Name person giving speech, character party Political party speaker, character party.facts.id ID party, usually numeric character chair Person chairing session, character terms Terms tags associated speech, character list text Actual text speech, character parliament Bundestag session, character numeric iso3country ISO3 country code Germany, character year Year speech made, numeric agenda_ID Unique identifier agenda, usually numeric character migration_dummy Dummy variable related migration topic, usually numeric (0 1) comment_agenda Additional comments agenda, character","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/de_immigration.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"German Bundestag Immigration Debate Data — de_immigration","text":"Data collected ParSpeechV2 House Commons year 2010. dataset publicly available https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/L4OAKN.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/de_immigration.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"German Bundestag Immigration Debate Data — de_immigration","text":"","code":"if (FALSE) { data(de_immigration) head(de_immigration) }"},{"path":"https://davidycliao.github.io/flaiR/reference/dot-onAttach.html","id":null,"dir":"Reference","previous_headings":"","what":"Install Python Dependencies and Load the flaiRnlp — .onAttach","title":"Install Python Dependencies and Load the flaiRnlp — .onAttach","text":".onAttach sets virtual environment, checks Python availability, ensures 'flair' module installed flair_env Python.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/dot-onAttach.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Install Python Dependencies and Load the flaiRnlp — .onAttach","text":"","code":".onAttach(...)"},{"path":"https://davidycliao.github.io/flaiR/reference/dot-onAttach.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Install Python Dependencies and Load the flaiRnlp — .onAttach","text":"... character string specifying name virtual environment.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/dot-onAttach.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Install Python Dependencies and Load the flaiRnlp — .onAttach","text":"function performs following steps: Checks virtual environment specified venv exists. , creates environment. Activates virtual environment. Checks availability Python. Python available, displays error message. Checks 'flair' Python module available virtual environment. , attempts install 'flair'. installation fails, prompts user install 'flair' manually.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/embeddings_to_matrix.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert Embeddings to Matrix — embeddings_to_matrix","title":"Convert Embeddings to Matrix — embeddings_to_matrix","text":"function takes three-dimensional array embeddings converts two-dimensional matrix based specified strategy.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/embeddings_to_matrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert Embeddings to Matrix — embeddings_to_matrix","text":"","code":"embeddings_to_matrix(embeddings, strategy = \"average\")"},{"path":"https://davidycliao.github.io/flaiR/reference/embeddings_to_matrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert Embeddings to Matrix — embeddings_to_matrix","text":"embeddings three-dimensional array shape (number_of_texts, number_of_words, embedding_dimension). strategy character string specifying strategy use. Options \"average\", \"concatenate\", \"max_pooling\", \"min_pooling\".","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/embeddings_to_matrix.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Convert Embeddings to Matrix — embeddings_to_matrix","text":"two-dimensional matrix transformed embeddings.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/embeddings_to_matrix.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Convert Embeddings to Matrix — embeddings_to_matrix","text":"","code":"if (FALSE) { embeddings <- array(runif(10 * 5 * 3), c(10, 5, 3)) result <- embeddings_to_matrix(embeddings, strategy = \"average\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.Sentence.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a Flair Sentence — flair_data.Sentence","title":"Create a Flair Sentence — flair_data.Sentence","text":"Flair powerful NLP framework leverages state---art embeddings various natural language processing tasks.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.Sentence.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a Flair Sentence — flair_data.Sentence","text":"","code":"flair_data.Sentence(sentence_text)"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.Sentence.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a Flair Sentence — flair_data.Sentence","text":"sentence_text character string converted Flair Sentence object.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.Sentence.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a Flair Sentence — flair_data.Sentence","text":"Flair Sentence object.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.Sentence.html","id":"note","dir":"Reference","previous_headings":"","what":"Note","title":"Create a Flair Sentence — flair_data.Sentence","text":"Ensure input string language compatible intended Flair model. R, processing multiple text, can use purrr basic R functions lapply sapply.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.Sentence.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Create a Flair Sentence — flair_data.Sentence","text":"Python equivalent:","code":"from flair.data import Sentence sentence = Sentence(\"The quick brown fox jumps over the lazy dog.\")"},{"path":[]},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.Sentence.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create a Flair Sentence — flair_data.Sentence","text":"","code":"if (FALSE) { flair_data.Sentence(\"The quick brown fox jumps over the lazy dog.\")}"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Import flair.data Module — flair_data","title":"Import flair.data Module — flair_data","text":"flair.data module provides essential utilities text data processing representation Flair library. function gives access various classes utilities flair.data module, notably: BoundingBox(left, top, right, bottom): Bases: tuple (Python); list (R) left - str. Alias field number 0. top - int Alias field number 1 right - int Alias field number 2 bottom - int Alias field number 3 Sentence(text, use_tokenizer=True, language_code=None, start_position=0):Sentence list tokens used represent sentence text fragment. Sentence can imported flair_data()$Sentence via flaiR. text Union[str, List[str], List[Token]] - original string (sentence), pre-tokenized list tokens. use_tokenizer Union[bool, Tokenizer] - Specify custom tokenizer split text tokens. default flair.tokenization.SegTokTokenizer. use_tokenizer set False, flair.tokenization.SpaceTokenizer used instead. tokenizer ignored text refers pre-tokenized tokens. language_code Optional[str] - Language sentence. provided, langdetect called language_code accessed first time. start_position int - Start character offset sentence superordinate document. Span(tokens, tag=None, score=1.0): Bases: _PartOfSentence. Span slice Sentence, consisting list Tokens. Span can imported flair_data()$Span. Token(text, head_id=None, whitespace_after=1, start_position=0, sentence=None): class represents one word tokenized sentence. token may number tags. may also point head dependency tree. Token can imported flair_data()$Token via flaiR. Corpus(train=None, dev=None, test=None, name='corpus', sample_missing_splits=True): Represents collection sentences, facilitating operations like splitting train/test/development sets applying transformations. particularly useful training evaluating models custom datasets. Corpus can imported flair_data()$Corpus via flaiR. Dictionary: Represents mapping items indices. useful converting text machine-readable formats.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Import flair.data Module — flair_data","text":"","code":"flair_data()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Import flair.data Module — flair_data","text":"Python module (flair.data). access classes utilities.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Import flair.data Module — flair_data","text":"Python reference:","code":"from flair.data import Sentence"},{"path":[]},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Import flair.data Module — flair_data","text":"","code":"if (FALSE) { Sentence <- flair_data()$Sentence Token <- flair_data()$Token Corpus <- flair_data()$Corpus }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_datasets.html","id":null,"dir":"Reference","previous_headings":"","what":"Access the flair_datasets Module from Flair — flair_datasets","title":"Access the flair_datasets Module from Flair — flair_datasets","text":"Utilizes reticulate package import flair.datasets dataset Flair's datasets Python, enabling use dataset R environment.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_datasets.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Access the flair_datasets Module from Flair — flair_datasets","text":"","code":"flair_datasets()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_datasets.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Access the flair_datasets Module from Flair — flair_datasets","text":"Python Module(flair.datasets) Flair, can utilized NLP tasks.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_datasets.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Access the flair_datasets Module from Flair — flair_datasets","text":"Python equivalent:","code":"from flair.datasets import UD_ENGLISH corpus = UD_ENGLISH().downsample(0.1)"},{"path":[]},{"path":"https://davidycliao.github.io/flaiR/reference/flair_datasets.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Access the flair_datasets Module from Flair — flair_datasets","text":"","code":"if (FALSE) { UD_ENGLISH <- flair_datasets()$UD_ENGLISH corpus <- UD_ENGLISH()$downsample(0.1)} # print all the datasets from flair names(flair_datasets()) #> [1] \"AMAZON_REVIEWS\" #> [2] \"ANAT_EM\" #> [3] \"AZDZ\" #> [4] \"BC2GM\" #> [5] \"BIOBERT_CHEMICAL_BC4CHEMD\" #> [6] \"BIOBERT_CHEMICAL_BC5CDR\" #> [7] \"BIOBERT_DISEASE_BC5CDR\" #> [8] \"BIOBERT_DISEASE_NCBI\" #> [9] \"BIOBERT_GENE_BC2GM\" #> [10] \"BIOBERT_GENE_JNLPBA\" #> [11] \"BIOBERT_SPECIES_LINNAEUS\" #> [12] \"BIOBERT_SPECIES_S800\" #> [13] \"BIONLP2013_CG\" #> [14] \"BIONLP2013_PC\" #> [15] \"BIOSCOPE\" #> [16] \"BIOSEMANTICS\" #> [17] \"BIO_INFER\" #> [18] \"CDR\" #> [19] \"CELL_FINDER\" #> [20] \"CEMP\" #> [21] \"CHEMDNER\" #> [22] \"CLL\" #> [23] \"COMMUNICATIVE_FUNCTIONS\" #> [24] \"CONLL_03\" #> [25] \"CONLL_03_DUTCH\" #> [26] \"CONLL_03_GERMAN\" #> [27] \"CONLL_03_SPANISH\" #> [28] \"CONLL_2000\" #> [29] \"CRAFT\" #> [30] \"CRAFT_V4\" #> [31] \"CSVClassificationCorpus\" #> [32] \"CSVClassificationDataset\" #> [33] \"ClassificationCorpus\" #> [34] \"ClassificationDataset\" #> [35] \"ColumnCorpus\" #> [36] \"ColumnDataset\" #> [37] \"DECA\" #> [38] \"DataLoader\" #> [39] \"DataPairCorpus\" #> [40] \"DataPairDataset\" #> [41] \"FEWNERD\" #> [42] \"FSU\" #> [43] \"FeideggerCorpus\" #> [44] \"FeideggerDataset\" #> [45] \"FlairDatapointDataset\" #> [46] \"GELLUS\" #> [47] \"GERMEVAL_2018_OFFENSIVE_LANGUAGE\" #> [48] \"GLUE_COLA\" #> [49] \"GLUE_MNLI\" #> [50] \"GLUE_MRPC\" #> [51] \"GLUE_QNLI\" #> [52] \"GLUE_QQP\" #> [53] \"GLUE_RTE\" #> [54] \"GLUE_SST2\" #> [55] \"GLUE_STSB\" #> [56] \"GLUE_WNLI\" #> [57] \"GO_EMOTIONS\" #> [58] \"GPRO\" #> [59] \"HUNER_CELL_LINE\" #> [60] \"HUNER_CELL_LINE_CELL_FINDER\" #> [61] \"HUNER_CELL_LINE_CLL\" #> [62] \"HUNER_CELL_LINE_GELLUS\" #> [63] \"HUNER_CELL_LINE_JNLPBA\" #> [64] \"HUNER_CHEMICAL\" #> [65] \"HUNER_CHEMICAL_CDR\" #> [66] \"HUNER_CHEMICAL_CEMP\" #> [67] \"HUNER_CHEMICAL_CHEBI\" #> [68] \"HUNER_CHEMICAL_CHEMDNER\" #> [69] \"HUNER_CHEMICAL_CRAFT_V4\" #> [70] \"HUNER_CHEMICAL_SCAI\" #> [71] \"HUNER_DISEASE\" #> [72] \"HUNER_DISEASE_CDR\" #> [73] \"HUNER_DISEASE_MIRNA\" #> [74] \"HUNER_DISEASE_NCBI\" #> [75] \"HUNER_DISEASE_PDR\" #> [76] \"HUNER_DISEASE_SCAI\" #> [77] \"HUNER_DISEASE_VARIOME\" #> [78] \"HUNER_GENE\" #> [79] \"HUNER_GENE_BC2GM\" #> [80] \"HUNER_GENE_BIO_INFER\" #> [81] \"HUNER_GENE_CELL_FINDER\" #> [82] \"HUNER_GENE_CHEBI\" #> [83] \"HUNER_GENE_CRAFT_V4\" #> [84] \"HUNER_GENE_DECA\" #> [85] \"HUNER_GENE_FSU\" #> [86] \"HUNER_GENE_GPRO\" #> [87] \"HUNER_GENE_IEPA\" #> [88] \"HUNER_GENE_JNLPBA\" #> [89] \"HUNER_GENE_LOCTEXT\" #> [90] \"HUNER_GENE_MIRNA\" #> [91] \"HUNER_GENE_OSIRIS\" #> [92] \"HUNER_GENE_VARIOME\" #> [93] \"HUNER_SPECIES\" #> [94] \"HUNER_SPECIES_CELL_FINDER\" #> [95] \"HUNER_SPECIES_CHEBI\" #> [96] \"HUNER_SPECIES_CRAFT_V4\" #> [97] \"HUNER_SPECIES_LINNEAUS\" #> [98] \"HUNER_SPECIES_LOCTEXT\" #> [99] \"HUNER_SPECIES_MIRNA\" #> [100] \"HUNER_SPECIES_S800\" #> [101] \"HUNER_SPECIES_VARIOME\" #> [102] \"IEPA\" #> [103] \"IMDB\" #> [104] \"JNLPBA\" #> [105] \"KEYPHRASE_INSPEC\" #> [106] \"KEYPHRASE_SEMEVAL2010\" #> [107] \"KEYPHRASE_SEMEVAL2017\" #> [108] \"LINNEAUS\" #> [109] \"LOCTEXT\" #> [110] \"MASAKHA_POS\" #> [111] \"MIRNA\" #> [112] \"MongoDataset\" #> [113] \"NCBI_DISEASE\" #> [114] \"NEL_ENGLISH_AIDA\" #> [115] \"NEL_ENGLISH_AQUAINT\" #> [116] \"NEL_ENGLISH_IITB\" #> [117] \"NEL_ENGLISH_REDDIT\" #> [118] \"NEL_ENGLISH_TWEEKI\" #> [119] \"NEL_GERMAN_HIPE\" #> [120] \"NER_ARABIC_ANER\" #> [121] \"NER_ARABIC_AQMAR\" #> [122] \"NER_BASQUE\" #> [123] \"NER_CHINESE_WEIBO\" #> [124] \"NER_DANISH_DANE\" #> [125] \"NER_ENGLISH_MOVIE_COMPLEX\" #> [126] \"NER_ENGLISH_MOVIE_SIMPLE\" #> [127] \"NER_ENGLISH_PERSON\" #> [128] \"NER_ENGLISH_RESTAURANT\" #> [129] \"NER_ENGLISH_SEC_FILLINGS\" #> [130] \"NER_ENGLISH_STACKOVERFLOW\" #> [131] \"NER_ENGLISH_TWITTER\" #> [132] \"NER_ENGLISH_WEBPAGES\" #> [133] \"NER_ENGLISH_WIKIGOLD\" #> [134] \"NER_ENGLISH_WNUT_2020\" #> [135] \"NER_FINNISH\" #> [136] \"NER_GERMAN_BIOFID\" #> [137] \"NER_GERMAN_EUROPARL\" #> [138] \"NER_GERMAN_GERMEVAL\" #> [139] \"NER_GERMAN_LEGAL\" #> [140] \"NER_GERMAN_MOBIE\" #> [141] \"NER_GERMAN_POLITICS\" #> [142] \"NER_HIPE_2022\" #> [143] \"NER_HUNGARIAN\" #> [144] \"NER_ICDAR_EUROPEANA\" #> [145] \"NER_ICELANDIC\" #> [146] \"NER_JAPANESE\" #> [147] \"NER_MASAKHANE\" #> [148] \"NER_MULTI_CONER\" #> [149] \"NER_MULTI_CONER_V2\" #> [150] \"NER_MULTI_WIKIANN\" #> [151] \"NER_MULTI_WIKINER\" #> [152] \"NER_MULTI_XTREME\" #> [153] \"NER_NERMUD\" #> [154] \"NER_SWEDISH\" #> [155] \"NER_TURKU\" #> [156] \"NER_UKRAINIAN\" #> [157] \"NEWSGROUPS\" #> [158] \"ONTONOTES\" #> [159] \"OSIRIS\" #> [160] \"OcrJsonDataset\" #> [161] \"OpusParallelCorpus\" #> [162] \"PDR\" #> [163] \"ParallelTextCorpus\" #> [164] \"ParallelTextDataset\" #> [165] \"RE_ENGLISH_CONLL04\" #> [166] \"RE_ENGLISH_DRUGPROT\" #> [167] \"RE_ENGLISH_SEMEVAL2010\" #> [168] \"RE_ENGLISH_TACRED\" #> [169] \"S800\" #> [170] \"SCAI_CHEMICALS\" #> [171] \"SCAI_DISEASE\" #> [172] \"SENTEVAL_CR\" #> [173] \"SENTEVAL_MPQA\" #> [174] \"SENTEVAL_MR\" #> [175] \"SENTEVAL_SST_BINARY\" #> [176] \"SENTEVAL_SST_GRANULAR\" #> [177] \"SENTEVAL_SUBJ\" #> [178] \"SENTIMENT_140\" #> [179] \"SROIE\" #> [180] \"STACKOVERFLOW\" #> [181] \"SUPERGLUE_RTE\" #> [182] \"SentenceDataset\" #> [183] \"StringDataset\" #> [184] \"TREC_50\" #> [185] \"TREC_6\" #> [186] \"UD_AFRIKAANS\" #> [187] \"UD_ANCIENT_GREEK\" #> [188] \"UD_ARABIC\" #> [189] \"UD_ARMENIAN\" #> [190] \"UD_BASQUE\" #> [191] \"UD_BELARUSIAN\" #> [192] \"UD_BULGARIAN\" #> [193] \"UD_CATALAN\" #> [194] \"UD_CHINESE\" #> [195] \"UD_COPTIC\" #> [196] \"UD_CROATIAN\" #> [197] \"UD_CZECH\" #> [198] \"UD_DANISH\" #> [199] \"UD_DUTCH\" #> [200] \"UD_ENGLISH\" #> [201] \"UD_ESTONIAN\" #> [202] \"UD_FAROESE\" #> [203] \"UD_FINNISH\" #> [204] \"UD_FRENCH\" #> [205] \"UD_GALICIAN\" #> [206] \"UD_GERMAN\" #> [207] \"UD_GERMAN_HDT\" #> [208] \"UD_GOTHIC\" #> [209] \"UD_GREEK\" #> [210] \"UD_HEBREW\" #> [211] \"UD_HINDI\" #> [212] \"UD_INDONESIAN\" #> [213] \"UD_IRISH\" #> [214] \"UD_ITALIAN\" #> [215] \"UD_JAPANESE\" #> [216] \"UD_KAZAKH\" #> [217] \"UD_KOREAN\" #> [218] \"UD_LATIN\" #> [219] \"UD_LATVIAN\" #> [220] \"UD_LITHUANIAN\" #> [221] \"UD_LIVVI\" #> [222] \"UD_MALTESE\" #> [223] \"UD_MARATHI\" #> [224] \"UD_NORTH_SAMI\" #> [225] \"UD_NORWEGIAN\" #> [226] \"UD_OLD_CHURCH_SLAVONIC\" #> [227] \"UD_OLD_FRENCH\" #> [228] \"UD_PERSIAN\" #> [229] \"UD_POLISH\" #> [230] \"UD_PORTUGUESE\" #> [231] \"UD_ROMANIAN\" #> [232] \"UD_RUSSIAN\" #> [233] \"UD_SERBIAN\" #> [234] \"UD_SLOVAK\" #> [235] \"UD_SLOVENIAN\" #> [236] \"UD_SPANISH\" #> [237] \"UD_SWEDISH\" #> [238] \"UD_TURKISH\" #> [239] \"UD_UKRAINIAN\" #> [240] \"UD_WOLOF\" #> [241] \"UP_CHINESE\" #> [242] \"UP_ENGLISH\" #> [243] \"UP_FINNISH\" #> [244] \"UP_FRENCH\" #> [245] \"UP_GERMAN\" #> [246] \"UP_ITALIAN\" #> [247] \"UP_SPANISH\" #> [248] \"UP_SPANISH_ANCORA\" #> [249] \"UniversalDependenciesCorpus\" #> [250] \"UniversalDependenciesDataset\" #> [251] \"VARIOME\" #> [252] \"WASSA_ANGER\" #> [253] \"WASSA_FEAR\" #> [254] \"WASSA_JOY\" #> [255] \"WASSA_SADNESS\" #> [256] \"WNUT_17\" #> [257] \"WSD_MASC\" #> [258] \"WSD_OMSTI\" #> [259] \"WSD_RAGANATO_ALL\" #> [260] \"WSD_SEMCOR\" #> [261] \"WSD_TRAINOMATIC\" #> [262] \"WSD_UFSAC\" #> [263] \"WSD_WORDNET_GLOSS_TAGGED\" #> [264] \"YAHOO_ANSWERS\" #> [265] \"ZELDA\" #> [266] \"base\" #> [267] \"biomedical\" #> [268] \"document_classification\" #> [269] \"entity_linking\" #> [270] \"ocr\" #> [271] \"relation_extraction\" #> [272] \"sequence_labeling\" #> [273] \"text_image\" #> [274] \"text_text\" #> [275] \"treebanks\""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_device.html","id":null,"dir":"Reference","previous_headings":"","what":"Set Flair Device — flair_device","title":"Set Flair Device — flair_device","text":"function sets device Flair Python library. allows set device use CPU, GPU (coda:0, coda:1, coda:3), specific MPS devices Mac (mps:0, mps:1, mps:2).","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_device.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Set Flair Device — flair_device","text":"","code":"flair_device(device = \"cpu\")"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_device.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Set Flair Device — flair_device","text":"device character string specifying device. Valid options include: \"cpu\", \"cuda\", \"mps:0\", \"mps:1\", \"mps:2\", etc.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_device.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Set Flair Device — flair_device","text":"set device Flair.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_device.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Set Flair Device — flair_device","text":"","code":"if (FALSE) { flair_device(\"cpu\") # Set device to CPU flair_device(\"cuda\") # Set device to GPU (if available) flair_device(\"mps:0\") # Set device to MPS device 0 (if available on Mac) }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.FlairEmbeddings.html","id":null,"dir":"Reference","previous_headings":"","what":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","title":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","text":"function initializes Flair embeddings flair.embeddings module.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.FlairEmbeddings.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","text":"","code":"flair_embeddings.FlairEmbeddings(embeddings_type = \"news-forward\")"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.FlairEmbeddings.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","text":"embeddings_type character string specifying type embeddings initialize. Options include: \"news-forward\", \"news-backward\".","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.FlairEmbeddings.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","text":"Flair embeddings class flair.embeddings module.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.FlairEmbeddings.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","text":"Multi-Language Embeddings: multi-X: Supports 300+ languages, sourced JW300 corpus. JW300 corpus, proposed Agić Vulić (2019). corpus licensed CC--NC-SA. multi-X-fast: CPU-friendly version, trained mix corpora languages like English, German, French, Italian, Dutch, Polish. English Embeddings: 'news-X': Trained 1 billion word corpus 'news-X-fast': Trained 1 billion word corpus, CPU-friendly. 'mix-X': Trained mixed corpus (Web, Wikipedia, Subtitles) 'pubmed-X': Added @jessepeng: Trained 5% PubMed abstracts 2015 (1150 hidden states, 3 layers) Specific Langauge Embeddings: 'de-X': German. Trained mixed corpus (Web, Wikipedia, Subtitles) de-historic-ha-X: German (historical). Added @stefan-: Historical German trained Hamburger Anzeiger. de-historic-wz-X: German (historical). Added @stefan-: Historical German trained Wiener Zeitung. de-historic-rw-X: German (historical). Added @redewiedergabe: Historical German trained 100 million tokens de-impresso-hipe-v1-X: -domain data CLEF HIPE Shared task. -domain data (Swiss Luxembourgish newspapers) CLEF HIPE Shared task. information shared task can found paper. '-X': Norwegian. Added @stefan-: Trained Wikipedia/OPUS. 'nl-X': Dutch. Added @stefan-: Trained Wikipedia/OPUS 'nl-v0-X': Dutch.Added @stefan-: LM embeddings (earlier version) 'ja-X': Japanese. Added @frtacoa: Trained 439M words Japanese Web crawls (2048 hidden states, 2 layers) 'ja-X': Japanese. Added @frtacoa: Trained 439M words Japanese Web crawls (2048 hidden states, 2 layers) 'fi-X': Finnish. Added @stefan-: Trained Wikipedia/OPUS. 'fr-X': French. Added @mhham: Trained French Wikipedia Japanese Web crawls (2048 hidden states, 2 layers) Domain-Specific Embeddings: 'es-clinical-': Spanish (clinical). Added @matirojasg: Trained Wikipedia 'pubmed-X':English. Added @jessepeng: Trained 5% PubMed abstracts 2015 (1150 hidden states, 3 layers) examples. Ensure reference correct embedding name details application. Replace 'X' either 'forward' 'backward'. comprehensive list embeddings, please refer : Flair Embeddings Documentation.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.FlairEmbeddings.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","text":"FlairEmbeddings Flair Python library. Python example usage:","code":"from flair.embeddings import FlairEmbeddings flair_embedding_forward = FlairEmbeddings('news-forward') flair_embedding_backward = FlairEmbeddings('news-backward')"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.FlairEmbeddings.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","text":"","code":"if (FALSE) { flair_embedding_forward <- flair_embeddings.FlairEmbeddings(\"news-forward\") flair_embedding_backward <- flair_embeddings.FlairEmbeddings(\"news-backward\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.StackedEmbeddings.html","id":null,"dir":"Reference","previous_headings":"","what":"Initializing a Class for StackedEmbeddings — flair_embeddings.StackedEmbeddings","title":"Initializing a Class for StackedEmbeddings — flair_embeddings.StackedEmbeddings","text":"Creates stacked embedding instance using multiple Flair embeddings.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.StackedEmbeddings.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initializing a Class for StackedEmbeddings — flair_embeddings.StackedEmbeddings","text":"","code":"flair_embeddings.StackedEmbeddings(embeddings_list)"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.StackedEmbeddings.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Initializing a Class for StackedEmbeddings — flair_embeddings.StackedEmbeddings","text":"embeddings_list list containing Flair embedding instances.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.StackedEmbeddings.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initializing a Class for StackedEmbeddings — flair_embeddings.StackedEmbeddings","text":"instance StackedEmbeddings flair.embeddings module.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.StackedEmbeddings.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Initializing a Class for StackedEmbeddings — flair_embeddings.StackedEmbeddings","text":"function ensures embedding provided list recognized Flair embedding. embeddings list recognized, function throw error.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.StackedEmbeddings.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Initializing a Class for StackedEmbeddings — flair_embeddings.StackedEmbeddings","text":"","code":"if (FALSE) { glove_embedding <- flair_embeddings.WordEmbeddings(\"glove\") fasttext_embedding <- flair_embeddings.WordEmbeddings(\"fasttext\") stacked_embedding <- flair_embeddings.StackedEmbeddings(list(glove_embedding, fasttext_embedding)) }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerDocumentEmbeddings.html","id":null,"dir":"Reference","previous_headings":"","what":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","title":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","text":"function interfaces Python via reticulate create flair_embeddings.TransformerDocumentEmbeddings object flair.embeddings module.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerDocumentEmbeddings.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","text":"","code":"flair_embeddings.TransformerDocumentEmbeddings( model = \"bert-base-uncased\", layers = \"all\", subtoken_pooling = \"mean\", fine_tune = FALSE, allow_long_sentences = TRUE, memory_efficient = NULL, use_context = FALSE )"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerDocumentEmbeddings.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","text":"model character string specifying pre-trained model use. Defaults 'bert-base-uncased'. name transformer model, e.g., \"bert-base-uncased\", \"gpt2-medium\", etc. can also path pre-trained model. layers (Optional) Layers transformer model use. string specifies layers transformer model use. BERT, can specify multiple like \"1,2,3\" single layers 1. layers argument controls transformer layers used embedding. set value '-1,-2,-3,-4', top 4 layers used make embedding. set '-1', last layer used. set \"\", layers used. subtoken_pooling (Optional) Method pooling handle subtokens. determines subtokens (word pieces) pooled one embedding original token. Options 'first' (use first subtoken), 'last' (use last subtoken), 'first_last' (concatenate first last subtokens), 'mean' (average subtokens). fine_tune Logical. Indicates fine-tuning done. Defaults FALSE. allow_long_sentences Logical. Allows longer sentences processed. Defaults TRUE. certain transformer models (like BERT), maximum sequence length. default, Flair cuts sentences \\ long. option set True, Flair split long sentences smaller parts later average embeddings. memory_efficient (Optional) Enables memory efficient mode transformers. set TRUE, uses less memory, might slower. use_context Logical. Whether consider surrounding context processing step. Default FALSE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerDocumentEmbeddings.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","text":"Flair TransformerWordEmbeddings Python class.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerDocumentEmbeddings.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","text":"function provides interface R users easily access utilize power Flair's TransformerDocumentEmbeddings. bridges gap Python's Flair library R, enabling R users leverage state---art NLP models.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerDocumentEmbeddings.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","text":"Python's Flair library:","code":"from flair.embeddings import TransformerDocumentEmbeddings embedding = TransformerDocumentEmbeddings('bert-base-uncased')"},{"path":[]},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerDocumentEmbeddings.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","text":"","code":"if (FALSE) { embedding <- flair_embeddings.TransformerDocumentEmbeddings(\"bert-base-uncased\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerWordEmbeddings.html","id":null,"dir":"Reference","previous_headings":"","what":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","title":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","text":"function interfaces Python via reticulate create TransformerWordEmbeddings object object flair.embeddings module.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerWordEmbeddings.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","text":"","code":"flair_embeddings.TransformerWordEmbeddings( model = \"bert-base-uncased\", layers = \"all\", subtoken_pooling = \"mean\", fine_tune = FALSE, allow_long_sentences = TRUE, memory_efficient = NULL, use_context = FALSE )"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerWordEmbeddings.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","text":"model character string specifying pre-trained model use. Defaults 'bert-base-uncased'. name transformer model, e.g., \"bert-base-uncased\", \"gpt2-medium\", etc. can also path pre-trained model. layers (Optional) Layers transformer model use. string specifies layers transformer model use. BERT, can specify multiple like \"1,2,3\" single layers 1. layers argument controls transformer layers used embedding. set value '-1,-2,-3,-4', top 4 layers used make embedding. set '-1', last layer used. set \"\", layers used. subtoken_pooling (Optional) Method pooling handle subtokens. determines subtokens (word pieces) pooled one embedding original token. Options 'first' (use first subtoken), 'last' (use last subtoken), 'first_last' (concatenate first last subtokens), 'mean' (average subtokens). fine_tune Logical. Indicates fine-tuning done. Defaults FALSE. allow_long_sentences Logical. Allows longer sentences processed. Defaults TRUE. certain transformer models (like BERT), maximum sequence length. default, Flair cuts sentences long. option set True, Flair split long sentences smaller parts later average embeddings. memory_efficient (Optional) Enables memory efficient mode transformers. set TRUE, uses less memory, might slower. use_context Logical. Whether consider surrounding context processing step. Default FALSE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerWordEmbeddings.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","text":"Flair TransformerWordEmbeddings Python class.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerWordEmbeddings.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","text":"function provides interface R users easily access utilize power Flair's TransformerWordEmbeddings. bridges gap Python's Flair library R, enabling R users leverage state---art NLP models.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerWordEmbeddings.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","text":"Python equivalent:","code":"from flair.embeddings import TransformerWordEmbeddings embedding = TransformerWordEmbeddings('bert-base-uncased')"},{"path":[]},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerWordEmbeddings.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","text":"","code":"if (FALSE) { embedding <- flair_embeddings.TransformerWordEmbeddings(\"bert-base-uncased\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.WordEmbeddings.html","id":null,"dir":"Reference","previous_headings":"","what":"Initializing a Class for Flair WordEmbeddings Class — flair_embeddings.WordEmbeddings","title":"Initializing a Class for Flair WordEmbeddings Class — flair_embeddings.WordEmbeddings","text":"function interfaces Python via reticulate create WordEmbeddings object using Flair library. Users select pre-trained embeddings load providing appropriate ID string. Typically, two-letter language code initializes embedding (e.g., 'en' English, 'de' German). default, loads FastText embeddings trained Wikipedia. web crawl embeddings, use '-crawl' suffix (e.g., 'de-crawl' German). English offers options like 'en-glove', 'en-extvec', etc. Supported embeddings include: 'en-glove' 'glove': English GloVe embeddings 'en-extvec' 'extvec': English Komninos embeddings 'en-crawl' 'crawl': English FastText web crawl embeddings 'en-twitter' 'twitter': English Twitter embeddings 'en-turian' 'turian': English Turian embeddings (small) 'en', 'en-news', 'news': English FastText news Wikipedia embeddings 'de': German FastText embeddings 'nl': Dutch FastText embeddings 'fr': French FastText embeddings '': Italian FastText embeddings 'es': Spanish FastText embeddings 'pt': Portuguese FastText embeddings 'ro': Romanian FastText embeddings 'ca': Catalan FastText embeddings 'sv': Swedish FastText embeddings 'da': Danish FastText embeddings '': Norwegian FastText embeddings 'fi': Finnish FastText embeddings 'pl': Polish FastText embeddings 'cz': Czech FastText embeddings 'sk': Slovak FastText embeddings 'sl': Slovenian FastText embeddings 'sr': Serbian FastText embeddings 'hr': Croatian FastText embeddings 'bg': Bulgarian FastText embeddings 'ru': Russian FastText embeddings 'ar': Arabic FastText embeddings '': Hebrew FastText embeddings 'tr': Turkish FastText embeddings 'fa': Persian FastText embeddings 'ja': Japanese FastText embeddings 'ko': Korean FastText embeddings 'zh': Chinese FastText embeddings 'hi': Hindi FastText embeddings 'id': Indonesian FastText embeddings 'eu': Basque FastText embeddings example, load German FastText embeddings, use 'de' embeddings parameter.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.WordEmbeddings.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initializing a Class for Flair WordEmbeddings Class — flair_embeddings.WordEmbeddings","text":"","code":"flair_embeddings.WordEmbeddings(embeddings = \"glove\")"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.WordEmbeddings.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Initializing a Class for Flair WordEmbeddings Class — flair_embeddings.WordEmbeddings","text":"embeddings type pre-trained embeddings use. Defaults \"glove\".","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.WordEmbeddings.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initializing a Class for Flair WordEmbeddings Class — flair_embeddings.WordEmbeddings","text":"Flair WordEmbeddings class.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.WordEmbeddings.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Initializing a Class for Flair WordEmbeddings Class — flair_embeddings.WordEmbeddings","text":"Python equivalent:","code":"from flair.embeddings import WordEmbeddings embedding = WordEmbeddings('glove')"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.WordEmbeddings.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Initializing a Class for Flair WordEmbeddings Class — flair_embeddings.WordEmbeddings","text":"","code":"if (FALSE) { embedding <- flair_embeddings.WordEmbeddings(\"glove\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.html","id":null,"dir":"Reference","previous_headings":"","what":"Initialization of Flair Embeddings Modules — flair_embeddings","title":"Initialization of Flair Embeddings Modules — flair_embeddings","text":"function provides interface R users access utilize flair.embeddings module Flair NLP library. Flair's embedding functionalities offer various state---art embeddings crucial natural language processing tasks. using function, R users can seamlessly incorporate advanced embeddings NLP workflows without delving deep Python. Essentially, function acts bridge R's ecosystem Flair's rich embedding capabilities.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initialization of Flair Embeddings Modules — flair_embeddings","text":"","code":"flair_embeddings()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initialization of Flair Embeddings Modules — flair_embeddings","text":"flair.embeddings module Flair.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Initialization of Flair Embeddings Modules — flair_embeddings","text":"function allows R users access following Flair embeddings modules: FlairEmbeddings Contextual string embeddings capturing latent syntactic-semantic information beyond standard word embeddings. WordEmbeddings Classic word embeddings like GloVe FastText. TransformerWordEmbeddings Word embeddings transformer models BERT, RoBERTa, etc. TransformerDocumentEmbeddings Transformer-based embeddings entire documents sentences. StackedEmbeddings Combines multiple embeddings richer representation. DocumentPoolEmbeddings Provides single embedding vector entire document based chosen operation mode (mean, max, etc.). BytePairEmbeddings Embeddings based Byte-Pair Encoding (BPE) mechanism used subword tokenization. ELMoEmbeddings Deep contextual embeddings derived internal state pretrained bidirectional LSTM. embedding type offers unique features suitable various NLP tasks. understanding differences capabilities, R users can select appropriate embeddings enhance NLP models.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Initialization of Flair Embeddings Modules — flair_embeddings","text":"Python's Flair library:","code":"from flair.embeddings import *"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Initialization of Flair Embeddings Modules — flair_embeddings","text":"","code":"if (FALSE) { library(flaiR) # Initialize FlairEmbeddings FlairEmbeddings <- flair_embeddings()$FlairEmbeddings embedding <- FlairEmbeddings('news-forward') } if (FALSE) { # Initialize WordEmbeddings WordEmbeddings <- flair_embeddings()$WordEmbeddings embedding <- WordEmbeddings('glove') } if (FALSE) { # Initialize TransformerWordEmbeddings TransformerWordEmbeddings <- flair_embeddings()$TransformerWordEmbeddings embedding <- TransformerWordEmbeddings('bert-base-uncased') } if (FALSE) { # Initialize TransformerDocumentEmbeddings TransformerDocumentEmbeddings <- flair_embeddings()$TransformerDocumentEmbeddings embedding <- TransformerDocumentEmbeddings('bert-base-uncased') } if (FALSE) { # Initialize StackedEmbeddings StackedEmbeddings <- flair_embeddings()$StackedEmbeddings WordEmbeddings <- flair_embeddings()$WordEmbeddings FlairEmbeddings <- flair_embeddings()$FlairEmbeddings stacked_embeddings <- StackedEmbeddings( list(WordEmbeddings('glove'), FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward') ) ) } if (FALSE) { # Initialize DocumentPoolEmbeddings DocumentPoolEmbeddings <- flair_embeddings()$DocumentPoolEmbeddings WordEmbeddings <- flair_embeddings()$WordEmbeddings doc_embeddings <- DocumentPoolEmbeddings(list(WordEmbeddings('glove'))) }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.Sequencetagger.html","id":null,"dir":"Reference","previous_headings":"","what":"Access Flair's SequenceTagger — flair_models.Sequencetagger","title":"Access Flair's SequenceTagger — flair_models.Sequencetagger","text":"function utilizes reticulate package import SequenceTaggers Flair's models Python, enabling interaction Flair's sequence tagging models R environment.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.Sequencetagger.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Access Flair's SequenceTagger — flair_models.Sequencetagger","text":"","code":"flair_models.Sequencetagger()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.Sequencetagger.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Access Flair's SequenceTagger — flair_models.Sequencetagger","text":"Python module (SequenceTagger) Flair, can utilized load use sequence tagging models.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.Sequencetagger.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Access Flair's SequenceTagger — flair_models.Sequencetagger","text":"function take parameters directly returns SequenceTagger called, can used sequence tagging tasks using pre-trained models Flair.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.Sequencetagger.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Access Flair's SequenceTagger — flair_models.Sequencetagger","text":"Python equivalent:","code":"from flair.models import SequenceTagger"},{"path":[]},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.Sequencetagger.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Access Flair's SequenceTagger — flair_models.Sequencetagger","text":"","code":"if (FALSE) { sequence_tagger <- flair_models.sequencetagger() }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.TextClassifier.html","id":null,"dir":"Reference","previous_headings":"","what":"Retrieve TextClassifier from flair.models — flair_models.TextClassifier","title":"Retrieve TextClassifier from flair.models — flair_models.TextClassifier","text":"function utilizes reticulate package directly import TextClassifier flair.models Flair NLP Python library. Ensure Python environment properly set Flair package installed.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.TextClassifier.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Retrieve TextClassifier from flair.models — flair_models.TextClassifier","text":"","code":"flair_models.TextClassifier()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.TextClassifier.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Retrieve TextClassifier from flair.models — flair_models.TextClassifier","text":"Python class representing flair.models.TextClassifier.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.TextClassifier.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Retrieve TextClassifier from flair.models — flair_models.TextClassifier","text":"Python equivalent:","code":"from flair.models import TextClassifier"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.TextClassifier.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Retrieve TextClassifier from flair.models — flair_models.TextClassifier","text":"","code":"# Load the TextClassifier TextClassifier <- flair_models.TextClassifier() # Load a pre-trained sentiment model classifier <- TextClassifier$load('sentiment') # Create a sentence object Sentence <- flair_data()$Sentence sentence <- Sentence(\"Flair is pretty neat!\") # Predict the sentiment classifier$predict(sentence) # Display the sentiment print(sentence$get_labels()) #> [[1]] #> 'Sentence[5]: \"Flair is pretty neat!\"'/'POSITIVE' (0.9997) #>"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.html","id":null,"dir":"Reference","previous_headings":"","what":"Import the flair.models Python module — flair_models","title":"Import the flair.models Python module — flair_models","text":"function imports flair.models module Flair NLP Python library, providing access several powerful models tailored NLP tasks. three primary methods available module: TextClassifier: method represents neural network model designed text classification tasks. Given piece text, predicts class label category. instance, can used classify movie reviews positive negative. SequenceTagger: Tailored tasks like Named Entity Recognition (NER) Part--Speech (POS) tagging, method annotates sequences words. NER, tag entities sentence locations, persons, organizations. POS tagging, can label word sentence grammatical role like noun, verb, adjective, etc. LanguageModel: method represents model trained predict next word sequence, making powerful tasks like text generation completion. learns statistical properties structure language, can base transfer learning NLP tasks.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Import the flair.models Python module — flair_models","text":"","code":"flair_models()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Import the flair.models Python module — flair_models","text":"Python module object representing flair.models.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Import the flair.models Python module — flair_models","text":"Python equivalent:","code":"from flair.models import *"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.Classifier.html","id":null,"dir":"Reference","previous_headings":"","what":"Initializing a Class for Flair Classifier — flair_nn.Classifier","title":"Initializing a Class for Flair Classifier — flair_nn.Classifier","text":"function interfaces Python via reticulate package create Classifier object Flair library.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.Classifier.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initializing a Class for Flair Classifier — flair_nn.Classifier","text":"","code":"flair_nn.Classifier()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.Classifier.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initializing a Class for Flair Classifier — flair_nn.Classifier","text":"Flair Classifier class instance.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.Classifier.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Initializing a Class for Flair Classifier — flair_nn.Classifier","text":"Python equivalent:","code":"from flair.nn import Classifier"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.Classifier.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Initializing a Class for Flair Classifier — flair_nn.Classifier","text":"","code":"if (FALSE) { classifier <- flair_nn.Classifier() }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.html","id":null,"dir":"Reference","previous_headings":"","what":"Import Flair's Neural Network Module — flair_nn","title":"Import Flair's Neural Network Module — flair_nn","text":"function provides interface flair.nn module Flair library. flair.nn module encompasses various sub-modules : decoder distance dropout loss model multitask recurrent Model Classifier PrototypicalDecoder LockedDropout WordDropout","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Import Flair's Neural Network Module — flair_nn","text":"","code":"flair_nn()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Import Flair's Neural Network Module — flair_nn","text":"reference Flair's neural network module (flair.nn).","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Import Flair's Neural Network Module — flair_nn","text":"","code":"if (FALSE) { flair_nn_module <- flair_nn() Classifier <- flair_nn_module$Classifier }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.SegtokSentenceSplitter.html","id":null,"dir":"Reference","previous_headings":"","what":"Segtok Sentence Splitter — flair_splitter.SegtokSentenceSplitter","title":"Segtok Sentence Splitter — flair_splitter.SegtokSentenceSplitter","text":"Interface Python flair.splitter module utilize SegtokSentenceSplitter class/method.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.SegtokSentenceSplitter.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Segtok Sentence Splitter — flair_splitter.SegtokSentenceSplitter","text":"","code":"flair_splitter.SegtokSentenceSplitter()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.SegtokSentenceSplitter.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Segtok Sentence Splitter — flair_splitter.SegtokSentenceSplitter","text":"instance Python class SegtokSentenceSplitter flair.splitter module.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.SegtokSentenceSplitter.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Segtok Sentence Splitter — flair_splitter.SegtokSentenceSplitter","text":"Python equivalent:","code":"from flair.splitter import SegtokSentenceSplitter"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.SegtokSentenceSplitter.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Segtok Sentence Splitter — flair_splitter.SegtokSentenceSplitter","text":"","code":"if (FALSE) { splitter <- flair_splitter.SegtokSentenceSplitter() }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.html","id":null,"dir":"Reference","previous_headings":"","what":"Import flair.splitter Module in R — flair_splitter","title":"Import flair.splitter Module in R — flair_splitter","text":"function interface Python flair.splitter module. function provides access various sentence splitting strategies implemented Flair library: NoSentenceSplitter: Treats entire text single sentence without splitting . SegtokSentenceSplitter: Uses segtok library split text sentences. SpacySentenceSplitter: Uses spaCy library sentence splitting. TagSentenceSplitter: Assumes specific tags text indicate sentence boundaries.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Import flair.splitter Module in R — flair_splitter","text":"","code":"flair_splitter()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Import flair.splitter Module in R — flair_splitter","text":"Python module (flair.splitter).","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Import flair.splitter Module in R — flair_splitter","text":"Python reference SegtokSentenceSplitter: Additional references classes can found within Flair library documentation. Flair GitHub","code":"from flair.splitter import *"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Import flair.splitter Module in R — flair_splitter","text":"","code":"if (FALSE) { SegtokSentenceSplitter <- flair_splitter$SegtokSentenceSplitter() text <- \"I am Taiwanese and come from Taiwan\" sentences <- splitter$split(text) }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_trainers.html","id":null,"dir":"Reference","previous_headings":"","what":"Import flair.trainers Module in R — flair_trainers","title":"Import flair.trainers Module in R — flair_trainers","text":"flair_trainers() provides R users access Flair's ModelTrainer Python class using reticulate package. ModelTrainer class offers following main methods: train: Trains given model. Parameters include corpus (data split training, development, test sets), output directory save model logs, various parameters control training process (e.g., learning rate, mini-batch size, maximum epochs). find_learning_rate: Uses \"learning rate finder\" method find optimal learning rate training. Parameters typically include corpus, batch size, range learning rates explore. final_test: training model, method evaluates model test set prints results. save_checkpoint: Saves current training state (including model parameters training configurations) resume later interrupted. load_checkpoint: Loads previously saved checkpoint resume training. log_line: Utility method logging. Writes line console log file. log_section: Utility method logging. Writes section break console log file.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_trainers.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Import flair.trainers Module in R — flair_trainers","text":"","code":"flair_trainers()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_trainers.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Import flair.trainers Module in R — flair_trainers","text":"Python Module(flair.trainers) object allowing access Flair's trainers R.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_trainers.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Import flair.trainers Module in R — flair_trainers","text":"Flair GitHub Python equivalent:","code":"from flair.trainers import ModelTrainer"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_trainers.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Import flair.trainers Module in R — flair_trainers","text":"","code":"if (FALSE) { trainers <- flair_trainers() model_trainer <- trainers$ModelTrainer }"},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities.html","id":null,"dir":"Reference","previous_headings":"","what":"Tagging Named Entities with Flair Models — get_entities","title":"Tagging Named Entities with Flair Models — get_entities","text":"function takes texts corresponding document IDs inputs, uses Flair NLP library extract named entities, returns dataframe identified entities along tags. entities detected text, function returns data table NA values. might clutter results. Depending use case, might decide either keep behavior skip rows detected entities.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Tagging Named Entities with Flair Models — get_entities","text":"","code":"get_entities( texts, doc_ids = NULL, tagger = NULL, language = NULL, show.text_id = FALSE, gc.active = FALSE )"},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Tagging Named Entities with Flair Models — get_entities","text":"texts character vector containing texts process. doc_ids character numeric vector containing document IDs corresponding text. tagger optional tagger object. NULL (default), function load Flair tagger based specified language. language character string indicating language model load. Default \"en\". show.text_id logical value. TRUE, includes actual text entity extracted resulting data table. Useful verification traceability purposes might increase size output. Default FALSE. gc.active logical value. TRUE, runs garbage collector processing texts. can help freeing memory releasing unused memory space, especially processing large number texts. Default FALSE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Tagging Named Entities with Flair Models — get_entities","text":"data table columns: doc_id ID document entity extracted. text_id TRUE, actual text entity extracted. entity named entity extracted text. tag tag category named entity. Common tags include: PERSON (names individuals), ORG (organizations, institutions), GPE (countries, cities, states), LOCATION (mountain ranges, bodies water), DATE (dates periods), TIME (times day), MONEY (monetary values), PERCENT (percentage values), FACILITY (buildings, airports), PRODUCT (objects, vehicles), EVENT (named events like wars sports events), ART (titles books)","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Tagging Named Entities with Flair Models — get_entities","text":"","code":"if (FALSE) { library(reticulate) library(fliaR) texts <- c(\"UCD is one of the best universities in Ireland.\", \"UCD has a good campus but is very far from my apartment in Dublin.\", \"Essex is famous for social science research.\", \"Essex is not in the Russell Group, but it is famous for political science research.\", \"TCD is the oldest university in Ireland.\", \"TCD is similar to Oxford.\") doc_ids <- c(\"doc1\", \"doc2\", \"doc3\", \"doc4\", \"doc5\", \"doc6\") # Load NER (\"ner\") model tagger_ner <- load_tagger_ner('ner') results <- get_entities(texts, doc_ids, tagger_ner) print(results)}"},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities_batch.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract Named Entities from a Batch of Texts — get_entities_batch","title":"Extract Named Entities from a Batch of Texts — get_entities_batch","text":"function processes batches texts extracts named entities.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities_batch.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract Named Entities from a Batch of Texts — get_entities_batch","text":"","code":"get_entities_batch( texts, doc_ids, tagger = NULL, language = \"en\", show.text_id = FALSE, gc.active = FALSE, batch_size = 5, device = \"cpu\", verbose = TRUE )"},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities_batch.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract Named Entities from a Batch of Texts — get_entities_batch","text":"texts character vector texts process. doc_ids vector document IDs corresponding text. tagger pre-loaded Flair NER tagger. Default NULL, tagger loaded based provided language. language character string specifying language texts. Default \"en\" (English). show.text_id Logical, whether include text ID output. Default FALSE. gc.active Logical, whether activate garbage collection processing batch. Default FALSE. batch_size integer specifying size batch. Default 5. device character string specifying computation device. can either \"cpu\" string representation GPU device number. instance, \"0\" corresponds first GPU. GPU device number provided, attempt use GPU. default \"cpu\". \"cuda\" \"cuda:0\" (\"mps\" \"mps:0\" Mac M1/M2 )Refers first GPU system. one GPU, specifying \"cuda\" \"cuda:0\" allocate computations GPU. \"cuda:1\" (\"mps:1\")Refers second GPU system, allowing allocation specific computations GPU. \"cuda:2\" (\"mps:2)Refers third GPU system, systems GPUs. verbose logical value. TRUE, function prints batch processing progress updates. Default TRUE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities_batch.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract Named Entities from a Batch of Texts — get_entities_batch","text":"data.table containing extracted entities, corresponding tags, document IDs.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities_batch.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract Named Entities from a Batch of Texts — get_entities_batch","text":"","code":"if (FALSE) { library(reticulate) library(fliaR) texts <- c(\"UCD is one of the best universities in Ireland.\", \"UCD has a good campus but is very far from my apartment in Dublin.\", \"Essex is famous for social science research.\", \"Essex is not in the Russell Group, but it is famous for political science research.\", \"TCD is the oldest university in Ireland.\", \"TCD is similar to Oxford.\") doc_ids <- c(\"doc1\", \"doc2\", \"doc3\", \"doc4\", \"doc5\", \"doc6\") # Load NER (\"ner\") model tagger_ner <- load_tagger_ner('ner') results <- get_entities_batch(texts, doc_ids, tagger_ner) print(results)}"},{"path":"https://davidycliao.github.io/flaiR/reference/get_flair_version.html","id":null,"dir":"Reference","previous_headings":"","what":"Retrieve Flair Version — get_flair_version","title":"Retrieve Flair Version — get_flair_version","text":"Gets version installed Flair module current Python environment.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_flair_version.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Retrieve Flair Version — get_flair_version","text":"","code":"get_flair_version(...)"},{"path":"https://davidycliao.github.io/flaiR/reference/get_flair_version.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Retrieve Flair Version — get_flair_version","text":"Character string representing version Flair. Flair installed, may return NULL cause error.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos.html","id":null,"dir":"Reference","previous_headings":"","what":"Tagging Part-of-Speech Tagging with Flair Models — get_pos","title":"Tagging Part-of-Speech Tagging with Flair Models — get_pos","text":"function returns data table POS tags related data given texts.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Tagging Part-of-Speech Tagging with Flair Models — get_pos","text":"","code":"get_pos( texts, doc_ids = NULL, tagger = NULL, language = NULL, show.text_id = FALSE, gc.active = FALSE )"},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Tagging Part-of-Speech Tagging with Flair Models — get_pos","text":"texts character vector containing texts processed. doc_ids character vector containing document ids. tagger tagger object (default NULL). language language texts (default NULL). show.text_id logical value. TRUE, includes actual text entity extracted resulting data table. Useful verification traceability purposes might increase size output. Default FALSE. gc.active logical value. TRUE, runs garbage collector processing texts. can help freeing memory releasing unused memory space, especially processing large number texts. Default FALSE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Tagging Part-of-Speech Tagging with Flair Models — get_pos","text":"data.table containing following columns: doc_id document identifier corresponding text. token_id token number original text, indicating position token. text_id actual text input passed function. token individual word token text POS tagged. tag part--speech tag assigned token Flair library. precision confidence score (numeric) assigned POS tag.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Tagging Part-of-Speech Tagging with Flair Models — get_pos","text":"","code":"if (FALSE) { library(reticulate) library(fliaR) tagger_pos_fast <- load_tagger_pos('pos-fast') texts <- c(\"UCD is one of the best universities in Ireland.\", \"Essex is not in the Russell Group, but it is famous for political science research.\", \"TCD is the oldest university in Ireland.\") doc_ids <- c(\"doc1\", \"doc2\", \"doc3\") get_pos(texts, doc_ids, tagger_pos_fast) }"},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos_batch.html","id":null,"dir":"Reference","previous_headings":"","what":"Batch Process of Part-of-Speech Tagging — get_pos_batch","title":"Batch Process of Part-of-Speech Tagging — get_pos_batch","text":"function returns data table POS tags related data given texts using batch processing.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos_batch.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Batch Process of Part-of-Speech Tagging — get_pos_batch","text":"","code":"get_pos_batch( texts, doc_ids, tagger = NULL, language = NULL, show.text_id = FALSE, gc.active = FALSE, batch_size = 5, device = \"cpu\", verbose = TRUE )"},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos_batch.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Batch Process of Part-of-Speech Tagging — get_pos_batch","text":"texts character vector containing texts processed. doc_ids character vector containing document ids. tagger tagger object (default NULL). language language texts (default NULL). show.text_id logical value. TRUE, includes actual text entity extracted resulting data table. Useful verification traceability purposes might increase size output. Default FALSE. gc.active logical value. TRUE, runs garbage collector processing texts. can help freeing memory releasing unused memory space, especially processing large number texts. Default FALSE. batch_size integer specifying size batch. Default 5. device character string specifying computation device. \"cuda\" \"cuda:0\" (\"mps\" \"mps:0\" Mac M1/M2 )Refers first GPU system. one GPU, specifying \"cuda\" \"cuda:0\" allocate computations GPU. \"cuda:1\" (\"mps:1\")Refers second GPU system, allowing allocation specific computations GPU. \"cuda:2\" (\"mps:2)Refers third GPU system, systems GPUs. verbose logical value. TRUE, function prints batch processing progress updates. Default TRUE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos_batch.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Batch Process of Part-of-Speech Tagging — get_pos_batch","text":"data.table containing following columns: doc_id document identifier corresponding text. token_id token number original text, indicating position token. text_id actual text input passed function (show.text_id TRUE). token individual word token text POS tagged. tag part--speech tag assigned token Flair library. precision confidence score (numeric) assigned POS tag.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos_batch.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Batch Process of Part-of-Speech Tagging — get_pos_batch","text":"","code":"if (FALSE) { library(reticulate) library(fliaR) tagger_pos_fast <- load_tagger_pos('pos-fast') texts <- c(\"UCD is one of the best universities in Ireland.\", \"Essex is not in the Russell Group, but it is famous for political science research.\", \"TCD is the oldest university in Ireland.\") doc_ids <- c(\"doc1\", \"doc2\", \"doc3\") # Using the batch_size parameter get_pos_batch(texts, doc_ids, tagger_pos_fast, batch_size = 2) }"},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments.html","id":null,"dir":"Reference","previous_headings":"","what":"Tagging Sentiment with Flair Standard Models — get_sentiments","title":"Tagging Sentiment with Flair Standard Models — get_sentiments","text":"function takes texts associated document IDs predict sentiments using flair Python library.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Tagging Sentiment with Flair Standard Models — get_sentiments","text":"","code":"get_sentiments( texts, doc_ids, tagger = NULL, ..., language = NULL, show.text_id = FALSE, gc.active = FALSE )"},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Tagging Sentiment with Flair Standard Models — get_sentiments","text":"texts list vector texts sentiment prediction made. doc_ids list vector document IDs corresponding texts. tagger optional flair sentiment model. NULL (default), function loads default model based language. ... Additional arguments passed next. language character string indicating language texts. Currently supports \"sentiment\" (English), \"sentiment-fast\" (English), \"de-offensive-language\" (German) show.text_id logical value. TRUE, includes actual text sentiment predicted. Default FALSE. gc.active logical value. TRUE, runs garbage collector processing texts. can help freeing memory releasing unused memory space, especially processing large number texts. Default FALSE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Tagging Sentiment with Flair Standard Models — get_sentiments","text":"data.table containing three columns: doc_id: document ID input. sentiment: Predicted sentiment text. score: Score sentiment prediction.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Tagging Sentiment with Flair Standard Models — get_sentiments","text":"","code":"if (FALSE) { library(flaiR) texts <- c(\"UCD is one of the best universities in Ireland.\", \"UCD has a good campus but is very far from my apartment in Dublin.\", \"Essex is famous for social science research.\", \"Essex is not in the Russell Group, but it is famous for political science research.\", \"TCD is the oldest university in Ireland.\", \"TCD is similar to Oxford.\") doc_ids <- c(\"doc1\", \"doc2\", \"doc3\", \"doc4\", \"doc5\", \"doc6\") # Load re-trained sentiment (\"sentiment\") model tagger_sent <- load_tagger_sentiments('sentiment') results <- get_sentiments(texts, doc_ids, tagger_sent) print(results) }"},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments_batch.html","id":null,"dir":"Reference","previous_headings":"","what":"Batch Process of Tagging Sentiment with Flair Models — get_sentiments_batch","title":"Batch Process of Tagging Sentiment with Flair Models — get_sentiments_batch","text":"function takes texts associated document IDs predict sentiments using flair Python library.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments_batch.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Batch Process of Tagging Sentiment with Flair Models — get_sentiments_batch","text":"","code":"get_sentiments_batch( texts, doc_ids, tagger = NULL, ..., language = NULL, show.text_id = FALSE, gc.active = FALSE, batch_size = 5, device = \"cpu\", verbose = FALSE )"},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments_batch.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Batch Process of Tagging Sentiment with Flair Models — get_sentiments_batch","text":"texts list vector texts sentiment prediction made. doc_ids list vector document IDs corresponding texts. tagger optional flair sentiment model. NULL (default), function loads default model based language. ... Additional arguments passed next. language character string indicating language texts. Currently supports \"sentiment\" (English), \"sentiment-fast\" (English), \"de-offensive-language\" (German) show.text_id logical value. TRUE, includes actual text sentiment predicted. Default FALSE. gc.active logical value. TRUE, runs garbage collector processing texts. can help freeing memory releasing unused memory space, especially processing large number texts. Default FALSE. batch_size integer specifying number texts processed . can help optimize performance leveraging parallel processing. Default 5. device character string specifying computation device. can either \"cpu\" string representation GPU device number. instance, \"0\" corresponds first GPU. GPU device number provided, attempt use GPU. default \"cpu\". \"cuda\" \"cuda:0\" (\"mps\" \"mps:0\" Mac M1/M2 )Refers first GPU system. one GPU, specifying \"cuda\" \"cuda:0\" allocate computations GPU. \"cuda:1\" (\"mps:1\")Refers second GPU system, allowing allocation specific computations GPU. \"cuda:2\" (\"mps:2)Refers third GPU system, systems GPUs. verbose logical value. TRUE, function prints batch processing progress updates. Default TRUE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments_batch.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Batch Process of Tagging Sentiment with Flair Models — get_sentiments_batch","text":"data.table containing three columns: doc_id: document ID input. sentiment: Predicted sentiment text. score: Score sentiment prediction.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments_batch.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Batch Process of Tagging Sentiment with Flair Models — get_sentiments_batch","text":"","code":"if (FALSE) { library(flaiR) texts <- c(\"UCD is one of the best universities in Ireland.\", \"UCD has a good campus but is very far from my apartment in Dublin.\", \"Essex is famous for social science research.\", \"Essex is not in the Russell Group, but it is famous for political science research.\", \"TCD is the oldest university in Ireland.\", \"TCD is similar to Oxford.\") doc_ids <- c(\"doc1\", \"doc2\", \"doc3\", \"doc4\", \"doc5\", \"doc6\") # Load re-trained sentiment (\"sentiment\") model tagger_sent <- load_tagger_sentiments('sentiment') results <- get_sentiments_batch(texts, doc_ids, tagger_sent, batch_size = 3) print(results) }"},{"path":"https://davidycliao.github.io/flaiR/reference/gs_score.html","id":null,"dir":"Reference","previous_headings":"","what":"Training Data from : When Do Politicians Grandstand? Measuring Message Politics in Committee Hearings (2021 JOP) — gs_score","title":"Training Data from : When Do Politicians Grandstand? Measuring Message Politics in Committee Hearings (2021 JOP) — gs_score","text":"dataset Ju Yeon Park's paper published Journal Politics 2021, titled \"Politicians Grandstand? Measuring Message Politics Committee Hearings\". contains \"Congressional Hearing Dataset: 105th 114th Congresses\", replication dataset paper. manuscript accepted publication June 2019. Please cite paper using data.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/gs_score.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Training Data from : When Do Politicians Grandstand? Measuring Message Politics in Committee Hearings (2021 JOP) — gs_score","text":"","code":"data(\"gs_score\")"},{"path":"https://davidycliao.github.io/flaiR/reference/gs_score.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Training Data from : When Do Politicians Grandstand? Measuring Message Politics in Committee Hearings (2021 JOP) — gs_score","text":"data frame 3 variables: speech Hearing speeches sentimentit_score grandstanding score. rescaled_gs Label indicating whether text grandstanding speech: '1' grandstanding speech '0' non-grandstanding speech. rescaled version sentimentit_score (grandstanding scores) original released data.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/gs_score.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Training Data from : When Do Politicians Grandstand? Measuring Message Politics in Committee Hearings (2021 JOP) — gs_score","text":"Data provided authors Ju Yeon Park JOP's Dataverse https://dataverse.harvard.edu/file.xhtml?persistentId=doi:10.7910/DVN/GSMBFX/JIHIGH&version=1.0.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/gs_score.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Training Data from : When Do Politicians Grandstand? Measuring Message Politics in Committee Hearings (2021 JOP) — gs_score","text":"","code":"if (FALSE) { data(gs_score) head(gs_score) }"},{"path":"https://davidycliao.github.io/flaiR/reference/hatespeech_zh_tw.html","id":null,"dir":"Reference","previous_headings":"","what":"Training Data from : Political Hate Speech Detection and Lexicon Building: A Study in Taiwan (IEEE Explore 2022) — hatespeech_zh_tw","title":"Training Data from : Political Hate Speech Detection and Lexicon Building: A Study in Taiwan (IEEE Explore 2022) — hatespeech_zh_tw","text":"dataset derived sample development set \"Political Hate Speech Detection Lexicon Building: Study Taiwan.\" contains 1,000 annotated data entries, 926 labeled '0' (hate speech) 74 '1' (hate speech). paper can accessed https://ieeexplore.ieee.org/document/9738642.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/hatespeech_zh_tw.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Training Data from : Political Hate Speech Detection and Lexicon Building: A Study in Taiwan (IEEE Explore 2022) — hatespeech_zh_tw","text":"","code":"data(\"hatespeech_zh_tw\")"},{"path":"https://davidycliao.github.io/flaiR/reference/hatespeech_zh_tw.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Training Data from : Political Hate Speech Detection and Lexicon Building: A Study in Taiwan (IEEE Explore 2022) — hatespeech_zh_tw","text":"data frame 2 variables: text Content text. label Label indicating whether text hate speech: '1' hate speech '0' non-hate speech.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/hatespeech_zh_tw.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Training Data from : Political Hate Speech Detection and Lexicon Building: A Study in Taiwan (IEEE Explore 2022) — hatespeech_zh_tw","text":"Data provided authors Chih-Chien Wang, Min-Yuh Day, Chun-Lian Wu. Available https://ieeexplore.ieee.org/document/9738642.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/hatespeech_zh_tw.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Training Data from : Political Hate Speech Detection and Lexicon Building: A Study in Taiwan (IEEE Explore 2022) — hatespeech_zh_tw","text":"","code":"if (FALSE) { data(hatespeech_zh_tw) head(hatespeech_zh_tw) }"},{"path":"https://davidycliao.github.io/flaiR/reference/highlight_text.html","id":null,"dir":"Reference","previous_headings":"","what":"Highlight Entities with Specified Colors and Tag — highlight_text","title":"Highlight Entities with Specified Colors and Tag — highlight_text","text":"function highlights specified entities text string specified background colors, font colors, optional labels. Additionally, allows setting specific font type highlighted text.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/highlight_text.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Highlight Entities with Specified Colors and Tag — highlight_text","text":"","code":"highlight_text(text, entities_mapping, font_family = \"Arial\")"},{"path":"https://davidycliao.github.io/flaiR/reference/highlight_text.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Highlight Entities with Specified Colors and Tag — highlight_text","text":"text character string containing text highlight. entities_mapping named list lists, sub-list containing: words: character vector words highlight. background_color: character string specifying CSS color highlight background. font_color: character string specifying CSS color highlighted text. label: character string specifying label append highlighted word. label_color: character string specifying CSS color label text. font_family character string specifying CSS font family highlighted text label. Default \"Arial\".","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/highlight_text.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Highlight Entities with Specified Colors and Tag — highlight_text","text":"HTML object containing text highlighted entities.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/highlight_text.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Highlight Entities with Specified Colors and Tag — highlight_text","text":"","code":"library(flaiR) data(\"uk_immigration\") uk_immigration <- head(uk_immigration, 1) tagger_ner <- load_tagger_ner(\"ner\") results <- get_entities(uk_immigration$text, uk_immigration$speaker, tagger_ner, show.text_id = FALSE) highlighted_text <- highlight_text(uk_immigration$text, map_entities(results)) print(highlighted_text) #> I thank Mr. Speaker for giving me permission to hold this debate today. I welcome the Minister-I very much appreciate the contact from his office prior to today-and the Conservative<\/span> (ORG)<\/span> and Liberal Democrat Front Benchers<\/span> (ORG)<\/span> to the debate. I also welcome my hon. Friends on the Back Benches<\/span> (MISC)<\/span>. Immigration is the most important issue for my constituents. I get more complaints, comments and suggestions about immigration than about anything else. In the Kettering<\/span> (LOC)<\/span> constituency, the number of immigrants is actually very low. There is a well-settled Sikh<\/span> (MISC)<\/span> community in the middle of Kettering<\/span> (LOC)<\/span> town itself, which has been in Kettering<\/span> (LOC)<\/span> for some 40 or 50 years and is very much part of the local community and of the fabric of local life. There are other very small migrant groups in my constituency, but it is predominantly made up of indigenous British<\/span> (MISC)<\/span> people. However, there is huge concern among my constituents about the level of immigration into our country. I believe that I am right in saying that, in recent years, net immigration into the United Kingdom<\/span> (LOC)<\/span> is the largest wave of immigration that our country has ever known and, proportionately, is probably the biggest wave of immigration since the Norman<\/span> (MISC)<\/span> conquest. My contention is that our country simply cannot cope with immigration on that scale-to coin a phrase, we simply cannot go on like this. It is about time that mainstream politicians started airing the views of their constituents, because for too long people have muttered under their breath that they are concerned about immigration. They have been frightened to speak out about it because they are frightened of being accused of being racist. My contention is that immigration is not a racist issue; it is a question of numbers. I personally could not care tuppence about the ethnicity of the immigrants concerned, the colour of their skin or the language that they speak. What I am concerned about is the very large numbers of new arrivals to our country. My contention is that the United Kingdom<\/span> (LOC)<\/span> simply cannot cope with them.<\/div>"},{"path":"https://davidycliao.github.io/flaiR/reference/import_flair.html","id":null,"dir":"Reference","previous_headings":"","what":"Wrapper for the Flair Python Library — import_flair","title":"Wrapper for the Flair Python Library — import_flair","text":"flair: wrapper access Flair library Python. Returns: list: list method python module. Environment Configuration: os: Pertains operating system related functions, path handling, file operations, . Path: pathlib, used convenient file path operations. set_seed: Functions set random seed. hf_set_seed: Functions set random seed. set_proxies: Used configure network proxies. Data Data Loading: data: Functions related data handling operations. datasets: Modules methods load handle specific datasets. file_utils: Utilities file operations. Embeddings Model Layers: embeddings: embeddings, including word embeddings, contextual embeddings, etc. nn: Related neural network layers operations. models: Different model architectures structures. Training Optimization: trainers: Related training models. training_utils: Utility functions training process. optim: Optimization algorithms, like SGD, Adam. Tokenization Text Processing: tokenization: break text tokens. splitter: splitting datasets texts. Visualizations Miscellaneous: visual: Related visualization. torch: main PyTorch library. cache_root: Related caching data models.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/import_flair.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Wrapper for the Flair Python Library — import_flair","text":"","code":"import_flair()"},{"path":"https://davidycliao.github.io/flaiR/reference/import_flair.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Wrapper for the Flair Python Library — import_flair","text":"object represents Flair module Python.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/import_flair.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Wrapper for the Flair Python Library — import_flair","text":"function relies reticulate package import use Flair module Python. Ensure Flair Python library installed Python environment used.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/import_flair.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Wrapper for the Flair Python Library — import_flair","text":"","code":"if (FALSE) { flair <- import_flair() }"},{"path":"https://davidycliao.github.io/flaiR/reference/install_python_package.html","id":null,"dir":"Reference","previous_headings":"","what":"Install a Specific Python Package and Return Its Version — install_python_package","title":"Install a Specific Python Package and Return Its Version — install_python_package","text":"function checks Python interpreter's location (either specified user automatically located), compares current R session's Python setting, installs specified Python package using identified Python interpreter, returns package version installation environment.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/install_python_package.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Install a Specific Python Package and Return Its Version — install_python_package","text":"","code":"install_python_package( package_name, package_version = NULL, python_path = Sys.which(\"python3\") )"},{"path":"https://davidycliao.github.io/flaiR/reference/install_python_package.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Install a Specific Python Package and Return Its Version — install_python_package","text":"package_name name Python package install. package_version version Python package install. NULL, latest version installed. python_path path Python interpreter used installation. provided, defaults result Sys.(\"python3\").","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/install_python_package.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Install a Specific Python Package and Return Its Version — install_python_package","text":"list containing package name, installed version, path Python interpreter used installation.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/install_python_package.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Install a Specific Python Package and Return Its Version — install_python_package","text":"","code":"if (FALSE) { install_python_package(package_name =\"flair\", package_version =\"0.12\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_ner.html","id":null,"dir":"Reference","previous_headings":"","what":"Load the Named Entity Recognition (NER) Tagger — load_tagger_ner","title":"Load the Named Entity Recognition (NER) Tagger — load_tagger_ner","text":"helper function load appropriate tagger based provided language. function supports variety languages/models.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_ner.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Load the Named Entity Recognition (NER) Tagger — load_tagger_ner","text":"","code":"load_tagger_ner(language = NULL)"},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_ner.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Load the Named Entity Recognition (NER) Tagger — load_tagger_ner","text":"language character string indicating desired language NER tagger. NULL, function default 'pos-fast' model. Supported languages models include: \"en\" - English NER tagging (ner) \"de\" - German NER tagging (de-ner) \"fr\" - French NER tagging (fr-ner) \"nl\" - Dutch NER tagging (nl-ner) \"da\" - Danish NER tagging (da-ner) \"ar\" - Arabic NER tagging (ar-ner) \"ner-fast\" - English NER fast model (ner-fast) \"ner-large\" - English NER large mode (ner-large) \"de-ner-legal\" - NER (legal text) (de-ner-legal) \"nl\" - Dutch NER tagging (nl-ner) \"da\" - Danish NER tagging (da-ner) \"ar\" - Arabic NER tagging (ar-ner)","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_ner.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Load the Named Entity Recognition (NER) Tagger — load_tagger_ner","text":"instance Flair SequenceTagger specified language.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_ner.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Load the Named Entity Recognition (NER) Tagger — load_tagger_ner","text":"","code":"# Load the English NER tagger tagger_en <- load_tagger_ner(\"en\")"},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_pos.html","id":null,"dir":"Reference","previous_headings":"","what":"Load Flair POS Tagger — load_tagger_pos","title":"Load Flair POS Tagger — load_tagger_pos","text":"function loads POS (part--speech) tagger model specified language using Flair library. language specified, defaults 'pos-fast'.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_pos.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Load Flair POS Tagger — load_tagger_pos","text":"","code":"load_tagger_pos(language = NULL)"},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_pos.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Load Flair POS Tagger — load_tagger_pos","text":"language character string indicating desired language model. NULL, function default 'pos-fast' model. Supported language models include: \"pos\" - General POS tagging \"pos-fast\" - Faster POS tagging \"upos\" - Universal POS tagging \"upos-fast\" - Faster Universal POS tagging \"pos-multi\" - Multi-language POS tagging \"pos-multi-fast\" - Faster Multi-language POS tagging \"ar-pos\" - Arabic POS tagging \"de-pos\" - German POS tagging \"de-pos-tweets\" - German POS tagging tweets \"da-pos\" - Danish POS tagging \"ml-pos\" - Malayalam POS tagging \"ml-upos\" - Malayalam Universal POS tagging \"pt-pos-clinical\" - Clinical Portuguese POS tagging \"pos-ukrainian\" - Ukrainian POS tagging","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_pos.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Load Flair POS Tagger — load_tagger_pos","text":"Flair POS tagger model corresponding specified (default) language.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_pos.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Load Flair POS Tagger — load_tagger_pos","text":"","code":"if (FALSE) { tagger <- load_tagger_pos(\"pos-fast\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_sentiments.html","id":null,"dir":"Reference","previous_headings":"","what":"Load a Sentiment or Language Tagger Model from Flair — load_tagger_sentiments","title":"Load a Sentiment or Language Tagger Model from Flair — load_tagger_sentiments","text":"function loads pre-trained sentiment language tagger Flair library.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_sentiments.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Load a Sentiment or Language Tagger Model from Flair — load_tagger_sentiments","text":"","code":"load_tagger_sentiments(language = NULL)"},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_sentiments.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Load a Sentiment or Language Tagger Model from Flair — load_tagger_sentiments","text":"language character string specifying language model load. Supported models include: \"sentiment\" - Sentiment analysis model \"sentiment-fast\" - Faster sentiment analysis model \"de-offensive-language\" - German offensive language detection model provided, function default \"sentiment\" model.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_sentiments.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Load a Sentiment or Language Tagger Model from Flair — load_tagger_sentiments","text":"object loaded Flair model.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_sentiments.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Load a Sentiment or Language Tagger Model from Flair — load_tagger_sentiments","text":"","code":"if (FALSE) { tagger <- load_tagger_sentiments(\"sentiment\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/map_entities.html","id":null,"dir":"Reference","previous_headings":"","what":"Create Mapping for NER Highlighting — map_entities","title":"Create Mapping for NER Highlighting — map_entities","text":"function generates mapping list Named Entity Recognition (NER) highlighting. mapping list defines different entity types highlighted text displays, defining background color, font color, label, label color entity type.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/map_entities.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create Mapping for NER Highlighting — map_entities","text":"","code":"map_entities(df, entity = \"entity\", tag = \"tag\")"},{"path":"https://davidycliao.github.io/flaiR/reference/map_entities.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create Mapping for NER Highlighting — map_entities","text":"df data frame containing least two columns: entity: character vector words/entities highlighted. tag: character vector indicating entity type word/entity. entity character vector entities annotated model. tag character vector tags corresponding annotated entities.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/map_entities.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create Mapping for NER Highlighting — map_entities","text":"list mapping settings entity type, entity type represented list containing: words: character vector words highlighted. background_color: character string representing background color highlighting words. font_color: character string representing font color words. label: character string label entity type. label_color: character string representing font color label.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/map_entities.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create Mapping for NER Highlighting — map_entities","text":"","code":"if (FALSE) { sample_df <- data.frame( entity = c(\"Microsoft\", \"USA\", \"dollar\", \"Bill Gates\"), tag = c(\"ORG\", \"LOC\", \"MISC\", \"PER\"), stringsAsFactors = FALSE ) mapping <- map_entities(sample_df) }"},{"path":"https://davidycliao.github.io/flaiR/reference/show_flair_cache.html","id":null,"dir":"Reference","previous_headings":"","what":"Show Flair Cache Preloaed flair's Directory — show_flair_cache","title":"Show Flair Cache Preloaed flair's Directory — show_flair_cache","text":"function lists contents flair cache directory returns data frame.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/show_flair_cache.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Show Flair Cache Preloaed flair's Directory — show_flair_cache","text":"","code":"show_flair_cache()"},{"path":"https://davidycliao.github.io/flaiR/reference/show_flair_cache.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Show Flair Cache Preloaed flair's Directory — show_flair_cache","text":"data frame containing file paths contents flair cache directory. directory exist empty, NULL returned.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/show_flair_cache.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Show Flair Cache Preloaed flair's Directory — show_flair_cache","text":"","code":"if (FALSE) { show_flair_cache() }"},{"path":"https://davidycliao.github.io/flaiR/reference/statements.html","id":null,"dir":"Reference","previous_headings":"","what":"Sampled Grandstanding Text — statements","title":"Sampled Grandstanding Text — statements","text":"Sampled Grandstanding Text","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/statements.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sampled Grandstanding Text — statements","text":"","code":"data(\"statements\")"},{"path":"https://davidycliao.github.io/flaiR/reference/statements.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Sampled Grandstanding Text — statements","text":"data frame 3 variables: Type Grandstanding types Statement Grandstanding texts","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/statements.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sampled Grandstanding Text — statements","text":"","code":"if (FALSE) { data(statements) head(statements) }"},{"path":"https://davidycliao.github.io/flaiR/reference/uk_immigration.html","id":null,"dir":"Reference","previous_headings":"","what":"UK House of Commons Immigration Debate Data — uk_immigration","title":"UK House of Commons Immigration Debate Data — uk_immigration","text":"dataset containing speeches debates UK House Commons topic immigration 2010.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/uk_immigration.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"UK House of Commons Immigration Debate Data — uk_immigration","text":"","code":"data(\"uk_immigration\")"},{"path":"https://davidycliao.github.io/flaiR/reference/uk_immigration.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"UK House of Commons Immigration Debate Data — uk_immigration","text":"data frame 12 variables: date Date speech, Date type agenda Agenda subject speech, character speechnumber Unique identifier speech, numeric speaker Name person giving speech, character party Political party speaker, character party.facts.id ID party, usually numeric character chair Person chairing session, character terms Terms tags associated speech, character list text Actual text speech, character parliament parliament session, character numeric iso3country ISO3 country code parliament located, character year Year speech made, numeric","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/uk_immigration.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"UK House of Commons Immigration Debate Data — uk_immigration","text":"Data collected ParSpeechV2 House Commons year 2010. dataset publicly available https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/L4OAKN.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/uk_immigration.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"UK House of Commons Immigration Debate Data — uk_immigration","text":"","code":"if (FALSE) { data(uk_immigration) head(uk_immigration) }"},{"path":"https://davidycliao.github.io/flaiR/reference/uninstall_python_package.html","id":null,"dir":"Reference","previous_headings":"","what":"Uninstall a Python Package — uninstall_python_package","title":"Uninstall a Python Package — uninstall_python_package","text":"uninstall_python_package function uninstalls specified Python package using system's Python installation. checks Python installed accessible, proceeds uninstall package. Finally, uninstall_python_package verifies package successfully uninstalled.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/uninstall_python_package.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Uninstall a Python Package — uninstall_python_package","text":"","code":"uninstall_python_package(package_name, python_path = Sys.which(\"python3\"))"},{"path":"https://davidycliao.github.io/flaiR/reference/uninstall_python_package.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Uninstall a Python Package — uninstall_python_package","text":"package_name name Python package uninstall. python_path path Python executable. provided, uses system's default Python path.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/uninstall_python_package.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Uninstall a Python Package — uninstall_python_package","text":"Invisibly returns TRUE package successfully uninstalled, otherwise stops error message.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/uninstall_python_package.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Uninstall a Python Package — uninstall_python_package","text":"","code":"if (FALSE) { uninstall_python_package(\"numpy\") }"},{"path":"https://davidycliao.github.io/flaiR/news/index.html","id":"flair-006-2023-10-29","dir":"Changelog","previous_headings":"","what":"flaiR 0.0.6 (2023-10-29)","title":"flaiR 0.0.6 (2023-10-29)","text":"flair {flaiR} renamed flair() import_flair() avoid overlapping conventional practice import flair Python. install_python_package() uninstall_python_package() new functions install uninstall Python packages using pip environment used flaiR package. Add new training data grandstanding training data Ju Yeon Park’s paper. zzz.R revised code proceeds three steps. First, installing loading package, {flaiR} utilizes system’s environment tool undergoes three evaluation stages. Initially, {flaiR} requires least Python 3 installed device. Python 3 available, unable install {flaiR} successfully. requirement met, system checks appropriate versions PyTorch Flair. primary focus Flair. already installed, see message indicating ‘Flair installed Python’. process represents new format loading Python environment used flaiR package. Add example datasets (cc_muller hatespeech_zh_tw) tutorials documentation.","code":""},{"path":"https://davidycliao.github.io/flaiR/news/index.html","id":"flair-005-2023-10-01","dir":"Changelog","previous_headings":"","what":"flaiR 0.0.5 (2023-10-01)","title":"flaiR 0.0.5 (2023-10-01)","text":"Added tests monitor function operation. Added wrapped functions integrating Python code. Created function coloring entities. Provided tutorials interacting R Python using Flair. Notice Python 3.x flair may fail install Python dependencies windows-latest due potential compatibility issues latest Python versions Windows. fix , modified Python version actions/setup-python@v2 step use Python 3.9 lower version. Added two new example datasets tutorials documentation. ","code":""},{"path":"https://davidycliao.github.io/flaiR/news/index.html","id":"flair-003-2023-09-10","dir":"Changelog","previous_headings":"","what":"flaiR 0.0.3 (2023-09-10)","title":"flaiR 0.0.3 (2023-09-10)","text":"Modifications Overview Added show.text_id gc.active parameters get_entities(), get_pos(), get_sentiment(). Enhanced batch processing introduction batch_size functions get_entities_batch(), get_pos_batch(), get_sentiment_batch(). Introduced device parameter specify computation device. Introduction New Parameters: show.text_id: activated (TRUE), actual text (labeled ‘text_id’) entity derived appended resulting dataset. Although enriching output validation traceability, users cautious, might inflate output size. default, option remains deactivated (FALSE). context, previously, ‘text_id’ intrinsically generated, potentially elevating R’s memory consumption. gc.active: Activating (TRUE) trigger garbage collector post-text processing. action aids memory optimization relinquishing unallocated memory spaces, crucial step, particularly processing extensive text dataset. default set FALSE, users managing larger texts consider setting gc.active TRUE. Though action doesn’t bolster computational efficiency, circumvent potential RStudio crashes. Batch Processing Enhancement: inception batch_size parameter (defaulted 5) get_entities_batch(), get_pos_batch(), get_sentiment_batch() augments batch processing capabilities. addition led creation internal function named process_batch proficiently manage text batch linked doc_ids. core functionality adapted segregate texts doc_ids specific batches, subsequently processed via process_batch function, final results amalgamated seamlessly. device: descriptive character string pinpointing computation device. Users can opt “cpu” GPU device number string format. instance, representing primary GPU 0. GPU device number furnished, system endeavor harness specific GPU, “cpu” default setting. batch_size: integer specifying size batch. Default 5. ","code":""},{"path":"https://davidycliao.github.io/flaiR/news/index.html","id":"flair-001-development-version","dir":"Changelog","previous_headings":"","what":"flaiR 0.0.1 (development version)","title":"flaiR 0.0.1 (development version)","text":"features flaiR currently include part--speech tagging, sentiment tagging, named entity recognition tagging. flaiR requires Python version 3.7 higher operate concurrently. create_flair_env(): function install Flair Python library using reticulate R package, automatically generated.","code":""}]
+[{"path":"https://davidycliao.github.io/flaiR/articles/flair_models.html","id":"ner-models","dir":"Articles","previous_headings":"","what":"NER Models","title":"Flair Models","text":"Source: https://flairnlp.github.io/docs/tutorial-basics/tagging-entities ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/flair_models.html","id":"pos-models","dir":"Articles","previous_headings":"","what":"POS Models","title":"Flair Models","text":"Source: https://flairnlp.github.io/docs/tutorial-basics/part--speech-tagging ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/flair_models.html","id":"sentiment-models","dir":"Articles","previous_headings":"","what":"Sentiment Models","title":"Flair Models","text":"Source: https://flairnlp.github.io/docs/tutorial-basics/tagging-sentiment","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/get_entities.html","id":"generic-approach-using-pre-trained-ner-english-model","dir":"Articles","previous_headings":"","what":"Generic Approach Using Pre-trained NER English Model","title":"Tagging Named Entities with Flair Standard Models","text":"Use load_tagger_ner call NER pretrained model. model downloaded Flair’s Hugging Face repo. Thus, ensure internet connection. downloaded, model stored .flair cache device. , ’ve downloaded hasn’t manually removed, executing command trigger download. Flair NLP operates PyTorch framework. , can use $method set device Flair Python library. flair_device(“cpu”) allows select whether use CPU, CUDA devices (like cuda:0, cuda:1, cuda:2), specific MPS devices Mac (mps:0, mps:1, mps:2). information Accelerated PyTorch training Mac, please refer https://developer.apple.com/metal/pytorch/. CUDA, please visit: https://developer.nvidia.com/cuda-zone want computation run faster, recommended keep show.text_id set FALSE default.","code":"library(flaiR) data(\"uk_immigration\") uk_immigration <- head(uk_immigration, 10) tagger_ner <- load_tagger_ner(\"ner\") #> 2023-11-29 12:37:10,539 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , tagger_pos$to(flair_device(\"mps\")) SequenceTagger( (embeddings): StackedEmbeddings( (list_embedding_0): FlairEmbeddings( (lm): LanguageModel( (drop): Dropout(p=0.05, inplace=False) (encoder): Embedding(300, 100) (rnn): LSTM(100, 2048) (decoder): Linear(in_features=2048, out_features=300, bias=True) ) ) (list_embedding_1): FlairEmbeddings( (lm): LanguageModel( (drop): Dropout(p=0.05, inplace=False) (encoder): Embedding(300, 100) (rnn): LSTM(100, 2048) (decoder): Linear(in_features=2048, out_features=300, bias=True) ) ) ) (word_dropout): WordDropout(p=0.05) (locked_dropout): LockedDropout(p=0.5) (embedding2nn): Linear(in_features=4096, out_features=4096, bias=True) (rnn): LSTM(4096, 256, batch_first=True, bidirectional=True) (linear): Linear(in_features=512, out_features=53, bias=True) (loss_function): ViterbiLoss() (crf): CRF() ) results <- get_entities(uk_immigration$text, uk_immigration$speaker, tagger_ner, show.text_id = FALSE ) print(results) #> doc_id entity tag #> 1: Philip Hollobone Conservative ORG #> 2: Philip Hollobone Liberal Democrat Front Benchers ORG #> 3: Philip Hollobone Back Benches MISC #> 4: Philip Hollobone Kettering LOC #> 5: Philip Hollobone Sikh MISC #> 6: Philip Hollobone Kettering LOC #> 7: Philip Hollobone Kettering LOC #> 8: Philip Hollobone British MISC #> 9: Philip Hollobone United Kingdom LOC #> 10: Philip Hollobone Norman MISC #> 11: Philip Hollobone United Kingdom LOC #> 12: Stewart Jackson Friend PER #> 13: Stewart Jackson Archbishop of Canterbury ORG #> 14: Stewart Jackson Carey PER #> 15: Philip Hollobone Friend PER #> 16: Philip Hollobone United Kingdom LOC #> 17: Philip Hollobone UK LOC #> 18: Philip Hollobone Europe LOC #> 19: Philip Hollobone Malta LOC #> 20: Stewart Jackson Barking LOC #> 21: Stewart Jackson Dagenham LOC #> 22: Stewart Jackson British National ORG #> 23: Stewart Jackson Conservative ORG #> 24: Stewart Jackson Friend PER #> 25: Stewart Jackson Folkestone LOC #> 26: Stewart Jackson Hythe LOC #> 27: Stewart Jackson Howard PER #> 28: Philip Hollobone Friend PER #> 29: Philip Hollobone Shipley PER #> 30: Philip Hollobone Philip Davies PER #> 31: Philip Hollobone Solihull LOC #> 32: Philip Hollobone Lorely Burt ORG #> 33: Philip Hollobone Peterborough LOC #> 34: Philip Hollobone Jackson PER #> 35: Philip Hollobone Friend PER #> 36: Philip Davies Friend PER #> 37: Philip Davies Government ORG #> 38: Philip Hollobone Kettering LOC #> 39: Philip Hollobone Government ORG #> 40: Philip Hollobone Kettering LOC #> 41: Philip Hollobone Kettering LOC #> 42: Philip Hollobone Migrationwatch UK ORG #> 43: Philip Hollobone Carshalton LOC #> 44: Philip Hollobone Wallington LOC #> 45: Philip Hollobone Tom Brake PER #> 46: Philip Hollobone #> 47: Phil Woolas Gentleman PER #> 48: Phil Woolas Carshalton LOC #> 49: Phil Woolas Wallington LOC #> 50: Phil Woolas Tom Brake PER #> doc_id entity tag print(results) #> doc_id entity tag #> 1: Philip Hollobone Conservative ORG #> 2: Philip Hollobone Liberal Democrat Front Benchers ORG #> 3: Philip Hollobone Back Benches MISC #> 4: Philip Hollobone Kettering LOC #> 5: Philip Hollobone Sikh MISC #> 6: Philip Hollobone Kettering LOC #> 7: Philip Hollobone Kettering LOC #> 8: Philip Hollobone British MISC #> 9: Philip Hollobone United Kingdom LOC #> 10: Philip Hollobone Norman MISC #> 11: Philip Hollobone United Kingdom LOC #> 12: Stewart Jackson Friend PER #> 13: Stewart Jackson Archbishop of Canterbury ORG #> 14: Stewart Jackson Carey PER #> 15: Philip Hollobone Friend PER #> 16: Philip Hollobone United Kingdom LOC #> 17: Philip Hollobone UK LOC #> 18: Philip Hollobone Europe LOC #> 19: Philip Hollobone Malta LOC #> 20: Stewart Jackson Barking LOC #> 21: Stewart Jackson Dagenham LOC #> 22: Stewart Jackson British National ORG #> 23: Stewart Jackson Conservative ORG #> 24: Stewart Jackson Friend PER #> 25: Stewart Jackson Folkestone LOC #> 26: Stewart Jackson Hythe LOC #> 27: Stewart Jackson Howard PER #> 28: Philip Hollobone Friend PER #> 29: Philip Hollobone Shipley PER #> 30: Philip Hollobone Philip Davies PER #> 31: Philip Hollobone Solihull LOC #> 32: Philip Hollobone Lorely Burt ORG #> 33: Philip Hollobone Peterborough LOC #> 34: Philip Hollobone Jackson PER #> 35: Philip Hollobone Friend PER #> 36: Philip Davies Friend PER #> 37: Philip Davies Government ORG #> 38: Philip Hollobone Kettering LOC #> 39: Philip Hollobone Government ORG #> 40: Philip Hollobone Kettering LOC #> 41: Philip Hollobone Kettering LOC #> 42: Philip Hollobone Migrationwatch UK ORG #> 43: Philip Hollobone Carshalton LOC #> 44: Philip Hollobone Wallington LOC #> 45: Philip Hollobone Tom Brake PER #> 46: Philip Hollobone #> 47: Phil Woolas Gentleman PER #> 48: Phil Woolas Carshalton LOC #> 49: Phil Woolas Wallington LOC #> 50: Phil Woolas Tom Brake PER #> doc_id entity tag"},{"path":"https://davidycliao.github.io/flaiR/articles/get_entities.html","id":"batch-processing","dir":"Articles","previous_headings":"","what":"Batch Processing","title":"Tagging Named Entities with Flair Standard Models","text":"Processing texts individually can inefficient memory-intensive. hand, processing texts simultaneously surpass memory constraints, especially document dataset sizable. Parsing documents smaller batches may provide optimal compromise two scenarios. Batch processing can enhance efficiency aid memory management. default, batch_size parameter set 5. can consider starting default value experimenting different batch sizes find one works best specific use case. can monitor memory usage processing time help make decision. access GPU, might also try larger batch sizes take advantage GPU parallelism. However, cautious set batch size large, can lead --memory errors. Ultimately, choice batch size based balance memory constraints, processing efficiency, specific requirements entity extraction task.","code":"batch_process_time <- system.time({ batch_process_results <- get_entities_batch(uk_immigration$text, uk_immigration$speaker, tagger_ner, show.text_id = FALSE, batch_size = 5) gc() }) #> CPU is used. #> Processing batch 1 out of 2... #> Processing batch 2 out of 2... print(batch_process_time) #> user system elapsed #> 23.833 0.219 23.966 print(batch_process_results) #> doc_id entity tag text_id #> 1: Philip Hollobone Conservative ORG NA #> 2: Philip Hollobone Liberal Democrat Front Benchers ORG NA #> 3: Philip Hollobone Back Benches MISC NA #> 4: Philip Hollobone Kettering LOC NA #> 5: Philip Hollobone Sikh MISC NA #> 6: Philip Hollobone Kettering LOC NA #> 7: Philip Hollobone Kettering LOC NA #> 8: Philip Hollobone British MISC NA #> 9: Philip Hollobone United Kingdom LOC NA #> 10: Philip Hollobone Norman MISC NA #> 11: Philip Hollobone United Kingdom LOC NA #> 12: Stewart Jackson Friend PER NA #> 13: Stewart Jackson Archbishop of Canterbury ORG NA #> 14: Stewart Jackson Carey PER NA #> 15: Philip Hollobone Friend PER NA #> 16: Philip Hollobone United Kingdom LOC NA #> 17: Philip Hollobone UK LOC NA #> 18: Philip Hollobone Europe LOC NA #> 19: Philip Hollobone Malta LOC NA #> 20: Stewart Jackson Barking LOC NA #> 21: Stewart Jackson Dagenham LOC NA #> 22: Stewart Jackson British National ORG NA #> 23: Stewart Jackson Conservative ORG NA #> 24: Stewart Jackson Friend PER NA #> 25: Stewart Jackson Folkestone LOC NA #> 26: Stewart Jackson Hythe LOC NA #> 27: Stewart Jackson Howard PER NA #> 28: Philip Hollobone Friend PER NA #> 29: Philip Hollobone Shipley PER NA #> 30: Philip Hollobone Philip Davies PER NA #> 31: Philip Hollobone Solihull LOC NA #> 32: Philip Hollobone Lorely Burt ORG NA #> 33: Philip Hollobone Peterborough LOC NA #> 34: Philip Hollobone Jackson PER NA #> 35: Philip Hollobone Friend PER NA #> 36: Philip Davies Friend PER NA #> 37: Philip Davies Government ORG NA #> 38: Philip Hollobone Kettering LOC NA #> 39: Philip Hollobone Government ORG NA #> 40: Philip Hollobone Kettering LOC NA #> 41: Philip Hollobone Kettering LOC NA #> 42: Philip Hollobone Migrationwatch UK ORG NA #> 43: Philip Hollobone Carshalton LOC NA #> 44: Philip Hollobone Wallington LOC NA #> 45: Philip Hollobone Tom Brake PER NA #> 46: Philip Hollobone NA #> 47: Phil Woolas Gentleman PER NA #> 48: Phil Woolas Carshalton LOC NA #> 49: Phil Woolas Wallington LOC NA #> 50: Phil Woolas Tom Brake PER NA #> doc_id entity tag text_id"},{"path":"https://davidycliao.github.io/flaiR/articles/get_pos.html","id":"generic-approach-using-part-of-speech-tagging","dir":"Articles","previous_headings":"","what":"Generic Approach Using Part-of-Speech Tagging","title":"Tagging Part-of-Speech Tagging with Flair Standard Models","text":"Download de-pos part--speech tagging model FlairNLP Hugging Face. Flair NLP operates PyTorch framework. , can use $method set device Flair Python library. flair_device(“cpu”) allows select whether use CPU, CUDA devices (like cuda:0, cuda:1, cuda:2), specific MPS devices Mac (mps:0, mps:1, mps:2). information Accelerated PyTorch training Mac, please refer https://developer.apple.com/metal/pytorch/. CUDA, please visit: https://developer.nvidia.com/cuda-zone","code":"library(flaiR) data(\"de_immigration\") uk_immigration <- head(uk_immigration, 2) tagger_pos <- load_tagger_pos(\"pos\") #> 2023-11-29 12:38:08,493 SequenceTagger predicts: Dictionary with 53 tags: , O, UH, ,, VBD, PRP, VB, PRP$, NN, RB, ., DT, JJ, VBP, VBG, IN, CD, NNS, NNP, WRB, VBZ, WDT, CC, TO, MD, VBN, WP, :, RP, EX, JJR, FW, XX, HYPH, POS, RBR, JJS, PDT, NNPS, RBS, AFX, WP$, -LRB-, -RRB-, ``, '', LS, $, SYM, ADD tagger_pos$to(flair_device(\"mps\")) SequenceTagger( (embeddings): StackedEmbeddings( (list_embedding_0): FlairEmbeddings( (lm): LanguageModel( (drop): Dropout(p=0.05, inplace=False) (encoder): Embedding(300, 100) (rnn): LSTM(100, 2048) (decoder): Linear(in_features=2048, out_features=300, bias=True) ) ) (list_embedding_1): FlairEmbeddings( (lm): LanguageModel( (drop): Dropout(p=0.05, inplace=False) (encoder): Embedding(300, 100) (rnn): LSTM(100, 2048) (decoder): Linear(in_features=2048, out_features=300, bias=True) ) ) ) (word_dropout): WordDropout(p=0.05) (locked_dropout): LockedDropout(p=0.5) (embedding2nn): Linear(in_features=4096, out_features=4096, bias=True) (rnn): LSTM(4096, 256, batch_first=True, bidirectional=True) (linear): Linear(in_features=512, out_features=53, bias=True) (loss_function): ViterbiLoss() (crf): CRF() ) results <- get_pos(uk_immigration$text, uk_immigration$speaker, tagger_pos, show.text_id = FALSE, gc.active = FALSE) print(results) #> doc_id token_id text_id token tag precision #> 1: Philip Hollobone 0 NA I PRP 1.0000 #> 2: Philip Hollobone 1 NA thank VBP 0.9996 #> 3: Philip Hollobone 2 NA Mr. NNP 1.0000 #> 4: Philip Hollobone 3 NA Speaker NNP 1.0000 #> 5: Philip Hollobone 4 NA for IN 1.0000 #> --- #> 440: Stewart Jackson 66 NA parties NNS 1.0000 #> 441: Stewart Jackson 67 NA in IN 1.0000 #> 442: Stewart Jackson 68 NA this DT 1.0000 #> 443: Stewart Jackson 69 NA country NN 1.0000 #> 444: Stewart Jackson 70 NA ? . 0.9949"},{"path":"https://davidycliao.github.io/flaiR/articles/get_pos.html","id":"batch-processing","dir":"Articles","previous_headings":"","what":"Batch Processing","title":"Tagging Part-of-Speech Tagging with Flair Standard Models","text":"default, batch_size parameter set 5. can consider starting default value experimenting different batch sizes find one works best specific use case. can monitor memory usage processing time help make decision. access GPU, might also try larger batch sizes take advantage GPU parallelism. However, cautious set batch size large, can lead --memory errors. Ultimately, choice batch size based balance memory constraints, processing efficiency, specific requirements entity extraction task.","code":"batch_process_results <- get_pos_batch(uk_immigration$text, uk_immigration$speaker, tagger_pos, show.text_id = FALSE, batch_size = 10, verbose = TRUE) #> CPU is used. #> Processing batch starting at index: 1 print(batch_process_results) #> doc_id token_id text_id token tag precision #> 1: Philip Hollobone 0 NA I PRP 1.0000 #> 2: Philip Hollobone 1 NA thank VBP 0.9996 #> 3: Philip Hollobone 2 NA Mr. NNP 1.0000 #> 4: Philip Hollobone 3 NA Speaker NNP 1.0000 #> 5: Philip Hollobone 4 NA for IN 1.0000 #> --- #> 448: 0 NA NA NNP 0.8859 #> 449: 0 NA NA NNP 0.8859 #> 450: 0 NA NA NNP 0.8859 #> 451: 0 NA NA NNP 0.8859 #> 452: 0 NA NA NNP 0.8859"},{"path":"https://davidycliao.github.io/flaiR/articles/get_sentiments.html","id":"an-example-using-sentiment-model-pre-trained-english-model","dir":"Articles","previous_headings":"","what":"An Example Using sentiment Model (Pre-trained English Model)","title":"Tagging Sentiment with Flair Standard Models","text":"Download English sentiment model FlairNLP Hugging Face. Currently, also supports large English sentiment model German pre-trained model. Flair NLP operates PyTorch framework. , can use $method set device Flair Python library. flair_device(“cpu”) allows select whether use CPU, CUDA devices (like cuda:0, cuda:1, cuda:2), specific MPS devices Mac (mps:0, mps:1, mps:2). information Accelerated PyTorch training Mac, please refer https://developer.apple.com/metal/pytorch/. CUDA, please visit: https://developer.nvidia.com/cuda-zone","code":"library(flaiR) data(\"uk_immigration\") uk_immigration <- head(uk_immigration, 5) tagger_sent <- load_tagger_sentiments(\"sentiment\") tagger_sent$to(flair_device(\"mps\")) TextClassifier( (embeddings): TransformerDocumentEmbeddings( (model): DistilBertModel( (embeddings): Embeddings( (word_embeddings): Embedding(30522, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (transformer): Transformer( (layer): ModuleList( (0-5): 6 x TransformerBlock( (attention): MultiHeadSelfAttention( (dropout): Dropout(p=0.1, inplace=False) (q_lin): Linear(in_features=768, out_features=768, bias=True) (k_lin): Linear(in_features=768, out_features=768, bias=True) (v_lin): Linear(in_features=768, out_features=768, bias=True) (out_lin): Linear(in_features=768, out_features=768, bias=True) ) (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (ffn): FFN( (dropout): Dropout(p=0.1, inplace=False) (lin1): Linear(in_features=768, out_features=3072, bias=True) (lin2): Linear(in_features=3072, out_features=768, bias=True) (activation): GELUActivation() ) (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) ) ) ) ) ) (decoder): Linear(in_features=768, out_features=2, bias=True) (dropout): Dropout(p=0.0, inplace=False) (locked_dropout): LockedDropout(p=0.0) (word_dropout): WordDropout(p=0.0) (loss_function): CrossEntropyLoss() ) results <- get_sentiments(uk_immigration$text, seq_len(nrow(uk_immigration)), tagger_sent) print(results) #> doc_id sentiment score #> 1: 1 POSITIVE 0.8097584 #> 2: 2 POSITIVE 0.9990165 #> 3: 3 POSITIVE 0.8827485 #> 4: 4 NEGATIVE 0.9997155 #> 5: 5 POSITIVE 0.8604343"},{"path":"https://davidycliao.github.io/flaiR/articles/get_sentiments.html","id":"batch-processing-in-english-sentiment-model","dir":"Articles","previous_headings":"","what":"Batch Processing in English Sentiment Model","title":"Tagging Sentiment with Flair Standard Models","text":"Processing texts individually can inefficient memory-intensive. hand, processing texts simultaneously surpass memory constraints, especially document dataset sizable. Parsing documents smaller batches may provide optimal compromise two scenarios. Batch processing can enhance efficiency aid memory management. default, batch_size parameter set 5. can consider starting default value experimenting different batch sizes find one works best specific use case. can monitor memory usage processing time help make decision. access GPU, might also try larger batch sizes take advantage GPU parallelism. However, cautious set batch size large, can lead --memory errors. Ultimately, choice batch size based balance memory constraints, processing efficiency, specific requirements entity extraction task.","code":"batch_process_results <- get_sentiments_batch(uk_immigration$text, uk_immigration$speaker, tagger_sent, show.text_id = FALSE, batch_size = 2, verbose = TRUE) #> CPU is used. #> Processing batch 1 out of 3... #> Processing batch 2 out of 3... #> Processing batch 3 out of 3... print(batch_process_results) #> doc_id sentiment score #> 1: Philip Hollobone POSITIVE 0.8097585 #> 2: Stewart Jackson POSITIVE 0.9990165 #> 3: Philip Hollobone POSITIVE 0.8827485 #> 4: Stewart Jackson NEGATIVE 0.9997155 #> 5: Philip Hollobone POSITIVE 0.8604343"},{"path":"https://davidycliao.github.io/flaiR/articles/highlight_text.html","id":"create-text-with-named-entities","dir":"Articles","previous_headings":"","what":"Create Text with Named Entities","title":"Highlight Entities with Colors","text":" ","code":"library(flaiR) data(\"uk_immigration\") uk_immigration <- uk_immigration[30,] tagger_ner <- load_tagger_ner(\"ner\") #> 2023-11-29 12:38:51,404 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , result <- get_entities(uk_immigration$text, tagger = tagger_ner, show.text_id = FALSE ) #> Warning in check_texts_and_ids(texts, doc_ids): doc_ids is NULL. #> Auto-assigning doc_ids."},{"path":"https://davidycliao.github.io/flaiR/articles/highlight_text.html","id":"highlight-text-with-entities","dir":"Articles","previous_headings":"","what":"Highlight Text with Entities","title":"Highlight Entities with Colors","text":"","code":"highlighted_text <- highlight_text(text = uk_immigration$text, entities_mapping = map_entities(result)) highlighted_text"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"why-write-flair-to-access-fair-nlp-in-python","dir":"Articles","previous_headings":"","what":"Why Write flaiR to Access fair NLP in Python?","title":"Quick Start","text":"Python, Flair ( developed Zalando Research) stands notably feature-rich user-friendly NLP framework. Flair NLP provides intuitive interfaces exceptional multilingual support, especially various embedding frameworks like Glove, transformer-based models BERT. Flair also comes equipped pre-trained models context-aware capabilities. addition, Flair also establishes dependencies primary NLP Python libraries (gensim, torch, transformer, ); installing {flaiR} subsequently install related NLP packages Python. installing {flaiR} package R, users can seamlessly access Python-based Flair library within R. integration allows use basic Python libraries, like NumPy, well modern NLP deep learning frameworks PyTorch, reticulate interface R environment. ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"install-flair-with-using-remotes","dir":"Articles","previous_headings":"","what":"Install flaiR with Using remotes","title":"Quick Start","text":"flaiR built top reticulate package incorporates key functions access core features FlairNLP, returning data tidy clean data.table. installation consists two parts: firstly, install Python 3.8 (available ; opt stable version, currently goes 3.11, avoid pre-release versions). Secondly, install R (version 3.6.3 higher), along RStudio. Additionally, ’ll also need Anaconda assist pip safely stably collecting dependencies setting Python environment. System Requirement: Python (>= 3.10.x) R (>= 4.2.0) RStudio (GUI interface allows users adjust manage Python environment R) Anaconda (highly recommended) first installing loading {flaiR} package, utilizes system environment tool undergoes three evaluation stages automatic installation Flair. Initially, flaiR requires least Python 3 installed device. Python 3 present, able successfully install flaiR R. point, essential check correct version Python installed. recommend installing Python 3.8 slightly higher version, avoid installing pre-release versions. installation consists two parts: First, install Python 3.8 higher, R 3.6.3 higher. Although tested Github Action R 3.6.2, strongly recommend installing R 4.0.0 ensure compatibility R environment Python. first installed, {flaiR} automatically detects whether Python 3.8 higher. , skip automatic installation Python flair NLP. case, need mannually install reload {flaiR} . Python 3.8 higher alreadt installed, installer {flaiR} automatically install flair Python NLP global environment. using {reticulate}, {flaiR} typically assume r-reticulate environment default. time, can use py_config() check location environment. Please note flaiR directly install flair NLP Python environment R using. environment can adjusted RStudio navigating Tools -> Global Options -> Python. issues installation, feel free ask Discussion process, observe numerous messages related installation Python environment Python flair module. Notably, flair numerous dependencies, including libraries related transformers (like torch, tokeniser, transformers, gensim, flair, etc). Thus, installation might take time complete. ’s also another scenario consider. {flaiR} unable automatically install Flair PyTorch, attempt force installation . However, attempt fails, ’ll encounter message: “Failed install Flair. {flaiR} requires Flair NLP. Please ensure Flair NLP installed Python manually.” ’re using Apple operating environment, ’s essential point check compatibility M1/M2 chip Python Torch. issues installation, feel free ask Discussion. copy command , generally asked upgrade package. package operates {reticulate}, packages R outdated, RStudio likely display “packages recent versions available.” prompt update. recommend update. ","code":"install.packages(\"remotes\") remotes::install_github(\"davidycliao/flaiR\", force = TRUE) library(flaiR)"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"class-and-object-in-r-via-flair","dir":"Articles","previous_headings":"","what":"Class and Object in R via {flaiR}","title":"Quick Start","text":"R users, {flairR} built top {reticulate}, enabling interact directly Python modules R providing seamless support documents R community. {flairR} architecture, use simplest S3 method wrap modules methods within modules, allowing R users conveniently access use Python functionalities. main modules methods (functions) wrapped {flairR}: Please note following basic examples derived official Flair NLP Python documentation tutorial. ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"tag-entities-in-text","dir":"Articles","previous_headings":"Class and Object in R via {flaiR}","what":"Tag Entities in Text","title":"Quick Start","text":"Let’s run named entity recognition following example sentence: “love Berlin New York. , need make Sentence text, load pre-trained model use predict tags sentence object. print: Use loop print POS tag. ’s important note Python indexed 0. Therefore, R environment, must use seq_along(sentence$get_labels()) - 1. ","code":"# attach flaiR in R library(flaiR) # make a sentence Sentence <- flair_data()$Sentence sentence <- Sentence('I love Berlin and New York.') # load the NER tagger Classifier <- flair_nn()$Classifier tagger <- Classifier$load('ner') #> 2023-11-29 12:39:09,314 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , # run NER over sentence tagger$predict(sentence) # print the sentence with all annotations print(sentence) #> Sentence[7]: \"I love Berlin and New York.\" → [\"Berlin\"/LOC, \"New York\"/LOC] for (i in seq_along(sentence$get_labels())) { print(sentence$get_labels()[[i]]) } #> 'Span[2:3]: \"Berlin\"'/'LOC' (0.9812) #> 'Span[4:6]: \"New York\"'/'LOC' (0.9957)"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"tag-part-of-speech-in-text","dir":"Articles","previous_headings":"Class and Object in R via {flaiR}","what":"Tag Part-of-Speech in Text","title":"Quick Start","text":"use flair/pos-english POS tagging standard models Hugging Face. print: Use loop print pos tag. ","code":"# attach flaiR in R library(flaiR) # make a sentence Sentence <- flair_data()$Sentence sentence <- Sentence('I love Berlin and New York.') # load the NER tagger Classifier <- flair_nn()$Classifier tagger <- Classifier$load('pos') #> 2023-11-29 12:39:09,875 SequenceTagger predicts: Dictionary with 53 tags: , O, UH, ,, VBD, PRP, VB, PRP$, NN, RB, ., DT, JJ, VBP, VBG, IN, CD, NNS, NNP, WRB, VBZ, WDT, CC, TO, MD, VBN, WP, :, RP, EX, JJR, FW, XX, HYPH, POS, RBR, JJS, PDT, NNPS, RBS, AFX, WP$, -LRB-, -RRB-, ``, '', LS, $, SYM, ADD # run NER over sentence tagger$predict(sentence) # print the sentence with all annotations print(sentence) #> Sentence[7]: \"I love Berlin and New York.\" → [\"I\"/PRP, \"love\"/VBP, \"Berlin\"/NNP, \"and\"/CC, \"New\"/NNP, \"York\"/NNP, \".\"/.] for (i in seq_along(sentence$get_labels())) { print(sentence$get_labels()[[i]]) } #> 'Token[0]: \"I\"'/'PRP' (1.0) #> 'Token[1]: \"love\"'/'VBP' (1.0) #> 'Token[2]: \"Berlin\"'/'NNP' (0.9999) #> 'Token[3]: \"and\"'/'CC' (1.0) #> 'Token[4]: \"New\"'/'NNP' (1.0) #> 'Token[5]: \"York\"'/'NNP' (1.0) #> 'Token[6]: \".\"'/'.' (1.0)"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"detect-sentiment","dir":"Articles","previous_headings":"Class and Object in R via {flaiR}","what":"Detect Sentiment","title":"Quick Start","text":"Let’s run sentiment analysis sentence determine whether POSITIVE NEGATIVE. can essentially code . Just instead loading ‘ner’ model, now load ‘sentiment’ model: ","code":"# attach flaiR in R library(flaiR) # make a sentence Sentence <- flair_data()$Sentence sentence <- Sentence('I love Berlin and New York.') # load the flair_nn.classifier_load tagger Classifier <- flair_nn()$Classifier tagger <- Classifier$load('sentiment') # run sentiment analysis over sentence tagger$predict(sentence) # print the sentence with all annotations print(sentence) #> Sentence[7]: \"I love Berlin and New York.\" → POSITIVE (0.9982)"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"embeddings","dir":"Articles","previous_headings":"Class and Object in R via {flaiR}","what":"Embeddings","title":"Quick Start","text":"Embeddings Words Transformers Let’s use standard BERT model (bert-base-uncased) embed sentence “grass green”. Simply instantate flair_embeddings.TransformerWordEmbeddings()use $embed() sentence object: cause word sentence embedded. can iterate words get embedding like : Embeddings Documents Transformers Sometimes want embedding whole document, individual words. case, use one DocumentEmbeddings classes Flair. Let’s use standard BERT model get embedding entire sentence: Use $embedding method extract entire embedding sentence print embedding follows: Stack Embeddings Flair allows combine embeddings “embedding stacks”. fine-tuning, using combinations embeddings often gives best results! Use StackedEmbeddings class instantiate passing list embeddings wish combine. instance, lets combine classic GloVe embeddings forward backward Flair embeddings. First, instantiate two embeddings wish combine: Now, instantiate StackedEmbeddings class pass list containing two embeddings. R Python list functionality. Let’s create StackedEmbedding object combines GloVe forward/backward Flair embeddings. Next, use $embed() method transform text vectors sentences. Words now embedded using concatenation three different embeddings. means resulting embedding vector still single PyTorch vector. ","code":"# attach flaiR in R library(flaiR) # initiate TransformerWordEmbeddings TransformerWordEmbeddings <- flair_embeddings()$TransformerWordEmbeddings embedding <- TransformerWordEmbeddings('bert-base-uncased') # create a sentence sentence <- flair_data.Sentence('The grass is green .') # embed words in sentence embedding$embed(sentence) #> [[1]] #> Sentence[5]: \"The grass is green .\" for (i in seq_along(sentence$tokens)) { cat(\"Token: \", reticulate::py_str(sentence$tokens[[i]]), \"\\n\") # Access the embedding of the token, converting it to an R object, # and print the first 15 elements of the vector. token_embedding <- sentence$tokens[[1]]$embedding print(head(token_embedding, 15)) } #> Token: Token[0]: \"The\" #> tensor([-0.3904, -1.1946, 0.1296, 0.5806, -0.0847, -0.4520, 1.3699, 0.3850, #> -0.6132, -0.3246, -0.9899, -0.6897, 0.2754, -0.5867, 0.2399]) #> Token: Token[1]: \"grass\" #> tensor([-0.3904, -1.1946, 0.1296, 0.5806, -0.0847, -0.4520, 1.3699, 0.3850, #> -0.6132, -0.3246, -0.9899, -0.6897, 0.2754, -0.5867, 0.2399]) #> Token: Token[2]: \"is\" #> tensor([-0.3904, -1.1946, 0.1296, 0.5806, -0.0847, -0.4520, 1.3699, 0.3850, #> -0.6132, -0.3246, -0.9899, -0.6897, 0.2754, -0.5867, 0.2399]) #> Token: Token[3]: \"green\" #> tensor([-0.3904, -1.1946, 0.1296, 0.5806, -0.0847, -0.4520, 1.3699, 0.3850, #> -0.6132, -0.3246, -0.9899, -0.6897, 0.2754, -0.5867, 0.2399]) #> Token: Token[4]: \".\" #> tensor([-0.3904, -1.1946, 0.1296, 0.5806, -0.0847, -0.4520, 1.3699, 0.3850, #> -0.6132, -0.3246, -0.9899, -0.6897, 0.2754, -0.5867, 0.2399]) # initiate TransformerWordEmbeddings embedding <- flair_embeddings.TransformerDocumentEmbeddings('bert-base-uncased') #> 2023-11-29 12:39:14,535 Using long sentences for Document embeddings is only beneficial for cls_pooling types 'mean' and 'max # create a sentence sentence <- flair_data.Sentence('The grass is green .') # embed words in sentence embedding$embed(sentence) #> [[1]] #> Sentence[5]: \"The grass is green .\" print(head(sentence$embedding, n = 20)) #> tensor([-0.2858, -0.3261, -0.1122, 0.0343, -0.2689, -0.0302, -0.0390, 0.0157, #> -0.2828, 0.1436, 0.0426, -0.2203, -0.0023, -0.5525, 0.1092, -0.0211, #> -0.0151, 0.0724, -0.3034, -0.2250]) # init standard GloVe embedding glove_embedding <- flair_embeddings.WordEmbeddings('glove') # init Flair forward and backwards embeddings flair_embedding_forward <- flair_embeddings.FlairEmbeddings('news-forward') flair_embedding_backward <- flair_embeddings.FlairEmbeddings('news-backward') stacked_embeddings <- flair_embeddings()$StackedEmbeddings( list(glove_embedding, flair_embedding_forward, flair_embedding_backward)) # make a sentence sentence <- flair_data.Sentence('I love Berlin and New York.') # just embed a sentence using the StackedEmbedding as you would with any single embedding. stacked_embeddings$embed(sentence) for (i in seq_along(sentence$tokens)) { cat(\"Token: \", reticulate::py_str(sentence$tokens[[i]]), \"\\n\") # Access the embedding of the token, converting it to an R object, # and print the first 15 elements of the vector. token_embedding <- sentence$tokens[[1]]$embedding print(head(token_embedding, 15)) } #> Token: Token[0]: \"I\" #> tensor([ 0.6197, 0.5665, -0.4658, -1.1890, 0.4460, 0.0660, 0.3191, 0.1468, #> -0.2212, 0.7924, 0.2991, 0.1607, 0.0253, 0.1868, -0.3100]) #> Token: Token[1]: \"love\" #> tensor([ 0.6197, 0.5665, -0.4658, -1.1890, 0.4460, 0.0660, 0.3191, 0.1468, #> -0.2212, 0.7924, 0.2991, 0.1607, 0.0253, 0.1868, -0.3100]) #> Token: Token[2]: \"Berlin\" #> tensor([ 0.6197, 0.5665, -0.4658, -1.1890, 0.4460, 0.0660, 0.3191, 0.1468, #> -0.2212, 0.7924, 0.2991, 0.1607, 0.0253, 0.1868, -0.3100]) #> Token: Token[3]: \"and\" #> tensor([ 0.6197, 0.5665, -0.4658, -1.1890, 0.4460, 0.0660, 0.3191, 0.1468, #> -0.2212, 0.7924, 0.2991, 0.1607, 0.0253, 0.1868, -0.3100]) #> Token: Token[4]: \"New\" #> tensor([ 0.6197, 0.5665, -0.4658, -1.1890, 0.4460, 0.0660, 0.3191, 0.1468, #> -0.2212, 0.7924, 0.2991, 0.1607, 0.0253, 0.1868, -0.3100]) #> Token: Token[5]: \"York\" #> tensor([ 0.6197, 0.5665, -0.4658, -1.1890, 0.4460, 0.0660, 0.3191, 0.1468, #> -0.2212, 0.7924, 0.2991, 0.1607, 0.0253, 0.1868, -0.3100]) #> Token: Token[6]: \".\" #> tensor([ 0.6197, 0.5665, -0.4658, -1.1890, 0.4460, 0.0660, 0.3191, 0.1468, #> -0.2212, 0.7924, 0.2991, 0.1607, 0.0253, 0.1868, -0.3100])"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"expanded-feats-in-flair","dir":"Articles","previous_headings":"","what":"Expanded Feats in flaiR","title":"Quick Start","text":"enhance efficient utilization social science research, {flairR} extends FlairNLP three principal functions extract features neat format data.table. featured functions, don’t write loops format parsed output ; {flairR} automatically neat format. main features include part--speech tagging, transformer-based sentiment analysis, named entity recognition. addition, handle load RAM dealing larger corpus, {flairR} supports batch processing handle texts batches, especially useful dealing large datasets, optimize memory usage performance. ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"tagging-parts-of-speech-with-flair-models","dir":"Articles","previous_headings":"Expanded Feats in flaiR","what":"Tagging Parts-of-Speech with Flair Models","title":"Quick Start","text":"can load pre-trained model \"pos-fast\". pre-trained models, see https://flairnlp.github.io/docs/tutorial-basics/part--speech-tagging#--english. ","code":"texts <- c(\"UCD is one of the best universities in Ireland.\", \"UCD has a good campus but is very far from my apartment in Dublin.\", \"Essex is famous for social science research.\", \"Essex is not in the Russell Group, but it is famous for political science research and in 1994 Group.\", \"TCD is the oldest university in Ireland.\", \"TCD is similar to Oxford.\") doc_ids <- c(\"doc1\", \"doc2\", \"doc3\", \"doc4\", \"doc5\", \"doc6\") library(flaiR) tagger_pos <- load_tagger_pos(\"pos-fast\") #> 2023-11-29 12:39:17,586 SequenceTagger predicts: Dictionary with 53 tags: , O, UH, ,, VBD, PRP, VB, PRP$, NN, RB, ., DT, JJ, VBP, VBG, IN, CD, NNS, NNP, WRB, VBZ, WDT, CC, TO, MD, VBN, WP, :, RP, EX, JJR, FW, XX, HYPH, POS, RBR, JJS, PDT, NNPS, RBS, AFX, WP$, -LRB-, -RRB-, ``, '', LS, $, SYM, ADD results <- get_pos(texts, doc_ids, tagger_pos) head(results, n = 10) #> doc_id token_id text_id token tag precision #> 1: doc1 0 NA UCD NNP 0.9967 #> 2: doc1 1 NA is VBZ 1.0000 #> 3: doc1 2 NA one CD 0.9993 #> 4: doc1 3 NA of IN 1.0000 #> 5: doc1 4 NA the DT 1.0000 #> 6: doc1 5 NA best JJS 0.9988 #> 7: doc1 6 NA universities NNS 0.9997 #> 8: doc1 7 NA in IN 1.0000 #> 9: doc1 8 NA Ireland NNP 1.0000 #> 10: doc1 9 NA . . 0.9998"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"tagging-entities-with-flair-models","dir":"Articles","previous_headings":"Expanded Feats in flaiR","what":"Tagging Entities with Flair Models","title":"Quick Start","text":"Load pretrained model ner. pretrained models, see https://flairnlp.github.io/docs/tutorial-basics/tagging-entities. ","code":"library(flaiR) tagger_ner <- load_tagger_ner(\"ner\") #> 2023-11-29 12:39:18,790 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , results <- get_entities(texts, doc_ids, tagger_ner) head(results, n = 10) #> doc_id entity tag #> 1: doc1 UCD ORG #> 2: doc1 Ireland LOC #> 3: doc2 UCD ORG #> 4: doc2 Dublin LOC #> 5: doc3 Essex ORG #> 6: doc4 Essex ORG #> 7: doc4 Russell Group ORG #> 8: doc5 TCD ORG #> 9: doc5 Ireland LOC #> 10: doc6 TCD ORG"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"tagging-sentiment","dir":"Articles","previous_headings":"Expanded Feats in flaiR","what":"Tagging Sentiment","title":"Quick Start","text":"Load pretrained model “sentiment”. pre-trained models “sentiment”, “sentiment-fast”, “de-offensive-language” currently available. pre-trained models, see https://flairnlp.github.io/docs/tutorial-basics/tagging-sentiment. ","code":"library(flaiR) tagger_sent <- load_tagger_sentiments(\"sentiment\") results <- get_sentiments(texts, doc_ids, tagger_sent) head(results, n = 10) #> doc_id sentiment score #> 1: doc1 POSITIVE 0.9970598 #> 2: doc2 NEGATIVE 0.8472329 #> 3: doc3 POSITIVE 0.9928006 #> 4: doc4 POSITIVE 0.9901404 #> 5: doc5 POSITIVE 0.9952670 #> 6: doc6 POSITIVE 0.9291795"},{"path":"https://davidycliao.github.io/flaiR/articles/quickstart.html","id":"how-to-contribute","dir":"Articles","previous_headings":"","what":"How to Contribute","title":"Quick Start","text":"R developers want contribute {flaiR} welcome – {flaiR} open source project. warmly invite R users share similar interests join contributing package. Please feel free shoot email collaborate task. Contributions – whether comments, code suggestions, tutorial examples, forking repository – greatly appreciated. Please note flaiR released Contributor Code Conduct. contributing project, agree abide terms. primary communication channel R users can found . Please feel free share insights Discussion page report issues related R interface Issue section.","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"the-overview-of-embedding-in-flair-nlp","dir":"Articles","previous_headings":"","what":"The Overview of Embedding in Flair NLP","title":"WordEmbeddings Supported in Flair NLP","text":"word embedding classes inherit TokenEmbeddings class call embed() method embed text. cases using Flair, various complex embedding processes hidden behind interface. Users simply need instantiate necessary embedding class call embed() embed text. types embeddings currently supported FlairNLP: ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"byte-pair-embeddings","dir":"Articles","previous_headings":"","what":"Byte Pair Embeddings","title":"WordEmbeddings Supported in Flair NLP","text":"Please note ihis document R conversion Flair NLP document implemented Python. BytePairEmbeddings word embeddings precomputed subword-level. means able embed word splitting words subwords looking embeddings. BytePairEmbeddings proposed computed Heinzerling Strube (2018) found offer nearly accuracy word embeddings, fraction model size. great choice want train small models. initialize language code (275 languages supported), number ‘syllables’ (one ) number dimensions (one 50, 100, 200 300). following initializes uses byte pair embeddings English: information can found byte pair embeddings web page. BytePairEmbeddings also multilingual model capable embedding word language. can instantiate : can also load custom BytePairEmbeddings specifying path model_file_path embedding_file_path arguments. correspond respectively SentencePiece model file embedding file (Word2Vec plain text GenSim binary). example:","code":"library(flaiR) ## flaiR: An R Wrapper for Accessing Flair NLP 0.13.0 # initialize embedding BytePairEmbeddings <- flair_embeddings()$BytePairEmbeddings embedding <- BytePairEmbeddings('en') # create a sentence Sentence <- flair_data()$Sentence sentence = Sentence('The grass is green .') # embed words in sentence embedding$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green .\" embedding <- BytePairEmbeddings('multi')"},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"flair-embeddings","dir":"Articles","previous_headings":"","what":"Flair Embeddings","title":"WordEmbeddings Supported in Flair NLP","text":"following example manual translated R Flair NLP Zalando Research. Flair, use embedding quite straightforward. ’s example code snippet use Flair’s contextual string embeddings: Source: https://github.com/flairNLP/flair/blob/master/resources/docs/embeddings/FLAIR_EMBEDDINGS.md#flair-embeddings , want load embeddings German forward LM model, instantiate method follows: want load embeddings Bulgarian backward LM model, instantiate method follows: ","code":"library(flaiR) FlairEmbeddings <- flair_embeddings()$FlairEmbeddings # init embedding flair_embedding_forward <- FlairEmbeddings('news-forward') # create a sentence Sentence <- flair_data()$Sentence sentence = Sentence('The grass is green .') # embed words in sentence flair_embedding_forward$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green .\" flair_de_forward <- FlairEmbeddings('de-forward') flair_bg_backward <- FlairEmbeddings('bg-backward')"},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"recommended-flair-usage-in-flair-in-r","dir":"Articles","previous_headings":"","what":"Recommended Flair Usage in {flaiR} in R","title":"WordEmbeddings Supported in Flair NLP","text":"recommend combining forward backward Flair embeddings. Depending task, also recommend adding standard word embeddings mix. , recommended StackedEmbedding English tasks : ’s ! Now just use embedding like embeddings, .e. call embed() method sentences. Words now embedded using concatenation three different embeddings. combination often gives state---art accuracy. ","code":"FlairEmbeddings <- flair_embeddings()$FlairEmbeddings WordEmbeddings <- flair_embeddings()$WordEmbeddings StackedEmbeddings <- flair_embeddings()$StackedEmbeddings # create a StackedEmbedding object that combines glove and forward/backward flair embeddings stacked_embeddings <- StackedEmbeddings(list(WordEmbeddings(\"glove\"), FlairEmbeddings(\"news-forward\"), FlairEmbeddings(\"news-backward\"))) # create a sentence Sentence <- flair_data()$Sentence sentence = Sentence('The grass is green .') # just embed a sentence using the StackedEmbedding as you would with any single embedding. stacked_embeddings$embed(sentence) # now check out the embedded tokens. # Note that Python is indexing from 0. In an R for loop, using seq_along(sentence) - 1 achieves the same effect. for (i in seq_along(sentence)-1) { print(sentence[i]) print(sentence[i]$embedding) } ## Token[0]: \"The\" ## tensor([-0.0382, -0.2449, 0.7281, ..., -0.0065, -0.0053, 0.0090]) ## Token[1]: \"grass\" ## tensor([-0.8135, 0.9404, -0.2405, ..., 0.0354, -0.0255, -0.0143]) ## Token[2]: \"is\" ## tensor([-5.4264e-01, 4.1476e-01, 1.0322e+00, ..., -5.3691e-04, ## -9.6750e-03, -2.7541e-02]) ## Token[3]: \"green\" ## tensor([-0.6791, 0.3491, -0.2398, ..., -0.0007, -0.1333, 0.0161]) ## Token[4]: \".\" ## tensor([-0.3398, 0.2094, 0.4635, ..., 0.0005, -0.0177, 0.0032])"},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"pooled-flair-embeddings","dir":"Articles","previous_headings":"","what":"Pooled Flair Embeddings","title":"WordEmbeddings Supported in Flair NLP","text":"also developed pooled variant FlairEmbeddings. embeddings differ constantly evolve time, even prediction time (.e. training complete). means words sentence two different points time may different embeddings. PooledFlairEmbeddings manage ‘global’ representation distinct word using pooling operation past occurences. details works may found Akbik et al. (2019). can instantiate use PooledFlairEmbeddings like embedding: Note get best results PooledFlairEmbeddings ineffective memory-wise since keep past embeddings words memory. many cases, regular FlairEmbeddings nearly good much lower memory requirements. ","code":"# initiate embedding from Flair NLP PooledFlairEmbeddings <- flair_embeddings()$PooledFlairEmbeddings flair_embedding_forward <- PooledFlairEmbeddings('news-forward') # create a sentence object sentence <- Sentence('The grass is green .') # embed words in sentence flair_embedding_forward$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green .\""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"transformer-embeddings","dir":"Articles","previous_headings":"","what":"Transformer Embeddings","title":"WordEmbeddings Supported in Flair NLP","text":"Please note content examples section extensively revised TransformerWordEmbeddings official documentation. Flair supports various Transformer-based architectures like BERT XLNet HuggingFace, two classes TransformerWordEmbeddings (embed words tokens) TransformerDocumentEmbeddings (embed documents). ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"embeddings-words-with-transformers","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Embeddings Words with Transformers","title":"WordEmbeddings Supported in Flair NLP","text":"instance, load standard BERT transformer model, : instead want use RoBERTa, : {flaiR} interacts Flair NLP (Zalando Research), allowing use pre-trained models HuggingFace , can search models use.","code":"library(flaiR) # initiate embedding and load BERT model from HugginFaces TransformerWordEmbeddings <- flair_embeddings()$TransformerWordEmbeddings embedding <- TransformerWordEmbeddings('bert-base-uncased') # create a sentence Sentence <- flair_data()$Sentence sentence = Sentence('The grass is green .') # embed words in sentence embedding$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green .\" TransformerWordEmbeddings <- flair_embeddings()$TransformerWordEmbeddings embedding <- TransformerWordEmbeddings('roberta-base') sentence <- Sentence('The grass is green .') embedding$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green .\""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"embedding-documents-with-transformers","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Embedding Documents with Transformers","title":"WordEmbeddings Supported in Flair NLP","text":"embed whole sentence one (instead word sentence), simply use TransformerDocumentEmbeddings instead:","code":"TransformerDocumentEmbeddings <- flair_embeddings()$TransformerDocumentEmbeddings embedding <- TransformerDocumentEmbeddings('roberta-base') sentence <- Sentence('The grass is green .') embedding$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green .\""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"arguments","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Arguments","title":"WordEmbeddings Supported in Flair NLP","text":"several options can set init TransformerWordEmbeddings TransformerDocumentEmbeddings classes:","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"layers","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Layers","title":"WordEmbeddings Supported in Flair NLP","text":"layers argument controls transformer layers used embedding. set value ‘-1,-2,-3,-4’, top 4 layers used make embedding. set ‘-1’, last layer used. set “”, layers used. affects length embedding, since layers just concatenated. ’s example might done: can directly import torch reticulate since already installed flair dependency installed flair Python. Notice L numbers list? ensures R treats numbers integers. ’re generating numbers dynamically (e.g., computation), might want ensure integers attempting create tensor. .e. size embedding increases mode layers use (layer_mean set False, otherwise length always ).","code":"Sentence <- flair_data()$Sentence TransformerWordEmbeddings <- flair_embeddings()$TransformerWordEmbeddings sentence = Sentence('The grass is green.') # use only last layers embeddings <- TransformerWordEmbeddings('bert-base-uncased', layers='-1', layer_mean = FALSE) embeddings$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green.\" print(sentence[0]$embedding$size()) ## torch.Size([768]) sentence$clear_embeddings() sentence <- Sentence('The grass is green.') # use only last layers embeddings <- TransformerWordEmbeddings('bert-base-uncased', layers = \"-1\", layer_mean = FALSE) embeddings$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green.\" print(sentence[0]$embedding$size()) ## torch.Size([768]) sentence$clear_embeddings() # use last two layers embeddings <- TransformerWordEmbeddings('bert-base-uncased', layers='-1,-2', layer_mean = FALSE) embeddings$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green.\" print(sentence[0]$embedding$size()) ## torch.Size([1536]) sentence$clear_embeddings() # use ALL layers embeddings = TransformerWordEmbeddings('bert-base-uncased', layers='all', layer_mean=FALSE) embeddings$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green.\" print(sentence[0]$embedding$size()) ## torch.Size([9984]) # You can directly import torch from reticulate since it has already been installed through the flair dependency when you installed flair in Python. torch <- reticulate::import('torch') # Attempting to create a tensor with integer dimensions torch$Size(list(768L)) ## torch.Size([768]) torch$Size(list(1536L)) ## torch.Size([1536]) torch$Size(list(9984L)) ## torch.Size([9984])"},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"pooling-operation","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Pooling Operation","title":"WordEmbeddings Supported in Flair NLP","text":"Transformer-based models use subword tokenization. E.g. following token puppeteer tokenized subwords: pupp, ##ete ##er. implement different pooling operations subwords generate final token representation: first: embedding first subword used last: embedding last subword used first_last: embeddings first last subwords concatenated used mean: torch.mean subword embeddings calculated used can choose one use passing constructor:","code":"# use first and last subtoken for each word embeddings = TransformerWordEmbeddings('bert-base-uncased', subtoken_pooling='first_last') embeddings$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green.\" print(sentence[0]$embedding$size()) ## torch.Size([9984])"},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"layer-mean","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Layer Mean","title":"WordEmbeddings Supported in Flair NLP","text":"Transformer-based models certain number layers. default, layers select concatenated explained . Alternatively, can set layer_mean=True mean selected layers. resulting vector always dimensionality single layer:","code":"# initiate embedding from transformer. This model will be downloaded from Flair NLP huggingface. embeddings <- TransformerWordEmbeddings('bert-base-uncased', layers=\"all\", layer_mean=TRUE) # create a sentence object sentence = Sentence(\"The Oktoberfest is the world's largest Volksfest .\") # embed words in sentence embedding$embed(sentence) ## [[1]] ## Sentence[9]: \"The Oktoberfest is the world's largest Volksfest .\""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"fine-tuneable-or-not","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Fine-tuneable or Not","title":"WordEmbeddings Supported in Flair NLP","text":"’s example might done: setups, may wish fine-tune transformer embeddings. case, set fine_tune=True init method. fine-tuning, also use topmost layer, best set layers='-1'. print tensor now gradient function can fine-tuned use training routine.","code":"# use first and last subtoken for each word TransformerWordEmbeddings <- flair_embeddings()$TransformerWordEmbeddings embeddings <- TransformerWordEmbeddings('bert-base-uncased', fine_tune=TRUE, layers='-1') embeddings$embed(sentence) ## [[1]] ## Sentence[9]: \"The Oktoberfest is the world's largest Volksfest .\" print(sentence[0]$embedding) ## tensor([-6.5871e-01, 1.0410e-01, 3.4632e-01, -3.3775e-01, -2.1013e-01, ## -1.3037e-02, 5.1998e-01, 1.6574e+00, -5.2520e-02, -4.8633e-02, ## -7.8968e-01, -9.5547e-01, -1.9723e-01, 9.4999e-01, -1.0336e+00, ## 8.6669e-02, 9.8103e-02, 5.6511e-02, 3.1075e-02, 2.4157e-01, ## -1.1427e-01, -2.3692e-01, -2.0700e-01, 7.7985e-01, 2.5459e-01, ## -5.0824e-03, -2.4110e-01, 2.2436e-01, -7.3249e-02, -8.1094e-01, ## -1.8778e-01, 2.1219e-01, -5.9514e-01, 6.3129e-02, -4.8880e-01, ## -3.2300e-02, -1.9123e-02, -1.0991e-01, -1.5603e-02, 4.3068e-01, ## -1.7968e-01, -5.4499e-01, 7.0608e-01, -4.0512e-01, 1.7761e-01, ## -8.5820e-01, 2.3438e-02, -1.4981e-01, -9.0368e-01, -2.1097e-01, ## -3.3535e-01, 1.4919e-01, -7.4522e-03, 1.0239e+00, -6.1776e-02, ## 3.3913e-01, 8.5811e-02, 6.9401e-01, -7.7483e-02, 3.1484e-01, ## -4.3921e-01, 1.2933e+00, 5.7990e-03, -7.0992e-01, 2.7525e-01, ## 8.8792e-01, 2.6309e-03, 1.3640e+00, 5.6885e-01, -2.4904e-01, ## -4.5157e-02, -1.7575e-01, -3.4729e-01, 5.8363e-02, -2.0346e-01, ## -1.2505e+00, -3.0592e-01, -3.6104e-02, -2.4066e-01, -5.1250e-01, ## 2.6930e-01, 1.4068e-01, 3.4056e-01, 7.3297e-01, 2.6848e-01, ## 2.4304e-01, -9.4885e-01, -9.0367e-01, -1.3184e-01, 6.7348e-01, ## -3.2995e-02, 4.7660e-01, -7.1623e-03, -3.4141e-01, 6.8473e-01, ## -4.4869e-01, -4.9831e-01, -8.0143e-01, 1.4073e+00, 5.3251e-01, ## 2.4643e-01, -4.2528e-01, 9.1615e-02, 6.4495e-01, 1.7931e-01, ## -2.1473e-01, 1.5447e-01, -3.2978e-01, 1.0799e-01, -1.9402e+00, ## -5.0380e-01, -2.7636e-01, -1.1227e-01, 1.1576e-01, 2.5885e-01, ## -1.7916e-01, 6.6166e-01, -9.6098e-01, -5.1242e-01, -3.5424e-01, ## 2.1383e-01, 6.6456e-01, 2.5498e-01, 3.7250e-01, -1.1821e+00, ## -4.9551e-01, -2.0858e-01, 1.1511e+00, -1.0366e-02, -1.0682e+00, ## 3.7277e-01, 6.4048e-01, 2.3308e-01, -9.3824e-01, 9.5014e-02, ## 5.7904e-01, 6.3969e-01, 8.2359e-02, -1.4075e-01, 3.0107e-01, ## 3.5823e-03, -4.4684e-01, -2.6913e+00, -3.3933e-01, 2.8733e-03, ## -1.3639e-01, -7.1054e-01, -1.1048e+00, 2.2374e-01, 1.1830e-01, ## 4.8416e-01, -2.9110e-01, -6.7650e-01, 2.3202e-01, -1.0123e-01, ## -1.9174e-01, 4.9960e-02, 5.2067e-01, 1.3272e+00, 6.8250e-01, ## 5.5332e-01, -1.0886e+00, 4.5160e-01, -1.5010e-01, -9.8074e-01, ## 8.5111e-02, 1.6498e-01, 6.6032e-01, 1.0815e-02, 1.8952e-01, ## -5.6608e-01, -1.3743e-02, 9.1171e-01, 2.7812e-01, 2.9551e-01, ## -3.5637e-01, 3.2030e-01, 5.6738e-01, -1.5707e-01, 3.5326e-01, ## -4.7747e-01, 7.8646e-01, 1.3765e-01, 2.2440e-01, 4.2422e-01, ## -2.6504e-01, 2.2014e-02, -6.7154e-01, -8.7999e-02, 1.4284e-01, ## 4.0983e-01, 1.0933e-02, -1.0704e+00, -1.9350e-01, 6.0051e-01, ## 5.0545e-02, 1.1434e-02, -8.0243e-01, -6.6871e-01, 5.3953e-01, ## -5.9856e-01, -1.6915e-01, -3.5307e-01, 4.4568e-01, -7.2761e-01, ## 1.1629e+00, -3.1553e-01, -7.9747e-01, -2.0582e-01, 3.7320e-01, ## 5.9379e-01, -3.1898e-01, -1.6932e-01, -6.2492e-01, 5.7047e-01, ## -2.9779e-01, -5.9106e-01, 8.5436e-02, -2.1839e-01, -2.2214e-01, ## 7.9233e-01, 8.0537e-01, -5.9785e-01, 4.0474e-01, 3.9265e-01, ## 5.8169e-01, -5.2506e-01, 6.9786e-01, 1.1163e-01, 8.7434e-02, ## 1.7549e-01, 9.1438e-02, 5.8816e-01, 6.4338e-01, -2.7138e-01, ## -5.3449e-01, -1.0168e+00, -5.1337e-02, 3.0099e-01, -7.6695e-02, ## -2.1126e-01, 5.8143e-01, 1.3599e-01, 6.2759e-01, -6.2810e-01, ## 5.9966e-01, 3.5836e-01, -3.0707e-02, 1.5563e-01, -1.4016e-01, ## -2.0155e-01, -1.3755e+00, -9.1877e-02, -6.9892e-01, 7.9438e-02, ## -4.2926e-01, 3.7988e-01, 7.6741e-01, 5.3094e-01, 8.5981e-01, ## 4.4184e-02, -6.3507e-01, 3.9587e-01, -3.6635e-01, -7.0770e-01, ## 8.3683e-04, -3.0055e-01, 2.1360e-01, -4.1649e-01, 6.9457e-01, ## -6.2715e-01, -5.1101e-01, 3.0331e-01, -2.3804e+00, -1.0566e-02, ## -9.4488e-01, 4.3318e-02, 2.4188e-01, 1.9204e-02, 1.5712e-03, ## -3.0374e-01, 3.1933e-01, -7.4432e-01, 1.4599e-01, -5.2102e-01, ## -5.2269e-01, 1.3274e-01, -2.8936e-01, 4.1706e-02, 2.6143e-01, ## -4.4796e-01, 7.3136e-01, 6.3893e-02, 4.7398e-01, -5.1062e-01, ## -1.3705e-01, 2.0763e-01, -3.9115e-01, 2.8822e-01, -3.5283e-01, ## 3.4881e-02, -3.3602e-01, 1.7210e-01, 1.3537e-02, -5.3036e-01, ## 1.2847e-01, -4.5576e-01, -3.7251e-01, -3.2254e+00, -3.1650e-01, ## -2.6144e-01, -9.4983e-02, 2.7651e-02, -2.3750e-01, 3.1001e-01, ## 1.1428e-01, -1.2870e-01, -4.7496e-01, 4.4594e-01, -3.6138e-01, ## -3.1009e-01, -9.9612e-02, 5.3967e-01, 1.2840e-02, 1.4507e-01, ## -2.5181e-01, 1.9310e-01, 4.1073e-01, 5.9776e-01, -2.5585e-01, ## 5.7184e-02, -5.1505e-01, -6.8709e-02, 4.7767e-01, -1.2079e-01, ## -5.0894e-01, -9.2884e-01, 7.8471e-01, 2.0216e-01, 4.3242e-01, ## 3.2803e-01, -1.0122e-01, 3.3530e-01, -1.2183e-01, -5.5060e-01, ## 3.5427e-01, 7.4558e-02, -3.1411e-01, -1.7512e-01, 2.2485e-01, ## 4.2295e-01, 7.7110e-02, 1.8063e+00, 7.6636e-03, -1.1082e-02, ## -2.8604e-02, 7.7143e-02, 8.2344e-02, 8.0271e-02, -1.1858e+00, ## 2.0523e-01, 3.4053e-01, 2.0424e-01, -2.0574e-02, 3.0466e-01, ## -2.1858e-01, 6.3737e-01, -5.6264e-01, 1.4153e-01, 2.4319e-01, ## -5.6688e-01, 7.2374e-02, -2.9329e-01, 4.6562e-02, 1.8977e-01, ## 2.4977e-01, 9.1892e-01, 1.1346e-01, 3.8588e-01, -3.5543e-01, ## -1.3380e+00, -8.5644e-01, -5.5443e-01, -7.2317e-01, -2.9225e-01, ## -1.4389e-01, 6.9714e-01, -5.9852e-01, -6.8932e-01, -6.0952e-01, ## 1.8234e-01, -7.5841e-02, 3.6445e-01, -3.8286e-01, 2.6545e-01, ## -2.6569e-01, -4.9999e-01, -3.8354e-01, -2.2809e-01, 8.8314e-01, ## 2.9041e-01, 5.4803e-01, -1.0668e+00, 4.7406e-01, 7.8804e-02, ## -1.1559e+00, -3.0649e-01, 6.0479e-02, -7.1279e-01, -4.3336e-01, ## -8.2402e-04, -1.0236e-01, 3.5497e-01, 1.8665e-01, 1.2045e-01, ## 1.2071e-01, 6.2911e-01, 3.1421e-01, -2.1635e-01, -8.9416e-01, ## 6.6361e-01, -9.2981e-01, 6.9193e-01, -2.5403e-01, -2.5835e-02, ## 1.2342e+00, -6.5908e-01, 7.5741e-01, 2.9014e-01, 3.0760e-01, ## -1.0249e+00, -2.7089e-01, 4.6132e-01, 6.1510e-02, 2.5385e-01, ## -5.2075e-01, -3.5107e-01, 3.3694e-01, -2.5047e-01, -2.7855e-01, ## 2.0280e-01, -1.5703e-01, 4.1619e-02, 1.4451e-01, -1.6666e-01, ## -3.0519e-01, -9.4271e-02, -1.7083e-01, 5.2454e-01, 2.4524e-01, ## 2.0732e-01, 3.7948e-01, 9.7359e-02, -3.2452e-02, 5.5792e-01, ## -2.4703e-01, 5.2864e-01, 5.6343e-01, -1.9198e-01, -8.3370e-02, ## -6.5377e-01, -5.4104e-01, 1.8289e-01, -4.9146e-01, 6.6422e-01, ## -5.2808e-01, -1.4797e-01, -4.5527e-02, -3.9593e-01, 1.2841e-01, ## -7.8591e-01, -3.7564e-02, 6.1912e-01, 3.2458e-01, 3.7858e-01, ## 1.8744e-01, -5.0738e-01, 8.0222e-02, -3.1468e-02, -1.5145e-01, ## 1.6657e-01, -5.2251e-01, -2.5940e-01, -3.8505e-01, -7.4941e-02, ## 3.9530e-01, -2.1742e-01, -1.7113e-01, -5.2492e-01, -7.7781e-02, ## -6.9759e-01, 2.2570e-01, -1.2935e-01, 3.0750e-01, -1.3554e-01, ## 6.0182e-02, -1.1479e-01, 4.7263e-01, 3.7957e-01, 8.9523e-01, ## -3.6411e-01, -6.6355e-01, -7.6647e-01, -1.4479e+00, -5.2238e-01, ## 2.3337e-02, -4.5736e-01, 5.9981e-01, 6.8700e-01, 4.2190e-02, ## 1.5894e-01, 2.0744e-02, 9.2334e-02, -7.2747e-01, 1.2388e-01, ## -4.7257e-01, -2.9889e-01, 4.8955e-01, -9.1618e-01, -1.9497e-01, ## -1.4157e-01, -1.7472e-01, 4.9251e-02, -2.2263e-01, 6.1700e-01, ## -2.4691e-01, 6.0937e-01, 3.6134e-01, 4.3398e-01, -2.7615e-01, ## -2.6582e-01, -1.3132e-01, -4.4156e-02, 5.3686e-01, 1.2956e-01, ## -6.4218e-01, -1.5820e-01, -1.0249e+00, -9.3593e-03, -3.5060e-01, ## 3.6650e-01, 4.9503e-01, 7.4325e-01, 9.6526e-02, 4.3141e-01, ## 3.9512e-02, -7.0726e-02, 6.2696e-01, 1.3066e-01, 1.0243e-01, ## 3.3839e-01, 1.9224e-01, 4.8800e-01, -2.1052e-01, 3.9523e-02, ## 7.7567e-01, -1.2005e-01, -1.1262e-01, 8.7001e-02, 2.7273e-01, ## -4.6831e-02, -2.4966e-01, -3.2083e-01, -2.6389e-01, 1.6225e-01, ## 2.8800e-01, -1.0799e-01, -1.0841e-01, 6.6873e-01, 3.4369e-01, ## 5.8675e-01, 9.2084e-01, -1.8131e-01, 5.6372e-02, -5.7125e-01, ## 3.1048e-01, 3.1630e-02, 1.2097e+00, 4.4492e-01, -2.3792e-01, ## -9.9342e-02, -5.0657e-01, -3.1333e-02, 1.5045e-01, 3.1493e-01, ## -4.1287e-01, -1.8618e-01, -4.2640e-02, 1.8266e+00, 4.8565e-01, ## 6.3892e-01, -2.9107e-01, -3.2557e-01, 1.1088e-01, -1.3212e+00, ## 7.1113e-01, 2.3618e-01, 2.1473e-01, 1.6360e-01, -5.2535e-01, ## 3.4322e-01, 9.0777e-01, 1.8697e-01, -3.0532e-01, 2.7574e-01, ## 5.1451e-01, -2.6733e-01, 2.4207e-01, -3.3234e-01, 6.3520e-01, ## 2.5884e-01, -5.7923e-01, 3.0204e-01, 4.1745e-02, 4.7539e-02, ## -6.7038e-01, 4.6699e-01, -1.6951e-01, -1.5161e-01, -1.2805e-01, ## -4.3990e-01, 1.0177e+00, -3.8138e-01, 4.3114e-01, -7.5447e-03, ## 2.7385e-01, 4.6314e-01, -8.6565e-02, -7.9458e-01, 1.4369e-02, ## 2.6016e-01, 9.2568e-03, 9.3968e-01, 7.9679e-01, 3.3144e-03, ## -5.6733e-01, 2.9052e-01, -9.5894e-02, 1.8630e-01, 1.4475e-01, ## 1.8935e-01, 5.1735e-01, -1.2187e+00, -1.3298e-01, -4.3538e-01, ## -6.5398e-01, -2.9286e-01, 1.3199e-01, 3.9075e-01, 9.0172e-01, ## 9.9439e-01, 6.2783e-01, -1.6103e-01, 1.4155e-03, -9.1476e-01, ## 7.7760e-01, 1.2264e+00, 8.1482e-02, 6.6732e-01, -7.4576e-01, ## -1.0470e-01, -6.7781e-01, 8.0405e-01, 3.6676e-02, 3.6362e-01, ## 4.4962e-01, 8.9600e-01, -1.8276e+00, 6.7828e-01, -9.4121e-03, ## 3.8665e-01, -2.2149e-02, 7.4756e-02, 3.7438e-01, -1.2696e-01, ## -5.3397e-01, -3.5782e-01, 3.0400e-01, 7.7663e-01, -1.9122e-01, ## -1.3041e-01, -2.1522e-01, 1.1086e+00, 1.0237e+00, -4.7554e-02, ## -3.9538e-01, 1.1568e+00, -4.2549e-01, -2.5641e-02, 2.1993e-01, ## -4.7488e-01, -7.7624e-02, -5.5211e-01, -5.3169e-01, -5.3790e-02, ## -6.0536e-01, 4.2789e-01, -3.8606e-01, 9.8630e-01, 4.3331e-01, ## 4.8414e-01, -1.3519e-01, -6.5505e-01, -2.2913e-01, -3.1254e-01, ## 1.2920e-01, -7.7762e-02, -3.1123e-01, 8.2576e-01, 8.6486e-01, ## -3.4766e-01, -3.8491e-01, 3.5731e-02, 3.7518e-01, -3.7511e-01, ## 5.2371e-01, -7.9721e-01, 3.3401e-01, 8.3976e-01, -3.2525e-01, ## -3.0268e-01, -1.3558e-01, 2.2812e-01, 1.5632e-01, 3.1584e-01, ## 9.3902e-02, -3.8647e-01, -1.0177e-01, -2.8833e-01, 3.6028e-01, ## 2.2565e-01, -1.5595e-01, -4.4974e-01, -5.0904e-01, 4.5058e-01, ## 7.9031e-01, 2.7041e-01, -3.6712e-01, -3.9090e-01, 2.3358e-01, ## 1.2162e+00, -1.1371e+00, -8.2702e-01, -9.2748e-02, 5.8958e-01, ## 4.4429e-02, -2.3344e-01, -5.6492e-01, 4.9406e-01, -4.0302e-01, ## 5.0951e-01, -1.6740e-01, -4.0176e+00, -8.2092e-01, -3.9132e-01, ## -2.9754e-01, -2.6798e-01, -2.5174e-01, 6.6282e-01, -5.7532e-02, ## 7.7360e-01, 2.5238e-01, 2.5733e-02, 1.7694e-01, 9.4648e-02, ## 2.6886e-01, 9.3711e-01, -8.3929e-02])"},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"models","dir":"Articles","previous_headings":"Transformer Embeddings","what":"Models","title":"WordEmbeddings Supported in Flair NLP","text":"Please look awesome HuggingFace supported pre-trained models! ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"classic-word-embeddings","dir":"Articles","previous_headings":"","what":"Classic Word Embeddings","title":"WordEmbeddings Supported in Flair NLP","text":"Classic word embeddings static word-level, meaning distinct word gets exactly one pre-computed embedding. embeddings fall class, including popular GloVe Komninos embeddings. Simply instantiate WordEmbeddings class pass string identifier embedding wish load. , want use GloVe embeddings, pass string ‘glove’ constructor: Now, create example sentence call embedding’s embed() method. can also pass list sentences method since embedding types make use batching increase speed. prints tokens embeddings. GloVe embeddings Pytorch vectors dimensionality 100. choose pre-trained embeddings load passing appropriate id string constructor WordEmbeddings class. Typically, use two-letter language code init embedding, ‘en’ English ‘de’ German . default, initialize FastText embeddings trained Wikipedia. can also always use FastText embeddings Web crawls, instantiating ‘-crawl’. ‘de-crawl’ use embeddings trained German web crawls. English, provide options, can choose instantiating ‘en-glove’, ‘en-extvec’ .","code":"library(flaiR) # initiate embedding with glove WordEmbeddings <- flair_embeddings()$WordEmbeddings glove_embedding <- WordEmbeddings('glove') library(flaiR) # initiate a sentence object Sentence <- flair_data()$Sentence # create sentence object. sentence = Sentence('The grass is green .') # embed a sentence using glove. glove_embedding$embed(sentence) ## [[1]] ## Sentence[5]: \"The grass is green .\" # view embedded tokens. for (token in seq_along(sentence)-1) { print(sentence[token]) print(sentence[token]$embedding$numpy()) } ## Token[0]: \"The\" ## [1] -0.038194 -0.244870 0.728120 -0.399610 0.083172 0.043953 ## [7] -0.391410 0.334400 -0.575450 0.087459 0.287870 -0.067310 ## [13] 0.309060 -0.263840 -0.132310 -0.207570 0.333950 -0.338480 ## [19] -0.317430 -0.483360 0.146400 -0.373040 0.345770 0.052041 ## [25] 0.449460 -0.469710 0.026280 -0.541550 -0.155180 -0.141070 ## [31] -0.039722 0.282770 0.143930 0.234640 -0.310210 0.086173 ## [37] 0.203970 0.526240 0.171640 -0.082378 -0.717870 -0.415310 ## [43] 0.203350 -0.127630 0.413670 0.551870 0.579080 -0.334770 ## [49] -0.365590 -0.548570 -0.062892 0.265840 0.302050 0.997750 ## [55] -0.804810 -3.024300 0.012540 -0.369420 2.216700 0.722010 ## [61] -0.249780 0.921360 0.034514 0.467450 1.107900 -0.193580 ## [67] -0.074575 0.233530 -0.052062 -0.220440 0.057162 -0.158060 ## [73] -0.307980 -0.416250 0.379720 0.150060 -0.532120 -0.205500 ## [79] -1.252600 0.071624 0.705650 0.497440 -0.420630 0.261480 ## [85] -1.538000 -0.302230 -0.073438 -0.283120 0.371040 -0.252170 ## [91] 0.016215 -0.017099 -0.389840 0.874240 -0.725690 -0.510580 ## [97] -0.520280 -0.145900 0.827800 0.270620 ## Token[1]: \"grass\" ## [1] -0.8135300 0.9404200 -0.2404800 -0.1350100 0.0556780 0.3362500 ## [7] 0.0802090 -0.1014800 -0.5477600 -0.3536500 0.0733820 0.2586800 ## [13] 0.1986600 -0.1432800 0.2507000 0.4281400 0.1949800 0.5345600 ## [19] 0.7424100 0.0578160 -0.3178100 0.9435900 0.8145000 -0.0823750 ## [25] 0.6165800 0.7284400 -0.3262300 -1.3641000 0.1232000 0.5372800 ## [31] -0.5122800 0.0245900 1.0822001 -0.2295900 0.6038500 0.5541500 ## [37] -0.9609900 0.4803300 0.0022260 0.5591300 -0.1636500 -0.8468100 ## [43] 0.0740790 -0.6215700 0.0259670 -0.5162100 -0.0524620 -0.1417700 ## [49] -0.0161230 -0.4971900 -0.5534500 -0.4037100 0.5095600 1.0276000 ## [55] -0.0840000 -1.1179000 0.3225700 0.4928100 0.9487600 0.2040300 ## [61] 0.5388300 0.8397200 -0.0688830 0.3136100 1.0450000 -0.2266900 ## [67] -0.0896010 -0.6427100 0.6442900 -1.1001000 -0.0095814 0.2668200 ## [73] -0.3230200 -0.6065200 0.0479150 -0.1663700 0.8571200 0.2335500 ## [79] 0.2539500 1.2546000 0.5471600 -0.1979600 -0.7186300 0.2076000 ## [85] -0.2587500 -0.3649900 0.0834360 0.6931700 0.1573700 1.0931000 ## [91] 0.0912950 -1.3773000 -0.2717000 0.7070800 0.1872000 -0.3307200 ## [97] -0.2835900 0.1029600 1.2228000 0.8374100 ## Token[2]: \"is\" ## [1] -0.5426400 0.4147600 1.0322000 -0.4024400 0.4669100 0.2181600 ## [7] -0.0748640 0.4733200 0.0809960 -0.2207900 -0.1280800 -0.1144000 ## [13] 0.5089100 0.1156800 0.0282110 -0.3628000 0.4382300 0.0475110 ## [19] 0.2028200 0.4985700 -0.1006800 0.1326900 0.1697200 0.1165300 ## [25] 0.3135500 0.2571300 0.0927830 -0.5682600 -0.5297500 -0.0514560 ## [31] -0.6732600 0.9253300 0.2693000 0.2273400 0.6636500 0.2622100 ## [37] 0.1971900 0.2609000 0.1877400 -0.3454000 -0.4263500 0.1397500 ## [43] 0.5633800 -0.5690700 0.1239800 -0.1289400 0.7248400 -0.2610500 ## [49] -0.2631400 -0.4360500 0.0789080 -0.8414600 0.5159500 1.3997000 ## [55] -0.7646000 -3.1452999 -0.2920200 -0.3124700 1.5129000 0.5243500 ## [61] 0.2145600 0.4245200 -0.0884110 -0.1780500 1.1876000 0.1057900 ## [67] 0.7657100 0.2191400 0.3582400 -0.1163600 0.0932610 -0.6248300 ## [73] -0.2189800 0.2179600 0.7405600 -0.4373500 0.1434300 0.1471900 ## [79] -1.1605000 -0.0505080 0.1267700 -0.0143950 -0.9867600 -0.0912970 ## [85] -1.2054000 -0.1197400 0.0478470 -0.5400100 0.5245700 -0.7096300 ## [91] -0.3252800 -0.1346000 -0.4131400 0.3343500 -0.0072412 0.3225300 ## [97] -0.0442190 -1.2969000 0.7621700 0.4634900 ## Token[3]: \"green\" ## [1] -0.67907000 0.34908000 -0.23984000 -0.99651998 0.73782003 ## [6] -0.00065911 0.28009999 0.01728700 -0.36063001 0.03695500 ## [11] -0.40395001 0.02409200 0.28957999 0.40496999 0.69992000 ## [16] 0.25268999 0.80350000 0.04937000 0.15561999 -0.00632860 ## [21] -0.29414001 0.14727999 0.18977000 -0.51791000 0.36985999 ## [26] 0.74581999 0.08268900 -0.72601002 -0.40939000 -0.09782200 ## [31] -0.14095999 0.71121001 0.61932999 -0.25014001 0.42250001 ## [36] 0.48458001 -0.51915002 0.77125001 0.36684999 0.49652001 ## [41] -0.04129800 -1.46829998 0.20038000 0.18591000 0.04986000 ## [46] -0.17523000 -0.35528001 0.94152999 -0.11898000 -0.51902997 ## [51] -0.01188700 -0.39186001 -0.17478999 0.93450999 -0.58930999 ## [56] -2.77010012 0.34522000 0.86532998 1.08080006 -0.10291000 ## [61] -0.09122000 0.55092001 -0.39473000 0.53675997 1.03830004 ## [66] -0.40658000 0.24590001 -0.26797000 -0.26036000 -0.14150999 ## [71] -0.12022000 0.16234000 -0.74320000 -0.64727998 0.04713300 ## [76] 0.51642001 0.19898000 0.23919000 0.12549999 0.22471000 ## [81] 0.82612997 0.07832800 -0.57020003 0.02393400 -0.15410000 ## [86] -0.25738999 0.41262001 -0.46967000 0.87914002 0.72628999 ## [91] 0.05386200 -1.15750003 -0.47835001 0.20139000 -1.00510001 ## [96] 0.11515000 -0.96609002 0.12960000 0.18388000 -0.03038300 ## Token[4]: \".\" ## [1] -0.3397900 0.2094100 0.4634800 -0.6479200 -0.3837700 0.0380340 ## [7] 0.1712700 0.1597800 0.4661900 -0.0191690 0.4147900 -0.3434900 ## [13] 0.2687200 0.0446400 0.4213100 -0.4103200 0.1545900 0.0222390 ## [19] -0.6465300 0.2525600 0.0431360 -0.1944500 0.4651600 0.4565100 ## [25] 0.6858800 0.0912950 0.2187500 -0.7035100 0.1678500 -0.3507900 ## [31] -0.1263400 0.6638400 -0.2582000 0.0365420 -0.1360500 0.4025300 ## [37] 0.1428900 0.3813200 -0.1228300 -0.4588600 -0.2528200 -0.3043200 ## [43] -0.1121500 -0.2618200 -0.2248200 -0.4455400 0.2991000 -0.8561200 ## [49] -0.1450300 -0.4908600 0.0082973 -0.1749100 0.2752400 1.4401000 ## [55] -0.2123900 -2.8434999 -0.2795800 -0.4572200 1.6386000 0.7880800 ## [61] -0.5526200 0.6500000 0.0864260 0.3901200 1.0632000 -0.3537900 ## [67] 0.4832800 0.3460000 0.8417400 0.0987070 -0.2421300 -0.2705300 ## [73] 0.0452870 -0.4014700 0.1139500 0.0062226 0.0366730 0.0185180 ## [79] -1.0213000 -0.2080600 0.6407200 -0.0687630 -0.5863500 0.3347600 ## [85] -1.1432000 -0.1148000 -0.2509100 -0.4590700 -0.0968190 -0.1794600 ## [91] -0.0633510 -0.6741200 -0.0688950 0.5360400 -0.8777300 0.3180200 ## [97] -0.3924200 -0.2339400 0.4729800 -0.0288030"},{"path":"https://davidycliao.github.io/flaiR/articles/transformer_wordembeddings.html","id":"suppored-models","dir":"Articles","previous_headings":"Classic Word Embeddings","what":"Suppored Models:","title":"WordEmbeddings Supported in Flair NLP","text":"following embeddings currently supported: , want load German FastText embeddings, instantiate follows: Alternatively, want load German FastText embeddings trained crawls, instantiate follows:","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Tutorials","text":"Flair NLP open-source library Natural Language Processing (NLP) developed Zalando Research. Known state---art solutions NLP tasks like Named Entity Recognition (NER), Part--Speech tagging (POS), , garnered attention NLP community ease use powerful functionalities. Developed Python, built PyTorch framework, offers flexible dynamic approach deal textual data. hand, {flaiR} R aims continue framework established Flair Python creating framework R, thereby extending Flair’s capabilities R programming environment. One hallmark features Flair contextual string embeddings, crucial discerning meaning words different contextual usages. Traditional embeddings assign fixed vector word, without considering context, can limitation trying understand nuances word’s usage across different sentences. contrary, Flair’s contextual embeddings generate word vectors considering surrounding text, thus capturing word’s context semantics accurately. particularly impactful scenarios word can different meanings based usage. Flair offers pre-trained models various languages tasks, providing solid foundation various NLP applications text classification, sentiment analysis, entity recognition, etc. instance, ’re involved project requires identifying persons, organizations, locations text, Flair pre-trained NER models can simplify task.","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"oop-in-r-when-introducing-python-module","dir":"Articles","previous_headings":"Introduction","what":"OOP in R when Introducing Python Module","title":"Tutorials","text":"Object-Oriented Programming (OOP) programming paradigm uses objects, contain data (attributes) functions (methods), design applications software. idea bind data methods operate data one single unit, object. advent R6, OOP common early stages R. knowledge, R6 relatively rare; aside {mlr3}, written R6, packages accomplished S4 S3 (personal experience), , course, may greatly related habits tasks R users. However, purpose {flaiR} standardize wrapping ‘{flair NLP}’ Python functionality R provide convenient access R users utilize flair NLP features. usage Flair NLP within {flaiR} employs concepts objects classes, similar R6 . However, features packaged {reticulate} Python. words, functionalities imported R essentially belong Python classes modules. {flairR} architecture, use simplest S3 method wrap modules methods within modules, allowing R users conveniently access use Python functionalities. addition, tensors serve fundamental building block creating training neural networks conducting various numerical computations Python. Flair’s NLP tasks Python PyTorch, numerous extensive functionalities tensor operations, including element-wise operations, matrix multiplications, reshaping. tutorial, also cover work tensors R convert tensors matrices R environment. particularly important using Flair word embeddings R environment. ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"the-overview","dir":"Articles","previous_headings":"Introduction","what":"The Overview","title":"Tutorials","text":"following tutorial mainly based Tadej Magajna’s ‘Natural Language Processing Flair: Practical Guide Understanding Solving NLP Problems’, well official Flair NLP Python tutorial blog. written Python. utilize examples {flaiR} R , welcome cite R repository, also cite works. Except necessary, everything accomplished within R environment, utilizing several important R packages, {quanteda}, {udpipe}, {mlr3}, complete following topics: Sentence Token Object Sequence Taggings Embedding flaiR Training Binary Classifier flaiR Training RNN FlaiR Finetune BERT FlaiR (progress) ","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"sentence-and-token","dir":"Articles","previous_headings":"","what":"Sentence and Token","title":"Tutorials","text":"Sentence Token fundamental classes.","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"sentence","dir":"Articles","previous_headings":"Sentence and Token","what":"Sentence","title":"Tutorials","text":"Sentence Flair object contains sequence Token objects, can annotated labels, named entities, part--speech tags, . also can store embeddings sentence whole different kinds linguistic annotations. ’s simple example create Sentence: Sentence[26] means total 26 tokens sentence.","code":"# Creating a Sentence object library(flaiR) string <- \"What I see in UCD today, what I have seen of UCD in its impact on my own life and the life of Ireland.\" Sentence <- flair_data()$Sentence sentence <- Sentence(string) print(sentence) #> Sentence[26]: \"What I see in UCD today, what I have seen of UCD in its impact on my own life and the life of Ireland.\""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"token","dir":"Articles","previous_headings":"Sentence and Token","what":"Token","title":"Tutorials","text":"use Flair handle text data,1 Sentence Token objects often play central roles many use cases. create Sentence object, usually automatically decomposes internal raw text multiple Token objects. words, Sentence object automatically handles text tokenization work, usually don’t need create Token objects manually. Unlike R, indexes 1, Python indexes 0. Therefore, use loop, use seq_along(sentence) - 1. output something like: can directly use $tokens method print tokens. Retrieve Token comprehend string representation format Sentence object, tagging least one token adequate. get_token(n) method, Python method, allows us retrieve Token object particular token. Additionally, can use [] index specific token. noteworthy Python indexes 0, whereas R starts indexing 1. word (punctuation) sentence treated individual Token object. Token objects store text information possible linguistic information (part--speech tags named entity tags) embeddings (used model generate ). Even though cases need create Token objects manually, understanding manage objects manually still useful situations, want fine-grained control tokenization process. example, can control exactness tokenization adding manually created Token objects Sentence object. design pattern Flair allows users handle text data flexible way. Users can use automatic tokenization feature rapid development, also perform finer-grained control accommodate use cases. Annotate POS tag NER tag add_label(label_type, value) method can employed assign label token. manually add tag preliminary tutorial, usually, Universal POS tags, sentence[10] ‘see’, ‘seen’ might tagged VERB, indicating past participle form verb. can also add NER (Named Entity Recognition) tag sentence[4], “UCD”, identifying university Dublin. print sentence object, Sentence[50] provides information 50 tokens → [‘’/ORG, ‘seen’/VERB], thus displaying two tagging pieces information.","code":"# The Sentence object has automatically created and contains multiple Token objects # We can iterate through the Sentence object to view each Token. for (i in seq_along(sentence)-1) { print(sentence[[i]]) } #> Token[0]: \"What\" #> Token[1]: \"I\" #> Token[2]: \"see\" #> Token[3]: \"in\" #> Token[4]: \"UCD\" #> Token[5]: \"today\" #> Token[6]: \",\" #> Token[7]: \"what\" #> Token[8]: \"I\" #> Token[9]: \"have\" #> Token[10]: \"seen\" #> Token[11]: \"of\" #> Token[12]: \"UCD\" #> Token[13]: \"in\" #> Token[14]: \"its\" #> Token[15]: \"impact\" #> Token[16]: \"on\" #> Token[17]: \"my\" #> Token[18]: \"own\" #> Token[19]: \"life\" #> Token[20]: \"and\" #> Token[21]: \"the\" #> Token[22]: \"life\" #> Token[23]: \"of\" #> Token[24]: \"Ireland\" #> Token[25]: \".\" print(sentence$tokens) #> [[1]] #> Token[0]: \"What\" #> #> [[2]] #> Token[1]: \"I\" #> #> [[3]] #> Token[2]: \"see\" #> #> [[4]] #> Token[3]: \"in\" #> #> [[5]] #> Token[4]: \"UCD\" #> #> [[6]] #> Token[5]: \"today\" #> #> [[7]] #> Token[6]: \",\" #> #> [[8]] #> Token[7]: \"what\" #> #> [[9]] #> Token[8]: \"I\" #> #> [[10]] #> Token[9]: \"have\" #> #> [[11]] #> Token[10]: \"seen\" #> #> [[12]] #> Token[11]: \"of\" #> #> [[13]] #> Token[12]: \"UCD\" #> #> [[14]] #> Token[13]: \"in\" #> #> [[15]] #> Token[14]: \"its\" #> #> [[16]] #> Token[15]: \"impact\" #> #> [[17]] #> Token[16]: \"on\" #> #> [[18]] #> Token[17]: \"my\" #> #> [[19]] #> Token[18]: \"own\" #> #> [[20]] #> Token[19]: \"life\" #> #> [[21]] #> Token[20]: \"and\" #> #> [[22]] #> Token[21]: \"the\" #> #> [[23]] #> Token[22]: \"life\" #> #> [[24]] #> Token[23]: \"of\" #> #> [[25]] #> Token[24]: \"Ireland\" #> #> [[26]] #> Token[25]: \".\" # method in Python sentence$get_token(5) #> Token[4]: \"UCD\" # indexing in R sentence[4] #> Token[4]: \"UCD\" sentence[10]$add_label('manual-pos', 'VERB') print(sentence[10]) #> Token[10]: \"seen\" → VERB (1.0) sentence[4]$add_label('ner', 'ORG') print(sentence[4]) #> Token[4]: \"UCD\" → ORG (1.0) print(sentence) #> Sentence[26]: \"What I see in UCD today, what I have seen of UCD in its impact on my own life and the life of Ireland.\" → [\"UCD\"/ORG, \"seen\"/VERB]"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"corpus","dir":"Articles","previous_headings":"Sentence and Token","what":"Corpus","title":"Tutorials","text":"Corpus object Flair fundamental data structure represents dataset containing text samples, usually comprising training set, development set (validation set), test set. ’s designed work smoothly Flair’s models tasks like named entity recognition, text classification, . Attributes: train: list sentences (List[Sentence]) form training dataset. dev (development): list sentences (List[Sentence]) form development (validation) dataset. test: list sentences (List[Sentence]) form test dataset. Important Methods: downsample: method allows downsample (reduce) number sentences train, dev, test splits. obtain_statistics: method gives quick overview statistics corpus, including number sentences distribution labels. make_vocab_dictionary: Used create vocabulary dictionary corpus. $obtain_statistics() method Corpus object Flair library provides overview dataset statistics. method returns Python’s dictionary details training, validation (development), test datasets make corpus. R, can use jsonlite package format JSON. R , use data article Temporal Focus Campaign Communication Stefan Muller, published Journal Politics 2020, example. First, vectorize cc_muller$text using Sentence function transform list object. , reformat cc_muller$class_pro_retro factor. ’s essential note R handles numerical values differently Python. R, numerical values represented floating point, ’s advisable convert factors strings. Lastly, employ map function purrr package assign labels sentence corpus using $add_label method. perform train-test split using base R, can follow steps: don’t provide dev set, Flair won’t force carve portion test set serve dev set. However, cases train test sets provided without dev set, Flair might automatically take fraction train set (e.g., 10%) use dev set (#2259). offer mechanism model selection early stopping prevent model overfitting train set. “Corpus” function, random selection “dev.” ensure reproducibility, need set seed Flair framework. can accomplish calling top-level module “flair” via {flaiR} using $set_seed(1964L) set seed. later sections, similar processing using Corpus. Following , focus advanced NLP applications. ","code":"library(flaiR) Corpus <- flair_data()$Corpus Sentence <- flair_data()$Sentence # Create some example sentences train <- list(Sentence('This is a training example.')) dev <- list(Sentence('This is a validation example.')) test <- list(Sentence('This is a test example.')) # Create a corpus using the custom data splits corpus <- Corpus(train = train, dev = dev, test = test) library(jsonlite) data <- fromJSON(corpus$obtain_statistics()) formatted_str <- toJSON(data, pretty=TRUE) print(formatted_str) #> { #> \"TRAIN\": { #> \"dataset\": [\"TRAIN\"], #> \"total_number_of_documents\": [1], #> \"number_of_documents_per_class\": {}, #> \"number_of_tokens_per_tag\": {}, #> \"number_of_tokens\": { #> \"total\": [6], #> \"min\": [6], #> \"max\": [6], #> \"avg\": [6] #> } #> }, #> \"TEST\": { #> \"dataset\": [\"TEST\"], #> \"total_number_of_documents\": [1], #> \"number_of_documents_per_class\": {}, #> \"number_of_tokens_per_tag\": {}, #> \"number_of_tokens\": { #> \"total\": [6], #> \"min\": [6], #> \"max\": [6], #> \"avg\": [6] #> } #> }, #> \"DEV\": { #> \"dataset\": [\"DEV\"], #> \"total_number_of_documents\": [1], #> \"number_of_documents_per_class\": {}, #> \"number_of_tokens_per_tag\": {}, #> \"number_of_tokens\": { #> \"total\": [6], #> \"min\": [6], #> \"max\": [6], #> \"avg\": [6] #> } #> } #> } library(purrr) #> #> Attaching package: 'purrr' #> The following object is masked from 'package:jsonlite': #> #> flatten data(cc_muller) # The `Sentence` object tokenizes text text <- lapply( cc_muller$text, Sentence) # split sentence object to train and test. labels <- as.factor(cc_muller$class_pro_retro) # `$add_label` method assigns the corresponding coded type to each Sentence corpus. text <- map2(text, labels, ~ .x$add_label(\"classification\", .y), .progress = TRUE) set.seed(2046) sample <- sample(c(TRUE, FALSE), length(text), replace=TRUE, prob=c(0.8, 0.2)) train <- text[sample] test <- text[!sample] sprintf(\"Corpus object sizes - Train: %d | Test: %d\", length(train), length(test)) #> [1] \"Corpus object sizes - Train: 4710 | Test: 1148\" flair <- import_flair() flair$set_seed(1964L) corpus <- Corpus(train=train, # dev=test, test=test) #> 2023-11-29 12:40:29,096 No dev split found. Using 0% (i.e. 471 samples) of the train split as dev data sprintf(\"Corpus object sizes - Train: %d | Test: %d | Dev: %d\", length(corpus$train), length(corpus$test), length(corpus$dev)) #> [1] \"Corpus object sizes - Train: 4239 | Test: 1148 | Dev: 471\""},{"path":[]},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"tag-entities-in-text","dir":"Articles","previous_headings":"Sequence Taggings","what":"Tag Entities in Text","title":"Tutorials","text":"Let’s run named entity recognition following example sentence: “love Berlin New York”. , need make Sentence text, load pre-trained model use predict tags sentence object. print: Use loop print POS tag. ’s important note Python indexed 0. Therefore, R environment, must use seq_along(sentence$get_labels()) - 1. ","code":"# attach flaiR in R library(flaiR) # make a sentence Sentence <- flair_data()$Sentence sentence <- Sentence('I love Berlin and New York.') # load the NER tagger Classifier <- flair_nn()$Classifier tagger <- Classifier$load('ner') #> 2023-11-29 12:40:30,777 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , # run NER over sentence tagger$predict(sentence) # print the sentence with all annotations print(sentence) #> Sentence[7]: \"I love Berlin and New York.\" → [\"Berlin\"/LOC, \"New York\"/LOC] for (i in seq_along(sentence$get_labels())) { print(sentence$get_labels()[[i]]) } #> 'Span[2:3]: \"Berlin\"'/'LOC' (0.9812) #> 'Span[4:6]: \"New York\"'/'LOC' (0.9957)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"tag-part-of-speech-in-text","dir":"Articles","previous_headings":"Sequence Taggings","what":"Tag Part-of-Speech in Text","title":"Tutorials","text":"use flair/pos-english POS tagging standard models Hugging Face. print: Use loop print pos tag. ","code":"# attach flaiR in R library(flaiR) # make a sentence Sentence <- flair_data()$Sentence sentence <- Sentence('I love Berlin and New York.') # load the NER tagger Classifier <- flair_nn()$Classifier tagger <- Classifier$load('pos') #> 2023-11-29 12:40:31,664 SequenceTagger predicts: Dictionary with 53 tags: , O, UH, ,, VBD, PRP, VB, PRP$, NN, RB, ., DT, JJ, VBP, VBG, IN, CD, NNS, NNP, WRB, VBZ, WDT, CC, TO, MD, VBN, WP, :, RP, EX, JJR, FW, XX, HYPH, POS, RBR, JJS, PDT, NNPS, RBS, AFX, WP$, -LRB-, -RRB-, ``, '', LS, $, SYM, ADD # run NER over sentence tagger$predict(sentence) # print the sentence with all annotations print(sentence) #> Sentence[7]: \"I love Berlin and New York.\" → [\"I\"/PRP, \"love\"/VBP, \"Berlin\"/NNP, \"and\"/CC, \"New\"/NNP, \"York\"/NNP, \".\"/.] for (i in seq_along(sentence$get_labels())) { print(sentence$get_labels()[[i]]) } #> 'Token[0]: \"I\"'/'PRP' (1.0) #> 'Token[1]: \"love\"'/'VBP' (1.0) #> 'Token[2]: \"Berlin\"'/'NNP' (0.9999) #> 'Token[3]: \"and\"'/'CC' (1.0) #> 'Token[4]: \"New\"'/'NNP' (1.0) #> 'Token[5]: \"York\"'/'NNP' (1.0) #> 'Token[6]: \".\"'/'.' (1.0)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"detect-sentiment","dir":"Articles","previous_headings":"Sequence Taggings","what":"Detect Sentiment","title":"Tutorials","text":"Let’s run sentiment analysis sentence determine whether POSITIVE NEGATIVE. can essentially code . Just instead loading ‘ner’ model, now load ‘sentiment’ model: ","code":"# attach flaiR in R library(flaiR) # make a sentence Sentence <- flair_data()$Sentence sentence <- Sentence('I love Berlin and New York.') # load the Classifier tagger from flair.nn module Classifier <- flair_nn()$Classifier tagger <- Classifier$load('sentiment') # run sentiment analysis over sentence tagger$predict(sentence) # print the sentence with all annotations print(sentence) #> Sentence[7]: \"I love Berlin and New York.\" → POSITIVE (0.9982)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"flair-embedding","dir":"Articles","previous_headings":"","what":"Flair Embedding","title":"Tutorials","text":"Flair popular natural language processing library, providing variety embedding methods text representation Flair. Flair Embeddings word embedding framowork Natural Language Processing, developed Zalando. Flair focuses word-level representation can capture contextual information words, meaning word can different embeddings different contexts. Unlike traditional word embeddings (Word2Vec GloVe), Flair can dynamically generate word embeddings based context achieved excellent results various NLP tasks. key points Flair Embeddings: Context-Aware Flair can understand context word sentence dynamically generate word embeddings based context. different static embeddings, embedding word consider context sentence. Flair dynamic word embedding technique can understand meaning words based context. contrast, static word embeddings, Word2Vec GloVe, provide fixed embedding word without considering context sentence. Therefore, context-sensitive embedding techniques, Flair, can capture meaning words specific sentences accurately, thus enhancing performance language models various tasks. Example: Consider following two English sentences: “interested bank river.” “need go bank withdraw money.” , word “bank” two different meanings. first sentence, refers edge shore river. second sentence, refers financial institution. static embeddings, word “bank” might embedding lies somewhere two meanings doesn’t consider context. dynamic embeddings like Flair, “bank” first sentence embedding related rivers, second sentence, embedding related finance. word, similar vector representation, essentially different. way, can see dynamic embeddings “bank” two sentences differ based context. Although printed embeddings , reality, high-dimensional vectors, might see lot numbers. want intuitive view differences, compute cosine similarity metrics two embeddings. just simple demonstration. practice, can also combine multiple embedding techniques, WordEmbeddings FlairEmbeddings, get richer word vectors. Character-Based Flair uses character-level language model, meaning can generate embeddings rare words even misspelled words. important feature allows model understand process words never appeared training data. Flair uses bidirectional LSTM (Long Short-Term Memory) network operates character level. means feeds individual characters LSTM instead words. Multilingual Support Flair provides various pre-trained character-level language models, supporting contextual word embeddings multiple languages. Flair allows easily combine different word embeddings (e.g., Flair Embeddings, Word2Vec, GloVe, etc.) create powerful stacked embeddings.","code":"FlairEmbeddings <- flair_embeddings()$FlairEmbeddings Sentence <- flair_data()$Sentence # Initialize Flair embeddings flair_embedding_forward <- FlairEmbeddings('news-forward') # Define the two sentences sentence1 <- Sentence(\"I am interested in the bank of the river.\") sentence2 <- Sentence(\"I need to go to the bank to withdraw money.\") # Get the embeddings flair_embedding_forward$embed(sentence1) #> [[1]] #> Sentence[10]: \"I am interested in the bank of the river.\" flair_embedding_forward$embed(sentence2) #> [[1]] #> Sentence[11]: \"I need to go to the bank to withdraw money.\" # Extract the embedding for \"bank\" from the sentences bank_embedding_sentence1 = sentence1[5]$embedding # \"bank\" is the seventh word bank_embedding_sentence2 = sentence2[6]$embedding # \"bank\" is the sixth word library(lsa) #> Loading required package: SnowballC cosine(as.numeric( bank_embedding_sentence1$numpy()), as.numeric( bank_embedding_sentence2$numpy())) #> [,1] #> [1,] 0.7329552"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"classic-wordembeddings","dir":"Articles","previous_headings":"Flair Embedding","what":"Classic Wordembeddings","title":"Tutorials","text":"Flair, simplest form embedding still contains semantic information word called classic word embeddings. embeddings pre-trained non-contextual. Let’s retrieve word embeddings. , can utilize FastText embeddings following code. use , simply instantiate WordEmbeddings class passing ID embedding choice. , simply wrap text Sentence object, call embed(sentence) method WordEmbeddings class. Flair supports range classic word embeddings, offering unique features application scopes. overview, detailing ID required load embedding corresponding language. ","code":"embedding = flair_embeddings.WordEmbeddings('crawl') sentence = flair_data.Sentence(\"one two three one\") embedding$embed(sentence) #> [[1]] #> Sentence[4]: \"one two three one\" for (i in seq_along(sentence$tokens)) { print(head(sentence$tokens[[i]]$embedding), n =5) } #> tensor([-0.0535, -0.0368, -0.2851, -0.0381, -0.0486, 0.2383]) #> tensor([ 0.0282, -0.0786, -0.1236, 0.1756, -0.1199, 0.0964]) #> tensor([-0.0920, -0.0690, -0.1475, 0.2313, -0.0872, 0.0799]) #> tensor([-0.0535, -0.0368, -0.2851, -0.0381, -0.0486, 0.2383])"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"contexual-embeddings","dir":"Articles","previous_headings":"Flair Embedding","what":"Contexual Embeddings","title":"Tutorials","text":"Understanding contextuality Flair embeddings idea behind contextual string embeddings word embedding defined syntactic-semantic meaning also context appears . means word different embedding every context appears . pre-trained Flair model offers forward version backward version. Let’s assume processing language , just like book, uses left--right script. forward version takes account context happens word – left-hand side. backward version works opposite direction. takes account context word – right-hand side word. true, two words appear beginning two different sentences identical forward embeddings, context null. Let’s test : using forward model, takes account context occurs word. Additionally, since word context left-hand side position sentence, two embeddings identical, code assumes identical, indeed output True. test whether sum two 2048 embeddings ‘nice’ equal 2048. true, indicates embedding results consistent, theoretically case. Now separately add words, pretty, two sentence objects. two sets embeddings identical words different, returns False. measure similarity two vectors inner product space known cosine similarity. formula calculating cosine similarity two vectors, vectors B, follows: \\(Cosine Similarity = \\frac{\\sum_{} (A_i \\cdot B_i)}{\\sqrt{\\sum_{} (A_i^2)} \\cdot \\sqrt{\\sum_{} (B_i^2)}}\\) can observe similarity two words 0.55. ","code":"embedding <- flair_embeddings.FlairEmbeddings('news-forward') s1 <- flair_data.Sentence(\"nice shirt\") s2 <- flair_data.Sentence(\"nice pants\") embedding$embed(s1) #> [[1]] #> Sentence[2]: \"nice shirt\" embedding$embed(s2) #> [[1]] #> Sentence[2]: \"nice pants\" cat(\" s1 sentence:\", paste(s1[0], sep = \"\"), \"\\n\", \"s2 sentence:\", paste(s2[0], sep = \"\")) #> s1 sentence: Token[0]: \"nice\" #> s2 sentence: Token[0]: \"nice\" length(s1[0]$embedding$numpy()) == sum(s1[0]$embedding$numpy() == s2[0]$embedding$numpy()) #> [1] TRUE embedding <- flair_embeddings.FlairEmbeddings('news-forward') s1 <- flair_data.Sentence(\"nice shirt\") s2 <- flair_data.Sentence(\"nice pants\") embedding <- flair_embeddings.FlairEmbeddings('news-forward') s1 <- flair_data.Sentence(\"very nice shirt\") s2 <- flair_data.Sentence(\"pretty nice pants\") embedding$embed(s1) #> [[1]] #> Sentence[3]: \"very nice shirt\" embedding$embed(s2) #> [[1]] #> Sentence[3]: \"pretty nice pants\" length(s1[0]$embedding$numpy()) == sum(s1[0]$embedding$numpy() == s2[0]$embedding$numpy()) #> [1] FALSE library(lsa) vector1 <- as.numeric(s1[0]$embedding$numpy()) vector2 <- as.numeric(s2[0]$embedding$numpy()) cosine_similarity <- cosine(vector1, vector2) print(cosine_similarity) #> [,1] #> [1,] 0.5571664"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"extracting-embeddings-from-bert","dir":"Articles","previous_headings":"Flair Embedding","what":"Extracting Embeddings from BERT","title":"Tutorials","text":"First, utilize flair.embeddings.TransformerWordEmbeddings function download BERT, transformer models can also found Flair NLP’s Hugging Face. Traverse token sentence print . view token, ’s necessary usereticulate::py_str(token) since sentence Python object. ","code":"TransformerWordEmbeddings <- flair_embeddings.TransformerWordEmbeddings(\"bert-base-uncased\") embedding <- TransformerWordEmbeddings$embed(sentence) # Iterate through each token in the sentence, printing them. # Utilize reticulate::py_str(token) to view each token, given that the sentence is a Python object. for (i in seq_along(sentence$tokens)) { cat(\"Token: \", reticulate::py_str(sentence$tokens[[i]]), \"\\n\") # Access the embedding of the token, converting it to an R object, # and print the first 10 elements of the vector. token_embedding <- sentence$tokens[[i]]$embedding print(head(token_embedding, 10)) } #> Token: Token[0]: \"one\" #> tensor([-0.0535, -0.0368, -0.2851, -0.0381, -0.0486, 0.2383, -0.1200, 0.2620, #> -0.0575, 0.0228]) #> Token: Token[1]: \"two\" #> tensor([ 0.0282, -0.0786, -0.1236, 0.1756, -0.1199, 0.0964, -0.1327, 0.4449, #> -0.0264, -0.1168]) #> Token: Token[2]: \"three\" #> tensor([-0.0920, -0.0690, -0.1475, 0.2313, -0.0872, 0.0799, -0.0901, 0.4403, #> -0.0103, -0.1494]) #> Token: Token[3]: \"one\" #> tensor([-0.0535, -0.0368, -0.2851, -0.0381, -0.0486, 0.2383, -0.1200, 0.2620, #> -0.0575, 0.0228])"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"training-a-binary-classifier-in-flair","dir":"Articles","previous_headings":"","what":"Training a Binary Classifier in flaiR","title":"Tutorials","text":"section, ’ll train sentiment analysis model can categorize text either positive negative. case study adapted pages 116 130 Tadej Magajna’s book, ‘Natural Language Processing Flair’. process training text classifiers Flair mirrors sequence followed sequence labeling models. Specifically, steps train text classifiers : Load tagged corpus compute label dictionary map. Prepare document embeddings. Initialize TextClassifier class. Train model.","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"loading-a-tagged-corpus","dir":"Articles","previous_headings":"Training a Binary Classifier in flaiR","what":"Loading a Tagged Corpus","title":"Tutorials","text":"Training text classification models requires set text documents (typically, sentences paragraphs) document associated one classification labels. train sentiment analysis text classification model, using famous Internet Movie Database (IMDb) dataset, contains 50,000 movie reviews IMDB, review labeled either positive negative. References dataset already baked Flair, loading dataset couldn’t easier: Print sizes corpus object follows - test: %d | train: %d | dev: %d”","code":"library(flaiR) # load IMDB from flair_datasets module Corpus <- flair_data()$Corpus IMDB <- flair_datasets()$IMDB # downsize to 0.05 corpus = IMDB() #> 2023-11-29 12:40:41,193 Reading data from /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced #> 2023-11-29 12:40:41,193 Train: /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced/train.txt #> 2023-11-29 12:40:41,193 Dev: None #> 2023-11-29 12:40:41,193 Test: None #> 2023-11-29 12:40:41,794 No test split found. Using 0% (i.e. 5000 samples) of the train split as test data #> 2023-11-29 12:40:41,809 No dev split found. Using 0% (i.e. 4500 samples) of the train split as dev data #> 2023-11-29 12:40:41,809 Initialized corpus /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced (label type name is 'sentiment') corpus$downsample(0.05) #> test_size <- length(corpus$test) train_size <- length(corpus$train) dev_size <- length(corpus$dev) output <- sprintf(\"Corpus object sizes - Test: %d | Train: %d | Dev: %d\", test_size, train_size, dev_size) print(output) #> [1] \"Corpus object sizes - Test: 250 | Train: 2025 | Dev: 225\" lbl_type = 'sentiment' label_dict = corpus$make_label_dictionary(label_type=lbl_type) #> 2023-11-29 12:40:41,915 Computing label dictionary. Progress: #> 2023-11-29 12:40:45,705 Dictionary created for label 'sentiment' with 2 values: POSITIVE (seen 1014 times), NEGATIVE (seen 1011 times)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"loading-the-embeddings","dir":"Articles","previous_headings":"Training a Binary Classifier in flaiR","what":"Loading the Embeddings","title":"Tutorials","text":"flair, iit covers different types document embeddings can use. , simply use DocumentPoolEmbeddings. require training prior training classification model :","code":"DocumentPoolEmbeddings <- flair_embeddings()$DocumentPoolEmbeddings WordEmbeddings <- flair_embeddings()$WordEmbeddings glove = WordEmbeddings('glove') document_embeddings = DocumentPoolEmbeddings(glove)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"initializing-the-textclassifier-class","dir":"Articles","previous_headings":"Training a Binary Classifier in flaiR","what":"Initializing the TextClassifier Class","title":"Tutorials","text":"$allows set device use CPU, GPU, specific MPS devices Mac (mps:0, mps:1, mps:2).","code":"# initiate TextClassifier TextClassifier <- flair_models()$TextClassifier classifier <- TextClassifier(document_embeddings, label_dictionary = label_dict, label_type = lbl_type) classifier$to(flair_device(\"mps\")) TextClassifier( (embeddings): DocumentPoolEmbeddings( fine_tune_mode=none, pooling=mean (embeddings): StackedEmbeddings( (list_embedding_0): WordEmbeddings( 'glove' (embedding): Embedding(400001, 100) ) ) ) (decoder): Linear(in_features=100, out_features=3, bias=True) (dropout): Dropout(p=0.0, inplace=False) (locked_dropout): LockedDropout(p=0.0) (word_dropout): WordDropout(p=0.0) (loss_function): CrossEntropyLoss() )"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"training-the-model","dir":"Articles","previous_headings":"Training a Binary Classifier in flaiR","what":"Training the Model","title":"Tutorials","text":"Training text classifier model involves two simple steps: - Defining model trainer class passing classifier model corpus - Setting training process passing required training hyperparameters. worth noting ‘L’ numbers like 32L 5L used R denote number integer. Without ‘L’ suffix, numbers R treated numeric, default double-precision floating-point numbers. contrast, Python determines type based value number . Whole numbers (e.g., 5 32) type int, numbers decimal points (e.g., 5.0) type float. Floating-point numbers languages representations real numbers can approximation due way stored memory.","code":"# initiate ModelTrainer ModelTrainer <- flair_trainers()$ModelTrainer # fit the model trainer <- ModelTrainer(classifier, corpus) # start to train # note: the 'L' in 32L is used in R to denote that the number is an integer. trainer$train('classifier', learning_rate=0.1, mini_batch_size=32L, # specifies how embeddings are stored in RAM, ie.\"cpu\", \"cuda\", \"gpu\", \"mps\". # embeddings_storage_mode = \"mps\", max_epochs=10L) #> 2023-11-29 12:40:47,456 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:40:47,457 Model: \"TextClassifier( #> (embeddings): DocumentPoolEmbeddings( #> fine_tune_mode=none, pooling=mean #> (embeddings): StackedEmbeddings( #> (list_embedding_0): WordEmbeddings( #> 'glove' #> (embedding): Embedding(400001, 100) #> ) #> ) #> ) #> (decoder): Linear(in_features=100, out_features=2, bias=True) #> (dropout): Dropout(p=0.0, inplace=False) #> (locked_dropout): LockedDropout(p=0.0) #> (word_dropout): WordDropout(p=0.0) #> (loss_function): CrossEntropyLoss() #> (weights): None #> (weight_tensor) None #> )\" #> 2023-11-29 12:40:47,457 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:40:47,457 Corpus: 2025 train + 225 dev + 250 test sentences #> 2023-11-29 12:40:47,457 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:40:47,457 Train: 2025 sentences #> 2023-11-29 12:40:47,457 (train_with_dev=False, train_with_test=False) #> 2023-11-29 12:40:47,457 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:40:47,457 Training Params: #> 2023-11-29 12:40:47,457 - learning_rate: \"0.1\" #> 2023-11-29 12:40:47,457 - mini_batch_size: \"32\" #> 2023-11-29 12:40:47,457 - max_epochs: \"10\" #> 2023-11-29 12:40:47,457 - shuffle: \"True\" #> 2023-11-29 12:40:47,457 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:40:47,457 Plugins: #> 2023-11-29 12:40:47,457 - AnnealOnPlateau | patience: '3', anneal_factor: '0.5', min_learning_rate: '0.0001' #> 2023-11-29 12:40:47,457 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:40:47,457 Final evaluation on model from best epoch (best-model.pt) #> 2023-11-29 12:40:47,457 - metric: \"('micro avg', 'f1-score')\" #> 2023-11-29 12:40:47,457 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:40:47,457 Computation: #> 2023-11-29 12:40:47,457 - compute on device: cpu #> 2023-11-29 12:40:47,457 - embedding storage: cpu #> 2023-11-29 12:40:47,457 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:40:47,458 Model training base path: \"classifier\" #> 2023-11-29 12:40:47,458 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:40:47,458 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:40:48,196 epoch 1 - iter 6/64 - loss 0.88400855 - time (sec): 0.74 - samples/sec: 260.03 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:40:48,865 epoch 1 - iter 12/64 - loss 0.88396302 - time (sec): 1.41 - samples/sec: 272.95 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:40:49,794 epoch 1 - iter 18/64 - loss 0.90135088 - time (sec): 2.34 - samples/sec: 246.57 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:40:50,499 epoch 1 - iter 24/64 - loss 0.91910574 - time (sec): 3.04 - samples/sec: 252.53 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:40:51,178 epoch 1 - iter 30/64 - loss 0.92947888 - time (sec): 3.72 - samples/sec: 258.02 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:40:51,891 epoch 1 - iter 36/64 - loss 0.91739042 - time (sec): 4.43 - samples/sec: 259.88 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:40:52,736 epoch 1 - iter 42/64 - loss 0.91689622 - time (sec): 5.28 - samples/sec: 254.62 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:40:53,443 epoch 1 - iter 48/64 - loss 0.91121411 - time (sec): 5.98 - samples/sec: 256.65 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:40:54,158 epoch 1 - iter 54/64 - loss 0.90482956 - time (sec): 6.70 - samples/sec: 257.91 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:40:54,883 epoch 1 - iter 60/64 - loss 0.90788143 - time (sec): 7.43 - samples/sec: 258.58 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:40:55,338 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:40:55,338 EPOCH 1 done: loss 0.9077 - lr: 0.100000 #> 2023-11-29 12:40:56,499 DEV : loss 0.8828572034835815 - f1-score (micro avg) 0.4533 #> 2023-11-29 12:40:56,909 - 0 epochs without improvement #> 2023-11-29 12:40:56,909 saving best model #> 2023-11-29 12:40:57,319 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:40:58,255 epoch 2 - iter 6/64 - loss 0.88782200 - time (sec): 0.94 - samples/sec: 205.29 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:40:58,925 epoch 2 - iter 12/64 - loss 0.89557258 - time (sec): 1.61 - samples/sec: 239.21 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:40:59,655 epoch 2 - iter 18/64 - loss 0.88284143 - time (sec): 2.33 - samples/sec: 246.70 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:00,378 epoch 2 - iter 24/64 - loss 0.86361855 - time (sec): 3.06 - samples/sec: 251.16 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:01,102 epoch 2 - iter 30/64 - loss 0.86423665 - time (sec): 3.78 - samples/sec: 253.79 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:01,959 epoch 2 - iter 36/64 - loss 0.85561348 - time (sec): 4.64 - samples/sec: 248.29 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:02,665 epoch 2 - iter 42/64 - loss 0.85215194 - time (sec): 5.34 - samples/sec: 251.45 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:03,376 epoch 2 - iter 48/64 - loss 0.85517522 - time (sec): 6.06 - samples/sec: 253.61 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:04,313 epoch 2 - iter 54/64 - loss 0.84677640 - time (sec): 6.99 - samples/sec: 247.10 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:05,043 epoch 2 - iter 60/64 - loss 0.85021446 - time (sec): 7.72 - samples/sec: 248.61 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:05,336 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:41:05,336 EPOCH 2 done: loss 0.8530 - lr: 0.100000 #> 2023-11-29 12:41:06,493 DEV : loss 0.8783490061759949 - f1-score (micro avg) 0.4533 #> 2023-11-29 12:41:07,078 - 0 epochs without improvement #> 2023-11-29 12:41:07,079 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:41:07,840 epoch 3 - iter 6/64 - loss 0.87799916 - time (sec): 0.76 - samples/sec: 252.28 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:08,584 epoch 3 - iter 12/64 - loss 0.89608843 - time (sec): 1.51 - samples/sec: 255.13 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:09,301 epoch 3 - iter 18/64 - loss 0.90057748 - time (sec): 2.22 - samples/sec: 259.30 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:10,179 epoch 3 - iter 24/64 - loss 0.89696234 - time (sec): 3.10 - samples/sec: 247.76 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:10,899 epoch 3 - iter 30/64 - loss 0.89147007 - time (sec): 3.82 - samples/sec: 251.29 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:11,631 epoch 3 - iter 36/64 - loss 0.89204659 - time (sec): 4.55 - samples/sec: 253.07 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:12,333 epoch 3 - iter 42/64 - loss 0.87829229 - time (sec): 5.25 - samples/sec: 255.83 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:13,193 epoch 3 - iter 48/64 - loss 0.87632222 - time (sec): 6.11 - samples/sec: 251.24 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:13,903 epoch 3 - iter 54/64 - loss 0.87269292 - time (sec): 6.82 - samples/sec: 253.23 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:14,606 epoch 3 - iter 60/64 - loss 0.87970024 - time (sec): 7.53 - samples/sec: 255.08 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:15,079 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:41:15,079 EPOCH 3 done: loss 0.8762 - lr: 0.100000 #> 2023-11-29 12:41:16,056 DEV : loss 0.8697565793991089 - f1-score (micro avg) 0.4533 #> 2023-11-29 12:41:16,672 - 0 epochs without improvement #> 2023-11-29 12:41:16,673 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:41:17,430 epoch 4 - iter 6/64 - loss 0.88036998 - time (sec): 0.76 - samples/sec: 253.57 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:18,340 epoch 4 - iter 12/64 - loss 0.84520512 - time (sec): 1.67 - samples/sec: 230.34 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:19,052 epoch 4 - iter 18/64 - loss 0.84692961 - time (sec): 2.38 - samples/sec: 242.16 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:19,754 epoch 4 - iter 24/64 - loss 0.84134499 - time (sec): 3.08 - samples/sec: 249.26 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:20,453 epoch 4 - iter 30/64 - loss 0.85828587 - time (sec): 3.78 - samples/sec: 254.00 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:21,155 epoch 4 - iter 36/64 - loss 0.85654225 - time (sec): 4.48 - samples/sec: 257.04 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:22,052 epoch 4 - iter 42/64 - loss 0.84629689 - time (sec): 5.38 - samples/sec: 249.84 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:22,781 epoch 4 - iter 48/64 - loss 0.84268748 - time (sec): 6.11 - samples/sec: 251.49 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:23,468 epoch 4 - iter 54/64 - loss 0.84196810 - time (sec): 6.80 - samples/sec: 254.30 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:24,363 epoch 4 - iter 60/64 - loss 0.85795100 - time (sec): 7.69 - samples/sec: 249.69 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:24,660 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:41:24,660 EPOCH 4 done: loss 0.8562 - lr: 0.100000 #> 2023-11-29 12:41:25,814 DEV : loss 0.8513666391372681 - f1-score (micro avg) 0.4533 #> 2023-11-29 12:41:26,249 - 0 epochs without improvement #> 2023-11-29 12:41:26,249 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:41:27,027 epoch 5 - iter 6/64 - loss 0.87054925 - time (sec): 0.78 - samples/sec: 246.94 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:27,872 epoch 5 - iter 12/64 - loss 0.88675048 - time (sec): 1.62 - samples/sec: 236.67 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:28,628 epoch 5 - iter 18/64 - loss 0.85735505 - time (sec): 2.38 - samples/sec: 242.23 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:29,335 epoch 5 - iter 24/64 - loss 0.86416615 - time (sec): 3.09 - samples/sec: 248.92 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:30,043 epoch 5 - iter 30/64 - loss 0.85780198 - time (sec): 3.79 - samples/sec: 253.08 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:30,931 epoch 5 - iter 36/64 - loss 0.85106002 - time (sec): 4.68 - samples/sec: 246.11 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:31,643 epoch 5 - iter 42/64 - loss 0.84119070 - time (sec): 5.39 - samples/sec: 249.19 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:32,399 epoch 5 - iter 48/64 - loss 0.84184100 - time (sec): 6.15 - samples/sec: 249.77 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:33,109 epoch 5 - iter 54/64 - loss 0.83640844 - time (sec): 6.86 - samples/sec: 251.91 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:34,002 epoch 5 - iter 60/64 - loss 0.83129187 - time (sec): 7.75 - samples/sec: 247.68 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:34,308 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:41:34,308 EPOCH 5 done: loss 0.8262 - lr: 0.100000 #> 2023-11-29 12:41:35,445 DEV : loss 0.8393897414207458 - f1-score (micro avg) 0.4578 #> 2023-11-29 12:41:36,037 - 0 epochs without improvement #> 2023-11-29 12:41:36,038 saving best model #> 2023-11-29 12:41:36,366 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:41:37,124 epoch 6 - iter 6/64 - loss 0.90684560 - time (sec): 0.76 - samples/sec: 253.32 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:37,873 epoch 6 - iter 12/64 - loss 0.86297455 - time (sec): 1.51 - samples/sec: 254.77 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:38,622 epoch 6 - iter 18/64 - loss 0.84712324 - time (sec): 2.26 - samples/sec: 255.33 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:39,331 epoch 6 - iter 24/64 - loss 0.84480650 - time (sec): 2.97 - samples/sec: 259.01 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:40,180 epoch 6 - iter 30/64 - loss 0.84118373 - time (sec): 3.81 - samples/sec: 251.72 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:40,935 epoch 6 - iter 36/64 - loss 0.84887512 - time (sec): 4.57 - samples/sec: 252.14 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:41,622 epoch 6 - iter 42/64 - loss 0.85247641 - time (sec): 5.26 - samples/sec: 255.69 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:42,378 epoch 6 - iter 48/64 - loss 0.83503058 - time (sec): 6.01 - samples/sec: 255.47 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:43,341 epoch 6 - iter 54/64 - loss 0.83527769 - time (sec): 6.98 - samples/sec: 247.72 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:44,042 epoch 6 - iter 60/64 - loss 0.82645011 - time (sec): 7.68 - samples/sec: 250.12 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:44,484 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:41:44,485 EPOCH 6 done: loss 0.8187 - lr: 0.100000 #> 2023-11-29 12:41:45,456 DEV : loss 0.697002649307251 - f1-score (micro avg) 0.5289 #> 2023-11-29 12:41:46,081 - 0 epochs without improvement #> 2023-11-29 12:41:46,081 saving best model #> 2023-11-29 12:41:46,397 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:41:47,185 epoch 7 - iter 6/64 - loss 0.92862150 - time (sec): 0.79 - samples/sec: 244.01 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:47,860 epoch 7 - iter 12/64 - loss 0.88754721 - time (sec): 1.46 - samples/sec: 262.63 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:48,617 epoch 7 - iter 18/64 - loss 0.87636076 - time (sec): 2.22 - samples/sec: 259.52 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:49,534 epoch 7 - iter 24/64 - loss 0.86338259 - time (sec): 3.14 - samples/sec: 244.90 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:50,286 epoch 7 - iter 30/64 - loss 0.86797423 - time (sec): 3.89 - samples/sec: 246.87 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:51,006 epoch 7 - iter 36/64 - loss 0.85439281 - time (sec): 4.61 - samples/sec: 249.96 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:51,730 epoch 7 - iter 42/64 - loss 0.85024860 - time (sec): 5.33 - samples/sec: 252.04 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:52,568 epoch 7 - iter 48/64 - loss 0.83954957 - time (sec): 6.17 - samples/sec: 248.93 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:53,309 epoch 7 - iter 54/64 - loss 0.83340724 - time (sec): 6.91 - samples/sec: 250.03 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:54,008 epoch 7 - iter 60/64 - loss 0.83293155 - time (sec): 7.61 - samples/sec: 252.30 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:54,448 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:41:54,448 EPOCH 7 done: loss 0.8286 - lr: 0.100000 #> 2023-11-29 12:41:55,424 DEV : loss 0.7402542233467102 - f1-score (micro avg) 0.5022 #> 2023-11-29 12:41:56,039 - 1 epochs without improvement #> 2023-11-29 12:41:56,040 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:41:56,804 epoch 8 - iter 6/64 - loss 0.72758570 - time (sec): 0.76 - samples/sec: 251.29 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:57,547 epoch 8 - iter 12/64 - loss 0.77498165 - time (sec): 1.51 - samples/sec: 254.81 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:58,391 epoch 8 - iter 18/64 - loss 0.78188934 - time (sec): 2.35 - samples/sec: 245.02 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:59,124 epoch 8 - iter 24/64 - loss 0.79232205 - time (sec): 3.08 - samples/sec: 249.02 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:41:59,847 epoch 8 - iter 30/64 - loss 0.78611903 - time (sec): 3.81 - samples/sec: 252.17 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:00,597 epoch 8 - iter 36/64 - loss 0.77288217 - time (sec): 4.56 - samples/sec: 252.80 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:01,512 epoch 8 - iter 42/64 - loss 0.77235644 - time (sec): 5.47 - samples/sec: 245.62 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:02,221 epoch 8 - iter 48/64 - loss 0.78255222 - time (sec): 6.18 - samples/sec: 248.52 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:02,953 epoch 8 - iter 54/64 - loss 0.78456129 - time (sec): 6.91 - samples/sec: 249.97 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:03,717 epoch 8 - iter 60/64 - loss 0.77978850 - time (sec): 7.68 - samples/sec: 250.11 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:04,158 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:42:04,158 EPOCH 8 done: loss 0.7780 - lr: 0.100000 #> 2023-11-29 12:42:05,135 DEV : loss 0.7568300366401672 - f1-score (micro avg) 0.4978 #> 2023-11-29 12:42:05,746 - 2 epochs without improvement #> 2023-11-29 12:42:05,747 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:42:06,639 epoch 9 - iter 6/64 - loss 0.83446981 - time (sec): 0.89 - samples/sec: 215.31 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:07,370 epoch 9 - iter 12/64 - loss 0.83481617 - time (sec): 1.62 - samples/sec: 236.69 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:08,059 epoch 9 - iter 18/64 - loss 0.84183356 - time (sec): 2.31 - samples/sec: 249.18 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:08,766 epoch 9 - iter 24/64 - loss 0.82389081 - time (sec): 3.02 - samples/sec: 254.42 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:09,698 epoch 9 - iter 30/64 - loss 0.81802387 - time (sec): 3.95 - samples/sec: 243.04 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:10,427 epoch 9 - iter 36/64 - loss 0.81533981 - time (sec): 4.68 - samples/sec: 246.20 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:11,116 epoch 9 - iter 42/64 - loss 0.80514485 - time (sec): 5.37 - samples/sec: 250.37 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:11,871 epoch 9 - iter 48/64 - loss 0.81017195 - time (sec): 6.12 - samples/sec: 250.85 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:12,583 epoch 9 - iter 54/64 - loss 0.80359553 - time (sec): 6.84 - samples/sec: 252.81 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:13,285 epoch 9 - iter 60/64 - loss 0.79928778 - time (sec): 7.54 - samples/sec: 254.75 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:13,782 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:42:13,782 EPOCH 9 done: loss 0.7969 - lr: 0.100000 #> 2023-11-29 12:42:14,910 DEV : loss 0.7745500206947327 - f1-score (micro avg) 0.4844 #> 2023-11-29 12:42:15,319 - 3 epochs without improvement #> 2023-11-29 12:42:15,320 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:42:16,295 epoch 10 - iter 6/64 - loss 0.74788894 - time (sec): 0.97 - samples/sec: 196.93 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:16,969 epoch 10 - iter 12/64 - loss 0.76720373 - time (sec): 1.65 - samples/sec: 232.91 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:17,693 epoch 10 - iter 18/64 - loss 0.79161676 - time (sec): 2.37 - samples/sec: 242.76 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:18,604 epoch 10 - iter 24/64 - loss 0.78399789 - time (sec): 3.28 - samples/sec: 233.86 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:19,306 epoch 10 - iter 30/64 - loss 0.78045449 - time (sec): 3.99 - samples/sec: 240.83 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:19,999 epoch 10 - iter 36/64 - loss 0.78069342 - time (sec): 4.68 - samples/sec: 246.20 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:20,746 epoch 10 - iter 42/64 - loss 0.76349049 - time (sec): 5.43 - samples/sec: 247.69 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:21,656 epoch 10 - iter 48/64 - loss 0.76165402 - time (sec): 6.34 - samples/sec: 242.41 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:22,368 epoch 10 - iter 54/64 - loss 0.77187297 - time (sec): 7.05 - samples/sec: 245.20 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:23,081 epoch 10 - iter 60/64 - loss 0.77289468 - time (sec): 7.76 - samples/sec: 247.40 - lr: 0.100000 - momentum: 0.000000 #> 2023-11-29 12:42:23,559 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:42:23,560 EPOCH 10 done: loss 0.7810 - lr: 0.100000 #> 2023-11-29 12:42:24,546 DEV : loss 0.7909061908721924 - f1-score (micro avg) 0.4889 #> 2023-11-29 12:42:25,158 - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.05] #> 2023-11-29 12:42:25,477 ---------------------------------------------------------------------------------------------------- #> 2023-11-29 12:42:25,477 Loading model from best epoch ... #> 2023-11-29 12:42:26,782 #> Results: #> - F-score (micro) 0.592 #> - F-score (macro) 0.5066 #> - Accuracy 0.592 #> #> By class: #> precision recall f1-score support #> #> POSITIVE 0.5600 0.9767 0.7119 129 #> NEGATIVE 0.8800 0.1818 0.3014 121 #> #> accuracy 0.5920 250 #> macro avg 0.7200 0.5793 0.5066 250 #> weighted avg 0.7149 0.5920 0.5132 250 #> #> 2023-11-29 12:42:26,782 ---------------------------------------------------------------------------------------------------- #> $test_score #> [1] 0.592"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"loading-and-using-the-classifiers","dir":"Articles","previous_headings":"Training a Binary Classifier in flaiR","what":"Loading and Using the Classifiers","title":"Tutorials","text":"training text classification model, resulting classifier already stored memory part classifier variable. possible, however, Python session exited training. , ’ll need load model memory following: import Sentence object. Now, can generate predictions example text inputs. ","code":"TextClassifier <- flair_models()$TextClassifier classifier <- TextClassifier$load('classifier/best-model.pt') Sentence <- flair_data()$Sentence sentence <- Sentence(\"great\") classifier$predict(sentence) print(sentence$labels) #> [[1]] #> 'Sentence[1]: \"great\"'/'POSITIVE' (0.9999) sentence <- Sentence(\"sad\") classifier$predict(sentence) print(sentence$labels) #> [[1]] #> 'Sentence[1]: \"sad\"'/'NEGATIVE' (0.9021)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"training-a-rnn-with-flair","dir":"Articles","previous_headings":"","what":"Training a RNN with FlaiR","title":"Tutorials","text":", train sentiment analysis model categorize text. case, also include pipeline implements use Recurrent Neural Networks (RNN). makes particularly effective tasks involving sequential data. section also show implent one powerful feature featrue, stacked Embeddings. can stack multiple embeddings different layers let classifier learn different types features. Flair NLP, {flaiR} package, ’s easy accomplish task.","code":""},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"import-necessary-modules-from-flair-via-flair-in-r","dir":"Articles","previous_headings":"Training a RNN with FlaiR","what":"Import Necessary Modules from Flair via {flaiR} in R","title":"Tutorials","text":"","code":"library(flaiR) WordEmbeddings <- flair_embeddings()$WordEmbeddings FlairEmbeddings <- flair_embeddings()$FlairEmbeddings DocumentRNNEmbeddings <- flair_embeddings()$DocumentRNNEmbeddings TextClassifier <- flair_models()$TextClassifier ModelTrainer <- flair_trainers()$ModelTrainer"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"get-the-imdb-corpus","dir":"Articles","previous_headings":"Training a RNN with FlaiR","what":"Get the IMDB Corpus","title":"Tutorials","text":"IMDB movie review dataset used , commonly utilized dataset sentiment analysis. $downsample(0.1) method means 10% dataset used, allowing faster demonstration","code":"# load the IMDB file and downsize it to 0.1 IMDB <- flair_datasets()$IMDB corpus <- IMDB()$downsample(0.1) #> 2023-11-29 12:42:27,196 Reading data from /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced #> 2023-11-29 12:42:27,196 Train: /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced/train.txt #> 2023-11-29 12:42:27,196 Dev: None #> 2023-11-29 12:42:27,196 Test: None #> 2023-11-29 12:42:27,784 No test split found. Using 0% (i.e. 5000 samples) of the train split as test data #> 2023-11-29 12:42:27,799 No dev split found. Using 0% (i.e. 4500 samples) of the train split as dev data #> 2023-11-29 12:42:27,799 Initialized corpus /Users/yenchiehliao/.flair/datasets/imdb_v4-rebalanced (label type name is 'sentiment') # create the label dictionary lbl_type <- 'sentiment' label_dict <- corpus$make_label_dictionary(label_type=lbl_type) #> 2023-11-29 12:42:27,816 Computing label dictionary. Progress: #> 2023-11-29 12:42:35,340 Dictionary created for label 'sentiment' with 2 values: POSITIVE (seen 2056 times), NEGATIVE (seen 1994 times)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"stacked-embeddings-in-flair","dir":"Articles","previous_headings":"Training a RNN with FlaiR","what":"Stacked Embeddings in flaiR","title":"Tutorials","text":"one Flair’s powerful features: allows integration embeddings enable model learn sparse features. Three types embeddings utilized : GloVe embeddings, two types Flair embeddings (forward backward). Word embeddings used convert words vectors.","code":"# make a list of word embeddings word_embeddings <- list(WordEmbeddings('glove'), FlairEmbeddings('news-forward-fast'), FlairEmbeddings('news-backward-fast')) # initialize the document embeddings document_embeddings <- DocumentRNNEmbeddings(word_embeddings, hidden_size = 512L, reproject_words = TRUE, reproject_words_dimension = 256L) # create a Text Classifier with the embeddings and label dictionary classifier <- TextClassifier(document_embeddings, label_dictionary=label_dict, label_type='class') # initialize the text classifier trainer with our corpus trainer <- ModelTrainer(classifier, corpus)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"start-the-training","dir":"Articles","previous_headings":"Training a RNN with FlaiR","what":"Start the Training","title":"Tutorials","text":"sake example, setting max_epochs 5. might want increase better performance. worth noting thelearning rate parameter determines step size iteration moving towards minimum loss function. smaller learning rate slow learning process, lead precise convergence. mini_batch_size determines number samples used compute gradient step. ‘L’ 32L used R denote number integer. patience (aka early stop) hyperparameter used conjunction early stopping avoid overfitting. determines number epochs training process tolerate without improvements stopping training. Setting max_epochs 5 means algorithm make five passes dataset.","code":"# note: the 'L' in 32L is used in R to denote that the number is an integer. trainer$train('models/sentiment', learning_rate=0.1, mini_batch_size=32L, patience=5L, max_epochs=5L)"},{"path":"https://davidycliao.github.io/flaiR/articles/tutorial.html","id":"to-apply-the-trained-model-for-prediction","dir":"Articles","previous_headings":"Training a RNN with FlaiR","what":"To Apply the Trained Model for Prediction","title":"Tutorials","text":"","code":"sentence <- \"This movie was really exciting!\" classifier$predict(sentence) print(sentence.labels)"},{"path":"https://davidycliao.github.io/flaiR/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"David Liao. Maintainer, author. Akbik Alan. Author, contributor. Blythe Duncan. Author, contributor. Vollgraf Roland. Author, contributor.","code":""},{"path":"https://davidycliao.github.io/flaiR/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Liao D, Alan , Duncan B, Roland V (2023). flaiR: R Wrapper Accessing FLAIR Python. R package version 0.0.6.","code":"@Manual{, title = {flaiR: An R Wrapper for Accessing FLAIR in Python}, author = {David Liao and Akbik Alan and Blythe Duncan and Vollgraf Roland}, year = {2023}, note = {R package version 0.0.6}, }"},{"path":"https://davidycliao.github.io/flaiR/index.html","id":"flairr-an-r-wrapper-for-accessing-flair-nlp-library-","dir":"","previous_headings":"","what":"flairR: An R Wrapper for Accessing Flair NLP Library","title":"An R Wrapper for Accessing Flair NLP Library","text":"{flaiR} R wrapper {flairNLP/flair} Python library, specifically tailored R users, particularly political science social sciences. flaiR provides easy access main functionalities {Flair NLP}. Developed Developed Zalando Research Berlin, flair NLP straightforward framework state---art Natural Language Processing (NLP) compatible Hugging Face. Flair offers intuitive interfaces exceptional multilingual support, particularly various embedding models, transformers state---art NLP tasks analyze texts, named entity recognition, sentiment analysis, part--speech tagging, support rapidly growing number language models community. comprehensive understanding {flairNLP/flair} architecture NLP tagging models Zalando Research, can refer research article ‘Contextual String Embeddings Sequence Labeling’ official manual written Python implementation. community support languages rapidly expanding. unofficial platform provides R users documentation, examples, tutorials using Flair NLP. goal make easier R users access powerful NLP tools provided Flair NLP.","code":""},{"path":"https://davidycliao.github.io/flaiR/index.html","id":"installation-via-github","dir":"","previous_headings":"","what":"Installation via GitHub","title":"An R Wrapper for Accessing Flair NLP Library","text":"installation consists two parts: First, install Python 3.8 higher (avoid developmental versions latest release compatibility reasons). Secondly, install R 4.2.0 higher. System Requirement: Python (>= 3.10.x) R (>= 4.2.0) RStudio (GUI interface allows users adjust manage Python environment R) Anaconda miniconda (highly recommended) tested flaiR using CI/CD GitHub Actions, conducting integration tests across various operating syste tests include intergration R versions 4.2.1, 4.3.2, 4.2.0 Python 3.10.x. testing also covers environments flair NLP PyTorch (given Flair NLP built Torch). stable usage, strongly recommend installing specific versions. first installed, {flaiR} automatically detects whether Python 3.8 higher. , skip automatic installation Python flair NLP. case, need manually install reload {flaiR} . correct Python installed, {flaiR} automatically install flair Python NLP global environment. using {reticulate}, {flaiR} typically assume r-reticulate environment default. time, can use py_config() check location environment. Please note flaiR directly install flair NLP Python environment R using. environment can adjusted RStudio navigating Tools -> Global Options -> Python. issues installation, feel free ask Discussion . First, understanding Python environment RStudio using important. advise confirm Python environment RStudio using. can checking reticulate::py_config() manually via Tools -> Global Options -> Python. stage, ’ll observe RStudio defaulted using ‘flair_env’ environment (personal environment) set . , Python Flair package installed within environment. wish modify setting, option either adjust within RStudio’s settings use {reticulate} package manage Python environment R Now, can confidently install flaiR R environment. notice following message, indicating successful installation. means RStudio successfully detected correct Python installed Flair Python environment","code":"install.packages(\"reticulate\") reticulate::py_config() #> python: /Users/*********/.virtualenvs/flair_env/bin/python #> libpython: /Users/*********/.pyenv/versions/3.10.13/lib/libpython3.10.dylib #> pythonhome: /Users/*********/.virtualenvs/flair_env:/Users/*********/.virtualenvs/flair_env #> version: 3.10.13 (main, Oct 27 2023, 04:44:16) [Clang 15.0.0 (clang-1500.0.40.1)] #> numpy: /Users/*********/.virtualenvs/flair_env/lib/python3.10/site-packages/numpy #> numpy_version: 1.26.2 #> flair: /Users/*********/.virtualenvs/flair_env/lib/python3.10/site-packages/flair #> NOTE: Python version was forced by use_python() function install.packages(\"remotes\") remotes::install_github(\"davidycliao/flaiR\", force = TRUE) library(flaiR) #> flaiR: An R Wrapper for Accessing Flair NLP 0.13.0"},{"path":"https://davidycliao.github.io/flaiR/index.html","id":"introduction","dir":"","previous_headings":"","what":"Introduction","title":"An R Wrapper for Accessing Flair NLP Library","text":"R users, {flairR} primarily consists two main components. first wrapper functions {flaiR} built top {reticulate}, enables interact directly Python modules R provides seamless support documents tutorial (progress) R community. {flaiR} package enables R users leverage Flair’s capabilities train models using Flair framework state---art NLP models without need interact directly Python. Flair offers simpler intuitive approach training custom NLP models compared using Transformer-based models directly. Flair, data loading preprocessing streamlined, facilitating easy integration various pre-trained embeddings, including traditional Transformer-based types like BERT. training process Flair condensed just lines code, automatic handling fundamental preprocessing steps. Evaluation optimization also made user-friendly accessible tools. addition, Flair NLP provides easy framework training language models compatible HuggingFace.","code":""},{"path":"https://davidycliao.github.io/flaiR/index.html","id":"training-models-with-huggingface-via-flair","dir":"","previous_headings":"Introduction flairR: An R Wrapper for Accessing Flair NLP Library","what":"Training Models with HuggingFace via flaiR","title":"An R Wrapper for Accessing Flair NLP Library","text":"following example offers straightforward introduction fully train model using Flair framework import BERT model HuggingFace 🤗. example utilizes grandstanding score training data Julia Park’s paper (Politicians Grandstand? Measuring Message Politics Committee Hearings) trains model using Transformer-based models via flair NLP {flaiR}. Step 1 Split Data Train Test Sets flair Sentence Object Step 2 Preprocess Data Corpus Object Step 3 Create Classifier Using Transformer First, $make_label_dictionary function used automatically create label dictionary classification task. label dictionary mapping label index, used map labels tensor label indices. expcept classifcation task, flair also supports label types training custom model, ner, pos sentiment. Alternatively, can also create label dictionary manually. following code creates label dictionary two labels, 0 1, maps indices 0 1 respectively. , can use $item2idx method check mapping label index. important make sure labels mapped correctly indices tensors. TextClassifier used create text classifier. classifier takes document embeddings (importing 'distilbert-base-uncased' HugginFace) label dictionary input. label type also specified classification. Step 4 Start Training specific computation devices local machine. GPU, can use flair_gpu specify GPU device. don’t GPU, can use flaiR::flair_device specify CPU device. ModelTrainer used train model, learns data based grandstanding score. Step 5 Evaluate Model model training process, evaluating performance trained model development set straightforward easy. overall performance model test set also straightforward easy evaluate. can find performance metrics model/training.log file. Step 6 Apply Trained Model Unseen Data Prediction use statement dataset example. lassifier$predict function used predict label sentence. function returns sentence object predicted label. sentence$labels list labels, value score. value label , score probability label. label highest score predicted label. Step 7 Reload Model Best Performance train model save_final_model=TRUE, model best performance development set saved output directory. can reload model best performance using load function. can create function classify text using specified Flair classifier. performing classification task, let’s quickly check exmaple dataset. Let’s apply function dataset.","code":"# load training data: grandstanding score from Julia Park's paper library(flaiR) data(gs_score) # load flair functions via flaiR Sentence <- flair_data()$Sentence Corpus <- flair_data()$Corpus TransformerDocumentEmbeddings <- flair_embeddings()$TransformerDocumentEmbeddings TextClassifier <- flair_models()$TextClassifier ModelTrainer <- flair_trainers()$ModelTrainer # split the data text <- lapply(gs_score$speech, Sentence) labels <- as.character(gs_score$rescaled_gs) for (i in 1:length(text)) { text[[i]]$add_label(\"classification\", labels[[i]]) } set.seed(2046) sample <- sample(c(TRUE, FALSE), length(text), replace=TRUE, prob=c(0.8, 0.2)) train <- text[sample] test <- text[!sample] corpus <- Corpus(train=train, test=test) #> 2023-11-29 12:28:25,704 No dev split found. Using 0% (i.e. 282 samples) of the train split as dev data document_embeddings <- TransformerDocumentEmbeddings('distilbert-base-uncased', fine_tune=TRUE) label_dict <- corpus$make_label_dictionary(label_type=\"classification\") #> 2023-11-29 12:28:27,310 Computing label dictionary. Progress: #> 2023-11-29 12:28:27,361 Dictionary created for label 'classification' with 2 values: 0 (seen 1322 times), 1 (seen 1212 times) # load Dictionary object from flair_data Dictionary <- flair_data()$Dictionary # manually create label_dict with two labels, 0 and 1 label_dict <- Dictionary(add_unk=FALSE) # you can specify the order of labels. Please note the label should be a list and character (string) type. specific_order_labels <- list('0', '1') for (label in seq_along(specific_order_labels)) { label_dict$add_item(as.character(specific_order_labels [[label]])) } print(label_dict$idx2item) #> [[1]] #> b'0' #> #> [[2]] #> b'1' print(label_dict$item2idx) #> $`b'0'` #> [1] 0 #> #> $`b'1'` #> [1] 1 classifier <- TextClassifier(document_embeddings, label_dictionary=label_dict, label_type='classification') classifier$to(flair_device(\"cpu\")) #> TextClassifier( #> (embeddings): TransformerDocumentEmbeddings( #> (model): DistilBertModel( #> (embeddings): Embeddings( #> (word_embeddings): Embedding(30523, 768) #> (position_embeddings): Embedding(512, 768) #> (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) #> (dropout): Dropout(p=0.1, inplace=False) #> ) #> (transformer): Transformer( #> (layer): ModuleList( #> (0-5): 6 x TransformerBlock( #> (attention): MultiHeadSelfAttention( #> (dropout): Dropout(p=0.1, inplace=False) #> (q_lin): Linear(in_features=768, out_features=768, bias=True) #> (k_lin): Linear(in_features=768, out_features=768, bias=True) #> (v_lin): Linear(in_features=768, out_features=768, bias=True) #> (out_lin): Linear(in_features=768, out_features=768, bias=True) #> ) #> (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) #> (ffn): FFN( #> (dropout): Dropout(p=0.1, inplace=False) #> (lin1): Linear(in_features=768, out_features=3072, bias=True) #> (lin2): Linear(in_features=3072, out_features=768, bias=True) #> (activation): GELUActivation() #> ) #> (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) #> ) #> ) #> ) #> ) #> ) #> (decoder): Linear(in_features=768, out_features=2, bias=True) #> (dropout): Dropout(p=0.0, inplace=False) #> (locked_dropout): LockedDropout(p=0.0) #> (word_dropout): WordDropout(p=0.0) #> (loss_function): CrossEntropyLoss() #> ) trainer <- ModelTrainer(classifier, corpus) trainer$train('grand_standing_model', # output directory learning_rate=0.02, # learning rate: if batch_growth_annealing activates,lr should starts a bit higher. mini_batch_size=8L, # batch size anneal_with_restarts = TRUE, save_final_model=TRUE, max_epochs=10L) # Maximum number of epochs # import the performance metrics generated during the training process performance_df <- read.table(file = \"grand_standing/loss.tsv\", header = TRUE, sep = \"\\t\") head(performance_df) #> EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL #> 1 1 13:07:11 0.02 0.6071 0.6314 0.7250 0.7250 #> 2 2 13:11:19 0.02 0.4509 0.6139 0.7393 0.7393 #> 3 3 13:21:47 0.02 0.3294 0.6228 0.7464 0.7464 #> 4 4 13:25:03 0.02 0.2513 0.6628 0.7393 0.7393 #> 5 5 13:28:10 0.02 0.1109 0.6920 0.7429 0.7429 #> 6 6 13:31:16 0.02 0.0553 0.7023 0.7429 0.7429 #> DEV_F1 DEV_ACCURACY #> 1 0.7250 0.7250 #> 2 0.7393 0.7393 #> 3 0.7464 0.7464 #> 4 0.7393 0.7393 #> 5 0.7429 0.7429 #> 6 0.7429 0.7429 library(ggplot2) ggplot(performance_df, aes(x = EPOCH)) + geom_line(aes(y = TRAIN_LOSS, color = \"Training Loss\")) + geom_line(aes(y = DEV_LOSS, color = \"Development Loss\")) + geom_line(aes(y = DEV_RECALL, color = \"Development Recall\")) + geom_line(aes(y = DEV_F1, color = \"Development F1\")) + labs(title = \"Training and Development Loss per Epoch\", x = \"Epochs / Grandstanding Classifier\", y = \"\") + scale_color_manual(\"\", values = c(\"Training Loss\" = \"blue\", \"Development Loss\" = \"red\", \"Development F1\" = \"green\"))+ theme_minimal() Results: - F-score (micro) 0.7443 - F-score (macro) 0.7438 - Accuracy 0.7443 By class: precision recall f1-score support 1 0.6781 0.8519 0.7551 324 0 0.8362 0.6516 0.7324 376 accuracy 0.7443 700 macro avg 0.7572 0.7517 0.7438 700 weighted avg 0.7630 0.7443 0.7429 700 # load the trained model data(statements) Sentence <- flair_data()$Sentence text <- statements[1, \"Statement\"] sentence <- Sentence(text) classifier$predict(sentence) print(sentence) #> Sentence[55]: \"Ladies and gentlemen, I stand before you today not just as a legislator, but as a defender of our very way of life! We are facing a crisis of monumental proportions, and if we don't act now, the very fabric of our society will unravel before our eyes!\" → 1 (0.5151) sentence$labels[[1]]$value #> [1] \"1\" sentence$labels[[1]]$score #> [1] 0.5150982 Sentence <- flair_data()$Sentence TextClassifier <- flair_models()$TextClassifier classifier <- TextClassifier$load('grand_standing/best-model.pt') classify_text <- function(text, classifier) { # Classifies the given text using the specified Flair classifier. # # Args: # text (str): The text to be classified. # classifier (TextClassifier): The Flair classifier to use for prediction. # # Returns: # list: A list containing the predicted class label and score as strings. sentence <- Sentence(text) classifier$predict(sentence) return(list (labels = sentence$labels[[1]]$value, score = as.character(sentence$labels[[1]]$score))) } data(statements) print(statements) #> Type #> 1 Dramatic Appeal to Emotion #> 2 Exaggerated Praise for a Local Issue #> 3 Over-Simplified Solution to Complex Issue #> 4 Personal Anecdote Over Policy #> 5 Blaming Political Opponents #> Statement #> 1 Ladies and gentlemen, I stand before you today not just as a legislator, but as a defender of our very way of life! We are facing a crisis of monumental proportions, and if we don't act now, the very fabric of our society will unravel before our eyes! #> 2 I want to bring attention to the extraordinary achievement of the Smallville High School baseball team. Their victory is not just a win for Smallville, but a symbol of hope for our nation! This is what true American spirit looks like! #> 3 The solution to our nation's economic struggles is simple: cut taxes. That's it. Cut them. The economy will skyrocket like never before. Why complicate things when the answer is right there in front of us? #> 4 I remember, back in my hometown, old Mr. Jenkins used to say, 'If it ain't broke, don't fix it.' That's exactly how I feel about our current healthcare system. We don't need reform; we just need good, old-fashioned common sense. #> 5 Every problem we face today can be traced back to the disastrous policies of the other party. They are the reason we are in this mess, and until we recognize that, we cannot move forward as a nation. for (i in seq_along(statements$Statement) ) { out_come <- classify_text(statements$Statement[[i]], classifier) statements[i, 'predicted_labels'] <- out_come[[1]] statements[i, 'prop_score'] <- out_come[[2]] } statements[c(\"Type\", \"predicted_labels\", \"prop_score\")] #> Type predicted_labels prop_score #> 1 Dramatic Appeal to Emotion 1 0.998062312602997 #> 2 Exaggerated Praise for a Local Issue 1 0.985962450504303 #> 3 Over-Simplified Solution to Complex Issue 1 0.967254757881165 #> 4 Personal Anecdote Over Policy 1 0.998513281345367 #> 5 Blaming Political Opponents 1 0.999097466468811"},{"path":"https://davidycliao.github.io/flaiR/index.html","id":"performing-nlp-tasks-in-r","dir":"","previous_headings":"Introduction flairR: An R Wrapper for Accessing Flair NLP Library","what":"Performing NLP Tasks in R","title":"An R Wrapper for Accessing Flair NLP Library","text":"Flair NLP also provides set functions perform NLP tasks, named entity recognition, sentiment analysis, part--speech tagging. First, load data model perform NER task text . Yesterday, Dr. Jane Smith spoke United Nations New York. discussed climate change impact global economies. event attended representatives various countries including France Japan. Dr. Smith mentioned 2050, world see rise sea level approximately 2 feet. World Health Organization () pledged $50 million combat health effects global warming. interview New York Times, Dr. Smith emphasized urgent need action. Later day, flew back London, arriving 10:00 PM GMT. Alternatively, facilitate efficient use social science research, {flairR} expands {flairNLP/flair}’s core functionality working three major functions extract features tidy fast format– data.table R. expanded features flaiR can used perform extract features sentence object tidy format. named entity recognition transformer-based sentiment analysis part--speech tagging example, can use get_entities function load_tagger_ner(\"ner\")flaiR extract named entities sentence object tidy format. cases, need extract named entities large corpus. example, can use Stefan’s data Temporal Focus Campaign Communication (JOP 2022) example. addition, handle load RAM dealing larger corpus, {flairR} supports batch processing handle texts batches, especially useful dealing large datasets, optimize memory usage performance. implementation batch processing can also utilize GPU acceleration faster computations.","code":"Classifier <- flair_nn()$Classifier Sentence <- flair_data()$Sentence # load the model flair NLP already trained for us tagger <- Classifier$load('ner') #> 2023-11-29 12:28:30,624 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , # make a sentence object text <- \"Yesterday, Dr. Jane Smith spoke at the United Nations in New York. She discussed climate change and its impact on global economies. The event was attended by representatives from various countries including France and Japan. Dr. Smith mentioned that by 2050, the world could see a rise in sea level by approximately 2 feet. The World Health Organization (WHO) has pledged $50 million to combat the health effects of global warming. In an interview with The New York Times, Dr. Smith emphasized the urgent need for action. Later that day, she flew back to London, arriving at 10:00 PM GMT.\" sentence <- Sentence(text) # predict NER tags tagger$predict(sentence) # print sentence with predicted tags print(sentence) #> Sentence[115]: \"Yesterday, Dr. Jane Smith spoke at the United Nations in New York. She discussed climate change and its impact on global economies. The event was attended by representatives from various countries including France and Japan. Dr. Smith mentioned that by 2050, the world could see a rise in sea level by approximately 2 feet. The World Health Organization (WHO) has pledged $50 million to combat the health effects of global warming. In an interview with The New York Times, Dr. Smith emphasized the urgent need for action. Later that day, she flew back to London, arriving at 10:00 PM GMT.\" → [\"Jane Smith\"/PER, \"United Nations\"/ORG, \"New York\"/LOC, \"France\"/LOC, \"Japan\"/LOC, \"Smith\"/PER, \"World Health Organization\"/ORG, \"WHO\"/ORG, \"The New York Times\"/ORG, \"Smith\"/PER, \"London\"/LOC, \"GMT\"/MISC] tagger_ner <- load_tagger_ner(\"ner\") #> 2023-11-29 12:28:33,472 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , results <- get_entities(text = text, doc_ids = \"example text\", tagger_ner) print(results) #> doc_id entity tag #> 1: example text Jane Smith PER #> 2: example text United Nations ORG #> 3: example text New York LOC #> 4: example text France LOC #> 5: example text Japan LOC #> 6: example text Smith PER #> 7: example text World Health Organization ORG #> 8: example text WHO ORG #> 9: example text The New York Times ORG #> 10: example text Smith PER #> 11: example text London LOC #> 12: example text GMT MISC library(flaiR) data(cc_muller) examples <- head(cc_muller, 10) examples[c(\"text\", \"countryname\")] #> # A tibble: 10 × 2 #> text countryname #> #> 1 And to boost the housing we need, we will start to build a new g… United Kin… #> 2 In many cases, their value to society in economic, social and en… Ireland #> 3 However, requests for Standing Order 31 adjournments of Dáil bus… Ireland #> 4 We will work with the Pig Industry Stakeholder group to enhance … Ireland #> 5 The legacy of the Celtic Tiger includes 'ghost' housing estates,… Ireland #> 6 We must not allow ISIS to hold a safe haven from which it can pu… Canada #> 7 The declaration of the G20 as the premier forum for internationa… Australia #> 8 This funding represents the next instalment (Round Five, Phase O… Australia #> 9 We'll provide free after-school care and holiday programmes for … New Zealand #> 10 This will properly manage the adverse environmental effects of a… New Zealand tagger_ner <- load_tagger_ner(\"ner\") #> 2023-11-29 12:28:36,082 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , results <- get_entities(text = examples$text, doc_ids = examples$countryname, tagger_ner) print(results) #> doc_id entity tag #> 1: United Kingdom #> 2: Ireland #> 3: Ireland Dáil ORG #> 4: Ireland Order of Business ORG #> 5: Ireland Standing Orders MISC #> 6: Ireland Pig Industry Stakeholder ORG #> 7: Ireland Celtic Tiger ORG #> 8: Canada ISIS ORG #> 9: Australia G20 ORG #> 10: Australia Round Five MISC #> 11: Australia Phase One MISC #> 12: Australia Rudd Labor Government ORG #> 13: New Zealand OSCAR ORG #> 14: New Zealand Exclusive Economic Zone MISC"},{"path":"https://davidycliao.github.io/flaiR/index.html","id":"contribution-and-open-source","dir":"","previous_headings":"","what":"Contribution and Open Source","title":"An R Wrapper for Accessing Flair NLP Library","text":"{flaiR} maintained developed David Liao friends. R developers want contribute {flaiR} welcome – {flaiR} open source project. warmly invite R users share similar interests join contributing package. Please feel free shoot email collaborate task. Contributions – whether comments, code suggestions, tutorial examples, forking repository – greatly appreciated. Please note flaiR released Contributor Code Conduct. contributing project, agree abide terms. primary communication channel R users can found . Please feel free share insights Discussion page report issues related R interface Issue section. issue pertains actual implementation Flair Python, please submit pull request offical flair NLP.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/cc_muller.html","id":null,"dir":"Reference","previous_headings":"","what":"Training Data from : The Temporal Focus of Campaign Communication (2020 JOP) — cc_muller","title":"Training Data from : The Temporal Focus of Campaign Communication (2020 JOP) — cc_muller","text":"replication data sourced \"Temporal Focus Campaign Communication,\" authored Stefan Müller, published Journal Politics 2022. study primarily delves temporal emphasis party manifestos. dataset encompasses 5,858 annotated data entries countries United Kingdom, Ireland, Canada, Australia, New Zealand, United States. central objective compute percentage sentences quasi-sentences referring past, present, future. differentiation made based two categories: \"Prospective\" \"Retrospective\".","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/cc_muller.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Training Data from : The Temporal Focus of Campaign Communication (2020 JOP) — cc_muller","text":"","code":"data(\"cc_muller\")"},{"path":"https://davidycliao.github.io/flaiR/reference/cc_muller.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Training Data from : The Temporal Focus of Campaign Communication (2020 JOP) — cc_muller","text":"data frame 7 variables: text Content text. sentence_id Unique identifier sentence. countryname Country's name. party Associated political party text. date Date record. class Type classification. class_pro_retro Classification either 'Prospective' 'Retrospective'.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/cc_muller.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Training Data from : The Temporal Focus of Campaign Communication (2020 JOP) — cc_muller","text":"Data provided author https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/7NP2XH","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/cc_muller.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Training Data from : The Temporal Focus of Campaign Communication (2020 JOP) — cc_muller","text":"","code":"if (FALSE) { data(cc_muller) head(cc_muller) }"},{"path":"https://davidycliao.github.io/flaiR/reference/check_and_gc.html","id":null,"dir":"Reference","previous_headings":"","what":"Perform Garbage Collection Based on Condition — check_and_gc","title":"Perform Garbage Collection Based on Condition — check_and_gc","text":"function checks value gc.active determine whether perform garbage collection. gc.active TRUE, function perform garbage collection send message indicating completion process.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_and_gc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Perform Garbage Collection Based on Condition — check_and_gc","text":"","code":"check_and_gc(gc.active)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_and_gc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Perform Garbage Collection Based on Condition — check_and_gc","text":"gc.active logical value indicating whether activate garbage collection.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_and_gc.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Perform Garbage Collection Based on Condition — check_and_gc","text":"message indicating garbage collection performed gc.active TRUE. Otherwise, action taken message displayed.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_batch_size.html","id":null,"dir":"Reference","previous_headings":"","what":"Check the Specified Batch Size — check_batch_size","title":"Check the Specified Batch Size — check_batch_size","text":"Validates given batch size positive integer.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_batch_size.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check the Specified Batch Size — check_batch_size","text":"","code":"check_batch_size(batch_size)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_batch_size.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check the Specified Batch Size — check_batch_size","text":"batch_size Integer. batch size checked.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_device.html","id":null,"dir":"Reference","previous_headings":"","what":"Check the Device for Accelerating PyTorch — check_device","title":"Check the Device for Accelerating PyTorch — check_device","text":"function verifies specified device available PyTorch. CUDA available, message shown. Additionally, system running Mac M1, MPS used instead CUDA. Checks specified device compatible current system's hardware operating system configuration, particularly Mac systems Apple M1/M2 silicon using Metal Performance Shaders (MPS).","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_device.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check the Device for Accelerating PyTorch — check_device","text":"","code":"check_device(device)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_device.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check the Device for Accelerating PyTorch — check_device","text":"device character string specifying device type.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_device.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check the Device for Accelerating PyTorch — check_device","text":"PyTorch device object.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_device.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Check the Device for Accelerating PyTorch — check_device","text":"MPS available system meets requirements, device type MPS returned. Otherwise, CPU device used. requirements using MPS follows:\\cr Mac computers Apple silicon AMD GPUs\\cr macOS 12.3 later\\cr Python 3.7 later\\cr Xcode command-line tools installed (xcode-select --install)\\cr information : https://developer.apple.com/metal/pytorch/.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_flair_installed.html","id":null,"dir":"Reference","previous_headings":"","what":"Check Flair — check_flair_installed","title":"Check Flair — check_flair_installed","text":"Determines Flair Python module available current Python environment.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_flair_installed.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check Flair — check_flair_installed","text":"","code":"check_flair_installed(...)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_flair_installed.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check Flair — check_flair_installed","text":"Logical. TRUE Flair installed, otherwise FALSE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_language_supported.html","id":null,"dir":"Reference","previous_headings":"","what":"Check the Given Language Models against Supported Languages Models — check_language_supported","title":"Check the Given Language Models against Supported Languages Models — check_language_supported","text":"function checks whether provided language supported. , stops execution returns message indicating supported languages.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_language_supported.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check the Given Language Models against Supported Languages Models — check_language_supported","text":"","code":"check_language_supported(language, supported_lan_models)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_language_supported.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check the Given Language Models against Supported Languages Models — check_language_supported","text":"language language check. supported_lan_models vector supported languages.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_language_supported.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check the Given Language Models against Supported Languages Models — check_language_supported","text":"function return anything, stops execution check fails.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_language_supported.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Check the Given Language Models against Supported Languages Models — check_language_supported","text":"","code":"# Assuming 'en' is a supported language and 'abc' is not: check_language_supported(\"en\", c(\"en\", \"de\", \"fr\")) # check_language_supported(\"abc\", c(\"en\", \"de\", \"fr\")) # will stop execution"},{"path":"https://davidycliao.github.io/flaiR/reference/check_prerequisites.html","id":null,"dir":"Reference","previous_headings":"","what":"Check Environment Pre-requisites — check_prerequisites","title":"Check Environment Pre-requisites — check_prerequisites","text":"function checks Python installed, flair module available Python, active internet connection.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_prerequisites.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check Environment Pre-requisites — check_prerequisites","text":"","code":"check_prerequisites(...)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_prerequisites.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check Environment Pre-requisites — check_prerequisites","text":"... passing additional arguments.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_prerequisites.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check Environment Pre-requisites — check_prerequisites","text":"message detailing missing pre-requisites.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_python_installed.html","id":null,"dir":"Reference","previous_headings":"","what":"Check for Available Python Installation — check_python_installed","title":"Check for Available Python Installation — check_python_installed","text":"function checks environment installed R system.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_python_installed.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check for Available Python Installation — check_python_installed","text":"","code":"check_python_installed(...)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_python_installed.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check for Available Python Installation — check_python_installed","text":"... param run.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_python_installed.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check for Available Python Installation — check_python_installed","text":"Logical. TRUE Python installed, FALSE otherwise. Additionally, installed, path Python installation printed.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_show.text_id.html","id":null,"dir":"Reference","previous_headings":"","what":"Check the show.text_id Parameter — check_show.text_id","title":"Check the show.text_id Parameter — check_show.text_id","text":"Validates given show.text_id logical value.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_show.text_id.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check the show.text_id Parameter — check_show.text_id","text":"","code":"check_show.text_id(show.text_id)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_show.text_id.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check the show.text_id Parameter — check_show.text_id","text":"show.text_id Logical. parameter checked.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_texts_and_ids.html","id":null,"dir":"Reference","previous_headings":"","what":"Check the texts and document IDs — check_texts_and_ids","title":"Check the texts and document IDs — check_texts_and_ids","text":"Validates given texts document IDs NULL empty.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/check_texts_and_ids.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check the texts and document IDs — check_texts_and_ids","text":"","code":"check_texts_and_ids(texts, doc_ids)"},{"path":"https://davidycliao.github.io/flaiR/reference/check_texts_and_ids.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check the texts and document IDs — check_texts_and_ids","text":"texts List. list texts. doc_ids List. list document IDs.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/clear_flair_cache.html","id":null,"dir":"Reference","previous_headings":"","what":"Clear Flair Cache — clear_flair_cache","title":"Clear Flair Cache — clear_flair_cache","text":"function clears cache associated Flair Python library. cache directory typically located \"~/.flair\".","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/clear_flair_cache.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Clear Flair Cache — clear_flair_cache","text":"","code":"clear_flair_cache(...)"},{"path":"https://davidycliao.github.io/flaiR/reference/clear_flair_cache.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Clear Flair Cache — clear_flair_cache","text":"... argument passed next.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/clear_flair_cache.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Clear Flair Cache — clear_flair_cache","text":"Returns NULL invisibly. Messages printed indicating whether cache found cleared.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/clear_flair_cache.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Clear Flair Cache — clear_flair_cache","text":"","code":"if (FALSE) { clear_flair_cache() }"},{"path":"https://davidycliao.github.io/flaiR/reference/de_immigration.html","id":null,"dir":"Reference","previous_headings":"","what":"German Bundestag Immigration Debate Data — de_immigration","title":"German Bundestag Immigration Debate Data — de_immigration","text":"dataset containing speeches debates German Bundestag topic immigration.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/de_immigration.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"German Bundestag Immigration Debate Data — de_immigration","text":"","code":"data(\"de_immigration\")"},{"path":"https://davidycliao.github.io/flaiR/reference/de_immigration.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"German Bundestag Immigration Debate Data — de_immigration","text":"data frame 16 variables: date Date speech, Date type agenda Agenda subject speech, character speechnumber Unique identifier speech, numeric speaker Name person giving speech, character party Political party speaker, character party.facts.id ID party, usually numeric character chair Person chairing session, character terms Terms tags associated speech, character list text Actual text speech, character parliament Bundestag session, character numeric iso3country ISO3 country code Germany, character year Year speech made, numeric agenda_ID Unique identifier agenda, usually numeric character migration_dummy Dummy variable related migration topic, usually numeric (0 1) comment_agenda Additional comments agenda, character","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/de_immigration.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"German Bundestag Immigration Debate Data — de_immigration","text":"Data collected ParSpeechV2 House Commons year 2010. dataset publicly available https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/L4OAKN.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/de_immigration.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"German Bundestag Immigration Debate Data — de_immigration","text":"","code":"if (FALSE) { data(de_immigration) head(de_immigration) }"},{"path":"https://davidycliao.github.io/flaiR/reference/dot-onAttach.html","id":null,"dir":"Reference","previous_headings":"","what":"Install Python Dependencies and Load the flaiRnlp — .onAttach","title":"Install Python Dependencies and Load the flaiRnlp — .onAttach","text":".onAttach sets virtual environment, checks Python availability, ensures 'flair' module installed flair_env Python.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/dot-onAttach.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Install Python Dependencies and Load the flaiRnlp — .onAttach","text":"","code":".onAttach(...)"},{"path":"https://davidycliao.github.io/flaiR/reference/dot-onAttach.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Install Python Dependencies and Load the flaiRnlp — .onAttach","text":"... character string specifying name virtual environment.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/dot-onAttach.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Install Python Dependencies and Load the flaiRnlp — .onAttach","text":"function performs following steps: Checks virtual environment specified venv exists. , creates environment. Activates virtual environment. Checks availability Python. Python available, displays error message. Checks 'flair' Python module available virtual environment. , attempts install 'flair'. installation fails, prompts user install 'flair' manually.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/embeddings_to_matrix.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert Embeddings to Matrix — embeddings_to_matrix","title":"Convert Embeddings to Matrix — embeddings_to_matrix","text":"function takes three-dimensional array embeddings converts two-dimensional matrix based specified strategy.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/embeddings_to_matrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert Embeddings to Matrix — embeddings_to_matrix","text":"","code":"embeddings_to_matrix(embeddings, strategy = \"average\")"},{"path":"https://davidycliao.github.io/flaiR/reference/embeddings_to_matrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert Embeddings to Matrix — embeddings_to_matrix","text":"embeddings three-dimensional array shape (number_of_texts, number_of_words, embedding_dimension). strategy character string specifying strategy use. Options \"average\", \"concatenate\", \"max_pooling\", \"min_pooling\".","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/embeddings_to_matrix.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Convert Embeddings to Matrix — embeddings_to_matrix","text":"two-dimensional matrix transformed embeddings.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/embeddings_to_matrix.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Convert Embeddings to Matrix — embeddings_to_matrix","text":"","code":"if (FALSE) { embeddings <- array(runif(10 * 5 * 3), c(10, 5, 3)) result <- embeddings_to_matrix(embeddings, strategy = \"average\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.Sentence.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a Flair Sentence — flair_data.Sentence","title":"Create a Flair Sentence — flair_data.Sentence","text":"Flair powerful NLP framework leverages state---art embeddings various natural language processing tasks.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.Sentence.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a Flair Sentence — flair_data.Sentence","text":"","code":"flair_data.Sentence(sentence_text)"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.Sentence.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a Flair Sentence — flair_data.Sentence","text":"sentence_text character string converted Flair Sentence object.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.Sentence.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a Flair Sentence — flair_data.Sentence","text":"Flair Sentence object.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.Sentence.html","id":"note","dir":"Reference","previous_headings":"","what":"Note","title":"Create a Flair Sentence — flair_data.Sentence","text":"Ensure input string language compatible intended Flair model. R, processing multiple text, can use purrr basic R functions lapply sapply.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.Sentence.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Create a Flair Sentence — flair_data.Sentence","text":"Python equivalent:","code":"from flair.data import Sentence sentence = Sentence(\"The quick brown fox jumps over the lazy dog.\")"},{"path":[]},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.Sentence.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create a Flair Sentence — flair_data.Sentence","text":"","code":"if (FALSE) { flair_data.Sentence(\"The quick brown fox jumps over the lazy dog.\")}"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Import flair.data Module — flair_data","title":"Import flair.data Module — flair_data","text":"flair.data module provides essential utilities text data processing representation Flair library. function gives access various classes utilities flair.data module, notably: BoundingBox(left, top, right, bottom): Bases: tuple (Python); list (R) left - str. Alias field number 0. top - int Alias field number 1 right - int Alias field number 2 bottom - int Alias field number 3 Sentence(text, use_tokenizer=True, language_code=None, start_position=0):Sentence list tokens used represent sentence text fragment. Sentence can imported flair_data()$Sentence via flaiR. text Union[str, List[str], List[Token]] - original string (sentence), pre-tokenized list tokens. use_tokenizer Union[bool, Tokenizer] - Specify custom tokenizer split text tokens. default flair.tokenization.SegTokTokenizer. use_tokenizer set False, flair.tokenization.SpaceTokenizer used instead. tokenizer ignored text refers pre-tokenized tokens. language_code Optional[str] - Language sentence. provided, langdetect called language_code accessed first time. start_position int - Start character offset sentence superordinate document. Span(tokens, tag=None, score=1.0): Bases: _PartOfSentence. Span slice Sentence, consisting list Tokens. Span can imported flair_data()$Span. Token(text, head_id=None, whitespace_after=1, start_position=0, sentence=None): class represents one word tokenized sentence. token may number tags. may also point head dependency tree. Token can imported flair_data()$Token via flaiR. Corpus(train=None, dev=None, test=None, name='corpus', sample_missing_splits=True): Represents collection sentences, facilitating operations like splitting train/test/development sets applying transformations. particularly useful training evaluating models custom datasets. Corpus can imported flair_data()$Corpus via flaiR. Dictionary: Represents mapping items indices. useful converting text machine-readable formats.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Import flair.data Module — flair_data","text":"","code":"flair_data()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Import flair.data Module — flair_data","text":"Python module (flair.data). access classes utilities.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Import flair.data Module — flair_data","text":"Python reference:","code":"from flair.data import Sentence"},{"path":[]},{"path":"https://davidycliao.github.io/flaiR/reference/flair_data.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Import flair.data Module — flair_data","text":"","code":"if (FALSE) { Sentence <- flair_data()$Sentence Token <- flair_data()$Token Corpus <- flair_data()$Corpus }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_datasets.html","id":null,"dir":"Reference","previous_headings":"","what":"Access the flair_datasets Module from Flair — flair_datasets","title":"Access the flair_datasets Module from Flair — flair_datasets","text":"Utilizes reticulate package import flair.datasets dataset Flair's datasets Python, enabling use dataset R environment.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_datasets.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Access the flair_datasets Module from Flair — flair_datasets","text":"","code":"flair_datasets()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_datasets.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Access the flair_datasets Module from Flair — flair_datasets","text":"Python Module(flair.datasets) Flair, can utilized NLP tasks.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_datasets.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Access the flair_datasets Module from Flair — flair_datasets","text":"Python equivalent:","code":"from flair.datasets import UD_ENGLISH corpus = UD_ENGLISH().downsample(0.1)"},{"path":[]},{"path":"https://davidycliao.github.io/flaiR/reference/flair_datasets.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Access the flair_datasets Module from Flair — flair_datasets","text":"","code":"if (FALSE) { UD_ENGLISH <- flair_datasets()$UD_ENGLISH corpus <- UD_ENGLISH()$downsample(0.1)} # print all the datasets from flair names(flair_datasets()) #> [1] \"AMAZON_REVIEWS\" #> [2] \"ANAT_EM\" #> [3] \"AZDZ\" #> [4] \"BC2GM\" #> [5] \"BIOBERT_CHEMICAL_BC4CHEMD\" #> [6] \"BIOBERT_CHEMICAL_BC5CDR\" #> [7] \"BIOBERT_DISEASE_BC5CDR\" #> [8] \"BIOBERT_DISEASE_NCBI\" #> [9] \"BIOBERT_GENE_BC2GM\" #> [10] \"BIOBERT_GENE_JNLPBA\" #> [11] \"BIOBERT_SPECIES_LINNAEUS\" #> [12] \"BIOBERT_SPECIES_S800\" #> [13] \"BIONLP2013_CG\" #> [14] \"BIONLP2013_PC\" #> [15] \"BIOSCOPE\" #> [16] \"BIOSEMANTICS\" #> [17] \"BIO_INFER\" #> [18] \"CDR\" #> [19] \"CELL_FINDER\" #> [20] \"CEMP\" #> [21] \"CHEMDNER\" #> [22] \"CLL\" #> [23] \"COMMUNICATIVE_FUNCTIONS\" #> [24] \"CONLL_03\" #> [25] \"CONLL_03_DUTCH\" #> [26] \"CONLL_03_GERMAN\" #> [27] \"CONLL_03_SPANISH\" #> [28] \"CONLL_2000\" #> [29] \"CRAFT\" #> [30] \"CRAFT_V4\" #> [31] \"CSVClassificationCorpus\" #> [32] \"CSVClassificationDataset\" #> [33] \"ClassificationCorpus\" #> [34] \"ClassificationDataset\" #> [35] \"ColumnCorpus\" #> [36] \"ColumnDataset\" #> [37] \"DECA\" #> [38] \"DataLoader\" #> [39] \"DataPairCorpus\" #> [40] \"DataPairDataset\" #> [41] \"FEWNERD\" #> [42] \"FSU\" #> [43] \"FeideggerCorpus\" #> [44] \"FeideggerDataset\" #> [45] \"FlairDatapointDataset\" #> [46] \"GELLUS\" #> [47] \"GERMEVAL_2018_OFFENSIVE_LANGUAGE\" #> [48] \"GLUE_COLA\" #> [49] \"GLUE_MNLI\" #> [50] \"GLUE_MRPC\" #> [51] \"GLUE_QNLI\" #> [52] \"GLUE_QQP\" #> [53] \"GLUE_RTE\" #> [54] \"GLUE_SST2\" #> [55] \"GLUE_STSB\" #> [56] \"GLUE_WNLI\" #> [57] \"GO_EMOTIONS\" #> [58] \"GPRO\" #> [59] \"HUNER_CELL_LINE\" #> [60] \"HUNER_CELL_LINE_CELL_FINDER\" #> [61] \"HUNER_CELL_LINE_CLL\" #> [62] \"HUNER_CELL_LINE_GELLUS\" #> [63] \"HUNER_CELL_LINE_JNLPBA\" #> [64] \"HUNER_CHEMICAL\" #> [65] \"HUNER_CHEMICAL_CDR\" #> [66] \"HUNER_CHEMICAL_CEMP\" #> [67] \"HUNER_CHEMICAL_CHEBI\" #> [68] \"HUNER_CHEMICAL_CHEMDNER\" #> [69] \"HUNER_CHEMICAL_CRAFT_V4\" #> [70] \"HUNER_CHEMICAL_SCAI\" #> [71] \"HUNER_DISEASE\" #> [72] \"HUNER_DISEASE_CDR\" #> [73] \"HUNER_DISEASE_MIRNA\" #> [74] \"HUNER_DISEASE_NCBI\" #> [75] \"HUNER_DISEASE_PDR\" #> [76] \"HUNER_DISEASE_SCAI\" #> [77] \"HUNER_DISEASE_VARIOME\" #> [78] \"HUNER_GENE\" #> [79] \"HUNER_GENE_BC2GM\" #> [80] \"HUNER_GENE_BIO_INFER\" #> [81] \"HUNER_GENE_CELL_FINDER\" #> [82] \"HUNER_GENE_CHEBI\" #> [83] \"HUNER_GENE_CRAFT_V4\" #> [84] \"HUNER_GENE_DECA\" #> [85] \"HUNER_GENE_FSU\" #> [86] \"HUNER_GENE_GPRO\" #> [87] \"HUNER_GENE_IEPA\" #> [88] \"HUNER_GENE_JNLPBA\" #> [89] \"HUNER_GENE_LOCTEXT\" #> [90] \"HUNER_GENE_MIRNA\" #> [91] \"HUNER_GENE_OSIRIS\" #> [92] \"HUNER_GENE_VARIOME\" #> [93] \"HUNER_SPECIES\" #> [94] \"HUNER_SPECIES_CELL_FINDER\" #> [95] \"HUNER_SPECIES_CHEBI\" #> [96] \"HUNER_SPECIES_CRAFT_V4\" #> [97] \"HUNER_SPECIES_LINNEAUS\" #> [98] \"HUNER_SPECIES_LOCTEXT\" #> [99] \"HUNER_SPECIES_MIRNA\" #> [100] \"HUNER_SPECIES_S800\" #> [101] \"HUNER_SPECIES_VARIOME\" #> [102] \"IEPA\" #> [103] \"IMDB\" #> [104] \"JNLPBA\" #> [105] \"KEYPHRASE_INSPEC\" #> [106] \"KEYPHRASE_SEMEVAL2010\" #> [107] \"KEYPHRASE_SEMEVAL2017\" #> [108] \"LINNEAUS\" #> [109] \"LOCTEXT\" #> [110] \"MASAKHA_POS\" #> [111] \"MIRNA\" #> [112] \"MongoDataset\" #> [113] \"NCBI_DISEASE\" #> [114] \"NEL_ENGLISH_AIDA\" #> [115] \"NEL_ENGLISH_AQUAINT\" #> [116] \"NEL_ENGLISH_IITB\" #> [117] \"NEL_ENGLISH_REDDIT\" #> [118] \"NEL_ENGLISH_TWEEKI\" #> [119] \"NEL_GERMAN_HIPE\" #> [120] \"NER_ARABIC_ANER\" #> [121] \"NER_ARABIC_AQMAR\" #> [122] \"NER_BASQUE\" #> [123] \"NER_CHINESE_WEIBO\" #> [124] \"NER_DANISH_DANE\" #> [125] \"NER_ENGLISH_MOVIE_COMPLEX\" #> [126] \"NER_ENGLISH_MOVIE_SIMPLE\" #> [127] \"NER_ENGLISH_PERSON\" #> [128] \"NER_ENGLISH_RESTAURANT\" #> [129] \"NER_ENGLISH_SEC_FILLINGS\" #> [130] \"NER_ENGLISH_STACKOVERFLOW\" #> [131] \"NER_ENGLISH_TWITTER\" #> [132] \"NER_ENGLISH_WEBPAGES\" #> [133] \"NER_ENGLISH_WIKIGOLD\" #> [134] \"NER_ENGLISH_WNUT_2020\" #> [135] \"NER_FINNISH\" #> [136] \"NER_GERMAN_BIOFID\" #> [137] \"NER_GERMAN_EUROPARL\" #> [138] \"NER_GERMAN_GERMEVAL\" #> [139] \"NER_GERMAN_LEGAL\" #> [140] \"NER_GERMAN_MOBIE\" #> [141] \"NER_GERMAN_POLITICS\" #> [142] \"NER_HIPE_2022\" #> [143] \"NER_HUNGARIAN\" #> [144] \"NER_ICDAR_EUROPEANA\" #> [145] \"NER_ICELANDIC\" #> [146] \"NER_JAPANESE\" #> [147] \"NER_MASAKHANE\" #> [148] \"NER_MULTI_CONER\" #> [149] \"NER_MULTI_CONER_V2\" #> [150] \"NER_MULTI_WIKIANN\" #> [151] \"NER_MULTI_WIKINER\" #> [152] \"NER_MULTI_XTREME\" #> [153] \"NER_NERMUD\" #> [154] \"NER_SWEDISH\" #> [155] \"NER_TURKU\" #> [156] \"NER_UKRAINIAN\" #> [157] \"NEWSGROUPS\" #> [158] \"ONTONOTES\" #> [159] \"OSIRIS\" #> [160] \"OcrJsonDataset\" #> [161] \"OpusParallelCorpus\" #> [162] \"PDR\" #> [163] \"ParallelTextCorpus\" #> [164] \"ParallelTextDataset\" #> [165] \"RE_ENGLISH_CONLL04\" #> [166] \"RE_ENGLISH_DRUGPROT\" #> [167] \"RE_ENGLISH_SEMEVAL2010\" #> [168] \"RE_ENGLISH_TACRED\" #> [169] \"S800\" #> [170] \"SCAI_CHEMICALS\" #> [171] \"SCAI_DISEASE\" #> [172] \"SENTEVAL_CR\" #> [173] \"SENTEVAL_MPQA\" #> [174] \"SENTEVAL_MR\" #> [175] \"SENTEVAL_SST_BINARY\" #> [176] \"SENTEVAL_SST_GRANULAR\" #> [177] \"SENTEVAL_SUBJ\" #> [178] \"SENTIMENT_140\" #> [179] \"SROIE\" #> [180] \"STACKOVERFLOW\" #> [181] \"SUPERGLUE_RTE\" #> [182] \"SentenceDataset\" #> [183] \"StringDataset\" #> [184] \"TREC_50\" #> [185] \"TREC_6\" #> [186] \"UD_AFRIKAANS\" #> [187] \"UD_ANCIENT_GREEK\" #> [188] \"UD_ARABIC\" #> [189] \"UD_ARMENIAN\" #> [190] \"UD_BASQUE\" #> [191] \"UD_BELARUSIAN\" #> [192] \"UD_BULGARIAN\" #> [193] \"UD_CATALAN\" #> [194] \"UD_CHINESE\" #> [195] \"UD_COPTIC\" #> [196] \"UD_CROATIAN\" #> [197] \"UD_CZECH\" #> [198] \"UD_DANISH\" #> [199] \"UD_DUTCH\" #> [200] \"UD_ENGLISH\" #> [201] \"UD_ESTONIAN\" #> [202] \"UD_FAROESE\" #> [203] \"UD_FINNISH\" #> [204] \"UD_FRENCH\" #> [205] \"UD_GALICIAN\" #> [206] \"UD_GERMAN\" #> [207] \"UD_GERMAN_HDT\" #> [208] \"UD_GOTHIC\" #> [209] \"UD_GREEK\" #> [210] \"UD_HEBREW\" #> [211] \"UD_HINDI\" #> [212] \"UD_INDONESIAN\" #> [213] \"UD_IRISH\" #> [214] \"UD_ITALIAN\" #> [215] \"UD_JAPANESE\" #> [216] \"UD_KAZAKH\" #> [217] \"UD_KOREAN\" #> [218] \"UD_LATIN\" #> [219] \"UD_LATVIAN\" #> [220] \"UD_LITHUANIAN\" #> [221] \"UD_LIVVI\" #> [222] \"UD_MALTESE\" #> [223] \"UD_MARATHI\" #> [224] \"UD_NORTH_SAMI\" #> [225] \"UD_NORWEGIAN\" #> [226] \"UD_OLD_CHURCH_SLAVONIC\" #> [227] \"UD_OLD_FRENCH\" #> [228] \"UD_PERSIAN\" #> [229] \"UD_POLISH\" #> [230] \"UD_PORTUGUESE\" #> [231] \"UD_ROMANIAN\" #> [232] \"UD_RUSSIAN\" #> [233] \"UD_SERBIAN\" #> [234] \"UD_SLOVAK\" #> [235] \"UD_SLOVENIAN\" #> [236] \"UD_SPANISH\" #> [237] \"UD_SWEDISH\" #> [238] \"UD_TURKISH\" #> [239] \"UD_UKRAINIAN\" #> [240] \"UD_WOLOF\" #> [241] \"UP_CHINESE\" #> [242] \"UP_ENGLISH\" #> [243] \"UP_FINNISH\" #> [244] \"UP_FRENCH\" #> [245] \"UP_GERMAN\" #> [246] \"UP_ITALIAN\" #> [247] \"UP_SPANISH\" #> [248] \"UP_SPANISH_ANCORA\" #> [249] \"UniversalDependenciesCorpus\" #> [250] \"UniversalDependenciesDataset\" #> [251] \"VARIOME\" #> [252] \"WASSA_ANGER\" #> [253] \"WASSA_FEAR\" #> [254] \"WASSA_JOY\" #> [255] \"WASSA_SADNESS\" #> [256] \"WNUT_17\" #> [257] \"WSD_MASC\" #> [258] \"WSD_OMSTI\" #> [259] \"WSD_RAGANATO_ALL\" #> [260] \"WSD_SEMCOR\" #> [261] \"WSD_TRAINOMATIC\" #> [262] \"WSD_UFSAC\" #> [263] \"WSD_WORDNET_GLOSS_TAGGED\" #> [264] \"YAHOO_ANSWERS\" #> [265] \"ZELDA\" #> [266] \"base\" #> [267] \"biomedical\" #> [268] \"document_classification\" #> [269] \"entity_linking\" #> [270] \"ocr\" #> [271] \"relation_extraction\" #> [272] \"sequence_labeling\" #> [273] \"text_image\" #> [274] \"text_text\" #> [275] \"treebanks\""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_device.html","id":null,"dir":"Reference","previous_headings":"","what":"Set Flair Device — flair_device","title":"Set Flair Device — flair_device","text":"function sets device Flair Python library. allows set device use CPU, GPU (coda:0, coda:1, coda:3), specific MPS devices Mac (mps:0, mps:1, mps:2).","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_device.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Set Flair Device — flair_device","text":"","code":"flair_device(device = \"cpu\")"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_device.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Set Flair Device — flair_device","text":"device character string specifying device. Valid options include: \"cpu\", \"cuda\", \"mps:0\", \"mps:1\", \"mps:2\", etc.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_device.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Set Flair Device — flair_device","text":"set device Flair.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_device.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Set Flair Device — flair_device","text":"","code":"if (FALSE) { flair_device(\"cpu\") # Set device to CPU flair_device(\"cuda\") # Set device to GPU (if available) flair_device(\"mps:0\") # Set device to MPS device 0 (if available on Mac) }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.FlairEmbeddings.html","id":null,"dir":"Reference","previous_headings":"","what":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","title":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","text":"function initializes Flair embeddings flair.embeddings module.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.FlairEmbeddings.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","text":"","code":"flair_embeddings.FlairEmbeddings(embeddings_type = \"news-forward\")"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.FlairEmbeddings.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","text":"embeddings_type character string specifying type embeddings initialize. Options include: \"news-forward\", \"news-backward\".","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.FlairEmbeddings.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","text":"Flair embeddings class flair.embeddings module.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.FlairEmbeddings.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","text":"Multi-Language Embeddings: multi-X: Supports 300+ languages, sourced JW300 corpus. JW300 corpus, proposed Agić Vulić (2019). corpus licensed CC--NC-SA. multi-X-fast: CPU-friendly version, trained mix corpora languages like English, German, French, Italian, Dutch, Polish. English Embeddings: 'news-X': Trained 1 billion word corpus 'news-X-fast': Trained 1 billion word corpus, CPU-friendly. 'mix-X': Trained mixed corpus (Web, Wikipedia, Subtitles) 'pubmed-X': Added @jessepeng: Trained 5% PubMed abstracts 2015 (1150 hidden states, 3 layers) Specific Langauge Embeddings: 'de-X': German. Trained mixed corpus (Web, Wikipedia, Subtitles) de-historic-ha-X: German (historical). Added @stefan-: Historical German trained Hamburger Anzeiger. de-historic-wz-X: German (historical). Added @stefan-: Historical German trained Wiener Zeitung. de-historic-rw-X: German (historical). Added @redewiedergabe: Historical German trained 100 million tokens de-impresso-hipe-v1-X: -domain data CLEF HIPE Shared task. -domain data (Swiss Luxembourgish newspapers) CLEF HIPE Shared task. information shared task can found paper. '-X': Norwegian. Added @stefan-: Trained Wikipedia/OPUS. 'nl-X': Dutch. Added @stefan-: Trained Wikipedia/OPUS 'nl-v0-X': Dutch.Added @stefan-: LM embeddings (earlier version) 'ja-X': Japanese. Added @frtacoa: Trained 439M words Japanese Web crawls (2048 hidden states, 2 layers) 'ja-X': Japanese. Added @frtacoa: Trained 439M words Japanese Web crawls (2048 hidden states, 2 layers) 'fi-X': Finnish. Added @stefan-: Trained Wikipedia/OPUS. 'fr-X': French. Added @mhham: Trained French Wikipedia Japanese Web crawls (2048 hidden states, 2 layers) Domain-Specific Embeddings: 'es-clinical-': Spanish (clinical). Added @matirojasg: Trained Wikipedia 'pubmed-X':English. Added @jessepeng: Trained 5% PubMed abstracts 2015 (1150 hidden states, 3 layers) examples. Ensure reference correct embedding name details application. Replace 'X' either 'forward' 'backward'. comprehensive list embeddings, please refer : Flair Embeddings Documentation.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.FlairEmbeddings.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","text":"FlairEmbeddings Flair Python library. Python example usage:","code":"from flair.embeddings import FlairEmbeddings flair_embedding_forward = FlairEmbeddings('news-forward') flair_embedding_backward = FlairEmbeddings('news-backward')"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.FlairEmbeddings.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Initializing a Class for Flair's Forward and Backward Embeddings — flair_embeddings.FlairEmbeddings","text":"","code":"if (FALSE) { flair_embedding_forward <- flair_embeddings.FlairEmbeddings(\"news-forward\") flair_embedding_backward <- flair_embeddings.FlairEmbeddings(\"news-backward\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.StackedEmbeddings.html","id":null,"dir":"Reference","previous_headings":"","what":"Initializing a Class for StackedEmbeddings — flair_embeddings.StackedEmbeddings","title":"Initializing a Class for StackedEmbeddings — flair_embeddings.StackedEmbeddings","text":"Creates stacked embedding instance using multiple Flair embeddings.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.StackedEmbeddings.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initializing a Class for StackedEmbeddings — flair_embeddings.StackedEmbeddings","text":"","code":"flair_embeddings.StackedEmbeddings(embeddings_list)"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.StackedEmbeddings.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Initializing a Class for StackedEmbeddings — flair_embeddings.StackedEmbeddings","text":"embeddings_list list containing Flair embedding instances.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.StackedEmbeddings.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initializing a Class for StackedEmbeddings — flair_embeddings.StackedEmbeddings","text":"instance StackedEmbeddings flair.embeddings module.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.StackedEmbeddings.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Initializing a Class for StackedEmbeddings — flair_embeddings.StackedEmbeddings","text":"function ensures embedding provided list recognized Flair embedding. embeddings list recognized, function throw error.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.StackedEmbeddings.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Initializing a Class for StackedEmbeddings — flair_embeddings.StackedEmbeddings","text":"","code":"if (FALSE) { glove_embedding <- flair_embeddings.WordEmbeddings(\"glove\") fasttext_embedding <- flair_embeddings.WordEmbeddings(\"fasttext\") stacked_embedding <- flair_embeddings.StackedEmbeddings(list(glove_embedding, fasttext_embedding)) }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerDocumentEmbeddings.html","id":null,"dir":"Reference","previous_headings":"","what":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","title":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","text":"function interfaces Python via reticulate create flair_embeddings.TransformerDocumentEmbeddings object flair.embeddings module.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerDocumentEmbeddings.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","text":"","code":"flair_embeddings.TransformerDocumentEmbeddings( model = \"bert-base-uncased\", layers = \"all\", subtoken_pooling = \"mean\", fine_tune = FALSE, allow_long_sentences = TRUE, memory_efficient = NULL, use_context = FALSE )"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerDocumentEmbeddings.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","text":"model character string specifying pre-trained model use. Defaults 'bert-base-uncased'. name transformer model, e.g., \"bert-base-uncased\", \"gpt2-medium\", etc. can also path pre-trained model. layers (Optional) Layers transformer model use. string specifies layers transformer model use. BERT, can specify multiple like \"1,2,3\" single layers 1. layers argument controls transformer layers used embedding. set value '-1,-2,-3,-4', top 4 layers used make embedding. set '-1', last layer used. set \"\", layers used. subtoken_pooling (Optional) Method pooling handle subtokens. determines subtokens (word pieces) pooled one embedding original token. Options 'first' (use first subtoken), 'last' (use last subtoken), 'first_last' (concatenate first last subtokens), 'mean' (average subtokens). fine_tune Logical. Indicates fine-tuning done. Defaults FALSE. allow_long_sentences Logical. Allows longer sentences processed. Defaults TRUE. certain transformer models (like BERT), maximum sequence length. default, Flair cuts sentences \\ long. option set True, Flair split long sentences smaller parts later average embeddings. memory_efficient (Optional) Enables memory efficient mode transformers. set TRUE, uses less memory, might slower. use_context Logical. Whether consider surrounding context processing step. Default FALSE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerDocumentEmbeddings.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","text":"Flair TransformerWordEmbeddings Python class.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerDocumentEmbeddings.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","text":"function provides interface R users easily access utilize power Flair's TransformerDocumentEmbeddings. bridges gap Python's Flair library R, enabling R users leverage state---art NLP models.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerDocumentEmbeddings.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","text":"Python's Flair library:","code":"from flair.embeddings import TransformerDocumentEmbeddings embedding = TransformerDocumentEmbeddings('bert-base-uncased')"},{"path":[]},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerDocumentEmbeddings.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Initializing a Class for TransformerDocumentEmbeddings — flair_embeddings.TransformerDocumentEmbeddings","text":"","code":"if (FALSE) { embedding <- flair_embeddings.TransformerDocumentEmbeddings(\"bert-base-uncased\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerWordEmbeddings.html","id":null,"dir":"Reference","previous_headings":"","what":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","title":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","text":"function interfaces Python via reticulate create TransformerWordEmbeddings object object flair.embeddings module.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerWordEmbeddings.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","text":"","code":"flair_embeddings.TransformerWordEmbeddings( model = \"bert-base-uncased\", layers = \"all\", subtoken_pooling = \"mean\", fine_tune = FALSE, allow_long_sentences = TRUE, memory_efficient = NULL, use_context = FALSE )"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerWordEmbeddings.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","text":"model character string specifying pre-trained model use. Defaults 'bert-base-uncased'. name transformer model, e.g., \"bert-base-uncased\", \"gpt2-medium\", etc. can also path pre-trained model. layers (Optional) Layers transformer model use. string specifies layers transformer model use. BERT, can specify multiple like \"1,2,3\" single layers 1. layers argument controls transformer layers used embedding. set value '-1,-2,-3,-4', top 4 layers used make embedding. set '-1', last layer used. set \"\", layers used. subtoken_pooling (Optional) Method pooling handle subtokens. determines subtokens (word pieces) pooled one embedding original token. Options 'first' (use first subtoken), 'last' (use last subtoken), 'first_last' (concatenate first last subtokens), 'mean' (average subtokens). fine_tune Logical. Indicates fine-tuning done. Defaults FALSE. allow_long_sentences Logical. Allows longer sentences processed. Defaults TRUE. certain transformer models (like BERT), maximum sequence length. default, Flair cuts sentences long. option set True, Flair split long sentences smaller parts later average embeddings. memory_efficient (Optional) Enables memory efficient mode transformers. set TRUE, uses less memory, might slower. use_context Logical. Whether consider surrounding context processing step. Default FALSE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerWordEmbeddings.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","text":"Flair TransformerWordEmbeddings Python class.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerWordEmbeddings.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","text":"function provides interface R users easily access utilize power Flair's TransformerWordEmbeddings. bridges gap Python's Flair library R, enabling R users leverage state---art NLP models.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerWordEmbeddings.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","text":"Python equivalent:","code":"from flair.embeddings import TransformerWordEmbeddings embedding = TransformerWordEmbeddings('bert-base-uncased')"},{"path":[]},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.TransformerWordEmbeddings.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Initializing a Class for TransformerWordEmbeddings — flair_embeddings.TransformerWordEmbeddings","text":"","code":"if (FALSE) { embedding <- flair_embeddings.TransformerWordEmbeddings(\"bert-base-uncased\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.WordEmbeddings.html","id":null,"dir":"Reference","previous_headings":"","what":"Initializing a Class for Flair WordEmbeddings Class — flair_embeddings.WordEmbeddings","title":"Initializing a Class for Flair WordEmbeddings Class — flair_embeddings.WordEmbeddings","text":"function interfaces Python via reticulate create WordEmbeddings object using Flair library. Users select pre-trained embeddings load providing appropriate ID string. Typically, two-letter language code initializes embedding (e.g., 'en' English, 'de' German). default, loads FastText embeddings trained Wikipedia. web crawl embeddings, use '-crawl' suffix (e.g., 'de-crawl' German). English offers options like 'en-glove', 'en-extvec', etc. Supported embeddings include: 'en-glove' 'glove': English GloVe embeddings 'en-extvec' 'extvec': English Komninos embeddings 'en-crawl' 'crawl': English FastText web crawl embeddings 'en-twitter' 'twitter': English Twitter embeddings 'en-turian' 'turian': English Turian embeddings (small) 'en', 'en-news', 'news': English FastText news Wikipedia embeddings 'de': German FastText embeddings 'nl': Dutch FastText embeddings 'fr': French FastText embeddings '': Italian FastText embeddings 'es': Spanish FastText embeddings 'pt': Portuguese FastText embeddings 'ro': Romanian FastText embeddings 'ca': Catalan FastText embeddings 'sv': Swedish FastText embeddings 'da': Danish FastText embeddings '': Norwegian FastText embeddings 'fi': Finnish FastText embeddings 'pl': Polish FastText embeddings 'cz': Czech FastText embeddings 'sk': Slovak FastText embeddings 'sl': Slovenian FastText embeddings 'sr': Serbian FastText embeddings 'hr': Croatian FastText embeddings 'bg': Bulgarian FastText embeddings 'ru': Russian FastText embeddings 'ar': Arabic FastText embeddings '': Hebrew FastText embeddings 'tr': Turkish FastText embeddings 'fa': Persian FastText embeddings 'ja': Japanese FastText embeddings 'ko': Korean FastText embeddings 'zh': Chinese FastText embeddings 'hi': Hindi FastText embeddings 'id': Indonesian FastText embeddings 'eu': Basque FastText embeddings example, load German FastText embeddings, use 'de' embeddings parameter.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.WordEmbeddings.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initializing a Class for Flair WordEmbeddings Class — flair_embeddings.WordEmbeddings","text":"","code":"flair_embeddings.WordEmbeddings(embeddings = \"glove\")"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.WordEmbeddings.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Initializing a Class for Flair WordEmbeddings Class — flair_embeddings.WordEmbeddings","text":"embeddings type pre-trained embeddings use. Defaults \"glove\".","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.WordEmbeddings.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initializing a Class for Flair WordEmbeddings Class — flair_embeddings.WordEmbeddings","text":"Flair WordEmbeddings class.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.WordEmbeddings.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Initializing a Class for Flair WordEmbeddings Class — flair_embeddings.WordEmbeddings","text":"Python equivalent:","code":"from flair.embeddings import WordEmbeddings embedding = WordEmbeddings('glove')"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.WordEmbeddings.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Initializing a Class for Flair WordEmbeddings Class — flair_embeddings.WordEmbeddings","text":"","code":"if (FALSE) { embedding <- flair_embeddings.WordEmbeddings(\"glove\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.html","id":null,"dir":"Reference","previous_headings":"","what":"Initialization of Flair Embeddings Modules — flair_embeddings","title":"Initialization of Flair Embeddings Modules — flair_embeddings","text":"function provides interface R users access utilize flair.embeddings module Flair NLP library. Flair's embedding functionalities offer various state---art embeddings crucial natural language processing tasks. using function, R users can seamlessly incorporate advanced embeddings NLP workflows without delving deep Python. Essentially, function acts bridge R's ecosystem Flair's rich embedding capabilities.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initialization of Flair Embeddings Modules — flair_embeddings","text":"","code":"flair_embeddings()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initialization of Flair Embeddings Modules — flair_embeddings","text":"flair.embeddings module Flair.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Initialization of Flair Embeddings Modules — flair_embeddings","text":"function allows R users access following Flair embeddings modules: FlairEmbeddings Contextual string embeddings capturing latent syntactic-semantic information beyond standard word embeddings. WordEmbeddings Classic word embeddings like GloVe FastText. TransformerWordEmbeddings Word embeddings transformer models BERT, RoBERTa, etc. TransformerDocumentEmbeddings Transformer-based embeddings entire documents sentences. StackedEmbeddings Combines multiple embeddings richer representation. DocumentPoolEmbeddings Provides single embedding vector entire document based chosen operation mode (mean, max, etc.). BytePairEmbeddings Embeddings based Byte-Pair Encoding (BPE) mechanism used subword tokenization. ELMoEmbeddings Deep contextual embeddings derived internal state pretrained bidirectional LSTM. embedding type offers unique features suitable various NLP tasks. understanding differences capabilities, R users can select appropriate embeddings enhance NLP models.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Initialization of Flair Embeddings Modules — flair_embeddings","text":"Python's Flair library:","code":"from flair.embeddings import *"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_embeddings.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Initialization of Flair Embeddings Modules — flair_embeddings","text":"","code":"if (FALSE) { library(flaiR) # Initialize FlairEmbeddings FlairEmbeddings <- flair_embeddings()$FlairEmbeddings embedding <- FlairEmbeddings('news-forward') } if (FALSE) { # Initialize WordEmbeddings WordEmbeddings <- flair_embeddings()$WordEmbeddings embedding <- WordEmbeddings('glove') } if (FALSE) { # Initialize TransformerWordEmbeddings TransformerWordEmbeddings <- flair_embeddings()$TransformerWordEmbeddings embedding <- TransformerWordEmbeddings('bert-base-uncased') } if (FALSE) { # Initialize TransformerDocumentEmbeddings TransformerDocumentEmbeddings <- flair_embeddings()$TransformerDocumentEmbeddings embedding <- TransformerDocumentEmbeddings('bert-base-uncased') } if (FALSE) { # Initialize StackedEmbeddings StackedEmbeddings <- flair_embeddings()$StackedEmbeddings WordEmbeddings <- flair_embeddings()$WordEmbeddings FlairEmbeddings <- flair_embeddings()$FlairEmbeddings stacked_embeddings <- StackedEmbeddings( list(WordEmbeddings('glove'), FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward') ) ) } if (FALSE) { # Initialize DocumentPoolEmbeddings DocumentPoolEmbeddings <- flair_embeddings()$DocumentPoolEmbeddings WordEmbeddings <- flair_embeddings()$WordEmbeddings doc_embeddings <- DocumentPoolEmbeddings(list(WordEmbeddings('glove'))) }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.Sequencetagger.html","id":null,"dir":"Reference","previous_headings":"","what":"Access Flair's SequenceTagger — flair_models.Sequencetagger","title":"Access Flair's SequenceTagger — flair_models.Sequencetagger","text":"function utilizes reticulate package import SequenceTaggers Flair's models Python, enabling interaction Flair's sequence tagging models R environment.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.Sequencetagger.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Access Flair's SequenceTagger — flair_models.Sequencetagger","text":"","code":"flair_models.Sequencetagger()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.Sequencetagger.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Access Flair's SequenceTagger — flair_models.Sequencetagger","text":"Python module (SequenceTagger) Flair, can utilized load use sequence tagging models.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.Sequencetagger.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Access Flair's SequenceTagger — flair_models.Sequencetagger","text":"function take parameters directly returns SequenceTagger called, can used sequence tagging tasks using pre-trained models Flair.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.Sequencetagger.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Access Flair's SequenceTagger — flair_models.Sequencetagger","text":"Python equivalent:","code":"from flair.models import SequenceTagger"},{"path":[]},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.Sequencetagger.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Access Flair's SequenceTagger — flair_models.Sequencetagger","text":"","code":"if (FALSE) { sequence_tagger <- flair_models.sequencetagger() }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.TextClassifier.html","id":null,"dir":"Reference","previous_headings":"","what":"Retrieve TextClassifier from flair.models — flair_models.TextClassifier","title":"Retrieve TextClassifier from flair.models — flair_models.TextClassifier","text":"function utilizes reticulate package directly import TextClassifier flair.models Flair NLP Python library. Ensure Python environment properly set Flair package installed.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.TextClassifier.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Retrieve TextClassifier from flair.models — flair_models.TextClassifier","text":"","code":"flair_models.TextClassifier()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.TextClassifier.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Retrieve TextClassifier from flair.models — flair_models.TextClassifier","text":"Python class representing flair.models.TextClassifier.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.TextClassifier.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Retrieve TextClassifier from flair.models — flair_models.TextClassifier","text":"Python equivalent:","code":"from flair.models import TextClassifier"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.TextClassifier.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Retrieve TextClassifier from flair.models — flair_models.TextClassifier","text":"","code":"# Load the TextClassifier TextClassifier <- flair_models.TextClassifier() # Load a pre-trained sentiment model classifier <- TextClassifier$load('sentiment') # Create a sentence object Sentence <- flair_data()$Sentence sentence <- Sentence(\"Flair is pretty neat!\") # Predict the sentiment classifier$predict(sentence) # Display the sentiment print(sentence$get_labels()) #> [[1]] #> 'Sentence[5]: \"Flair is pretty neat!\"'/'POSITIVE' (0.9997) #>"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.html","id":null,"dir":"Reference","previous_headings":"","what":"Import the flair.models Python module — flair_models","title":"Import the flair.models Python module — flair_models","text":"function imports flair.models module Flair NLP Python library, providing access several powerful models tailored NLP tasks. three primary methods available module: TextClassifier: method represents neural network model designed text classification tasks. Given piece text, predicts class label category. instance, can used classify movie reviews positive negative. SequenceTagger: Tailored tasks like Named Entity Recognition (NER) Part--Speech (POS) tagging, method annotates sequences words. NER, tag entities sentence locations, persons, organizations. POS tagging, can label word sentence grammatical role like noun, verb, adjective, etc. LanguageModel: method represents model trained predict next word sequence, making powerful tasks like text generation completion. learns statistical properties structure language, can base transfer learning NLP tasks.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Import the flair.models Python module — flair_models","text":"","code":"flair_models()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Import the flair.models Python module — flair_models","text":"Python module object representing flair.models.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_models.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Import the flair.models Python module — flair_models","text":"Python equivalent:","code":"from flair.models import *"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.Classifier.html","id":null,"dir":"Reference","previous_headings":"","what":"Initializing a Class for Flair Classifier — flair_nn.Classifier","title":"Initializing a Class for Flair Classifier — flair_nn.Classifier","text":"function interfaces Python via reticulate package create Classifier object Flair library.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.Classifier.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initializing a Class for Flair Classifier — flair_nn.Classifier","text":"","code":"flair_nn.Classifier()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.Classifier.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initializing a Class for Flair Classifier — flair_nn.Classifier","text":"Flair Classifier class instance.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.Classifier.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Initializing a Class for Flair Classifier — flair_nn.Classifier","text":"Python equivalent:","code":"from flair.nn import Classifier"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.Classifier.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Initializing a Class for Flair Classifier — flair_nn.Classifier","text":"","code":"if (FALSE) { classifier <- flair_nn.Classifier() }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.html","id":null,"dir":"Reference","previous_headings":"","what":"Import Flair's Neural Network Module — flair_nn","title":"Import Flair's Neural Network Module — flair_nn","text":"function provides interface flair.nn module Flair library. flair.nn module encompasses various sub-modules : decoder distance dropout loss model multitask recurrent Model Classifier PrototypicalDecoder LockedDropout WordDropout","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Import Flair's Neural Network Module — flair_nn","text":"","code":"flair_nn()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Import Flair's Neural Network Module — flair_nn","text":"reference Flair's neural network module (flair.nn).","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_nn.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Import Flair's Neural Network Module — flair_nn","text":"","code":"if (FALSE) { flair_nn_module <- flair_nn() Classifier <- flair_nn_module$Classifier }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.SegtokSentenceSplitter.html","id":null,"dir":"Reference","previous_headings":"","what":"Segtok Sentence Splitter — flair_splitter.SegtokSentenceSplitter","title":"Segtok Sentence Splitter — flair_splitter.SegtokSentenceSplitter","text":"Interface Python flair.splitter module utilize SegtokSentenceSplitter class/method.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.SegtokSentenceSplitter.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Segtok Sentence Splitter — flair_splitter.SegtokSentenceSplitter","text":"","code":"flair_splitter.SegtokSentenceSplitter()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.SegtokSentenceSplitter.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Segtok Sentence Splitter — flair_splitter.SegtokSentenceSplitter","text":"instance Python class SegtokSentenceSplitter flair.splitter module.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.SegtokSentenceSplitter.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Segtok Sentence Splitter — flair_splitter.SegtokSentenceSplitter","text":"Python equivalent:","code":"from flair.splitter import SegtokSentenceSplitter"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.SegtokSentenceSplitter.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Segtok Sentence Splitter — flair_splitter.SegtokSentenceSplitter","text":"","code":"if (FALSE) { splitter <- flair_splitter.SegtokSentenceSplitter() }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.html","id":null,"dir":"Reference","previous_headings":"","what":"Import flair.splitter Module in R — flair_splitter","title":"Import flair.splitter Module in R — flair_splitter","text":"function interface Python flair.splitter module. function provides access various sentence splitting strategies implemented Flair library: NoSentenceSplitter: Treats entire text single sentence without splitting . SegtokSentenceSplitter: Uses segtok library split text sentences. SpacySentenceSplitter: Uses spaCy library sentence splitting. TagSentenceSplitter: Assumes specific tags text indicate sentence boundaries.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Import flair.splitter Module in R — flair_splitter","text":"","code":"flair_splitter()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Import flair.splitter Module in R — flair_splitter","text":"Python module (flair.splitter).","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Import flair.splitter Module in R — flair_splitter","text":"Python reference SegtokSentenceSplitter: Additional references classes can found within Flair library documentation. Flair GitHub","code":"from flair.splitter import *"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_splitter.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Import flair.splitter Module in R — flair_splitter","text":"","code":"if (FALSE) { SegtokSentenceSplitter <- flair_splitter$SegtokSentenceSplitter() text <- \"I am Taiwanese and come from Taiwan\" sentences <- splitter$split(text) }"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_trainers.html","id":null,"dir":"Reference","previous_headings":"","what":"Import flair.trainers Module in R — flair_trainers","title":"Import flair.trainers Module in R — flair_trainers","text":"flair_trainers() provides R users access Flair's ModelTrainer Python class using reticulate package. ModelTrainer class offers following main methods: train: Trains given model. Parameters include corpus (data split training, development, test sets), output directory save model logs, various parameters control training process (e.g., learning rate, mini-batch size, maximum epochs). find_learning_rate: Uses \"learning rate finder\" method find optimal learning rate training. Parameters typically include corpus, batch size, range learning rates explore. final_test: training model, method evaluates model test set prints results. save_checkpoint: Saves current training state (including model parameters training configurations) resume later interrupted. load_checkpoint: Loads previously saved checkpoint resume training. log_line: Utility method logging. Writes line console log file. log_section: Utility method logging. Writes section break console log file.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_trainers.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Import flair.trainers Module in R — flair_trainers","text":"","code":"flair_trainers()"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_trainers.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Import flair.trainers Module in R — flair_trainers","text":"Python Module(flair.trainers) object allowing access Flair's trainers R.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/flair_trainers.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Import flair.trainers Module in R — flair_trainers","text":"Flair GitHub Python equivalent:","code":"from flair.trainers import ModelTrainer"},{"path":"https://davidycliao.github.io/flaiR/reference/flair_trainers.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Import flair.trainers Module in R — flair_trainers","text":"","code":"if (FALSE) { trainers <- flair_trainers() model_trainer <- trainers$ModelTrainer }"},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities.html","id":null,"dir":"Reference","previous_headings":"","what":"Tagging Named Entities with Flair Models — get_entities","title":"Tagging Named Entities with Flair Models — get_entities","text":"function takes texts corresponding document IDs inputs, uses Flair NLP library extract named entities, returns dataframe identified entities along tags. entities detected text, function returns data table NA values. might clutter results. Depending use case, might decide either keep behavior skip rows detected entities.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Tagging Named Entities with Flair Models — get_entities","text":"","code":"get_entities( texts, doc_ids = NULL, tagger = NULL, language = NULL, show.text_id = FALSE, gc.active = FALSE )"},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Tagging Named Entities with Flair Models — get_entities","text":"texts character vector containing texts process. doc_ids character numeric vector containing document IDs corresponding text. tagger optional tagger object. NULL (default), function load Flair tagger based specified language. language character string indicating language model load. Default \"en\". show.text_id logical value. TRUE, includes actual text entity extracted resulting data table. Useful verification traceability purposes might increase size output. Default FALSE. gc.active logical value. TRUE, runs garbage collector processing texts. can help freeing memory releasing unused memory space, especially processing large number texts. Default FALSE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Tagging Named Entities with Flair Models — get_entities","text":"data table columns: doc_id ID document entity extracted. text_id TRUE, actual text entity extracted. entity named entity extracted text. tag tag category named entity. Common tags include: PERSON (names individuals), ORG (organizations, institutions), GPE (countries, cities, states), LOCATION (mountain ranges, bodies water), DATE (dates periods), TIME (times day), MONEY (monetary values), PERCENT (percentage values), FACILITY (buildings, airports), PRODUCT (objects, vehicles), EVENT (named events like wars sports events), ART (titles books)","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Tagging Named Entities with Flair Models — get_entities","text":"","code":"if (FALSE) { library(reticulate) library(fliaR) texts <- c(\"UCD is one of the best universities in Ireland.\", \"UCD has a good campus but is very far from my apartment in Dublin.\", \"Essex is famous for social science research.\", \"Essex is not in the Russell Group, but it is famous for political science research.\", \"TCD is the oldest university in Ireland.\", \"TCD is similar to Oxford.\") doc_ids <- c(\"doc1\", \"doc2\", \"doc3\", \"doc4\", \"doc5\", \"doc6\") # Load NER (\"ner\") model tagger_ner <- load_tagger_ner('ner') results <- get_entities(texts, doc_ids, tagger_ner) print(results)}"},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities_batch.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract Named Entities from a Batch of Texts — get_entities_batch","title":"Extract Named Entities from a Batch of Texts — get_entities_batch","text":"function processes batches texts extracts named entities.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities_batch.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract Named Entities from a Batch of Texts — get_entities_batch","text":"","code":"get_entities_batch( texts, doc_ids, tagger = NULL, language = \"en\", show.text_id = FALSE, gc.active = FALSE, batch_size = 5, device = \"cpu\", verbose = TRUE )"},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities_batch.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract Named Entities from a Batch of Texts — get_entities_batch","text":"texts character vector texts process. doc_ids vector document IDs corresponding text. tagger pre-loaded Flair NER tagger. Default NULL, tagger loaded based provided language. language character string specifying language texts. Default \"en\" (English). show.text_id Logical, whether include text ID output. Default FALSE. gc.active Logical, whether activate garbage collection processing batch. Default FALSE. batch_size integer specifying size batch. Default 5. device character string specifying computation device. can either \"cpu\" string representation GPU device number. instance, \"0\" corresponds first GPU. GPU device number provided, attempt use GPU. default \"cpu\". \"cuda\" \"cuda:0\" (\"mps\" \"mps:0\" Mac M1/M2 )Refers first GPU system. one GPU, specifying \"cuda\" \"cuda:0\" allocate computations GPU. \"cuda:1\" (\"mps:1\")Refers second GPU system, allowing allocation specific computations GPU. \"cuda:2\" (\"mps:2)Refers third GPU system, systems GPUs. verbose logical value. TRUE, function prints batch processing progress updates. Default TRUE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities_batch.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract Named Entities from a Batch of Texts — get_entities_batch","text":"data.table containing extracted entities, corresponding tags, document IDs.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_entities_batch.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract Named Entities from a Batch of Texts — get_entities_batch","text":"","code":"if (FALSE) { library(reticulate) library(fliaR) texts <- c(\"UCD is one of the best universities in Ireland.\", \"UCD has a good campus but is very far from my apartment in Dublin.\", \"Essex is famous for social science research.\", \"Essex is not in the Russell Group, but it is famous for political science research.\", \"TCD is the oldest university in Ireland.\", \"TCD is similar to Oxford.\") doc_ids <- c(\"doc1\", \"doc2\", \"doc3\", \"doc4\", \"doc5\", \"doc6\") # Load NER (\"ner\") model tagger_ner <- load_tagger_ner('ner') results <- get_entities_batch(texts, doc_ids, tagger_ner) print(results)}"},{"path":"https://davidycliao.github.io/flaiR/reference/get_flair_version.html","id":null,"dir":"Reference","previous_headings":"","what":"Retrieve Flair Version — get_flair_version","title":"Retrieve Flair Version — get_flair_version","text":"Gets version installed Flair module current Python environment.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_flair_version.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Retrieve Flair Version — get_flair_version","text":"","code":"get_flair_version(...)"},{"path":"https://davidycliao.github.io/flaiR/reference/get_flair_version.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Retrieve Flair Version — get_flair_version","text":"Character string representing version Flair. Flair installed, may return NULL cause error.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos.html","id":null,"dir":"Reference","previous_headings":"","what":"Tagging Part-of-Speech Tagging with Flair Models — get_pos","title":"Tagging Part-of-Speech Tagging with Flair Models — get_pos","text":"function returns data table POS tags related data given texts.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Tagging Part-of-Speech Tagging with Flair Models — get_pos","text":"","code":"get_pos( texts, doc_ids = NULL, tagger = NULL, language = NULL, show.text_id = FALSE, gc.active = FALSE )"},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Tagging Part-of-Speech Tagging with Flair Models — get_pos","text":"texts character vector containing texts processed. doc_ids character vector containing document ids. tagger tagger object (default NULL). language language texts (default NULL). show.text_id logical value. TRUE, includes actual text entity extracted resulting data table. Useful verification traceability purposes might increase size output. Default FALSE. gc.active logical value. TRUE, runs garbage collector processing texts. can help freeing memory releasing unused memory space, especially processing large number texts. Default FALSE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Tagging Part-of-Speech Tagging with Flair Models — get_pos","text":"data.table containing following columns: doc_id document identifier corresponding text. token_id token number original text, indicating position token. text_id actual text input passed function. token individual word token text POS tagged. tag part--speech tag assigned token Flair library. precision confidence score (numeric) assigned POS tag.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Tagging Part-of-Speech Tagging with Flair Models — get_pos","text":"","code":"if (FALSE) { library(reticulate) library(fliaR) tagger_pos_fast <- load_tagger_pos('pos-fast') texts <- c(\"UCD is one of the best universities in Ireland.\", \"Essex is not in the Russell Group, but it is famous for political science research.\", \"TCD is the oldest university in Ireland.\") doc_ids <- c(\"doc1\", \"doc2\", \"doc3\") get_pos(texts, doc_ids, tagger_pos_fast) }"},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos_batch.html","id":null,"dir":"Reference","previous_headings":"","what":"Batch Process of Part-of-Speech Tagging — get_pos_batch","title":"Batch Process of Part-of-Speech Tagging — get_pos_batch","text":"function returns data table POS tags related data given texts using batch processing.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos_batch.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Batch Process of Part-of-Speech Tagging — get_pos_batch","text":"","code":"get_pos_batch( texts, doc_ids, tagger = NULL, language = NULL, show.text_id = FALSE, gc.active = FALSE, batch_size = 5, device = \"cpu\", verbose = TRUE )"},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos_batch.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Batch Process of Part-of-Speech Tagging — get_pos_batch","text":"texts character vector containing texts processed. doc_ids character vector containing document ids. tagger tagger object (default NULL). language language texts (default NULL). show.text_id logical value. TRUE, includes actual text entity extracted resulting data table. Useful verification traceability purposes might increase size output. Default FALSE. gc.active logical value. TRUE, runs garbage collector processing texts. can help freeing memory releasing unused memory space, especially processing large number texts. Default FALSE. batch_size integer specifying size batch. Default 5. device character string specifying computation device. \"cuda\" \"cuda:0\" (\"mps\" \"mps:0\" Mac M1/M2 )Refers first GPU system. one GPU, specifying \"cuda\" \"cuda:0\" allocate computations GPU. \"cuda:1\" (\"mps:1\")Refers second GPU system, allowing allocation specific computations GPU. \"cuda:2\" (\"mps:2)Refers third GPU system, systems GPUs. verbose logical value. TRUE, function prints batch processing progress updates. Default TRUE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos_batch.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Batch Process of Part-of-Speech Tagging — get_pos_batch","text":"data.table containing following columns: doc_id document identifier corresponding text. token_id token number original text, indicating position token. text_id actual text input passed function (show.text_id TRUE). token individual word token text POS tagged. tag part--speech tag assigned token Flair library. precision confidence score (numeric) assigned POS tag.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_pos_batch.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Batch Process of Part-of-Speech Tagging — get_pos_batch","text":"","code":"if (FALSE) { library(reticulate) library(fliaR) tagger_pos_fast <- load_tagger_pos('pos-fast') texts <- c(\"UCD is one of the best universities in Ireland.\", \"Essex is not in the Russell Group, but it is famous for political science research.\", \"TCD is the oldest university in Ireland.\") doc_ids <- c(\"doc1\", \"doc2\", \"doc3\") # Using the batch_size parameter get_pos_batch(texts, doc_ids, tagger_pos_fast, batch_size = 2) }"},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments.html","id":null,"dir":"Reference","previous_headings":"","what":"Tagging Sentiment with Flair Standard Models — get_sentiments","title":"Tagging Sentiment with Flair Standard Models — get_sentiments","text":"function takes texts associated document IDs predict sentiments using flair Python library.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Tagging Sentiment with Flair Standard Models — get_sentiments","text":"","code":"get_sentiments( texts, doc_ids, tagger = NULL, ..., language = NULL, show.text_id = FALSE, gc.active = FALSE )"},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Tagging Sentiment with Flair Standard Models — get_sentiments","text":"texts list vector texts sentiment prediction made. doc_ids list vector document IDs corresponding texts. tagger optional flair sentiment model. NULL (default), function loads default model based language. ... Additional arguments passed next. language character string indicating language texts. Currently supports \"sentiment\" (English), \"sentiment-fast\" (English), \"de-offensive-language\" (German) show.text_id logical value. TRUE, includes actual text sentiment predicted. Default FALSE. gc.active logical value. TRUE, runs garbage collector processing texts. can help freeing memory releasing unused memory space, especially processing large number texts. Default FALSE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Tagging Sentiment with Flair Standard Models — get_sentiments","text":"data.table containing three columns: doc_id: document ID input. sentiment: Predicted sentiment text. score: Score sentiment prediction.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Tagging Sentiment with Flair Standard Models — get_sentiments","text":"","code":"if (FALSE) { library(flaiR) texts <- c(\"UCD is one of the best universities in Ireland.\", \"UCD has a good campus but is very far from my apartment in Dublin.\", \"Essex is famous for social science research.\", \"Essex is not in the Russell Group, but it is famous for political science research.\", \"TCD is the oldest university in Ireland.\", \"TCD is similar to Oxford.\") doc_ids <- c(\"doc1\", \"doc2\", \"doc3\", \"doc4\", \"doc5\", \"doc6\") # Load re-trained sentiment (\"sentiment\") model tagger_sent <- load_tagger_sentiments('sentiment') results <- get_sentiments(texts, doc_ids, tagger_sent) print(results) }"},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments_batch.html","id":null,"dir":"Reference","previous_headings":"","what":"Batch Process of Tagging Sentiment with Flair Models — get_sentiments_batch","title":"Batch Process of Tagging Sentiment with Flair Models — get_sentiments_batch","text":"function takes texts associated document IDs predict sentiments using flair Python library.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments_batch.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Batch Process of Tagging Sentiment with Flair Models — get_sentiments_batch","text":"","code":"get_sentiments_batch( texts, doc_ids, tagger = NULL, ..., language = NULL, show.text_id = FALSE, gc.active = FALSE, batch_size = 5, device = \"cpu\", verbose = FALSE )"},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments_batch.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Batch Process of Tagging Sentiment with Flair Models — get_sentiments_batch","text":"texts list vector texts sentiment prediction made. doc_ids list vector document IDs corresponding texts. tagger optional flair sentiment model. NULL (default), function loads default model based language. ... Additional arguments passed next. language character string indicating language texts. Currently supports \"sentiment\" (English), \"sentiment-fast\" (English), \"de-offensive-language\" (German) show.text_id logical value. TRUE, includes actual text sentiment predicted. Default FALSE. gc.active logical value. TRUE, runs garbage collector processing texts. can help freeing memory releasing unused memory space, especially processing large number texts. Default FALSE. batch_size integer specifying number texts processed . can help optimize performance leveraging parallel processing. Default 5. device character string specifying computation device. can either \"cpu\" string representation GPU device number. instance, \"0\" corresponds first GPU. GPU device number provided, attempt use GPU. default \"cpu\". \"cuda\" \"cuda:0\" (\"mps\" \"mps:0\" Mac M1/M2 )Refers first GPU system. one GPU, specifying \"cuda\" \"cuda:0\" allocate computations GPU. \"cuda:1\" (\"mps:1\")Refers second GPU system, allowing allocation specific computations GPU. \"cuda:2\" (\"mps:2)Refers third GPU system, systems GPUs. verbose logical value. TRUE, function prints batch processing progress updates. Default TRUE.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments_batch.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Batch Process of Tagging Sentiment with Flair Models — get_sentiments_batch","text":"data.table containing three columns: doc_id: document ID input. sentiment: Predicted sentiment text. score: Score sentiment prediction.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/get_sentiments_batch.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Batch Process of Tagging Sentiment with Flair Models — get_sentiments_batch","text":"","code":"if (FALSE) { library(flaiR) texts <- c(\"UCD is one of the best universities in Ireland.\", \"UCD has a good campus but is very far from my apartment in Dublin.\", \"Essex is famous for social science research.\", \"Essex is not in the Russell Group, but it is famous for political science research.\", \"TCD is the oldest university in Ireland.\", \"TCD is similar to Oxford.\") doc_ids <- c(\"doc1\", \"doc2\", \"doc3\", \"doc4\", \"doc5\", \"doc6\") # Load re-trained sentiment (\"sentiment\") model tagger_sent <- load_tagger_sentiments('sentiment') results <- get_sentiments_batch(texts, doc_ids, tagger_sent, batch_size = 3) print(results) }"},{"path":"https://davidycliao.github.io/flaiR/reference/gs_score.html","id":null,"dir":"Reference","previous_headings":"","what":"Training Data from : When Do Politicians Grandstand? Measuring Message Politics in Committee Hearings (2021 JOP) — gs_score","title":"Training Data from : When Do Politicians Grandstand? Measuring Message Politics in Committee Hearings (2021 JOP) — gs_score","text":"dataset Ju Yeon Park's paper published Journal Politics 2021, titled \"Politicians Grandstand? Measuring Message Politics Committee Hearings\". contains \"Congressional Hearing Dataset: 105th 114th Congresses\", replication dataset paper. manuscript accepted publication June 2019. Please cite paper using data.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/gs_score.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Training Data from : When Do Politicians Grandstand? Measuring Message Politics in Committee Hearings (2021 JOP) — gs_score","text":"","code":"data(\"gs_score\")"},{"path":"https://davidycliao.github.io/flaiR/reference/gs_score.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Training Data from : When Do Politicians Grandstand? Measuring Message Politics in Committee Hearings (2021 JOP) — gs_score","text":"data frame 3 variables: speech Hearing speeches sentimentit_score grandstanding score. rescaled_gs Label indicating whether text grandstanding speech: '1' grandstanding speech '0' non-grandstanding speech. rescaled version sentimentit_score (grandstanding scores) original released data.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/gs_score.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Training Data from : When Do Politicians Grandstand? Measuring Message Politics in Committee Hearings (2021 JOP) — gs_score","text":"Data provided authors Ju Yeon Park JOP's Dataverse https://dataverse.harvard.edu/file.xhtml?persistentId=doi:10.7910/DVN/GSMBFX/JIHIGH&version=1.0.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/gs_score.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Training Data from : When Do Politicians Grandstand? Measuring Message Politics in Committee Hearings (2021 JOP) — gs_score","text":"","code":"if (FALSE) { data(gs_score) head(gs_score) }"},{"path":"https://davidycliao.github.io/flaiR/reference/hatespeech_zh_tw.html","id":null,"dir":"Reference","previous_headings":"","what":"Training Data from : Political Hate Speech Detection and Lexicon Building: A Study in Taiwan (IEEE Explore 2022) — hatespeech_zh_tw","title":"Training Data from : Political Hate Speech Detection and Lexicon Building: A Study in Taiwan (IEEE Explore 2022) — hatespeech_zh_tw","text":"dataset derived sample development set \"Political Hate Speech Detection Lexicon Building: Study Taiwan.\" contains 1,000 annotated data entries, 926 labeled '0' (hate speech) 74 '1' (hate speech). paper can accessed https://ieeexplore.ieee.org/document/9738642.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/hatespeech_zh_tw.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Training Data from : Political Hate Speech Detection and Lexicon Building: A Study in Taiwan (IEEE Explore 2022) — hatespeech_zh_tw","text":"","code":"data(\"hatespeech_zh_tw\")"},{"path":"https://davidycliao.github.io/flaiR/reference/hatespeech_zh_tw.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Training Data from : Political Hate Speech Detection and Lexicon Building: A Study in Taiwan (IEEE Explore 2022) — hatespeech_zh_tw","text":"data frame 2 variables: text Content text. label Label indicating whether text hate speech: '1' hate speech '0' non-hate speech.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/hatespeech_zh_tw.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Training Data from : Political Hate Speech Detection and Lexicon Building: A Study in Taiwan (IEEE Explore 2022) — hatespeech_zh_tw","text":"Data provided authors Chih-Chien Wang, Min-Yuh Day, Chun-Lian Wu. Available https://ieeexplore.ieee.org/document/9738642.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/hatespeech_zh_tw.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Training Data from : Political Hate Speech Detection and Lexicon Building: A Study in Taiwan (IEEE Explore 2022) — hatespeech_zh_tw","text":"","code":"if (FALSE) { data(hatespeech_zh_tw) head(hatespeech_zh_tw) }"},{"path":"https://davidycliao.github.io/flaiR/reference/highlight_text.html","id":null,"dir":"Reference","previous_headings":"","what":"Highlight Entities with Specified Colors and Tag — highlight_text","title":"Highlight Entities with Specified Colors and Tag — highlight_text","text":"function highlights specified entities text string specified background colors, font colors, optional labels. Additionally, allows setting specific font type highlighted text.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/highlight_text.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Highlight Entities with Specified Colors and Tag — highlight_text","text":"","code":"highlight_text(text, entities_mapping, font_family = \"Arial\")"},{"path":"https://davidycliao.github.io/flaiR/reference/highlight_text.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Highlight Entities with Specified Colors and Tag — highlight_text","text":"text character string containing text highlight. entities_mapping named list lists, sub-list containing: words: character vector words highlight. background_color: character string specifying CSS color highlight background. font_color: character string specifying CSS color highlighted text. label: character string specifying label append highlighted word. label_color: character string specifying CSS color label text. font_family character string specifying CSS font family highlighted text label. Default \"Arial\".","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/highlight_text.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Highlight Entities with Specified Colors and Tag — highlight_text","text":"HTML object containing text highlighted entities.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/highlight_text.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Highlight Entities with Specified Colors and Tag — highlight_text","text":"","code":"library(flaiR) data(\"uk_immigration\") uk_immigration <- head(uk_immigration, 1) tagger_ner <- load_tagger_ner(\"ner\") results <- get_entities(uk_immigration$text, uk_immigration$speaker, tagger_ner, show.text_id = FALSE) highlighted_text <- highlight_text(uk_immigration$text, map_entities(results)) print(highlighted_text) #> I thank Mr. Speaker for giving me permission to hold this debate today. I welcome the Minister-I very much appreciate the contact from his office prior to today-and the Conservative<\/span> (ORG)<\/span> and Liberal Democrat Front Benchers<\/span> (ORG)<\/span> to the debate. I also welcome my hon. Friends on the Back Benches<\/span> (MISC)<\/span>. Immigration is the most important issue for my constituents. I get more complaints, comments and suggestions about immigration than about anything else. In the Kettering<\/span> (LOC)<\/span> constituency, the number of immigrants is actually very low. There is a well-settled Sikh<\/span> (MISC)<\/span> community in the middle of Kettering<\/span> (LOC)<\/span> town itself, which has been in Kettering<\/span> (LOC)<\/span> for some 40 or 50 years and is very much part of the local community and of the fabric of local life. There are other very small migrant groups in my constituency, but it is predominantly made up of indigenous British<\/span> (MISC)<\/span> people. However, there is huge concern among my constituents about the level of immigration into our country. I believe that I am right in saying that, in recent years, net immigration into the United Kingdom<\/span> (LOC)<\/span> is the largest wave of immigration that our country has ever known and, proportionately, is probably the biggest wave of immigration since the Norman<\/span> (MISC)<\/span> conquest. My contention is that our country simply cannot cope with immigration on that scale-to coin a phrase, we simply cannot go on like this. It is about time that mainstream politicians started airing the views of their constituents, because for too long people have muttered under their breath that they are concerned about immigration. They have been frightened to speak out about it because they are frightened of being accused of being racist. My contention is that immigration is not a racist issue; it is a question of numbers. I personally could not care tuppence about the ethnicity of the immigrants concerned, the colour of their skin or the language that they speak. What I am concerned about is the very large numbers of new arrivals to our country. My contention is that the United Kingdom<\/span> (LOC)<\/span> simply cannot cope with them.<\/div>"},{"path":"https://davidycliao.github.io/flaiR/reference/import_flair.html","id":null,"dir":"Reference","previous_headings":"","what":"Wrapper for the Flair Python Library — import_flair","title":"Wrapper for the Flair Python Library — import_flair","text":"flair: wrapper access Flair library Python. Returns: list: list method python module. Environment Configuration: os: Pertains operating system related functions, path handling, file operations, . Path: pathlib, used convenient file path operations. set_seed: Functions set random seed. hf_set_seed: Functions set random seed. set_proxies: Used configure network proxies. Data Data Loading: data: Functions related data handling operations. datasets: Modules methods load handle specific datasets. file_utils: Utilities file operations. Embeddings Model Layers: embeddings: embeddings, including word embeddings, contextual embeddings, etc. nn: Related neural network layers operations. models: Different model architectures structures. Training Optimization: trainers: Related training models. training_utils: Utility functions training process. optim: Optimization algorithms, like SGD, Adam. Tokenization Text Processing: tokenization: break text tokens. splitter: splitting datasets texts. Visualizations Miscellaneous: visual: Related visualization. torch: main PyTorch library. cache_root: Related caching data models.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/import_flair.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Wrapper for the Flair Python Library — import_flair","text":"","code":"import_flair()"},{"path":"https://davidycliao.github.io/flaiR/reference/import_flair.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Wrapper for the Flair Python Library — import_flair","text":"object represents Flair module Python.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/import_flair.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Wrapper for the Flair Python Library — import_flair","text":"function relies reticulate package import use Flair module Python. Ensure Flair Python library installed Python environment used.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/import_flair.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Wrapper for the Flair Python Library — import_flair","text":"","code":"if (FALSE) { flair <- import_flair() }"},{"path":"https://davidycliao.github.io/flaiR/reference/install_python_package.html","id":null,"dir":"Reference","previous_headings":"","what":"Install a Specific Python Package and Return Its Version — install_python_package","title":"Install a Specific Python Package and Return Its Version — install_python_package","text":"function checks Python interpreter's location (either specified user automatically located), compares current R session's Python setting, installs specified Python package using identified Python interpreter, returns package version installation environment.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/install_python_package.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Install a Specific Python Package and Return Its Version — install_python_package","text":"","code":"install_python_package( package_name, package_version = NULL, python_path = Sys.which(\"python3\") )"},{"path":"https://davidycliao.github.io/flaiR/reference/install_python_package.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Install a Specific Python Package and Return Its Version — install_python_package","text":"package_name name Python package install. package_version version Python package install. NULL, latest version installed. python_path path Python interpreter used installation. provided, defaults result Sys.(\"python3\").","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/install_python_package.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Install a Specific Python Package and Return Its Version — install_python_package","text":"list containing package name, installed version, path Python interpreter used installation.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/install_python_package.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Install a Specific Python Package and Return Its Version — install_python_package","text":"","code":"if (FALSE) { install_python_package(package_name =\"flair\", package_version =\"0.12\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_ner.html","id":null,"dir":"Reference","previous_headings":"","what":"Load the Named Entity Recognition (NER) Tagger — load_tagger_ner","title":"Load the Named Entity Recognition (NER) Tagger — load_tagger_ner","text":"helper function load appropriate tagger based provided language. function supports variety languages/models.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_ner.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Load the Named Entity Recognition (NER) Tagger — load_tagger_ner","text":"","code":"load_tagger_ner(language = NULL)"},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_ner.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Load the Named Entity Recognition (NER) Tagger — load_tagger_ner","text":"language character string indicating desired language NER tagger. NULL, function default 'pos-fast' model. Supported languages models include: \"en\" - English NER tagging (ner) \"de\" - German NER tagging (de-ner) \"fr\" - French NER tagging (fr-ner) \"nl\" - Dutch NER tagging (nl-ner) \"da\" - Danish NER tagging (da-ner) \"ar\" - Arabic NER tagging (ar-ner) \"ner-fast\" - English NER fast model (ner-fast) \"ner-large\" - English NER large mode (ner-large) \"de-ner-legal\" - NER (legal text) (de-ner-legal) \"nl\" - Dutch NER tagging (nl-ner) \"da\" - Danish NER tagging (da-ner) \"ar\" - Arabic NER tagging (ar-ner)","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_ner.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Load the Named Entity Recognition (NER) Tagger — load_tagger_ner","text":"instance Flair SequenceTagger specified language.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_ner.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Load the Named Entity Recognition (NER) Tagger — load_tagger_ner","text":"","code":"# Load the English NER tagger tagger_en <- load_tagger_ner(\"en\")"},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_pos.html","id":null,"dir":"Reference","previous_headings":"","what":"Load Flair POS Tagger — load_tagger_pos","title":"Load Flair POS Tagger — load_tagger_pos","text":"function loads POS (part--speech) tagger model specified language using Flair library. language specified, defaults 'pos-fast'.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_pos.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Load Flair POS Tagger — load_tagger_pos","text":"","code":"load_tagger_pos(language = NULL)"},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_pos.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Load Flair POS Tagger — load_tagger_pos","text":"language character string indicating desired language model. NULL, function default 'pos-fast' model. Supported language models include: \"pos\" - General POS tagging \"pos-fast\" - Faster POS tagging \"upos\" - Universal POS tagging \"upos-fast\" - Faster Universal POS tagging \"pos-multi\" - Multi-language POS tagging \"pos-multi-fast\" - Faster Multi-language POS tagging \"ar-pos\" - Arabic POS tagging \"de-pos\" - German POS tagging \"de-pos-tweets\" - German POS tagging tweets \"da-pos\" - Danish POS tagging \"ml-pos\" - Malayalam POS tagging \"ml-upos\" - Malayalam Universal POS tagging \"pt-pos-clinical\" - Clinical Portuguese POS tagging \"pos-ukrainian\" - Ukrainian POS tagging","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_pos.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Load Flair POS Tagger — load_tagger_pos","text":"Flair POS tagger model corresponding specified (default) language.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_pos.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Load Flair POS Tagger — load_tagger_pos","text":"","code":"if (FALSE) { tagger <- load_tagger_pos(\"pos-fast\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_sentiments.html","id":null,"dir":"Reference","previous_headings":"","what":"Load a Sentiment or Language Tagger Model from Flair — load_tagger_sentiments","title":"Load a Sentiment or Language Tagger Model from Flair — load_tagger_sentiments","text":"function loads pre-trained sentiment language tagger Flair library.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_sentiments.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Load a Sentiment or Language Tagger Model from Flair — load_tagger_sentiments","text":"","code":"load_tagger_sentiments(language = NULL)"},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_sentiments.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Load a Sentiment or Language Tagger Model from Flair — load_tagger_sentiments","text":"language character string specifying language model load. Supported models include: \"sentiment\" - Sentiment analysis model \"sentiment-fast\" - Faster sentiment analysis model \"de-offensive-language\" - German offensive language detection model provided, function default \"sentiment\" model.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_sentiments.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Load a Sentiment or Language Tagger Model from Flair — load_tagger_sentiments","text":"object loaded Flair model.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/load_tagger_sentiments.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Load a Sentiment or Language Tagger Model from Flair — load_tagger_sentiments","text":"","code":"if (FALSE) { tagger <- load_tagger_sentiments(\"sentiment\") }"},{"path":"https://davidycliao.github.io/flaiR/reference/map_entities.html","id":null,"dir":"Reference","previous_headings":"","what":"Create Mapping for NER Highlighting — map_entities","title":"Create Mapping for NER Highlighting — map_entities","text":"function generates mapping list Named Entity Recognition (NER) highlighting. mapping list defines different entity types highlighted text displays, defining background color, font color, label, label color entity type.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/map_entities.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create Mapping for NER Highlighting — map_entities","text":"","code":"map_entities(df, entity = \"entity\", tag = \"tag\")"},{"path":"https://davidycliao.github.io/flaiR/reference/map_entities.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create Mapping for NER Highlighting — map_entities","text":"df data frame containing least two columns: entity: character vector words/entities highlighted. tag: character vector indicating entity type word/entity. entity character vector entities annotated model. tag character vector tags corresponding annotated entities.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/map_entities.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create Mapping for NER Highlighting — map_entities","text":"list mapping settings entity type, entity type represented list containing: words: character vector words highlighted. background_color: character string representing background color highlighting words. font_color: character string representing font color words. label: character string label entity type. label_color: character string representing font color label.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/map_entities.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create Mapping for NER Highlighting — map_entities","text":"","code":"if (FALSE) { sample_df <- data.frame( entity = c(\"Microsoft\", \"USA\", \"dollar\", \"Bill Gates\"), tag = c(\"ORG\", \"LOC\", \"MISC\", \"PER\"), stringsAsFactors = FALSE ) mapping <- map_entities(sample_df) }"},{"path":"https://davidycliao.github.io/flaiR/reference/show_flair_cache.html","id":null,"dir":"Reference","previous_headings":"","what":"Show Flair Cache Preloaed flair's Directory — show_flair_cache","title":"Show Flair Cache Preloaed flair's Directory — show_flair_cache","text":"function lists contents flair cache directory returns data frame.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/show_flair_cache.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Show Flair Cache Preloaed flair's Directory — show_flair_cache","text":"","code":"show_flair_cache()"},{"path":"https://davidycliao.github.io/flaiR/reference/show_flair_cache.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Show Flair Cache Preloaed flair's Directory — show_flair_cache","text":"data frame containing file paths contents flair cache directory. directory exist empty, NULL returned.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/show_flair_cache.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Show Flair Cache Preloaed flair's Directory — show_flair_cache","text":"","code":"if (FALSE) { show_flair_cache() }"},{"path":"https://davidycliao.github.io/flaiR/reference/statements.html","id":null,"dir":"Reference","previous_headings":"","what":"Sampled Grandstanding Text — statements","title":"Sampled Grandstanding Text — statements","text":"Sampled Grandstanding Text","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/statements.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sampled Grandstanding Text — statements","text":"","code":"data(\"statements\")"},{"path":"https://davidycliao.github.io/flaiR/reference/statements.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Sampled Grandstanding Text — statements","text":"data frame 3 variables: Type Grandstanding types Statement Grandstanding texts","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/statements.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sampled Grandstanding Text — statements","text":"","code":"if (FALSE) { data(statements) head(statements) }"},{"path":"https://davidycliao.github.io/flaiR/reference/uk_immigration.html","id":null,"dir":"Reference","previous_headings":"","what":"UK House of Commons Immigration Debate Data — uk_immigration","title":"UK House of Commons Immigration Debate Data — uk_immigration","text":"dataset containing speeches debates UK House Commons topic immigration 2010.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/uk_immigration.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"UK House of Commons Immigration Debate Data — uk_immigration","text":"","code":"data(\"uk_immigration\")"},{"path":"https://davidycliao.github.io/flaiR/reference/uk_immigration.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"UK House of Commons Immigration Debate Data — uk_immigration","text":"data frame 12 variables: date Date speech, Date type agenda Agenda subject speech, character speechnumber Unique identifier speech, numeric speaker Name person giving speech, character party Political party speaker, character party.facts.id ID party, usually numeric character chair Person chairing session, character terms Terms tags associated speech, character list text Actual text speech, character parliament parliament session, character numeric iso3country ISO3 country code parliament located, character year Year speech made, numeric","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/uk_immigration.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"UK House of Commons Immigration Debate Data — uk_immigration","text":"Data collected ParSpeechV2 House Commons year 2010. dataset publicly available https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/L4OAKN.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/uk_immigration.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"UK House of Commons Immigration Debate Data — uk_immigration","text":"","code":"if (FALSE) { data(uk_immigration) head(uk_immigration) }"},{"path":"https://davidycliao.github.io/flaiR/reference/uninstall_python_package.html","id":null,"dir":"Reference","previous_headings":"","what":"Uninstall a Python Package — uninstall_python_package","title":"Uninstall a Python Package — uninstall_python_package","text":"uninstall_python_package function uninstalls specified Python package using system's Python installation. checks Python installed accessible, proceeds uninstall package. Finally, uninstall_python_package verifies package successfully uninstalled.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/uninstall_python_package.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Uninstall a Python Package — uninstall_python_package","text":"","code":"uninstall_python_package(package_name, python_path = Sys.which(\"python3\"))"},{"path":"https://davidycliao.github.io/flaiR/reference/uninstall_python_package.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Uninstall a Python Package — uninstall_python_package","text":"package_name name Python package uninstall. python_path path Python executable. provided, uses system's default Python path.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/uninstall_python_package.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Uninstall a Python Package — uninstall_python_package","text":"Invisibly returns TRUE package successfully uninstalled, otherwise stops error message.","code":""},{"path":"https://davidycliao.github.io/flaiR/reference/uninstall_python_package.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Uninstall a Python Package — uninstall_python_package","text":"","code":"if (FALSE) { uninstall_python_package(\"numpy\") }"},{"path":"https://davidycliao.github.io/flaiR/news/index.html","id":"flair-006-2023-10-29","dir":"Changelog","previous_headings":"","what":"flaiR 0.0.6 (2023-10-29)","title":"flaiR 0.0.6 (2023-10-29)","text":"flair {flaiR} renamed flair() import_flair() avoid overlapping conventional practice import flair Python. install_python_package() uninstall_python_package() new functions install uninstall Python packages using pip environment used flaiR package. Add new training data grandstanding training data Ju Yeon Park’s paper. zzz.R revised code proceeds three steps. First, installing loading package, {flaiR} utilizes system’s environment tool undergoes three evaluation stages. Initially, {flaiR} requires least Python 3 installed device. Python 3 available, unable install {flaiR} successfully. requirement met, system checks appropriate versions PyTorch Flair. primary focus Flair. already installed, see message indicating ‘Flair installed Python’. process represents new format loading Python environment used flaiR package. Add example datasets (cc_muller hatespeech_zh_tw) tutorials documentation.","code":""},{"path":"https://davidycliao.github.io/flaiR/news/index.html","id":"flair-005-2023-10-01","dir":"Changelog","previous_headings":"","what":"flaiR 0.0.5 (2023-10-01)","title":"flaiR 0.0.5 (2023-10-01)","text":"Added tests monitor function operation. Added wrapped functions integrating Python code. Created function coloring entities. Provided tutorials interacting R Python using Flair. Notice Python 3.x flair may fail install Python dependencies windows-latest due potential compatibility issues latest Python versions Windows. fix , modified Python version actions/setup-python@v2 step use Python 3.9 lower version. Added two new example datasets tutorials documentation. ","code":""},{"path":"https://davidycliao.github.io/flaiR/news/index.html","id":"flair-003-2023-09-10","dir":"Changelog","previous_headings":"","what":"flaiR 0.0.3 (2023-09-10)","title":"flaiR 0.0.3 (2023-09-10)","text":"Modifications Overview Added show.text_id gc.active parameters get_entities(), get_pos(), get_sentiment(). Enhanced batch processing introduction batch_size functions get_entities_batch(), get_pos_batch(), get_sentiment_batch(). Introduced device parameter specify computation device. Introduction New Parameters: show.text_id: activated (TRUE), actual text (labeled ‘text_id’) entity derived appended resulting dataset. Although enriching output validation traceability, users cautious, might inflate output size. default, option remains deactivated (FALSE). context, previously, ‘text_id’ intrinsically generated, potentially elevating R’s memory consumption. gc.active: Activating (TRUE) trigger garbage collector post-text processing. action aids memory optimization relinquishing unallocated memory spaces, crucial step, particularly processing extensive text dataset. default set FALSE, users managing larger texts consider setting gc.active TRUE. Though action doesn’t bolster computational efficiency, circumvent potential RStudio crashes. Batch Processing Enhancement: inception batch_size parameter (defaulted 5) get_entities_batch(), get_pos_batch(), get_sentiment_batch() augments batch processing capabilities. addition led creation internal function named process_batch proficiently manage text batch linked doc_ids. core functionality adapted segregate texts doc_ids specific batches, subsequently processed via process_batch function, final results amalgamated seamlessly. device: descriptive character string pinpointing computation device. Users can opt “cpu” GPU device number string format. instance, representing primary GPU 0. GPU device number furnished, system endeavor harness specific GPU, “cpu” default setting. batch_size: integer specifying size batch. Default 5. ","code":""},{"path":"https://davidycliao.github.io/flaiR/news/index.html","id":"flair-001-development-version","dir":"Changelog","previous_headings":"","what":"flaiR 0.0.1 (development version)","title":"flaiR 0.0.1 (development version)","text":"features flaiR currently include part--speech tagging, sentiment tagging, named entity recognition tagging. flaiR requires Python version 3.7 higher operate concurrently. create_flair_env(): function install Flair Python library using reticulate R package, automatically generated.","code":""}]