diff --git a/articles/flair_embeddings.html b/articles/flair_embeddings.html index 991f7dff..c5ba0138 100644 --- a/articles/flair_embeddings.html +++ b/articles/flair_embeddings.html @@ -172,7 +172,7 @@

Create Sentence Objectlibrary(flaiR) library(reticulate)
-string <- "What I see in UCD today"
+string <- "UCD is one of the world's top universities and is ranked in the top 1% of higher education institutions worldwide."
 sentence <- flair_data.sentence(string)

 

@@ -203,24 +203,75 @@

Employing the BERT M token_embedding <- sentence$tokens[[i]]$embedding print(head(token_embedding, 10)) } -#> Token: Token[0]: "What" -#> tensor([-0.2512, -0.4922, -0.4639, 0.2517, -0.3188, 0.0957, 1.6545, 0.3004, -#> -0.8781, 0.1227]) -#> Token: Token[1]: "I" -#> tensor([-0.3337, 0.4115, -0.4157, -0.9596, 0.0055, 0.3405, 1.3206, -0.3020, -#> -0.2711, -0.1189]) -#> Token: Token[2]: "see" -#> tensor([ 0.8483, 0.6642, -0.5487, 0.3471, 0.4927, 0.3256, 0.9243, -0.6720, -#> -0.6935, 0.6259]) -#> Token: Token[3]: "in" -#> tensor([-0.2381, 0.2073, -0.5796, 0.1363, -0.5629, -0.2510, 1.3423, -0.5730, -#> -0.6775, 0.4376]) -#> Token: Token[4]: "UCD" -#> tensor([-0.5148, 1.4145, -0.8204, 0.3421, -0.5881, -0.2627, 1.3721, 0.0260, -#> 0.1095, 0.6303]) -#> Token: Token[5]: "today" -#> tensor([-0.8136, -0.0583, -0.2771, -0.6339, -0.2820, 0.0869, 0.7950, -0.6545, -#> -0.2286, 0.3327]) +#> Token: Token[0]: "UCD" +#> tensor([ 0.0833, 0.2852, -0.6398, 0.5306, -0.2550, -0.7952, 0.9191, -0.0284, +#> -0.1390, -0.0700]) +#> Token: Token[1]: "is" +#> tensor([ 0.0093, 0.3069, -0.3772, -0.5046, 0.3399, 0.3802, 1.4442, -0.0901, +#> -0.0049, -0.2420]) +#> Token: Token[2]: "one" +#> tensor([-0.1006, 0.4575, -0.0397, -0.9328, 0.2846, 0.2338, 1.3998, 0.1552, +#> 0.1651, -0.2045]) +#> Token: Token[3]: "of" +#> tensor([-0.2752, 0.2917, 0.1150, -0.5803, 0.8611, 0.3942, 0.8704, 0.1432, +#> -0.3376, -0.2798]) +#> Token: Token[4]: "the" +#> tensor([-0.2464, 0.3974, 0.4161, -0.5347, 0.0285, 0.3619, 1.1400, -0.0707, +#> 0.1255, -0.4121]) +#> Token: Token[5]: "world" +#> tensor([-0.8204, 0.7235, -0.0335, 0.1262, 0.1314, 0.5855, 1.6661, -0.2858, +#> 0.1801, -0.8496]) +#> Token: Token[6]: "'s" +#> tensor([-0.6831, 0.7184, -0.1451, -0.4499, 0.1971, 0.3204, 1.2689, -0.3038, +#> 0.0673, -0.6701]) +#> Token: Token[7]: "top" +#> tensor([ 0.2090, 0.5064, 0.0417, -0.5580, -0.5341, 0.4189, 0.7103, -0.3170, +#> 0.0792, 0.0506]) +#> Token: Token[8]: "universities" +#> tensor([ 0.3336, 0.1307, -0.1218, -0.1945, 0.5289, -0.4657, 1.3310, 0.2141, +#> 0.1781, 0.0481]) +#> Token: Token[9]: "and" +#> tensor([ 0.0842, 0.2225, -0.0061, -0.7238, 0.3044, -0.1714, 1.4067, 0.3702, +#> -0.9546, -0.3608]) +#> Token: Token[10]: "is" +#> tensor([ 0.0606, 0.7361, 0.0384, -0.7512, 0.6239, 0.3918, 1.4170, -0.0143, +#> 0.1442, 0.1245]) +#> Token: Token[11]: "ranked" +#> tensor([-0.2530, 0.3414, 0.2172, -0.7527, 0.6933, 0.3993, 0.5563, 0.5353, +#> 0.2479, 0.1477]) +#> Token: Token[12]: "in" +#> tensor([-0.4973, -0.0277, 0.1821, -0.6973, 0.4903, -0.1480, 1.0401, 0.6653, +#> 0.1306, -0.0559]) +#> Token: Token[13]: "the" +#> tensor([-0.4150, 0.1021, 0.6204, -0.3566, 0.3788, 0.1652, 0.7545, 0.1566, +#> 0.4301, -0.3805]) +#> Token: Token[14]: "top" +#> tensor([-0.0116, 0.4095, 0.4882, 0.0605, -0.1946, -0.0589, 0.9664, -0.1612, +#> 0.7455, 0.3259]) +#> Token: Token[15]: "1" +#> tensor([ 0.2684, -0.1150, 0.0121, -0.3681, -0.4538, 0.6005, 0.6733, 0.3242, +#> 0.1395, -0.4707]) +#> Token: Token[16]: "%" +#> tensor([-0.2299, 0.1644, -0.1590, -0.4592, 0.6184, 0.8257, 0.8378, 0.0844, +#> 0.0695, -0.3707]) +#> Token: Token[17]: "of" +#> tensor([ 0.4932, 0.2413, 0.5705, -0.5453, 0.4407, 0.9492, 0.5458, -0.0643, +#> -0.0599, -0.2992]) +#> Token: Token[18]: "higher" +#> tensor([ 1.0912, 0.7395, -0.2275, 0.0513, -0.7952, -0.4250, 1.0819, -0.1928, +#> 0.1182, -0.2961]) +#> Token: Token[19]: "education" +#> tensor([ 0.7011, 0.6579, 0.1685, 1.0606, -0.1816, -0.2890, 1.4887, 0.4833, +#> 0.0555, -0.3187]) +#> Token: Token[20]: "institutions" +#> tensor([ 1.1192, 0.8685, 0.0450, 0.0711, 0.0641, -0.0049, 1.4312, 0.0940, +#> 0.4002, -0.0662]) +#> Token: Token[21]: "worldwide" +#> tensor([ 0.0737, 0.6137, 0.1128, -0.3651, -0.0724, 0.6873, 1.2160, -0.1015, +#> 0.4676, -0.5741]) +#> Token: Token[22]: "." +#> tensor([ 0.0663, -0.2634, 0.6907, -0.2992, -0.3788, 0.3833, -0.0426, 0.6789, +#> 0.0010, 0.2179])