From 5109bb3f64119df31da028acffb8c027767dec45 Mon Sep 17 00:00:00 2001 From: Nathan Gabriel Date: Thu, 20 Jun 2024 18:36:11 -0400 Subject: [PATCH 01/10] updated folder sturcture --- test.ipynb => research/test.ipynb | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test.ipynb => research/test.ipynb (100%) diff --git a/test.ipynb b/research/test.ipynb similarity index 100% rename from test.ipynb rename to research/test.ipynb From 95f97ed84b92346b83fb71aeccf97a946252ff20 Mon Sep 17 00:00:00 2001 From: Nathan Gabriel Date: Thu, 20 Jun 2024 19:02:09 -0400 Subject: [PATCH 02/10] created a prilimilary versoin of vocabulary --- research/test.ipynb | 2 +- research/two_byte_encoding.ipynb | 338 +++++++++++++++++++++++++++++++ 2 files changed, 339 insertions(+), 1 deletion(-) create mode 100644 research/two_byte_encoding.ipynb diff --git a/research/test.ipynb b/research/test.ipynb index 123ce1f..9feccb8 100644 --- a/research/test.ipynb +++ b/research/test.ipynb @@ -156,7 +156,7 @@ " llama_itos = {value:key for key,value in llama_stoi.items()}\n", "\n", " #load hugging face data\n", - " dataset = load_dataset('parquet', data_files=test_dir)\n", + " dataset = load_dataset('parquet', data_files=parquet_files)\n", " vocabulary = set()\n", "\n", " for sent in dataset[\"train\"][\"txt\"]:\n", diff --git a/research/two_byte_encoding.ipynb b/research/two_byte_encoding.ipynb new file mode 100644 index 0000000..3d58c7b --- /dev/null +++ b/research/two_byte_encoding.ipynb @@ -0,0 +1,338 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import os, json\n", + "from datasets import load_dataset\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class Vocabulary:\n", + " \"\"\"Class to map codes from huggingface dataset to tokens in Llama 3-8B token\"\"\"\n", + "\n", + " def __init__(self):\n", + " self.stoi = {}\n", + " self.itos = {}\n", + " \n", + " def build_vocabulary(self, parquet_files, tokenizer_file=\"tokenizer.json\"):\n", + " '''\n", + " creates the vocabulary from the Llama 3 tokenizer and hugging face dataset\n", + " Args:\n", + " tokenizer_file(str): file downloaded from Llama 3(8B) which contains the vocabulary for the model\n", + " parquet_files(list): director with the dataset from hugging face in parquet format\n", + "\n", + " '''\n", + " # Open the JSON file\n", + " with open(tokenizer_file, 'r') as file:\n", + " # Load the JSON data\n", + " data = json.load(file)\n", + " \n", + " llama_stoi = data['model']['vocab']\n", + " llama_itos = {value:key for key,value in llama_stoi.items()}\n", + "\n", + " #load hugging face data\n", + " dataset = load_dataset('parquet', data_files=parquet_files)\n", + " vocabulary = set()\n", + "\n", + " for sent in dataset[\"train\"][\"txt\"]:\n", + " for word in sent.split():\n", + " vocabulary.add(word)\n", + " \n", + " self.itos = {int(value):llama_itos[int(value)] for value in vocabulary}\n", + " self.stoi = {value:key for key,value in self.itos.items()}\n", + " \n", + " def save(self, file_path):\n", + " with open(file_path, \"w\") as file:\n", + " json.dump(self.itos, file)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "os.chdir(\"..\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "train_dir = [f\"dataset/default/partial-train/000{i}.parquet\" for i in range(10)]\n", + "dataset = load_dataset('parquet', data_files=train_dir)\n", + "txt = dataset[\"train\"][\"txt\"]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "list" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(txt)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Tokenization': 2,\n", + " 'is': 2,\n", + " 'the': 3,\n", + " 'process': 1,\n", + " 'of': 2,\n", + " 'breaking': 1,\n", + " 'down': 1,\n", + " 'a': 1,\n", + " 'sequence': 1,\n", + " 'text': 2,\n", + " 'into': 2,\n", + " 'smaller': 1,\n", + " 'units': 1,\n", + " 'called': 1,\n", + " 'tokens,': 1,\n", + " 'which': 1,\n", + " 'can': 1,\n", + " 'be': 1,\n", + " 'w or d s,': 1,\n", + " 'p h r as es ,': 1,\n", + " 'or': 1,\n", + " 'e v en': 1,\n", + " 'in d i v i d u al': 1,\n", + " 'ch ar a c te r s': 1,\n", + " 'of t en': 1,\n", + " 'f i r s t': 1,\n", + " 'step ': 1,\n", + " 'in ': 1,\n", + " 'na t ur al': 1,\n", + " 'l an g u a g e s': 1,\n", + " 'processing': 2,\n", + " 't as k s': 1,\n", + " 'such': 2,\n", + " 'as': 3,\n", + " 'c l as s i f ic ation ,': 1,\n", + " 'na m ed': 1,\n", + " 'enti ty ': 1,\n", + " 're c o g ni tion ,': 1,\n", + " 'an d': 1,\n", + " 's enti m en t': 1,\n", + " 'an al y s is': 1,\n", + " 'T h e': 1,\n", + " 'r es u l t ing': 1,\n", + " 'tokens': 2,\n", + " 'are': 2,\n", + " 'ty p ic all y ': 1,\n", + " 'us ed': 1,\n", + " 'in p u t': 1,\n", + " 'to': 2,\n", + " 'f ur th er': 1,\n", + " 'step s,': 1,\n", + " 'v e c to r ization ,': 1,\n", + " 'wh er e': 1,\n", + " 'c on v er te d': 1,\n", + " 'n u m er ic al': 1,\n", + " 're pr es en t ation s': 1,\n", + " 'f or': 1,\n", + " 'm a ch in e': 1,\n", + " 'l e ar n ing': 1,\n", + " 'm o d e l s': 1,\n", + " 'us e': 1}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# https://www.geeksforgeeks.org/byte-pair-encoding-bpe-in-nlp/\n", + "\n", + "import re\n", + "from collections import defaultdict\n", + "\n", + "def get_stats(vocab):\n", + " \"\"\"\n", + " Given a vocabulary (dictionary mapping words to frequency counts), returns a \n", + " dictionary of tuples representing the frequency count of pairs of characters \n", + " in the vocabulary.\n", + " \"\"\"\n", + " pairs = defaultdict(int)\n", + " for word, freq in vocab.items():\n", + " symbols = word.split()\n", + " for i in range(len(symbols)-1):\n", + " pairs[symbols[i],symbols[i+1]] += freq\n", + " return pairs\n", + "\n", + "def merge_vocab(pair, v_in):\n", + " \"\"\"\n", + " Given a pair of characters and a vocabulary, returns a new vocabulary with the \n", + " pair of characters merged together wherever they appear.\n", + " \"\"\"\n", + " v_out = {}\n", + " bigram = re.escape(' '.join(pair))\n", + " p = re.compile(r'(?'] += 1\n", + " return vocab\n", + "\n", + "def byte_pair_encoding(data, n):\n", + " \"\"\"\n", + " Given a list of strings and an integer n, returns a list of n merged pairs\n", + " of characters found in the vocabulary of the input data.\n", + " \"\"\"\n", + " vocab = get_vocab(data)\n", + " for i in range(n):\n", + " pairs = get_stats(vocab)\n", + " best = max(pairs, key=pairs.get)\n", + " vocab = merge_vocab(best, vocab)\n", + " return vocab\n", + "\n", + "# Example usage:\n", + "corpus = '''Tokenization is the process of breaking down \n", + "a sequence of text into smaller units called tokens,\n", + "which can be words, phrases, or even individual characters.\n", + "Tokenization is often the first step in natural languages processing tasks \n", + "such as text classification, named entity recognition, and sentiment analysis.\n", + "The resulting tokens are typically used as input to further processing steps,\n", + "such as vectorization, where the tokens are converted\n", + "into numerical representations for machine learning models to use.'''\n", + "data = corpus.split('.')\n", + "\n", + "n = 100\n", + "bpe_pairs = byte_pair_encoding(data, n)\n", + "bpe_pairs\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "laion", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 50c02f856b408615f8d0b6e44a8dba6df493a4ad Mon Sep 17 00:00:00 2001 From: Nathan Gabriel Date: Sat, 22 Jun 2024 14:14:49 -0400 Subject: [PATCH 03/10] started hugging face implimentation of BPE --- research/two_byte_encoding.ipynb | 1133 ++++++++++++++++++++++++++---- 1 file changed, 986 insertions(+), 147 deletions(-) diff --git a/research/two_byte_encoding.ipynb b/research/two_byte_encoding.ipynb index 3d58c7b..a284d41 100644 --- a/research/two_byte_encoding.ipynb +++ b/research/two_byte_encoding.ipynb @@ -95,180 +95,1019 @@ "type(txt)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Applying BPE\n", + "\n", + "using the following link for implimentation: https://huggingface.co/learn/nlp-course/en/chapter6/5" + ] + }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'Tokenization': 2,\n", - " 'is': 2,\n", - " 'the': 3,\n", - " 'process': 1,\n", - " 'of': 2,\n", - " 'breaking': 1,\n", - " 'down': 1,\n", - " 'a': 1,\n", - " 'sequence': 1,\n", - " 'text': 2,\n", - " 'into': 2,\n", - " 'smaller': 1,\n", - " 'units': 1,\n", - " 'called': 1,\n", - " 'tokens,': 1,\n", - " 'which': 1,\n", - " 'can': 1,\n", - " 'be': 1,\n", - " 'w or d s,': 1,\n", - " 'p h r as es ,': 1,\n", - " 'or': 1,\n", - " 'e v en': 1,\n", - " 'in d i v i d u al': 1,\n", - " 'ch ar a c te r s': 1,\n", - " 'of t en': 1,\n", - " 'f i r s t': 1,\n", - " 'step ': 1,\n", - " 'in ': 1,\n", - " 'na t ur al': 1,\n", - " 'l an g u a g e s': 1,\n", - " 'processing': 2,\n", - " 't as k s': 1,\n", - " 'such': 2,\n", - " 'as': 3,\n", - " 'c l as s i f ic ation ,': 1,\n", - " 'na m ed': 1,\n", - " 'enti ty ': 1,\n", - " 're c o g ni tion ,': 1,\n", - " 'an d': 1,\n", - " 's enti m en t': 1,\n", - " 'an al y s is': 1,\n", - " 'T h e': 1,\n", - " 'r es u l t ing': 1,\n", - " 'tokens': 2,\n", - " 'are': 2,\n", - " 'ty p ic all y ': 1,\n", - " 'us ed': 1,\n", - " 'in p u t': 1,\n", - " 'to': 2,\n", - " 'f ur th er': 1,\n", - " 'step s,': 1,\n", - " 'v e c to r ization ,': 1,\n", - " 'wh er e': 1,\n", - " 'c on v er te d': 1,\n", - " 'n u m er ic al': 1,\n", - " 're pr es en t ation s': 1,\n", - " 'f or': 1,\n", - " 'm a ch in e': 1,\n", - " 'l e ar n ing': 1,\n", - " 'm o d e l s': 1,\n", - " 'us e': 1}" + "str" ] }, - "execution_count": 11, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# https://www.geeksforgeeks.org/byte-pair-encoding-bpe-in-nlp/\n", - "\n", - "import re\n", - "from collections import defaultdict\n", - "\n", - "def get_stats(vocab):\n", - " \"\"\"\n", - " Given a vocabulary (dictionary mapping words to frequency counts), returns a \n", - " dictionary of tuples representing the frequency count of pairs of characters \n", - " in the vocabulary.\n", - " \"\"\"\n", - " pairs = defaultdict(int)\n", - " for word, freq in vocab.items():\n", - " symbols = word.split()\n", - " for i in range(len(symbols)-1):\n", - " pairs[symbols[i],symbols[i+1]] += freq\n", - " return pairs\n", - "\n", - "def merge_vocab(pair, v_in):\n", - " \"\"\"\n", - " Given a pair of characters and a vocabulary, returns a new vocabulary with the \n", - " pair of characters merged together wherever they appear.\n", - " \"\"\"\n", - " v_out = {}\n", - " bigram = re.escape(' '.join(pair))\n", - " p = re.compile(r'(?'] += 1\n", - " return vocab\n", - "\n", - "def byte_pair_encoding(data, n):\n", - " \"\"\"\n", - " Given a list of strings and an integer n, returns a list of n merged pairs\n", - " of characters found in the vocabulary of the input data.\n", - " \"\"\"\n", - " vocab = get_vocab(data)\n", - " for i in range(n):\n", - " pairs = get_stats(vocab)\n", - " best = max(pairs, key=pairs.get)\n", - " vocab = merge_vocab(best, vocab)\n", - " return vocab\n", - "\n", - "# Example usage:\n", - "corpus = '''Tokenization is the process of breaking down \n", - "a sequence of text into smaller units called tokens,\n", - "which can be words, phrases, or even individual characters.\n", - "Tokenization is often the first step in natural languages processing tasks \n", - "such as text classification, named entity recognition, and sentiment analysis.\n", - "The resulting tokens are typically used as input to further processing steps,\n", - "such as vectorization, where the tokens are converted\n", - "into numerical representations for machine learning models to use.'''\n", - "data = corpus.split('.')\n", - "\n", - "n = 100\n", - "bpe_pairs = byte_pair_encoding(data, n)\n", - "bpe_pairs\n" + "type(txt[0])" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "▁this ▁is ▁a ▁test\n", + "▁another ▁example ▁sentence\n" + ] + } + ], + "source": [ + "def add_special_character(corpus, special_char='▁'):\n", + " modified_corpus = []\n", + " for sentence in corpus:\n", + " # Split the sentence into words\n", + " words = sentence.split()\n", + " # Add the special character to the beginning of each word\n", + " modified_words = [special_char + word for word in words]\n", + " # Join the modified words back into a sentence\n", + " modified_sentence = ' '.join(modified_words)\n", + " # Append the modified sentence to the new corpus\n", + " modified_corpus.append(modified_sentence)\n", + " return modified_corpus\n", + "\n", + "# Example usage\n", + "corpus = [\"this is a test\", \"another example sentence\"]\n", + "modified_corpus = add_special_character(corpus)\n", + "for sentence in modified_corpus:\n", + " print(sentence)\n" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "▁2029\n" + ] + } + ], + "source": [ + "from collections import defaultdict\n", + "\n", + "def add_special_character(corpus, special_char='▁'):\n", + " modified_corpus = []\n", + " for sentence in corpus:\n", + " modified_sentence = ''\n", + " words = []\n", + " previous_char_is_space = False\n", + " \n", + " for char in sentence:\n", + " if char == ' ':\n", + " previous_char_is_space = True\n", + " words.append(modified_sentence)\n", + " modified_sentence = ''\n", + " elif previous_char_is_space:\n", + " modified_sentence += special_char + char\n", + " previous_char_is_space = False\n", + " else:\n", + " modified_sentence += char\n", + " \n", + " modified_corpus.extend(words)\n", + " \n", + " return modified_corpus\n", + "\n", + "# Example usage\n", + "corpus = [txt[0], txt[1], txt[2]]\n", + "modified_corpus = add_special_character(corpus)\n", + "word_freqs = defaultdict(int)\n", + "for word in modified_corpus:\n", + " word_freqs[word] += 1\n" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "defaultdict(int,\n", + " {'896': 1,\n", + " '▁2029': 4,\n", + " '▁935': 2,\n", + " '▁679': 4,\n", + " '▁1115': 1,\n", + " '▁3601': 1,\n", + " '▁3000': 1,\n", + " '▁222': 21,\n", + " '▁3446': 2,\n", + " '▁2218': 1,\n", + " '▁3072': 1,\n", + " '▁550': 3,\n", + " '▁3652': 1,\n", + " '▁665': 8,\n", + " '▁2596': 5,\n", + " '▁2809': 2,\n", + " '▁3649': 1,\n", + " '▁251': 6,\n", + " '▁2610': 3,\n", + " '▁2536': 2,\n", + " '▁47': 1,\n", + " '▁2852': 2,\n", + " '▁2940': 2,\n", + " '▁3353': 3,\n", + " '▁3400': 1,\n", + " '▁3336': 1,\n", + " '▁325': 1,\n", + " '▁2647': 4,\n", + " '▁4076': 8,\n", + " '▁3653': 1,\n", + " '▁3253': 2,\n", + " '▁58': 1,\n", + " '▁3664': 2,\n", + " '▁1424': 3,\n", + " '▁1388': 1,\n", + " '▁278': 3,\n", + " '▁897': 1,\n", + " '▁447': 4,\n", + " '▁2355': 1,\n", + " '▁2453': 4,\n", + " '▁2531': 4,\n", + " '▁2712': 20,\n", + " '▁828': 1,\n", + " '▁2895': 2,\n", + " '▁2398': 4,\n", + " '▁2908': 1,\n", + " '▁901': 4,\n", + " '▁3686': 2,\n", + " '▁2620': 1,\n", + " '▁3254': 2,\n", + " '▁3962': 1,\n", + " '▁0': 1,\n", + " '▁1448': 1,\n", + " '▁863': 1,\n", + " '▁3593': 4,\n", + " '▁124': 3,\n", + " '▁1048': 2,\n", + " '▁1593': 3,\n", + " '▁4086': 3,\n", + " '▁3236': 2,\n", + " '▁1767': 1,\n", + " '▁2800': 1,\n", + " '▁697': 2,\n", + " '▁514': 5,\n", + " '▁3648': 1,\n", + " '▁2337': 6,\n", + " '▁1338': 1,\n", + " '▁1114': 4,\n", + " '▁340': 1,\n", + " '▁3514': 6,\n", + " '▁2658': 1,\n", + " '▁1954': 1,\n", + " '▁3867': 3,\n", + " '▁2300': 1,\n", + " '▁317': 2,\n", + " '▁7': 15,\n", + " '▁1091': 1,\n", + " '▁1768': 3,\n", + " '▁1440': 1,\n", + " '▁3167': 1,\n", + " '▁672': 1,\n", + " '▁1253': 1,\n", + " '▁188': 8,\n", + " '▁3544': 1,\n", + " '▁2934': 1,\n", + " '▁1368': 1,\n", + " '▁479': 3,\n", + " '▁3951': 1,\n", + " '▁3387': 2,\n", + " '▁2438': 1,\n", + " '▁1262': 1,\n", + " '▁3166': 2,\n", + " '▁462': 2,\n", + " '▁3530': 1,\n", + " '▁333': 1,\n", + " '▁3808': 4,\n", + " '▁2796': 1,\n", + " '▁1920': 2,\n", + " '▁794': 1,\n", + " '▁263': 4,\n", + " '▁2626': 2,\n", + " '▁1949': 1,\n", + " '▁57': 1,\n", + " '▁3990': 1,\n", + " '▁3785': 1,\n", + " '▁146': 2,\n", + " '▁404': 1,\n", + " '▁3731': 15,\n", + " '▁3840': 2,\n", + " '▁940': 2,\n", + " '▁2550': 1,\n", + " '▁544': 1,\n", + " '▁3465': 1,\n", + " '▁3232': 4,\n", + " '▁269': 1,\n", + " '▁79': 2,\n", + " '▁2159': 1,\n", + " '▁3879': 2,\n", + " '▁1734': 1,\n", + " '▁3900': 1,\n", + " '▁755': 3,\n", + " '▁1756': 1,\n", + " '▁818': 1,\n", + " '▁800': 1,\n", + " '▁1249': 1,\n", + " '▁171': 2,\n", + " '▁319': 1,\n", + " '▁727': 2,\n", + " '▁3698': 2,\n", + " '▁3683': 1,\n", + " '▁3969': 4,\n", + " '▁2431': 3,\n", + " '▁1838': 6,\n", + " '▁126': 1,\n", + " '▁2673': 8,\n", + " '▁4012': 1,\n", + " '▁1010': 3,\n", + " '▁2151': 3,\n", + " '▁3437': 5,\n", + " '▁417': 1,\n", + " '▁2386': 5,\n", + " '▁3705': 1,\n", + " '▁3428': 1,\n", + " '▁1168': 1,\n", + " '▁1527': 2,\n", + " '▁3885': 1,\n", + " '▁1952': 2,\n", + " '▁2443': 1,\n", + " '▁3997': 1,\n", + " '▁3562': 1,\n", + " '▁1667': 2,\n", + " '▁3651': 1,\n", + " '▁3981': 2,\n", + " '▁2426': 3,\n", + " '▁1494': 2,\n", + " '▁1532': 2,\n", + " '▁3602': 2,\n", + " '2712': 1,\n", + " '▁1604': 5,\n", + " '▁3458': 3,\n", + " '▁4031': 1,\n", + " '▁1669': 11,\n", + " '▁2008': 6,\n", + " '▁857': 1,\n", + " '▁3123': 2,\n", + " '▁3321': 2,\n", + " '▁1887': 1,\n", + " '▁846': 1,\n", + " '▁763': 2,\n", + " '▁612': 1,\n", + " '▁3846': 1,\n", + " '▁1060': 3,\n", + " '▁312': 1,\n", + " '▁859': 3,\n", + " '▁3638': 1,\n", + " '▁2238': 5,\n", + " '▁590': 1,\n", + " '▁2458': 2,\n", + " '▁3847': 1,\n", + " '▁304': 3,\n", + " '▁1888': 1,\n", + " '▁1986': 1,\n", + " '▁3412': 3,\n", + " '▁1885': 1,\n", + " '▁375': 5,\n", + " '▁2176': 1,\n", + " '▁887': 1,\n", + " '▁3051': 4,\n", + " '▁3374': 1,\n", + " '▁3485': 1,\n", + " '▁973': 2,\n", + " '▁622': 1,\n", + " '▁2557': 1,\n", + " '▁3750': 3,\n", + " '▁240': 3,\n", + " '▁2452': 2,\n", + " '▁2913': 4,\n", + " '▁2525': 3,\n", + " '▁691': 2,\n", + " '▁1363': 1,\n", + " '▁796': 1,\n", + " '▁1232': 1,\n", + " '▁1332': 1,\n", + " '▁3282': 1,\n", + " '▁966': 4,\n", + " '▁3883': 1,\n", + " '▁1774': 3,\n", + " '▁2559': 2,\n", + " '▁748': 1,\n", + " '▁2975': 1,\n", + " '▁2608': 1,\n", + " '▁3345': 4,\n", + " '▁868': 3,\n", + " '▁731': 2,\n", + " '▁2872': 1,\n", + " '▁1336': 4,\n", + " '▁2488': 1,\n", + " '▁3706': 2,\n", + " '▁2276': 2,\n", + " '▁3739': 1,\n", + " '▁434': 1,\n", + " '▁2203': 2,\n", + " '▁2019': 3,\n", + " '▁873': 1,\n", + " '▁1273': 1,\n", + " '▁3627': 1,\n", + " '▁2912': 1,\n", + " '▁4046': 1,\n", + " '▁120': 1,\n", + " '▁2888': 1,\n", + " '▁1707': 2,\n", + " '▁1153': 2,\n", + " '▁2927': 1,\n", + " '▁1188': 3,\n", + " '▁1400': 2,\n", + " '▁397': 2,\n", + " '▁1140': 1,\n", + " '▁792': 1,\n", + " '▁20': 2,\n", + " '▁3452': 4,\n", + " '▁1247': 4,\n", + " '▁3297': 3,\n", + " '▁326': 1,\n", + " '▁2813': 4,\n", + " '▁2365': 4,\n", + " '▁3368': 2,\n", + " '▁1129': 2,\n", + " '▁3260': 1,\n", + " '▁186': 3,\n", + " '▁1814': 2,\n", + " '▁1445': 2,\n", + " '▁438': 1,\n", + " '▁3323': 2,\n", + " '▁3744': 2,\n", + " '▁2392': 2,\n", + " '▁448': 2,\n", + " '▁1953': 5,\n", + " '▁2204': 7,\n", + " '▁2430': 1,\n", + " '▁1094': 3,\n", + " '▁1702': 1,\n", + " '▁2688': 3,\n", + " '▁2380': 1,\n", + " '▁3714': 1,\n", + " '▁3391': 1,\n", + " '▁2738': 5,\n", + " '▁2312': 3,\n", + " '▁3866': 3,\n", + " '▁4069': 1,\n", + " '▁752': 3,\n", + " '▁1861': 2,\n", + " '▁73': 3,\n", + " '▁3403': 1,\n", + " '▁3825': 2,\n", + " '▁685': 5,\n", + " '▁332': 1,\n", + " '▁305': 1,\n", + " '▁4049': 1,\n", + " '▁1004': 1,\n", + " '▁3468': 2,\n", + " '▁2885': 2,\n", + " '▁740': 1,\n", + " '▁1001': 1,\n", + " '▁3367': 1,\n", + " '▁48': 2,\n", + " '▁2617': 2,\n", + " '▁793': 2,\n", + " '▁927': 1,\n", + " '▁2801': 2,\n", + " '▁3229': 3,\n", + " '▁3896': 1,\n", + " '▁886': 1,\n", + " '▁773': 1,\n", + " '▁3396': 4,\n", + " '▁1160': 1,\n", + " '▁1968': 1,\n", + " '▁3272': 1,\n", + " '▁381': 1,\n", + " '▁1307': 3,\n", + " '▁2240': 1,\n", + " '▁215': 1,\n", + " '▁726': 1,\n", + " '▁3195': 2,\n", + " '▁3026': 1,\n", + " '▁903': 1,\n", + " '▁2317': 2,\n", + " '▁1484': 2,\n", + " '▁2182': 4,\n", + " '▁3688': 1,\n", + " '▁234': 1,\n", + " '▁3009': 1,\n", + " '▁370': 1,\n", + " '▁3871': 1,\n", + " '▁3551': 2,\n", + " '▁499': 7,\n", + " '▁1742': 1,\n", + " '▁582': 1,\n", + " '▁862': 1,\n", + " '▁930': 2,\n", + " '▁1097': 1,\n", + " '▁688': 2,\n", + " '▁2450': 2,\n", + " '▁1658': 1,\n", + " '▁97': 1,\n", + " '▁2502': 2,\n", + " '▁308': 2,\n", + " '▁746': 2,\n", + " '▁488': 1,\n", + " '▁608': 2,\n", + " '▁2948': 1,\n", + " '▁3919': 2,\n", + " '▁3219': 1,\n", + " '▁1941': 1,\n", + " '▁845': 1,\n", + " '▁1398': 1,\n", + " '▁2440': 1,\n", + " '▁3765': 2,\n", + " '▁1644': 2,\n", + " '▁2206': 2,\n", + " '▁3795': 1,\n", + " '▁2076': 2,\n", + " '▁3089': 1,\n", + " '▁2331': 2,\n", + " '▁2807': 1,\n", + " '▁1660': 2,\n", + " '▁95': 1,\n", + " '▁3671': 4,\n", + " '▁2918': 4,\n", + " '▁3008': 2,\n", + " '▁152': 1,\n", + " '▁117': 2,\n", + " '▁1924': 2,\n", + " '▁365': 3,\n", + " '▁3893': 3,\n", + " '▁3069': 3,\n", + " '▁1725': 1,\n", + " '▁3258': 1,\n", + " '▁1794': 1,\n", + " '▁2718': 2,\n", + " '▁829': 4,\n", + " '▁575': 1,\n", + " '▁2326': 2,\n", + " '▁294': 1,\n", + " '▁4054': 1,\n", + " '▁1349': 1,\n", + " '▁3177': 2,\n", + " '▁3872': 1,\n", + " '▁3281': 1,\n", + " '▁588': 1,\n", + " '▁992': 1,\n", + " '▁3687': 2,\n", + " '▁3087': 1,\n", + " '▁3786': 1,\n", + " '▁450': 1,\n", + " '▁1788': 14,\n", + " '▁831': 1,\n", + " '▁2984': 1,\n", + " '▁3180': 2,\n", + " '▁206': 1,\n", + " '▁100': 1,\n", + " '▁2506': 1,\n", + " '▁3230': 1,\n", + " '▁399': 2,\n", + " '▁204': 4,\n", + " '▁1806': 5,\n", + " '▁3603': 1,\n", + " '▁2779': 2,\n", + " '▁289': 5,\n", + " '▁572': 1,\n", + " '▁1032': 2,\n", + " '▁1932': 5,\n", + " '▁990': 2,\n", + " '▁3702': 4,\n", + " '▁1046': 3,\n", + " '▁3161': 2,\n", + " '▁2085': 1,\n", + " '▁3350': 1,\n", + " '▁702': 1,\n", + " '▁489': 1,\n", + " '▁2434': 3,\n", + " '▁3693': 1,\n", + " '▁2788': 4,\n", + " '▁1026': 1,\n", + " '▁3251': 1,\n", + " '▁1701': 1,\n", + " '▁1477': 1,\n", + " '▁318': 1,\n", + " '▁1930': 2,\n", + " '▁1325': 1,\n", + " '▁1595': 1,\n", + " '▁237': 2,\n", + " '▁1054': 1,\n", + " '▁3820': 2,\n", + " '▁931': 1,\n", + " '▁1863': 2,\n", + " '▁3218': 5,\n", + " '▁2094': 3,\n", + " '▁3937': 1,\n", + " '▁1229': 1,\n", + " '▁1408': 2,\n", + " '▁153': 1,\n", + " '▁1990': 1,\n", + " '▁1435': 1,\n", + " '▁427': 4,\n", + " '▁961': 1,\n", + " '▁3030': 4,\n", + " '▁1516': 2,\n", + " '▁3775': 1,\n", + " '▁3013': 1,\n", + " '▁267': 1,\n", + " '▁477': 1,\n", + " '▁1134': 4,\n", + " '▁2083': 2,\n", + " '▁1217': 1,\n", + " '▁243': 1,\n", + " '▁2070': 2,\n", + " '▁695': 1,\n", + " '▁3566': 2,\n", + " '▁3075': 1,\n", + " '▁2167': 1,\n", + " '▁616': 1,\n", + " '▁3574': 1,\n", + " '▁3375': 1,\n", + " '▁1655': 3,\n", + " '▁457': 3,\n", + " '▁1131': 1,\n", + " '▁3316': 1,\n", + " '▁3498': 2,\n", + " '▁1261': 1,\n", + " '▁1369': 1,\n", + " '▁2516': 1,\n", + " '▁435': 2,\n", + " '▁890': 1,\n", + " '▁951': 1,\n", + " '▁1443': 3,\n", + " '▁2003': 2,\n", + " '▁1648': 2,\n", + " '▁1299': 1,\n", + " '▁1395': 1,\n", + " '▁3324': 3,\n", + " '▁2593': 1,\n", + " '▁719': 1,\n", + " '▁2939': 4,\n", + " '▁3592': 1,\n", + " '▁3613': 1,\n", + " '▁3487': 2,\n", + " '▁2786': 1,\n", + " '▁3318': 1,\n", + " '▁231': 1,\n", + " '▁1346': 1,\n", + " '▁3828': 1,\n", + " '▁1482': 1,\n", + " '▁175': 4,\n", + " '▁1017': 2,\n", + " '▁99': 2,\n", + " '▁2469': 1,\n", + " '▁584': 2,\n", + " '▁2001': 2,\n", + " '▁2750': 5,\n", + " '▁2573': 1,\n", + " '▁784': 1,\n", + " '▁1524': 3,\n", + " '▁1580': 1,\n", + " '▁1793': 1,\n", + " '▁2874': 1,\n", + " '▁1574': 2,\n", + " '▁2160': 4,\n", + " '▁1316': 2,\n", + " '▁2655': 1,\n", + " '▁1675': 1,\n", + " '▁1052': 1,\n", + " '▁2147': 7,\n", + " '▁1351': 1,\n", + " '▁527': 2,\n", + " '▁26': 1,\n", + " '▁3286': 1,\n", + " '▁2228': 1,\n", + " '▁50': 5,\n", + " '▁1112': 1,\n", + " '▁2342': 2,\n", + " '▁761': 1,\n", + " '▁1093': 1,\n", + " '▁670': 1,\n", + " '▁1312': 1,\n", + " '▁3044': 1,\n", + " '▁3199': 1,\n", + " '▁295': 2,\n", + " '▁2259': 3,\n", + " '▁3384': 5,\n", + " '▁354': 3,\n", + " '▁2943': 2,\n", + " '▁2952': 1,\n", + " '▁3194': 1,\n", + " '▁1231': 1,\n", + " '▁3288': 1,\n", + " '▁3672': 1,\n", + " '▁1539': 2,\n", + " '▁1161': 3,\n", + " '▁2675': 1,\n", + " '▁2876': 1,\n", + " '▁3979': 1,\n", + " '▁2444': 2,\n", + " '▁2013': 2,\n", + " '▁2224': 3,\n", + " '▁926': 3,\n", + " '▁580': 1,\n", + " '▁1430': 1,\n", + " '▁1907': 3,\n", + " '▁1059': 2,\n", + " '▁1544': 1,\n", + " '▁2080': 1,\n", + " '▁114': 1,\n", + " '▁1130': 2,\n", + " '▁2066': 1,\n", + " '▁1848': 2,\n", + " '▁1726': 1,\n", + " '▁1567': 1,\n", + " '▁833': 3,\n", + " '▁3724': 3,\n", + " '▁3910': 2,\n", + " '▁568': 1,\n", + " '▁1423': 2,\n", + " '▁1305': 1,\n", + " '▁1631': 1,\n", + " '▁536': 2,\n", + " '▁2174': 1,\n", + " '▁979': 1,\n", + " '▁1688': 1,\n", + " '▁484': 1,\n", + " '▁3708': 1,\n", + " '▁3875': 1,\n", + " '▁1148': 1,\n", + " '▁2700': 1,\n", + " '▁3761': 1,\n", + " '▁1420': 1,\n", + " '▁1761': 1,\n", + " '▁2115': 1,\n", + " '▁2419': 1,\n", + " '▁1283': 2,\n", + " '▁2310': 1,\n", + " '▁756': 1,\n", + " '▁1164': 1,\n", + " '▁92': 1,\n", + " '▁2328': 1,\n", + " '▁1877': 1,\n", + " '▁85': 1,\n", + " '▁2662': 2,\n", + " '▁4093': 1,\n", + " '▁3415': 1,\n", + " '▁2408': 1,\n", + " '▁3269': 1,\n", + " '▁1462': 3,\n", + " '▁2435': 1,\n", + " '▁3205': 3,\n", + " '▁230': 1,\n", + " '▁1594': 1,\n", + " '▁3881': 1,\n", + " '▁1934': 1,\n", + " '▁2676': 1,\n", + " '▁2891': 1,\n", + " '▁782': 2,\n", + " '▁4071': 2,\n", + " '▁2338': 2,\n", + " '▁623': 1,\n", + " '▁1733': 2,\n", + " '▁757': 1,\n", + " '▁3467': 1,\n", + " '▁3352': 3,\n", + " '▁2467': 1,\n", + " '▁1912': 1,\n", + " '▁775': 2,\n", + " '▁2254': 1,\n", + " '▁216': 1,\n", + " '▁2736': 1,\n", + " '▁3106': 3,\n", + " '▁4036': 1,\n", + " '▁4080': 1,\n", + " '▁2613': 1,\n", + " '▁2870': 1,\n", + " '▁1970': 1,\n", + " '▁3183': 1,\n", + " '▁2782': 5,\n", + " '▁1851': 1,\n", + " '▁599': 1,\n", + " '▁3028': 1,\n", + " '▁105': 3,\n", + " '▁764': 1,\n", + " '▁3450': 1,\n", + " '▁1905': 1,\n", + " '▁3806': 1,\n", + " '▁654': 1,\n", + " '▁2831': 1,\n", + " '▁1343': 1,\n", + " '▁1813': 1,\n", + " '▁2415': 1,\n", + " '▁2558': 2,\n", + " '▁657': 1,\n", + " '▁3507': 2,\n", + " '▁1581': 1,\n", + " '▁366': 2,\n", + " '▁3005': 1,\n", + " '▁2124': 1,\n", + " '▁1446': 1,\n", + " '▁2817': 1,\n", + " '▁802': 1,\n", + " '▁2462': 2,\n", + " '▁2448': 1,\n", + " '▁2068': 1,\n", + " '▁928': 1,\n", + " '▁3547': 1,\n", + " '▁2992': 1,\n", + " '▁3463': 1,\n", + " '▁2100': 1,\n", + " '▁2909': 2,\n", + " '▁2689': 1,\n", + " '▁933': 2,\n", + " '▁2473': 1,\n", + " '▁3856': 1,\n", + " '▁3662': 1,\n", + " '▁2900': 1,\n", + " '▁208': 1,\n", + " '▁1106': 1,\n", + " '▁2513': 1,\n", + " '▁3849': 2,\n", + " '▁981': 1,\n", + " '▁3181': 1,\n", + " '▁1818': 3,\n", + " '▁2470': 1,\n", + " '▁680': 3,\n", + " '▁77': 1,\n", + " '▁2946': 1,\n", + " '▁3149': 1,\n", + " '▁529': 1,\n", + " '▁162': 1,\n", + " '▁1055': 1,\n", + " '▁2867': 1,\n", + " '▁1904': 2,\n", + " '▁2511': 1,\n", + " '▁2964': 1,\n", + " '▁3619': 1,\n", + " '▁1310': 1,\n", + " '▁3570': 1,\n", + " '▁1751': 2,\n", + " '▁410': 3,\n", + " '▁660': 1,\n", + " '▁2320': 2,\n", + " '▁344': 1,\n", + " '▁3740': 1,\n", + " '▁3704': 1,\n", + " '▁1519': 1,\n", + " '▁945': 1,\n", + " '▁87': 1,\n", + " '▁429': 1,\n", + " '▁247': 1,\n", + " '▁196': 3,\n", + " '▁3998': 1,\n", + " '▁449': 2,\n", + " '▁721': 1,\n", + " '▁3171': 1,\n", + " '▁1578': 1,\n", + " '▁455': 1,\n", + " '▁3855': 1,\n", + " '▁1678': 1,\n", + " '▁3173': 1,\n", + " '▁3112': 1,\n", + " '▁177': 1,\n", + " '▁4033': 1,\n", + " '▁637': 2,\n", + " '▁3461': 1,\n", + " '▁1041': 1,\n", + " '▁2089': 1,\n", + " '▁2968': 1,\n", + " '▁991': 1,\n", + " '▁3901': 4,\n", + " '▁811': 1,\n", + " '▁1088': 1,\n", + " '▁1875': 2,\n", + " '▁3212': 1,\n", + " '▁1589': 1,\n", + " '▁497': 1,\n", + " '▁4059': 1,\n", + " '▁1269': 1,\n", + " '▁946': 1,\n", + " '▁595': 1,\n", + " '▁684': 1,\n", + " '▁2253': 1,\n", + " '▁31': 1,\n", + " '▁149': 1,\n", + " '▁3877': 1,\n", + " '▁351': 1,\n", + " '▁2991': 1,\n", + " '▁3102': 2,\n", + " '▁4087': 1,\n", + " '▁2221': 2,\n", + " '▁3227': 1,\n", + " '▁1411': 1,\n", + " '▁2158': 1,\n", + " '▁1833': 1,\n", + " '▁3154': 1,\n", + " '▁60': 1,\n", + " '▁1764': 1,\n", + " '▁2073': 3,\n", + " '▁1565': 1,\n", + " '▁902': 1,\n", + " '▁1143': 1,\n", + " '▁716': 1,\n", + " '▁3936': 1,\n", + " '▁646': 1,\n", + " '▁3799': 1,\n", + " '▁1238': 1,\n", + " '▁2616': 1,\n", + " '▁4026': 1,\n", + " '▁1617': 1,\n", + " '▁2491': 1,\n", + " '▁1659': 1,\n", + " '▁832': 1,\n", + " '▁1425': 1,\n", + " '▁586': 2,\n", + " '▁1042': 1,\n", + " '▁3187': 1,\n", + " '▁39': 1,\n", + " '▁1444': 1,\n", + " '▁2480': 1,\n", + " '▁956': 1,\n", + " '▁1485': 1,\n", + " '▁2410': 1,\n", + " '▁3001': 1,\n", + " '▁3907': 1,\n", + " '▁2634': 1,\n", + " '▁1757': 3,\n", + " '▁1966': 1,\n", + " '▁3062': 3,\n", + " '▁1747': 2,\n", + " '▁2166': 1,\n", + " '▁2097': 2,\n", + " '▁2005': 2,\n", + " '▁219': 1,\n", + " '▁3560': 1,\n", + " '▁1159': 1,\n", + " '▁645': 1,\n", + " '▁3246': 1,\n", + " '▁1246': 1,\n", + " '▁2290': 1,\n", + " '▁3497': 1,\n", + " '▁15': 1,\n", + " '▁12': 1,\n", + " '▁1390': 1,\n", + " '▁4070': 1,\n", + " '▁2485': 1,\n", + " '▁1599': 1,\n", + " '▁3477': 1,\n", + " '▁169': 1,\n", + " '▁777': 1,\n", + " '▁1489': 1,\n", + " '▁2735': 1,\n", + " '▁1882': 1,\n", + " '3446': 1,\n", + " '▁2542': 1,\n", + " '▁1488': 1,\n", + " '▁1810': 1,\n", + " '▁3480': 1,\n", + " '▁3220': 1,\n", + " '▁98': 1,\n", + " '▁1749': 1,\n", + " '▁1893': 1,\n", + " '▁781': 1,\n", + " '▁1993': 1,\n", + " '▁1991': 1,\n", + " '▁3659': 1,\n", + " '▁3047': 1,\n", + " '▁1289': 2,\n", + " '▁2945': 1,\n", + " '▁2280': 1,\n", + " '▁2524': 1,\n", + " '▁2917': 1,\n", + " '▁4042': 1,\n", + " '▁280': 1,\n", + " '▁1375': 1,\n", + " '▁134': 1,\n", + " '▁400': 1,\n", + " '▁1481': 1,\n", + " '▁463': 1,\n", + " '▁3442': 1,\n", + " '▁2357': 1,\n", + " '▁2947': 1,\n", + " '▁258': 1,\n", + " '▁2123': 1,\n", + " '▁571': 1,\n", + " '▁3762': 1,\n", + " '▁3978': 1,\n", + " '▁1670': 1,\n", + " '▁651': 1,\n", + " '▁2694': 2,\n", + " '▁894': 2,\n", + " '▁1061': 1,\n", + " '▁3020': 1,\n", + " '▁218': 1,\n", + " '▁921': 1,\n", + " '▁2265': 1,\n", + " '▁1560': 1,\n", + " '▁1573': 1,\n", + " '▁610': 1,\n", + " '▁2862': 1,\n", + " '▁510': 1,\n", + " '▁1739': 1,\n", + " '▁950': 1,\n", + " '▁4009': 1,\n", + " '▁2332': 1,\n", + " '▁442': 1,\n", + " '▁2887': 1,\n", + " '▁853': 1,\n", + " '▁1865': 1,\n", + " '▁2002': 1,\n", + " '▁2356': 2,\n", + " '▁25': 1,\n", + " '▁1507': 1,\n", + " '▁3362': 1,\n", + " '▁2507': 1,\n", + " '▁3983': 2,\n", + " '▁3738': 1,\n", + " '▁1284': 1,\n", + " '▁2249': 1,\n", + " '▁472': 2,\n", + " '▁3717': 1,\n", + " '▁1056': 1,\n", + " '▁3390': 2,\n", + " '▁1554': 1,\n", + " '▁3040': 1,\n", + " '▁168': 1,\n", + " '▁4040': 1,\n", + " '▁3342': 1,\n", + " '▁90': 2,\n", + " '▁2322': 1,\n", + " '▁3165': 1,\n", + " '▁606': 1,\n", + " '▁997': 1,\n", + " '▁2520': 1,\n", + " '▁3356': 2,\n", + " '▁53': 1,\n", + " '▁62': 2,\n", + " '▁1421': 1,\n", + " '▁1650': 1,\n", + " '▁2244': 1,\n", + " '▁1095': 1,\n", + " '▁3684': 1,\n", + " '▁3410': 1,\n", + " '▁2971': 1,\n", + " '▁210': 1,\n", + " '▁700': 1,\n", + " '▁955': 1,\n", + " '▁1769': 1,\n", + " '▁1919': 1,\n", + " '▁2169': 1,\n", + " '▁977': 1,\n", + " '▁2053': 1,\n", + " '▁1234': 1,\n", + " '▁1705': 1,\n", + " '▁1909': 1,\n", + " '▁2133': 1,\n", + " '▁3933': 1,\n", + " '▁2503': 1,\n", + " '▁441': 1,\n", + " '▁2266': 1,\n", + " '▁2609': 1})" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "word_freqs" + ] }, { "cell_type": "code", From 56c336d2672ff08a7c29cd68bac1cebde07dbbf7 Mon Sep 17 00:00:00 2001 From: Nathan Gabriel Date: Sat, 29 Jun 2024 13:27:30 -0400 Subject: [PATCH 04/10] udpated readme file: --- README.md | 70 ++++++++++++++++++++++++++++++++++++++++++++--- codec-to-token.py | 0 2 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 codec-to-token.py diff --git a/README.md b/README.md index 1a49ade..0ff0140 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,76 @@ -https://laion.ai/notes/open-gpt-4-o/ +# Emo-2-SNAC to LLaMA 3 Audio Token Conversion +This repository contains scripts and documentation for converting audio samples from the [0xd4t4/Emo-2-SNAC](https://huggingface.co/0xd4t4/Emo-2-SNAC) dataset into tokens compatible with the LLaMA 3 language model. This project is part of a collaboration with LAION to develop advanced audio processing capabilities for [AI assistants](https://laion.ai/notes/open-gpt-4-o/). -## Instructions +## Table of Contents -you might want to create an enviroment for this (optioinal) and activate it +- [Introduction](#introduction) +- [Dataset](#dataset) +- [Requirements](#requirements) +- [Installation](#installation) +- [Usage](#usage) +- [Project Structure](#project-structure) +- [License](#license) +- [Acknowledgements](#acknowledgements) +## Introduction + +The goal of this project is to enhance the audio processing capabilities of the LLaMA 3 language model by converting audio samples into discrete tokens. These tokens will then be used to train the model to understand and generate audio, similar to how it processes text. + +## Dataset + +The dataset used in this project is the [0xd4t4/Emo-2-SNAC](https://huggingface.co/0xd4t4/Emo-2-SNAC) dataset, available on Hugging Face. This dataset contains audio samples labeled with emotional content, which will be converted into SNAC (Multi-Scale Neural Audio Codec) tokens. + +## Requirements + +- Python 3.8 or higher +- hugging face + +## Installation + +(Optional) Create your own enviroment + +## Usage + +1. Clone the repository: + ``` bash -pip intall -r requirement.txt +git clone https://github.com/LAION-AI/snac-to-llama3.git +cd snac-to-llama3 ``` +2. Install the requirments + +``` bash +pip install -r requirements.txt +``` + +3. Download The hugging face dataset + ``` bash huggingface-cli download 0xd4t4/Emo-2-SNAC --local-dir ./dataset --revision refs/convert/parquet --repo-type dataset ``` + +4. Integrate SNAC codec with LLaMA 3 model: + +```bash +python codec-to-token.py +``` + +5. Byte pair encoder + +## Project Structure + +- `codec-to-token.py`: Script for converting audio samples to SNAC tokens. +- `byte-pair-encoding.py`: Script for integrating SNAC tokens with the LLaMA 3 model. +- `requirements.txt`: List of dependencies required for the project. +- `README.md`: This file. + + +## License + +This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details. + +## Acknowledgements + +This project is a collaboration with [LAION](https://laion.ai/). Special thanks to the Hugging Face team for providing the [0xd4t4/Emo-2-SNAC](https://huggingface.co/0xd4t4/Emo-2-SNAC) dataset. diff --git a/codec-to-token.py b/codec-to-token.py new file mode 100644 index 0000000..e69de29 From f2b1cc221164e0ddf167dfcb97aa1c4093e9d83e Mon Sep 17 00:00:00 2001 From: Nathan Gabriel Date: Sat, 29 Jun 2024 13:32:12 -0400 Subject: [PATCH 05/10] updated codec to token --- codec-to-token.py | 59 + main.py | 53 - snac-to-llama.json | 8004 ++++++++++++++++++++++---------------------- 3 files changed, 4061 insertions(+), 4055 deletions(-) diff --git a/codec-to-token.py b/codec-to-token.py index e69de29..749a522 100644 --- a/codec-to-token.py +++ b/codec-to-token.py @@ -0,0 +1,59 @@ +from datasets import load_dataset +import json + + +class Vocabulary: + """Class to map codes from huggingface dataset to tokens in Llama 3-8B token""" + + def __init__(self): + self.stoi = {} + self.itos = {} + + def build_vocabulary(self, parquet_files, tokenizer_file="tokenizer.json"): + ''' + creates the vocabulary from the Llama 3 tokenizer and hugging face dataset + Args: + tokenizer_file(str): file downloaded from Llama 3(8B) which contains the vocabulary for the model + parquet_files(list): director with the dataset from hugging face in parquet format + + ''' + # Open the JSON file + with open(tokenizer_file, 'r') as file: + # Load the JSON data + data = json.load(file) + + llama_stoi = data['model']['vocab'] + llama_itos = {value:key for key,value in llama_stoi.items()} + + #load hugging face data + dataset = load_dataset('parquet', data_files=parquet_files) + vocabulary = set() + + for sent in dataset["train"]["txt"]: + for word in sent.split(): + vocabulary.add(word) + + self.itos = {value:llama_itos[int(value)] for value in vocabulary} + self.stoi = {value:key for key,value in self.itos.items()} + + def save(self, file_path): + ''' + saves the the int to string(llama token) to a json file to be loaded later + Args: + file_path(str): file where dict will be saved + + ''' + with open(file_path, "w") as file: + json.dump(self.itos, file, indent=4) + + +if __name__ == "__main__": + test_dir = [f"dataset/default/partial-train/000{i}.parquet" for i in range(10)] + vocab = Vocabulary() + + vocab.build_vocabulary(test_dir) + vocab.save("snac-to-llama.json") + + + + \ No newline at end of file diff --git a/main.py b/main.py index 72d8d3f..e69de29 100644 --- a/main.py +++ b/main.py @@ -1,53 +0,0 @@ -from datasets import load_dataset -import json - - -class Vocabulary: - """Class to map codes from huggingface dataset to tokens in Llama 3-8B token""" - - def __init__(self): - self.stoi = {} - self.itos = {} - - def build_vocabulary(self, parquet_files, tokenizer_file="tokenizer.json"): - ''' - creates the vocabulary from the Llama 3 tokenizer and hugging face dataset - Args: - tokenizer_file(str): file downloaded from Llama 3(8B) which contains the vocabulary for the model - parquet_files(list): director with the dataset from hugging face in parquet format - - ''' - # Open the JSON file - with open(tokenizer_file, 'r') as file: - # Load the JSON data - data = json.load(file) - - llama_stoi = data['model']['vocab'] - llama_itos = {value:key for key,value in llama_stoi.items()} - - #load hugging face data - dataset = load_dataset('parquet', data_files=parquet_files) - vocabulary = set() - - for sent in dataset["train"]["txt"]: - for word in sent.split(): - vocabulary.add(word) - - self.itos = {int(value):llama_itos[int(value)] for value in vocabulary} - self.stoi = {value:key for key,value in self.itos.items()} - - def save(self, file_path): - with open(file_path, "w") as file: - json.dump(self.itos, file, indent=4) - - -if __name__ == "__main__": - test_dir = [f"dataset/default/partial-train/000{i}.parquet" for i in range(10)] - vocab = Vocabulary() - - vocab.build_vocabulary(test_dir) - vocab.save("snac-to-llama.json") - - - - \ No newline at end of file diff --git a/snac-to-llama.json b/snac-to-llama.json index dceccdf..e9dc8ca 100644 --- a/snac-to-llama.json +++ b/snac-to-llama.json @@ -1,4098 +1,4098 @@ { - "29": ">", - "1275": "index", - "3891": "GET", - "3531": "agement", - "340": ")\u010a", - "3847": "itted", - "524": "", - "2663": "\u0120called", - "2758": "\u0120Add", - "811": "ations", - "646": "_s", - "334": "**", - "598": "ans", - "2613": "\u0120email", - "2207": "_IN", - "100": "\u00a7", - "3278": "\u0120women", - "3483": "annot", - "3550": "uper", - "901": "ble", - "2008": "sub", - "1235": "\u0120*\u010a", - "27": "<", - "3938": "\u0120future", - "3183": "apper", - "1115": "\u0120This", - "309": "am", - "3877": ".view", - "281": "\u0120p", - "3881": "\u0120apply", - "854": "\u0120null", - "2216": "\u0120really", - "2503": "\u0120sit", - "1296": "\u0120test", - "4045": ".Control", - "2199": "\u0120page", - "1870": "ually", - "2906": "erver", - "1345": ".M", - "1919": "ty", - "2890": "\u0120health", - "3085": "\u0120offer", - "147": "\u00d7", - "1568": "try", - "3087": "HER", - "3481": "ERT", - "35": "D", - "3792": "Back", - "1513": "clud", - "1412": "uch", + "1156": "\u0120sc", + "3520": "ney", + "3012": "ention", + "1047": "\u0120had", + "3030": "avig", + "329": "ad", + "1666": "\u0120As", + "1266": "../", + "2128": "\u0120team", + "1178": "word", + "1424": "\u0120spec", + "42": "K", + "216": "\u011c", + "3092": "\u0120My", + "52": "U", + "798": "key", + "3389": "\u0120late", + "2175": "\u0120Get", + "4078": "\u0120Type", + "2516": "++)", + "696": ")\u010a\u010a", + "1409": "ough", + "2219": "\u0120dep", + "3856": "\u0120pract", + "3001": "!!", + "544": "ff", + "370": "ab", + "70": "g", + "4074": "\u0120admin", "3607": "Method", - "1778": "\u0120such", - "1375": "\u0120rel", + "2309": "default", + "1141": "amp", "117": "\u00b9", - "1668": "Color", - "1070": "\u0120there", - "1998": "(i", - "3962": "\u0120Post", - "2693": "atur", + "125": "\u00c1", + "1103": "url", + "4084": "cket", + "2024": "\u0120ter", + "1507": "ED", + "919": "ions", + "1692": "Man", + "3605": "\u0120\"/", + "61": "^", + "2418": "\u0120Man", + "94": "\u00a1", + "921": "\u0120Ch", + "2621": "uccess", "470": "\u0120com", - "1535": "(),", - "1430": "eter", - "762": "ec", - "529": "\u00e2\u0122\u013b", + "2459": "_ST", + "398": "ly", + "1243": "\u0120then", + "1033": "ervice", + "2555": "\u0120something", + "1249": "heck", + "3106": "Entity", + "3976": "Per", + "3633": "ony", + "4005": ".(", + "2491": "49", "2335": "ential", - "1751": "ott", - "1502": "User", - "2856": "\u0120offic", - "991": "\u0120em", - "493": "('", - "519": "ant", - "2683": "\u0120job", - "1711": "uring", - "1741": "AC", - "267": "st", - "2790": "46", - "3410": "atform", - "1176": "\u0120first", - "2253": "_e", - "3582": "\u0120though", - "3559": "\u0120);\u010a\u010a", - "2960": "\u0120cent", - "210": "\u0116", - "3362": "IZE", - "3670": "container", - "1767": "Info", - "177": "\u00f5", - "1289": "opy", - "2107": "\u0120cap", - "1026": "ms", - "3518": "abs", - "1259": "ream", - "2607": "HT", - "3155": ");\u010d\u010a\u010d\u010a", - "4000": ">();\u010a", - "2069": "(e", - "3852": "any", - "2582": "\u0120Or", - "1888": "\u0120best", - "3668": "(\"#", - "250": "\u013e", - "1558": "\u0120inv", - "1017": "itle", - "2292": "(String", - "1866": "\u0120own", - "2033": "\u0120double", - "1878": "ename", - "2279": "UE", - "1608": "\u0120ed", - "800": "\u0120St", - "2434": "TR", - "4093": "ecess", - "3294": "olution", - "736": "ivate", - "3459": "\u0120Form", - "2154": "irt", - "937": "SE", - "593": "que", - "549": "\u0120U", - "4047": "ball", - "586": "\u0120public", - "2545": "09", - "2834": "\u0120won", - "2817": "../../", - "4001": "ional", - "3395": "cul", - "1173": "\u0120},\u010a", - "466": "ter", - "1226": "\u0120We", - "3228": "ead", - "1984": "\u0120message", - "3553": "eneric", - "2787": "ById", - "4059": "\u0120mind", - "1451": "Ind", - "2237": "\u0120level", - "2": "#", - "1606": "\u0120because", - "25": ":", - "3268": "\u0120rights", - "2339": "\u0120catch", - "1413": "ative", - "2111": "ved", - "869": "ov", - "3671": "edit", - "666": "\u0120Th", - "228": "\u0128", - "852": "urre", - "153": "\u00dd", - "833": "eng", - "1561": "\u0120New", - "3064": "attr", - "283": "ou", - "3079": "itional", - "2658": "mod", - "3142": "\u0120Public", - "909": "\")", - "2083": "_H", - "3337": "EX", - "2832": "_W", - "2954": "ended", - "923": "\u0120add", - "2218": "\u0120target", - "1454": "();\u010a\u010a", - "1738": "File", - "1029": "ml", - "2175": "\u0120Get", - "2536": "\u0120non", - "1264": "sum", - "1819": "\u0120((", - "1982": "ook", - "425": "age", + "928": "string", + "353": "\u0120*", + "3037": "pecial", + "1575": "sole", + "2833": "plement", + "505": "\u0120from", + "1027": "\u0120been", + "4018": "\u0120cut", + "964": "IT", "2649": "ada", - "76": "m", - "362": "\u0120A", - "2211": "\u0120ca", - "3824": "\u0120dam", - "618": "err", - "3608": "\u0120ident", - "1228": "lick", - "1679": "();\u010d\u010a", - "1272": "40", - "881": "\u010d\u010a\u010d\u010a", - "3588": "\u0120::", - "993": "\u0120sp", - "3545": "ral", - "999": "quire", - "1993": "ground", - "2888": "\u0120belie", - "3854": "\u0120pack", - "747": "li", - "920": ".D", - "718": "ich", - "2655": "\u0120intern", - "4007": "\u0120study", - "1121": "\u0120result", - "50": "S", - "2573": "Action", - "3033": "});\u010a", - "4067": "________", - "2696": "\u0120Date", - "931": "000", - "258": "in", - "1915": "Field", - "2537": "\u0120los", - "1125": "));\u010a", - "2501": "<<", - "485": "ind", - "3729": "\u0120contact", - "947": "];\u010a", - "2429": "rame", - "140": "\u00d0", - "1045": "date", + "3176": "pository", + "502": "\u0120new", + "2924": "idden", + "2945": "options", + "286": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "2154": "irt", + "3382": "\";\u010a\u010a", + "675": "aw", + "3889": "__(", + "610": "\u0120str", + "1771": "ains", + "1434": "--------------------------------", + "3036": "ilt", + "3834": "\u0120interface", + "2197": "\u0120beg", + "2942": "\u0120organ", + "1314": "\u0120right", + "2653": "\u0120hard", + "3277": "\u0120When", + "3209": "\u00c3\u00a7", + "3841": "je", + "195": "\u0107", + "1875": "\"\u010a\u010a", + "2986": ".println", + "26": ";", + "1453": "Im", + "482": "\u0120-", + "3704": "_SE", + "3894": "[j", + "1566": "\u0120last", + "1080": "\u0120co", + "2544": "ither", + "3270": "\u0120\"\"\"\u010a", + "1887": "\u0120system", "3706": "(w", - "3965": "150", - "236": "\u0130", - "3356": "*/\u010a", - "2266": ".\"\u010a\u010a", - "1467": "#define", - "369": "\u0120for", - "2509": "ought", - "321": "il", - "2204": "\u0120different", - "3117": "\u0120far", - "3651": "\u0120AND", - "1840": "\u00d0\u00b8", - "3996": ".txt", - "3908": "ursor", - "1231": "ttp", - "3610": "\u0120million", - "891": "\">\u010a", - "2341": "//\u010a", - "1048": "\u0120bu", - "4095": "........", - "3416": "ison", - "2679": "\u0120ide", - "1694": "\u0120being", - "600": "unction", - "688": "vent", - "1418": "\u0120while", - "2146": "(&", - "1128": "Con", - "830": "ef", - "3519": "amera", - "488": "ity", - "951": "\u0120des", - "3592": ".png", - "2609": "\u0120chang", - "2504": "ework", - "2057": "\u0120To", - "3256": "arning", - "3674": "\u0120social", - "2455": "())\u010a", - "1846": "AD", - "411": "ith", - "2425": "\u0120iss", - "450": "iz", - "1294": "ION", - "1926": "ideo", + "2789": "\u0109case", + "4007": "\u0120study", + "3357": "inate", + "3126": "Handler", + "155": "\u00df", + "782": "pro", + "2863": "\u0120stat", + "1717": "\u0120\u00c3", + "3138": "_get", + "2319": "oh", + "1142": "ied", + "260": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "2795": "\u0120says", + "2449": "\u0120element", + "3656": ".Collections", + "3198": "Vis", "1573": "ollow", - "975": "14", - "2278": "\u0120poss", - "3501": "\u0120CON", - "1853": "\u0120path", - "3574": "\u00e4\u00b8", - "2871": "IR", - "158": "\u00e2", - "4079": "amic", - "2317": "\u0120context", - "4088": "uel", - "1478": "ocument", - "2182": "oute", - "2396": "ves", - "282": "\u0120f", - "3943": "_array", - "3047": ":\"", - "589": "ath", - "2958": "\u0120File", - "509": "ld", - "1768": "par", + "1583": "\u0120/**\u010a", + "1223": "ower", + "2597": "\u0120await", + "3444": "ana", + "1074": "line", + "705": "),", + "2208": "button", + "3709": "\u0120inc", + "3120": "\u0120\\\u010a", + "274": "\u0120s", + "3691": "\u0120food", + "775": "\u0120else", + "1782": "\u0120differ", + "3985": "////////////////////////////////", + "490": "\u0120tr", + "903": "****************", + "1410": "orld", + "1502": "User", + "2063": "come", + "2442": "-t", + "1209": "((", + "2876": "\u0120Not", + "3505": "\\\\", + "688": "vent", + "563": "tring", + "341": "\u0120{\u010a", + "1855": "\u0120each", + "1757": "package", + "894": "ry", "1494": "ah", - "2698": "\u0120fri", - "1544": "27", - "1582": "uthor", - "1687": "We", - "4050": "Var", - "3901": ".spring", - "3414": "ITY", - "823": "ield", - "3109": "\u0120government", - "2363": "\u0120book", - "808": "essage", - "3003": "\u0120init", - "95": "\u00a2", - "3902": "main", - "57": "Z", - "3998": "\u0120Prop", - "3073": "\u0120exist", - "161": "\u00e5", - "1735": "object", - "3287": "\u0120didn", - "2300": "Ent", - "178": "\u00f6", - "1947": "ollection", - "944": "ern", - "2037": "message", - "832": "\u0120one", - "2122": "Result", - "2219": "\u0120dep", - "612": "\u0120&", - "1241": "\u0109public", + "2519": "Line", + "988": "ates", + "2794": "\u0120OR", + "427": "ht", + "1388": ".add", + "334": "**", + "1031": "nc", + "965": "EN", + "2673": "\u0120height", + "2903": "Number", + "306": "ent", + "2479": "\u0120len", + "2206": "\u0120Me", + "351": "ag", + "3306": "sl", + "4030": "region", + "1833": "\u0120follow", + "862": "\u0109return", + "2593": "\u0120eas", + "1953": "IG", + "600": "unction", + "604": "pp", + "2198": "CH", + "3765": "\u0120Ab", + "723": "add", + "3172": "mb", + "2163": "\u0120left", + "2291": "\u0120fore", + "3931": "IST", + "250": "\u013e", + "2273": "\u0120float", + "3927": "\u0120individual", + "3487": "56", + "1700": "{\u010d\u010a", + "2530": "ours", + "3788": "\u0120https", + "3189": "\u0120wr", + "543": "all", + "1407": "result", + "1999": "Class", + "1306": "\u0120after", + "1729": "by", + "257": "\u0120\u0120\u0120\u0120", + "251": "\u013f", + "231": "\u012b", + "1743": "summary", + "2342": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "2152": "CE", + "2112": ".N", + "814": "\u0120they", + "635": "ite", + "3239": "env", + "2713": "\u0120update", + "694": "\u0120<", - "3169": "\u0120kind", - "1783": ".r", - "3233": "ones", - "404": "ir", - "2634": "}\u010d\u010a\u010d\u010a", - "1935": "\u0120take", - "1105": "ors", - "2985": "opt", - "1281": "\u0120jav", - "1911": "press", - "1446": "\u0120ser", - "3332": "\":\"", - "2870": "upport", - "2992": "actory", - "3495": "\u0120research", - "2343": "\u0120pay", - "3093": "ored", - "1769": "imes", - "1957": "\u0120action", - "4058": "asure", - "2353": "los", - "2989": "PL", - "2912": "icy", - "3244": "\u00c3\u00b3n", - "1258": ".st", - "1908": "ified", - "1090": "Int", - "3348": "(g", - "2388": "_V", - "3952": "\u0120took", - "1469": "\u0120ap", - "1593": "cause", - "1408": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "2241": "_RE", - "1721": "01", - "3688": "tributes", - "2735": "ML", - "976": "\u0120!=", - "171": "\u00ef", - "1466": "ren", - "2017": "over", - "686": "ear", - "2794": "\u0120OR", - "1580": "html", - "2097": "Message", - "3690": "amed", - "1191": "lection", - "3521": "\u0120exc", - "3730": "':\u010a", - "1034": "\u0120%", - "239": "\u0133", - "2413": "\u0120wom", - "3458": "na", - "67": "d", - "3490": "\u00e3\u0122\u0124\u010a\u010a", - "560": "ice", - "2044": "Param", - "10": "+", - "132": "\u00c8", - "851": "_id", - "144": "\u00d4", - "3052": "{{", - "2464": "chem", - "973": "utton", - "1138": "arr", - "2491": "49", - "1762": "pre", - "2829": "\u0109\u0120\u0120\u0120", - "3813": "\u0120location", - "491": "elf", - "751": "set", - "71": "h", - "1449": "());\u010a", - "941": "Type", - "1303": "ting", - "537": "ost", - "1720": "\u0120}\u010d\u010a", - "1095": "\u0120let", - "2523": "\u0120fun", - "1550": "\u0120did", - "2137": "39", - "1343": "\u0120ph", - "3576": "sv", - "3479": "move", - "1141": "amp", - "2324": "\u0120life", - "3119": "\u0120NS", - "3196": "\u0120based", - "2931": "imal", - "2062": "_list", - "1315": "right", - "3685": "\u0120expected", - "146": "\u00d6", - "3735": "\u0120Pol", - "476": "ublic", - "264": "\u0120a", - "1596": "_A", - "3082": "\u0120True", - "2771": "\u0120sure", - "2695": "-w", - "3425": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "2248": "tribute", - "1795": "\u0120http", - "269": "or", - "849": "Ex", - "3022": "quote", - "3101": "300", - "1238": "ices", - "1746": "\u0120($", - "122": "\u00be", - "785": "own", - "79": "p", - "1247": "ek", - "2891": "ANT", - "4065": "utions", - "1434": "--------------------------------", - "2316": "\u0120title", - "1789": "\u0120For", - "90": "{", - "3441": "\u0120Ste", - "2561": "\u0120available", - "3833": "rel", - "3262": "/m", - "184": "\u00fc", - "2821": "pen", - "1646": "\u0120model", - "3658": "\u0120Free", - "366": "\u0120<", - "805": "ser", - "249": "\u013d", - "3648": "New", - "468": "\u0120W", - "728": "ft", - "893": "\u0120man", - "3612": "style", - "2957": "une", - "2416": "atri", - "150": "\u00da", - "2195": "...\u010a\u010a", - "3041": "\u0120give", - "986": "ames", - "518": "',", - "3124": "child", - "1248": "irect", - "2479": "\u0120len", - "934": "\u0120qu", - "3357": "inate", - "1431": "\u00e2\u0122\u013bt", - "294": "\u0120d", - "1482": "\u00d0\u00be", - "3910": "GL", - "2694": "\u0120sum", - "3060": "\u0120either", - "2426": "otal", - "2462": "\u0109var", - "735": "\u0120K", - "3409": "now", - "3860": ".Point", - "2913": "IZ", - "1407": "result", - "616": "ell", - "1504": "\u0120{\u010a\u010a", - "3464": "_con", - "1910": "yn", - "3723": "\u0120United", - "3291": "\u0120],\u010a", - "4078": "\u0120Type", - "2581": ".create", - "3207": "\u00e2\u0122\u013bre", - "2987": "ulation", - "741": ");\u010d\u010a", - "2948": "(a", - "1598": "ature", - "225": "\u0125", - "3315": "da", - "2697": "\u0120little", - "522": ".c", - "758": "\u0120!", - "3654": "\u0120${", - "1854": "ision", - "4083": "\u0120opp", - "1938": "\u0120day", - "3494": "\u0120Return", - "4077": "`\u010a", - "2053": "els", - "1560": "\u0120es", - "2024": "\u0120ter", - "3937": "\u0120mot", - "4016": "icrosoft", - "187": "\u00ff", - "1588": "itial", - "384": "\u0120e", - "1355": "-s", - "41": "J", - "2759": "\u0120account", - "3364": "/*\u010a", - "414": "our", - "2690": "aps", - "3676": "\u0120React", - "1572": "ines", - "2449": "\u0120element", - "1160": "\u0120list", - "1628": "\u0120And", - "1358": "\u0120array", - "2243": "\u00e3\u0123", - "531": "ert", - "2515": "\u0120effect", - "2315": "ases", - "3258": "ley", - "3178": "uk", - "858": "ict", - "2901": "uments", - "1540": "orn", - "3447": "ids", - "3636": "\u0120password", - "1882": "','", - "315": "\u0120of", - "33": "B", - "1198": "\u0120--", - "3873": "FO", - "300": "as", - "1904": "true", - "1990": "\u0120between", - "2682": "\u0120protected", - "2757": "ired", - "3176": "pository", - "1485": "trans", - "1510": "\u0120current", - "2125": "link", - "2778": "\u0120search", - "3383": "\u0120Am", - "4005": ".", - "649": "\u0120can", - "1164": "\u0109\u0109\u0109\u0109\u0109", - "571": "\u0120@", - "3326": "agn", - "3695": "ites", - "2110": "tings", - "3667": "dev", - "1403": "\u0120two", - "62": "_", - "1396": "\u0120number", - "2255": "andom", - "2889": "rist", - "2995": "\u0120eng", - "4006": "ALL", - "1135": "50", - "1753": "ING", - "2711": "ension", - "3825": "pend", - "705": "),", - "2239": "book", - "2298": "ron", - "1028": "\u0120te", - "1862": "\u0120support", - "872": "\u0120their", - "1057": "\u0120our", - "608": "====", - "1649": "lete", - "2791": "ether", - "2256": "assert", - "1545": "ample", - "1868": "count", - "1717": "\u0120\u00c3", - "2006": "\u0120IN", - "2555": "\u0120something", - "2383": "\u0120law", - "925": "\u0120string", - "1531": "else", - "786": "abel", - "3560": "\u0120role", - "3666": "\u0120typ", - "3939": "ached", - "1686": "array", - "3281": "\u0120dest", - "2795": "\u0120says", - "3412": "\u0120hold", - "2166": "48", - "329": "ad", - "2432": ")(", - "2015": "\u0120order", - "695": "data", - "3090": "\u0120water", - "3577": "\u0120press", - "1562": "export", - "129": "\u00c5", - "3568": "\u0120deal", - "636": "\u0120get", - "229": "\u0129", - "2401": "_to", - "3065": "ART", - "1078": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "2481": "eters", - "662": "\u0120.", - "1814": "ider", - "1491": "\u0120mod", - "3054": "ounc", - "3580": "\u0120See", - "286": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "750": "ally", - "3916": "ales", - "1533": "\u0120(!", - "1171": "red", - "2905": "\u0120Log", - "1274": "\u0120people", - "1896": ".in", - "440": "ect", - "1676": "\u0120Ar", - "2699": "YPE", - "290": "ion", - "3273": "\u0120po", - "1543": "man", - "1669": "IS", - "3650": "\u0120prob", - "729": "ens", - "1948": "\u0120top", - "2664": "body", - "3274": "empty", - "3223": "alt", - "2641": "ts", - "434": "ess", - "2993": "(function", - "3048": "\u0120copy", - "1426": "_F", - "581": "ib", - "2539": "\u0120full", - "1566": "\u0120last", - "3808": "ements", - "726": "self", - "2686": "\u0120address", - "1107": "_n", - "2725": ".find", - "1902": "alth", - "2305": "\u0120month", - "1262": "\u0120org", - "868": "15", - "2883": "\u0120company", - "1316": "day", - "673": ".get", - "438": "and", - "3300": "\u0120money", - "2729": "Out", - "1480": "Error", - "1657": "\u0120});\u010a", - "583": "rou", - "1929": "AG", - "110": "\u00b2", - "640": "aram", - "3849": "*)", - "1388": ".add", - "844": ".T", - "1456": "\u0120try", - "3112": "And", - "2214": "ration", - "2403": "\u0120against", - "3039": "room", - "1969": "mand", - "21": "6", - "1828": "\u0120next", - "1922": "\u0120cre", - "1189": "cent", - "1083": "min", - "1010": "ception", - "2702": "pring", - "2811": "Listener", - "271": "\u010a\u010a", - "2451": "\u0109\u0109\u010a", - "558": ".p", - "1352": "ateg", - "3843": "istr", - "1797": "\u0120count", - "606": "one", - "373": "ame", - "3955": "\u0120?>\u010a", - "3444": "ana", - "302": "ct", - "189": "\u0101", - "2534": "iron", - "1705": "ists", - "3823": "\u0120human", - "383": "he", - "47": "P", - "1348": "fore", - "3293": "\u0120recent", - "906": "we", - "983": "ng", - "3957": "Is", - "1604": "ale", - "2809": "(const", - "3431": "ury", - "185": "\u00fd", - "3753": "198", - "2362": "\u0120old", - "3948": "ught", - "1516": "ution", - "3727": "\u0120makes", - "950": "ical", - "1760": "\u0120prot", - "2350": "ething", - "2643": "\u0120might", - "1817": "\u0120check", - "2232": "Qu", - "564": "ok", - "3977": "\u0120variable", - "1151": "='", - "2754": "/s", - "1284": "_st", - "2299": "iving", - "2036": ".x", - "3029": "\u00c3\u00b6", - "723": "add", - "3430": "\u0120price", - "1321": "cond", - "42": "K", - "3333": "eral", - "1780": "ainer", - "1299": "ular", - "2179": "____", - "2247": "\",\"", - "1858": "Path", - "3148": "\u0120mass", - "2467": "\u0120Ad", - "1331": "mp", - "1032": "13", - "2016": "\u0120mov", - "1730": "Size", - "2982": "\u0120Array", - "1800": "mit", - "1182": "CT", - "2149": "Al", - "1483": "][", - "1515": "\u0120log", - "2640": "///", - "2929": "Query", - "502": "\u0120new", - "2983": "42", - "3094": "\u0120step", - "143": "\u00d3", - "2671": "\u0120options", - "1333": "stance", - "2042": "db", - "1554": "\u0120sm", - "2578": "printf", - "2104": "\u0120provid", - "4087": "\u0120pur", - "2023": "pany", - "2773": "\u0120prom", - "2178": "();", - "3151": "omic", - "4080": "pc", - "1195": "mer", - "3324": "\u0109t", - "1387": "play", - "1133": "ement", - "43": "L", - "3866": "Vector", - "3807": "nav", - "2628": ".html", - "448": "lass", - "1148": "\u0120what", - "1994": "ene", - "657": "ll", - "1660": "etail", - "1383": "By", - "642": "\u0120*/", - "1759": "urs", - "3788": "\u0120https", - "3646": "most", - "1890": "\u0120same", - "1370": "\u0120par", - "3603": "params", - "1589": "\u0120cr", - "180": "\u00f8", - "3009": "\u0120stop", - "2800": "\u0120rest", - "1955": "eta", - "3691": "\u0120food", - "316": "om", - "2850": "ari", - "251": "\u013f", - "3186": "_info", - "1007": "pec", - "3703": "\u0120bar", - "441": "ke", - "348": "\u0120v", - "1578": "\u0120again", - "14": "/", - "2398": "path", - "613": "ach", - "935": "\u0120String", - "2880": "max", - "1366": "aph", - "2875": "\u0120short", - "597": "\u0120k", - "1424": "\u0120spec", - "3230": "\u0120specific", - "3174": "41", - "2739": "ru", - "1823": ".B", - "1103": "url", - "2271": "\u0120quest", - "2084": "uffer", - "3107": "product", - "1118": "\u0120static", - "471": "\u0120return", - "3932": "\u0120users", - "2512": "\u0120care", - "2769": "_ex", - "1292": "_name", - "268": "en", - "1025": "';\u010a", - "388": "ers", - "3007": "react", - "1747": "Model", - "1143": "ument", - "1774": "45", - "926": "ID", - "490": "\u0120tr", - "2039": "private", - "270": "\u0120th", - "2319": "oh", - "3510": "isc", - "1252": "\u0120Un", - "2970": "58", - "3755": "\u0120\",", - "1063": "\u0120some", - "3533": "_ID", - "3663": "\u0120face", - "1379": "input", - "2123": "Code", - "1555": "\u0120through", - "3726": "images", - "4020": "\u0120cou", - "3125": "ifier", - "2356": "Le", - "3032": "Client", - "2846": "'m", - "2560": "\u0120proble", - "3537": "_se", - "23": "8", - "3015": "ILE", - "370": "ab", - "3718": "\u0120};\u010a\u010a", - "712": "\u0120bo", - "1567": "\u0120event", - "3002": "side", - "3958": "\u0120bad", - "2004": "\u0120ke", - "2461": "ification", - "4030": "region", - "3517": "player", - "2118": "\u00e2\u0122\u013e", - "1127": "reak", - "2674": "Point", - "706": "\u0120has", - "1298": "rror", - "248": "\u013c", - "2611": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "2864": "UM", - "2262": "\u0120content", - "3892": "\u0120several", - "796": "tp", - "2596": "oney", - "288": "es", - "34": "C", - "2700": "\u0120port", - "3439": "\u0120adv", - "4073": "\u0120Us", - "1487": "\u0120std", - "141": "\u00d1", - "311": "\u0120to", - "3874": "'))", - "3783": "\u0120delete", - "482": "\u0120-", - "4052": "(err", - "3249": "\u0120why", - "1594": "face", - "648": "ie", - "3083": "\u0120OF", - "3240": "\u0120begin", - "3583": "leep", - "594": "\u0120res", - "1610": "reg", - "1354": "ins", - "2186": "},", - "2792": "posit", - "1808": "\u0120NULL", - "3795": "_by", - "113": "\u00b5", - "3853": ".swing", - "360": "ul", - "243": "\u0137", - "483": "ine", - "614": "div", - "3505": "\\\\", - "1945": "Image", - "3163": "event", - "3805": "\u0120air", - "3134": "================================", - "1923": "arn", - "2594": ".out", - "346": "ce", - "3172": "mb", - "942": "son", - "1001": "lement", - "275": "it", - "2472": "\u0120map", - "1240": ")\u010d\u010a", - "2761": "\u0120der", - "1683": "atter", - "2208": "button", - "3302": "bsite", - "2113": "api", - "1973": "\u0120max", - "209": "\u0115", - "2571": "\u0120throw", - "3604": "\u0120actually", - "3549": "\u0120created", - "1637": "ward", - "2018": "\u0120contin", - "1537": "\u0120item", - "1362": "ession", - "2612": "\u0120output", - "3484": "\u00e3\u0124", - "292": "ic", - "2916": "\u0120distrib", - "2979": "ava", - "3035": "erc", - "2164": "args", - "3578": "ABLE", - "2124": ".is", - "3816": "\u0120Red", - "1590": "ars", - "2437": "02", - "1842": "ypes", - "1732": "\u0120person", - "3057": "\u0120appro", - "3618": "\";\u010d\u010a", - "2043": "\"><", + "3162": "structor", + "3273": "\u0120po", + "1719": "\u0120param", + "3471": "umer", + "193": "\u0105", + "3249": "\u0120why", + "2407": "cur", + "129": "\u00c5", + "2363": "\u0120book", + "2477": "\u0120pop", + "355": "us", + "2548": "resh", + "1808": "\u0120NULL", + "2793": "\u0120invest", + "58": "[", + "1058": "class", + "3101": "300", + "2987": "ulation", + "3590": "DB", "218": "\u011e", - "3192": "255", - "3773": "cover", - "704": "\u0120out", - "3573": "\u0120(\"", - "850": "error", - "55": "X", - "2454": "ched", - "1417": "\u0120arg", - "3420": "\u0120Trump", - "1933": "\u0120color", - "3411": "\u0120looking", - "1251": "ning", - "3246": "Trans", - "125": "\u00c1", - "3931": "IST", - "202": "\u010e", - "3199": "omain", - "2367": "Hand", - "3562": "\u0120Color", - "1754": "\u0120()", - "2359": "ready", - "3864": "aid", - "3371": "\u0120tell", + "3684": "next", + "248": "\u013c", + "375": "pe", + "1885": "off", + "247": "\u013b", + "172": "\u00f0", + "3775": "\u0120fail", + "1881": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "198": "\u010a", + "2302": "xt", + "811": "ations", + "1110": "Com", + "1800": "mit", + "3883": "Options", + "960": ".b", + "3386": "Inter", + "1331": "mp", + "4027": "\u0120fire", + "3643": "swer", + "2532": "\u0120service", + "1147": "als", + "1603": "\u0120before", + "1354": "ins", + "1599": "ected", + "653": "\u0120un", + "1019": "void", "266": "at", - "3096": "CK", - "3180": "ohn", - "2768": "\u0120following", - "1518": "\u0120see", - "3633": "ony", - "3104": "active", - "3162": "structor", - "2173": "Of", - "1065": "ys", - "3890": "ensor", - "903": "****************", - "1845": "\u0120bool", - "3625": "\u0120les", - "1680": "):", - "1208": "\u0120la", - "2963": "len", - "191": "\u0103", - "1400": "\u0120col", - "2345": "\u00e2\u0122\u0136", - "1242": "SS", - "1114": "17", - "1895": "Array", - "1457": "\u0120now", - "3777": "\u0120component", - "165": "\u00e9", - "3904": "iment", - "733": "\u0120go", - "1983": "uck", - "2297": "\u00d1\u0123", - "312": "\u0120re", - "2687": "\u0120uint", - "2281": "_O", - "3566": "\u0120web", - "1014": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "1744": "\u0120que", - "3712": "\u0120params", - "3619": "\u0120understand", - "1349": "ersion", - "1728": "lication", - "1766": "\u0120found", - "3132": "\u0120[]", - "442": "rom", - "622": "\u0120J", - "1180": "\u0120act", - "1961": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "217": "\u011d", - "2289": "\u0120extends", - "297": "\u0120o", - "3869": "\u0120\u00c3\u0142", - "2269": "-f", - "3820": "\u0120pick", - "155": "\u00df", - "2381": "init", - "2354": "lation", - "2448": "\u00c3\u00bc", - "1607": "roid", - "1622": "Key", - "3226": "57", - "1627": "26", - "2465": "my", - "183": "\u00fb", - "3399": "\u0120ben", - "3513": "Be", - "1009": "ove", - "1234": "\u0120under", - "1188": "ient", - "3201": "\u0120away", - "2610": "\u0120ask", - "3669": "AGE", - "550": "\u0120pr", - "2701": "eh", - "1283": "\u0120He", - "3168": "iff", - "2337": ".de", - "1987": "38", - "1847": "\u0120game", - "3131": "\u0120once", - "3909": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "2342": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "1179": "\u0120import", - "2740": "ically", - "3005": "\u0120She", - "870": ".h", - "3806": "ops", - "3108": "(){\u010a", - "98": "\u00a5", - "15": "0", - "2707": "bool", - "1575": "sole", - "2466": "\u0120big", - "3137": "\u0120talk", - "3945": "'];\u010a", - "1290": "elp", + "176": "\u00f4", + "503": "\u0120j", "501": "pl", - "73": "j", - "4049": "inding", - "3882": "\u0120session", - "1802": "aking", - "2616": "\u0120}\u010d\u010a\u010d\u010a", - "1831": "OT", - "3115": "\u0120times", - "1392": "ments", - "3601": "pg", - "3127": "\u0120lib", - "2056": ".R", - "2835": "\u0120video", - "2941": "\u0120javax", - "439": "\u0120as", - "285": "is", - "1287": "){\u010a", - "1519": "indows", - "272": "\u0120c", - "1435": "ager", - "3602": "})\u010a", - "1712": "time", - "916": ".com", - "994": "\u0120when", - "2720": "uction", - "1214": "(self", - "3534": "97", - "3051": "ernal", - "1072": "du", - "914": "25", - "4084": "cket", - "1992": "enter", - "1314": "\u0120right", - "1542": "Se", - "3947": "There", - "2447": "\u0120project", - "2661": "ugg", - "2908": "ament", - "1210": ".\"", - "1980": "?\u010a\u010a", - "2926": "\u0120initial", - "562": "ption", - "1126": "der", - "675": "aw", - "861": "List", - "2282": "\u0109b", - "929": ".l", - "1041": "100", - "970": "value", - "310": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "3834": "\u0120interface", - "3147": "\");\u010a\u010a", - "3232": "(name", - "1924": "ives", - "2114": "OD", - "2654": "Ad", - "1617": "att", - "668": "te", - "840": ".f", - "1763": "\u0120)\u010a", - "3012": "ention", - "231": "\u012b", - "3556": "while", - "1250": "cript", - "2920": "reated", - "3817": "lin", - "2213": "App", - "2745": "ording", - "3257": "ograph", - "2972": "pose", - "3017": "raft", - "992": "inal", - "2246": "\u0120document", - "1497": "lose", - "2452": ".sh", - "2766": "\u0120bit", - "3705": "comm", - "364": "\u0120'", - "2495": "78", - "831": "fo", - "2517": "_file", - "902": "\u0120which", - "3167": "\u0120mus", - "2991": "\u0120Text", - "2945": "options", - "1232": "':", - "3219": "\u0120render", - "1448": "\u0120fr", - "2133": "\u0120going", - "3880": "-r", - "2879": "section", - "760": "\">", - "477": "\u0120or", - "2463": "\u0120tri", - "2830": "\"))", - "2723": "\u0120link", - "1609": "ik", - "989": "ely", - "981": "pos", - "3698": "\u0120comb", - "3193": "\"/", - "1605": "other", - "1243": "\u0120then", - "1626": "thing", - "1681": "Set", - "1167": "\u0120z", - "387": "\u0120be", - "59": "\\", - "2893": "\u0120Be", - "1044": "++", - "2847": "\u0120cert", - "2606": "\u0120mill", - "3918": "password", - "133": "\u00c9", - "2170": "As", - "3631": "_tr", - "899": "vel", - "1872": "std", - "1455": "\u0120most", - "2252": "post", - "3121": "uest", - "3570": "Integer", - "2168": "oci", - "1806": "37", - "949": "\u0120?", - "661": "ary", - "2964": "page", - "538": "\u0120class", - "3433": "roy", - "1665": "\u0120object", + "3687": "\u0120indiv", + "2380": "\u0120three", + "2870": "upport", + "3777": "\u0120component", + "3737": "typ", + "3953": "\u0120games", + "3071": "adding", + "2929": "Query", + "2601": "odes", + "1377": "\u0120dr", + "3286": "\u0120btn", + "1890": "\u0120same", + "3906": "mm", + "2078": "US", + "2039": "private", + "3731": "\u0120abs", + "2723": "\u0120link", + "2262": "\u0120content", + "6": "'", + "3178": "uk", + "234": "\u012e", + "3519": "amera", + "59": "\\", + "697": "Re", + "1724": "********************************", + "1332": "\u0120min", + "1504": "\u0120{\u010a\u010a", + "3852": "any", + "2272": ".ex", + "51": "T", + "2572": "uilder", "308": "\u0120n", - "2718": "ately", - "1583": "\u0120/**\u010a", - "3340": "ued", + "764": "ph", + "143": "\u00d3", + "4067": "________", + "3869": "\u0120\u00c3\u0142", + "1909": ",\u010d\u010a", + "3512": "\u0120div", + "1227": "64", + "558": ".p", + "776": "----------------", + "3888": "Init", + "1613": "},\u010a", + "2462": "\u0109var", + "185": "\u00fd", + "739": ".t", + "2190": "size", + "3318": "\u0120working", + "2053": "els", + "3307": "attern", + "3756": "\u0120device", + "1170": "DE", + "624": "\u0120==", "2928": "\u0120\u00e2", - "2035": "\u0120place", - "304": "\u0120in", - "1363": ">\u010a\u010a", - "2737": "\u0120including", - "4054": "One", - "2636": "500", - "1270": "'\u010a", - "2776": ".pro", - "3361": "\u0120special", - "3809": "(id", - "2308": "bar", - "545": "();\u010a", - "2327": "UI", - "777": "19", - "514": "\u0120le", - "2638": "\u0120Set", - "1475": "\u0120every", - "1074": "line", - "3855": "\\u", - "860": "View", - "2444": "anel", - "82": "s", - "592": "ust", - "922": "\u0120about", - "3030": "avig", - "3290": "\u0120command", - "770": "ree", - "175": "\u00f3", + "1092": "loc", + "3079": "itional", + "430": "\u0120that", + "2016": "\u0120mov", + "2533": "\u0120since", + "2966": "_str", + "3796": "mail", + "3736": "Child", + "2201": "no", + "2304": "05", + "2228": "=>", + "469": "\u0120E", + "3527": "ipe", + "2182": "oute", + "2006": "\u0120IN", + "368": "()", + "152": "\u00dc", + "3661": "dated", + "3742": "iles", + "1886": "\u0120err", + "3338": "\u0120dat", + "3595": "pond", + "3436": "\u0120''", + "1381": "########", + "3119": "\u0120NS", + "3554": "/b", + "568": "\u0120he", + "3982": "_back", + "2028": "This", + "233": "\u012d", + "3459": "\u0120Form", + "1614": "\u0120state", + "1226": "\u0120We", + "1402": "ember", + "2166": "48", + "1625": "pty", + "591": "\u0120=>", + "1633": "\u0120very", + "2248": "tribute", + "774": "eth", + "1385": "ities", + "1621": "\u0120\"\"", + "84": "u", + "2410": "\u0120power", + "3465": "\u0120task", + "2099": "ever", + "2141": "ailable", + "62": "_", + "667": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "1493": "\u0120error", + "4037": "\u0120token", + "935": "\u0120String", + "1671": "Wh", + "2464": "chem", + "1974": "land", + "1858": "Path", + "1991": "Get", + "3726": "images", + "2981": ".label", + "513": "\u0120se", + "3908": "ursor", + "3085": "\u0120offer", + "987": "\u0120{\u010d\u010a", + "3466": "ocus", + "3813": "\u0120location", + "2395": "\u0120spe", + "3333": "eral", + "3715": "ugin", + "926": "ID", + "2867": "\u0120clear", + "118": "\u00ba", + "224": "\u0124", + "1214": "(self", "754": "bo", + "1604": "ale", + "2678": "\u0120small", + "2985": "opt", + "106": "\u00ae", + "3365": "\u0120Eng", + "3366": "ixed", + "3220": "_value", + "35": "D", + "1827": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u010a", + "3428": "\u0120low", + "3353": "Control", + "394": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "1954": "90", + "1752": "less", + "3329": "aring", + "1964": "her", + "3849": "*)", + "609": "name", + "1658": "fr", + "3067": "\u0120da", + "783": "ton", + "1535": "(),", + "412": "out", + "69": "f", + "1569": "\u00ef\u00bc", + "1084": "\u0120\u0120\u0120\u0120\u010a", + "4045": ".Control", + "3240": "\u0120begin", + "939": "sh", + "2460": "All", + "2819": "\u0120values", + "3724": "Height", + "1876": "Form", "2357": "\u00c3\u00a4", - "2932": ".con", - "1756": "ync", - "137": "\u00cd", - "1112": "UL", - "1703": "\u0120:=", - "862": "\u0109return", - "2100": "\u0120So", - "1136": "ush", - "1170": "DE", - "961": "\u0120part", - "3470": "\u0109break", - "149": "\u00d9", - "3141": "eb", - "1829": "IM", - "943": "new", - "3526": "\u0120++", - "1662": "aj", - "2206": "\u0120Me", - "3782": "\u0120came", - "643": "ER", - "6": "'", - "1781": "\u0120think", - "2593": "\u0120eas", - "797": "eg", - "2770": "SON", - "2532": "\u0120service", - "403": "port", - "1666": "\u0120As", - "1042": "ponse", - "4015": "\u0120ST", - "1597": "Button", - "3282": "\u00c3\u00a3", - "3831": "\u0120strong", - "1671": "Wh", - "1634": "ces", - "3656": ".Collections", - "1106": "_P", - "1212": "\u0120start", - "244": "\u0138", - "1217": "\u0120user", - "605": "10", - "116": "\u00b8", - "2600": "ublish", + "211": "\u0117", + "3772": "\u0120media", + "1024": "\u0120&&", + "2814": "\u0120mult", + "3779": "\u0120added", + "2129": "\u0109int", + "2320": "-p", + "2118": "\u00e2\u0122\u013e", + "3966": "\u0120needs", + "1990": "\u0120between", + "3502": "-in", + "810": "\u0120more", + "2191": "ism", + "2959": "root", + "318": "im", + "3373": "\u0120select", + "3802": "\u0120claim", + "2261": "Add", + "1279": "ector", + "1447": "\u0120+=", + "1884": "\u0120those", + "2603": "ply", + "803": "\");\u010a", + "2894": ".value", + "2992": "actory", + "3929": "\u0120-->", + "2025": "head", + "114": "\u00b6", + "598": "ans", + "53": "V", + "3555": "(!", + "3219": "\u0120render", + "2437": "02", + "1944": "oot", + "3394": "ino", + "674": "\u0120#", + "912": "\u0120no", + "4024": "\u0120went", + "3873": "FO", + "560": "ice", + "3129": "translation", + "2926": "\u0120initial", + "3256": "arning", + "188": "\u0100", + "1646": "\u0120model", + "2417": "\u0120Im", + "790": "ST", + "1153": "\u0120pos", "997": "):\u010a", - "1822": "><", - "469": "\u0120E", - "2637": ".,", - "3802": "\u0120claim", - "2816": "\u0120site", - "2333": "\u0120incre", - "2438": "rl", - "3058": "thers", - "1319": "(p", - "2176": "ee", - "2397": "65", - "1798": "\u0120inst", - "3197": "\u0120plan", - "246": "\u013a", - "1219": "\u0120Com", - "596": "'s", - "2777": "\u0120boolean", - "980": "oll", - "1997": "Node", - "2812": "arge", - "4071": "But", - "2121": "ght", - "3920": "\u0120esc", - "2157": "\u0109m", - "1684": "\u0120view", - "3912": "ithub", - "885": ".A", - "1288": "\u0120should", - "2588": "location", - "2617": "ination", - "834": "\u0120dis", - "2522": "\u0120Sc", - "3969": "\u0120exec", - "1677": "mon", - "3378": "IP", - "1204": "[\"", - "194": "\u0106", - "3893": "ii", - "1050": "\u0120Re", - "357": "\u0120st", - "278": "al", - "4023": "ival", - "102": "\u00a9", - "2841": "erved", - "431": "****", - "2184": "chool", - "1156": "\u0120sc", - "2988": "\u0120func", - "3170": "author", - "166": "\u00ea", - "2172": "ado", - "3942": "\u0120'<", - "8": ")", - "2153": "\\\"", - "1508": "span", - "734": "\u0120function", - "2492": "aces", - "3423": "rt", - "3992": "btn", - "2802": "\u0120interest", - "1875": "\"\u010a\u010a", - "1639": "ited", - "68": "e", - "2138": "\u0120fam", - "486": "ere", - "4036": "\u0120received", - "4026": "Link", - "1530": "label", - "12": "-", - "1592": "\u0120tem", - "3323": "only", - "1918": "iness", - "3585": "\u0120points", - "652": "pr", - "433": "\u0120it", - "3994": "Attribute", - "2102": ".append", + "3068": "etch", + "592": "ust", + "1567": "\u0120event", + "3657": "$this", + "3531": "agement", + "304": "\u0120in", + "2059": "Sh", + "1384": "\u0120aw", + "1892": "uss", + "4019": "():\u010a", + "1586": "_D", + "1471": "(n", + "2656": "used", + "895": "========", + "156": "\u00e0", + "2212": "\u0120around", + "2116": "iven", + "3040": ".Name", + "2788": "bed", + "975": "14", + "979": "reate", + "963": "urrent", + "1430": "eter", + "2009": "\u0120Le", + "3696": "dr", "1971": "OL", - "1476": "ustom", - "2286": "etails", - "781": "\u0120lo", - "2275": "\u00d0\u00be\u00d0", - "2672": "\u0120Gener", - "1577": "(int", - "645": "les", - "2296": "cale", - "1122": "ian", - "512": ":\u010a", - "3803": "'=>", - "1985": "test", - "452": "\u0120N", - "454": "op", - "3157": "\u0120market", + "126": "\u00c2", + "3423": "rt", + "3372": "avor", + "2474": "cope", + "3055": "do", + "737": "\u0120im", + "1317": "\u0120long", + "2162": "\u0120home", + "3083": "\u0120OF", + "3884": "uggest", + "1926": "ideo", + "1104": "ific", + "745": "cri", + "4009": "\u0120network", + "2594": ".out", + "2428": "ique", + "2069": "(e", + "3876": "ising", + "1839": "\u0120href", + "2685": "Def", + "3745": "values", + "1401": "\u0120key", + "3815": "\u0120doing", + "440": "ect", + "271": "\u010a\u010a", + "3532": "lev", + "2809": "(const", + "3504": "\u0120cover", + "2734": "enn", + "4013": "NU", + "1952": "\u0120On", + "1780": "ainer", + "3303": "\u0109p", + "2202": "core", + "2702": "pring", + "1259": "ream", + "1057": "\u0120our", + "1276": ".n", + "2260": "atabase", + "399": "urn", + "1835": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "2704": "\u0120status", + "3213": "izer", + "45": "N", + "3383": "\u0120Am", + "1338": "util", + "893": "\u0120man", + "3422": "ides", + "3940": "\u0120started", + "2282": "\u0109b", + "1574": "icense", + "2111": "ved", + "2514": "\u0109private", "3044": "\u0120condition", - "2502": "ogle", - "3177": "\u0120light", - "2274": "\u0120develop", - "680": "app", - "527": "\u0120are", - "1659": "formation", - "1770": ".e", - "1423": "State", + "3381": "\u0120font", + "1136": "ush", + "2386": "email", + "3157": "\u0120market", + "3921": ".write", + "2525": "\u0109else", + "570": ").", + "438": "and", + "2365": "bug", + "3721": "\u0120ev", + "1978": "leg", + "4034": "\u0120visit", + "1630": "\u0120X", + "3295": "vector", + "2286": "etails", + "2847": "\u0120cert", + "3925": "\u0120history", + "2271": "\u0120quest", + "2605": "\u0120prov", + "3855": "\\u", + "453": "\u0120al", + "2314": "\u0120Ind", + "546": "ont", + "2824": "\u0120beh", + "1760": "\u0120prot", + "1557": "che", + "1364": "\u0120she", + "1791": "uc", + "1207": "\u0120sub", + "3492": "\u0120word", + "2288": "\u0120too", + "628": "\u0120pl", + "2180": ".ph", + "1275": "index", + "2581": ".create", + "1121": "\u0120result", + "484": "ill", + "3473": "then", + "3564": "\u0120porn", + "343": "ig", + "918": "_l", + "3983": "first", + "2131": "55", + "3275": "artment", + "500": "ype", + "1414": "ger", + "1199": "Text", + "3990": "find", + "2729": "Out", + "2229": "aterial", + "1175": "(t", + "2802": "\u0120interest", + "2590": "model", + "1486": "\u0120point", + "3517": "player", + "516": ".s", + "3617": "Pr", + "1071": "\u0120said", + "864": "\u0120pre", + "3978": "iet", + "2635": "lex", + "44": "M", + "3098": "\u00e0\u00b8", + "692": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "3912": "ithub", "3821": "\u0120watch", - "142": "\u00d2", - "2489": "\u0120match", - "1901": "\u0120Z", - "3702": "=True", - "2470": "num", - "1618": "\u0120here", - "3319": "\u0120query", - "1344": "\u0120fl", - "3308": "\u0120Class", - "1859": "nection", - "457": "\u0120}\u010a", - "3660": "(false", - "1569": "\u00ef\u00bc", - "3320": "\u0120Br", - "2668": "ird", - "2076": "ags", - "764": "ph", - "3496": "\u00c3\u00a3o", - "3272": ".model", - "540": "og", - "1109": "\u0120than", - "2973": "irtual", - "3133": "center", - "2662": "\u0120/>\u010a", - "1907": "rid", - "2601": "odes", - "3341": "\u0120Car", - "2978": "\u0120school", - "367": "ation", - "3213": "izer", - "2633": "\u0120govern", - "3799": "\u0120Dec", - "2852": "ais", - "3218": "CL", - "3489": "\u0120Qu", - "322": "//", - "3286": "\u0120btn", - "4062": "\u0120quick", - "2762": "resent", - "2639": "urch", - "816": "\u0120Y", - "3061": "\u0120Fl", - "1402": "ember", - "156": "\u00e0", - "912": "\u0120no", - "3380": "esc", - "2525": "\u0109else", - "3482": "patch", - "1587": "\u0120does", - "255": "\u0143", - "2058": "iter", - "3288": "\u0120sent", - "1154": "ages", - "897": "rop", - "737": "\u0120im", - "1161": "(s", - "1120": "\u0120just", - "2614": "68", - "3321": "\u0120window", - "3746": "oss", - "671": "\u0120ab", - "1381": "########", - "2667": "_set", - "2022": "\u0120className", - "3900": "\u0120il", - "3359": "aged", - "1129": "://", - "1964": "her", - "56": "Y", - "319": "\u010d\u010a", - "2603": "ply", - "1737": "En", - "3790": "\u0120handle", - "418": "pt", - "3264": "98", + "1456": "\u0120try", "3635": "umb", - "793": "RE", - "843": "32", - "856": "\u0120my", - "1300": "\u00e3\u0122", - "884": "com", - "3885": "\u0120others", + "2681": "anguage", + "1051": "\u0120were", + "1485": "trans", + "548": "are", + "2503": "\u0120sit", + "729": "ens", + "3683": "Cell", + "2135": "_E", + "2872": "\u0120row", + "2426": "otal", + "1851": "velop", + "1744": "\u0120que", + "626": "St", + "736": "ivate", + "3144": "ecute", + "1997": "Node", + "2087": "Manager", + "1530": "label", + "976": "\u0120!=", + "1653": "vert", + "3575": "\u0120problem", + "3238": ".'", + "2443": "update", + "2086": "lobal", + "2767": "ponents", + "2705": "06", + "741": ");\u010d\u010a", + "4002": "ARR", + "2278": "\u0120poss", + "223": "\u0123", + "2217": "\u0120image", + "631": "put", + "2195": "...\u010a\u010a", + "2436": "ences", + "1359": ",\"", + "2276": "Map", + "3439": "\u0120adv", + "1194": "\u0120print", + "3567": "\u0120dev", + "2296": "cale", + "3150": ".class", + "590": "oid", + "3641": "\u0120False", + "3135": "\u0120results", + "2764": "\u0120valid", + "2784": "aut", + "556": "ire", + "289": "\u0120w", + "777": "19", + "2980": "\u0120consider", + "3115": "\u0120times", + "3968": "Fl", + "4062": "\u0120quick", + "2747": "orage", + "3793": "emplate", + "474": "ack", + "660": "ated", + "48": "Q", + "2778": "\u0120search", "1097": "\u0120am", - "905": "\u0120false", - "699": "ild", - "1038": "\u010a\u010a\u010a\u010a", - "2814": "\u0120mult", - "742": "\u0120void", - "890": "_b", - "551": "\u0120:", - "1357": "\u0120Int", - "1386": "uff", - "1884": "\u0120those", - "532": "ial", - "3728": "\u0120global", - "655": "ber", - "3171": "ounds", - "3599": "('#", - "3504": "\u0120cover", - "4057": "fort", - "46": "O", - "1416": "\u0120/*", - "731": "ake", - "3468": "\u0120Min", - "496": "str", - "1785": "FF", - "2348": ".php", - "1145": "],", - "1697": "ational", - "2789": "\u0109case", - "1376": "\u0120form", - "810": "\u0120more", - "3241": "\u0120software", - "1912": "\u0120group", - "1758": "35", - "2406": "_L", - "2716": "ERR", - "654": "row", - "3951": "icult", - "32": "A", - "3704": "_SE", - "4037": "\u0120token", - "424": "ver", - "1422": "\u0120dif", - "475": "import", - "317": ");\u010a", - "2934": "Url", - "1581": "Index", - "1052": "\u0120file", - "181": "\u00f9", + "79": "p", + "2562": "mt", + "2193": "tract", + "3343": "\".", + "3285": "\u0120sever", + "3360": "Null", + "2634": "}\u010d\u010a\u010d\u010a", + "1820": "the", + "2398": "path", + "4080": "pc", + "3854": "\u0120pack", + "1473": ":\u010a\u010a", + "3500": "}}", + "443": "\u0120//", + "603": "\u0120us", + "1844": "Un", + "1572": "ines", + "2146": "(&", + "3609": "unic", + "3025": "\u0120able", + "3039": "room", + "4051": "\u0120decl", + "2786": "ality", + "1307": "arget", + "1220": "its", + "1837": "HE", + "1215": "ner", + "1452": "ULL", + "2869": "\u0120techn", + "3608": "\u0120ident", + "1063": "\u0120some", + "1511": "\u0120used", + "1829": "IM", + "3463": "\u0120thought", + "1068": "view", + "1629": "\u0120run", + "400": "\u0120$", + "3069": "entity", + "1862": "\u0120support", + "162": "\u00e6", + "3863": "iple", + "2216": "\u0120really", + "80": "q", + "3122": "\u0120bas", + "3521": "\u0120exc", + "1568": "try", + "1004": "[i", + "1260": "ible", + "4070": "\u00e2\u0122\u013bve", + "3000": "\u0120db", + "3904": "iment", + "2596": "oney", + "717": "12", + "3735": "\u0120Pol", + "3868": "\u0120wait", + "381": "----", + "1793": "odule", + "4043": "\u0120ess", + "3804": "\u0120Sub", + "189": "\u0101", + "823": "ield", + "2509": "ought", + "887": "\u0120id", + "1594": "face", + "3430": "\u0120price", + "2064": "\u0120sw", + "971": "cho", + "1001": "lement", + "3740": "OUT", + "2359": "ready", + "151": "\u00db", + "1678": "til", + "3773": "cover", + "2955": "\u0120design", + "2171": "ret", + "2626": "\u0120business", + "194": "\u0106", + "2422": "quals", + "2": "#", + "4050": "Var", "2825": "\u0120'./", - "245": "\u0139", - "1714": "amespace", - "451": "de", - "40": "I", - "4039": "\u0120anim", - "307": "id", - "1218": "\u0120ent", - "430": "\u0120that", - "303": "nd", - "3988": "uly", + "3640": "ged", + "925": "\u0120string", + "727": "ery", + "2333": "\u0120incre", + "1358": "\u0120array", + "1437": "\u0120br", "2855": "(R", - "1098": "rol", - "3925": "\u0120history", - "1830": "\u00d1\u0124", + "898": "public", + "1439": "ures", + "1422": "\u0120dif", + "2173": "Of", + "1000": "indow", + "2199": "\u0120page", + "3515": "\u0120having", + "1211": "Object", + "2048": "table", + "1119": "roll", + "1515": "\u0120log", + "3482": "patch", + "476": "ublic", + "419": "==", + "1730": "Size", + "3174": "41", + "1283": "\u0120He", + "3118": "\u0120present", + "1280": "\u0120ind", + "2355": "\u0120\u0120\u010a", + "294": "\u0120d", + "1713": "oken", + "787": "oint", + "3499": "chema", + "2905": "\u0120Log", + "1323": "\u0120rem", + "2823": "icro", + "1906": "\u0120mark", + "2472": "\u0120map", + "183": "\u00fb", + "2524": ".Size", + "3669": "AGE", + "230": "\u012a", + "3747": "/**", + "1679": "();\u010d\u010a", + "1738": "File", + "701": "\u0120your", + "3397": "andle", + "1672": "\u0120ins", + "2306": "\u0120happ", + "413": "turn", + "4052": "(err", + "607": "ail", + "3604": "\u0120actually", + "3545": "ral", + "203": "\u010f", + "1436": "\u0120could", + "1869": "miss", + "1660": "etail", + "3754": "\u0120news", + "465": "\u0109\u0109\u0109\u0109", + "4069": "INE", + "1982": "ook", + "2068": "\u0120program", + "2771": "\u0120sure", + "3623": "\u0120Co", + "1435": "ager", + "1138": "arr", "752": "ep", - "2302": "xt", - "3001": "!!", - "3335": "\u0120record", - "232": "\u012c", - "3313": "\u0120auto", - "323": "\u0120and", - "242": "\u0136", - "2971": "/c", - "324": "ur", + "652": "pr", + "3429": "\u0120para", + "3690": "amed", + "1061": "Data", + "3455": "change", + "3003": "\u0120init", + "943": "new", + "1181": "\u0120char", + "2886": "\u0120gre", + "2927": ".js", + "2600": "ublish", + "1564": "uto", + "1727": "alk", + "3419": "\u0120pot", + "1959": "Over", + "3175": "width", + "2458": "\u0120tot", + "3268": "\u0120rights", + "415": "\u0120\u0120\u0120\u0120\u0120", + "491": "elf", + "1533": "\u0120(!", + "3557": "ias", + "1303": "ting", + "3145": "ols", + "1715": "\u0120request", + "1540": "orn", + "2446": ".to", + "1722": "\u0120echo", + "622": "\u0120J", + "3676": "\u0120React", + "2346": "\u0120coun", + "2274": "\u0120develop", + "3021": "\u0120love", + "2211": "\u0120ca", + "2014": "Context", + "433": "\u0120it", + "417": "res", + "278": "al", + "1913": "\u0120fin", "1382": "order", - "2034": "sg", - "3343": "\".", - "3375": "imum", - "3572": ".on", - "2882": "\u0120override", - "1206": "\u0120cl", - "3417": ".Data", - "1553": "iver", - "696": ")\u010a\u010a", - "3144": "ecute", - "2704": "\u0120status", - "2150": "title", - "456": "get", - "498": "\",", - "876": "gh", - "1071": "\u0120said", - "3837": "_at", - "3480": "\u0120switch", - "848": "log", - "780": "atic", - "1892": "uss", - "2417": "\u0120Im", - "2290": "\u0120None", - "4051": "\u0120decl", - "2803": "Arg", - "3554": "/b", - "377": "ck", - "263": "on", - "1865": "\").", - "3252": "ging", - "3351": "\u0120move", - "4035": "olve", - "1813": "org", - "1458": "rough", - "2191": "ism", - "1341": "IC", - "887": "\u0120id", - "894": "ry", - "3116": "\u0120four", - "173": "\u00f1", - "3732": "\u00d0\u00b0\u00d0", - "1411": "ood", - "1603": "\u0120before", - "2903": "Number", - "610": "\u0120str", - "2652": "\u0120;", - "3672": "QL", - "1650": "\u0120call", - "1889": "code", - "2685": "Def", - "1799": "char", - "2589": "07", - "2309": "default", - "3613": "\u0120conc", - "1974": "land", - "3398": "_user", - "604": "pp", - "3365": "\u0120Eng", - "1523": "\u0120down", - "990": "\u0120work", + "1963": "\u0120index", + "680": "app", + "1174": "\u0120,", + "2779": "ken", + "4047": "ball", + "335": "\u0120}", + "1123": "idth", + "3027": "AX", + "2896": "ottom", + "1756": "ync", + "3244": "\u00c3\u00b3n", + "67": "d", + "904": "\u0120any", + "2913": "IZ", + "3757": "cel", + "1976": "raph", + "1274": "\u0120people", + "3621": "\u0120happen", + "1479": "cription", + "3566": "\u0120web", + "1423": "State", + "1232": "':", + "432": "\u0120R", + "279": "\u0120the", + "3369": "\u0120eff", + "4017": "\u0120limit", + "429": "=\"", + "3730": "':\u010a", + "449": "\u0120with", + "2221": "tra", + "184": "\u00fc", + "2898": "PT", + "1552": "_h", + "1585": "Event", + "2939": "\u0120Fr", + "2266": ".\"\u010a\u010a", + "722": "_m", + "3743": "\u0120-->\u010a", + "2527": "start", + "870": ".h", + "665": "\u0120en", + "1589": "\u0120cr", + "1363": ">\u010a\u010a", + "1421": "\u0120rec", + "2394": "\u0120Pr", + "163": "\u00e7", + "3551": ".state", + "3279": "\u0120Ed", + "1190": ".j", + "2230": "iled", + "2534": "iron", + "2189": "())", + "1962": "not", + "1689": "\u0120bel", + "3342": "emp", + "395": "ass", + "1353": "\u0120loc", + "3490": "\u00e3\u0122\u0124\u010a\u010a", + "991": "\u0120em", + "2709": "\u0120os", + "2671": "\u0120options", + "1158": "\")\u010a", + "2393": "\u0120console", + "2349": "\u0120change", + "2019": "\u0120say", + "3195": "way", + "3243": "\u0120win", + "1821": "ants", + "2367": "Hand", + "2592": "\u0120source", + "2846": "'m", + "3991": "_string", + "1731": "rawing", + "2742": "lean", + "2438": "rl", + "650": "\u0120V", + "161": "\u00e5", + "2425": "\u0120iss", + "860": "View", + "3112": "And", + "1457": "\u0120now", + "896": "roup", + "2801": "info", + "2933": "au", + "2347": "obj", + "3650": "\u0120prob", + "531": "ert", + "1524": "\u0120even", "526": "ang", - "148": "\u00d8", - "3776": "\u0120black", - "1724": "********************************", - "3347": "\u0120past", - "2101": "aining", - "2853": "\u0120cost", - "827": "####", - "2998": "\u0120En", - "1851": "velop", - "2836": "adi", + "1861": "').", + "950": "ical", + "2290": "\u0120None", + "3093": "ored", + "1376": "\u0120form", + "910": "nder", + "2938": "ORT", + "1062": "\u0120val", + "2822": "No", + "1983": "uck", + "3390": "iddle", + "1528": "(d", + "2988": "\u0120func", + "2821": "pen", + "142": "\u00d2", + "1173": "\u0120},\u010a", + "3047": ":\"", + "2264": "signed", + "2313": "({\u010a", + "4008": "ili", + "2130": "ift", + "262": "\u0120\u0120\u0120", + "235": "\u012f", + "966": "30", + "838": "val", + "2297": "\u00d1\u0123", + "296": "\u0120m", + "2646": "\u0120never", + "1042": "ponse", + "1371": "unc", + "5": "&", + "1390": "\u0120want", + "468": "\u0120W", + "756": "',\u010a", + "3716": "fa", + "942": "son", + "3637": "\u0120store", + "495": "ult", + "323": "\u0120and", + "1652": "\u0120cal", + "3451": "\u0120\u00e2\u0122\u013a", + "3143": "OP", + "2326": "\u0120US", + "612": "\u0120&", + "3377": "Builder", + "102": "\u00a9", + "3415": "vo", + "748": "\u0109if", "1784": "/**\u010a", - "1147": "als", - "2732": "Page", - "3761": "@\"", - "1019": "void", - "769": "Id", - "776": "----------------", - "345": ",\u010a", - "70": "g", - "3967": "\u0120known", - "2533": "\u0120since", - "1524": "\u0120even", - "3173": "_key", - "205": "\u0111", - "197": "\u0109", - "1883": "\u0120App", + "1922": "\u0120cre", + "3348": "(g", + "3043": "ike", + "3212": "imer", "836": "\u0120name", - "461": "ore", - "2497": "local", - "2813": "77", - "3848": "VER", - "2898": "PT", - "877": "LE", - "2128": "\u0120team", - "4032": "Ref", - "2937": "\u0120instance", - "301": "el", - "2994": "null", - "3970": "\u0120seen", - "204": "\u0110", - "464": ";\u010d\u010a", - "1658": "fr", - "2482": "ules", - "1425": "ride", - "1477": "lient", - "214": "\u011a", - "3664": "CTION", - "1692": "Man", - "459": "\u0120an", - "569": "ard", - "1094": "ody", - "1559": "ouse", - "293": "\u0120b", - "2424": "_size", - "273": "le", - "1420": "(this", - "1013": "ader", - "3858": "ctx", - "3486": "itive", - "3536": "ades", - "1081": "\u0120comm", - "3762": "\u0109\u0120", - "1073": "of", - "1152": "olum", - "3775": "\u0120fail", - "3467": "\u0120DE", - "1873": "ience", - "355": "us", - "2019": "\u0120say", - "3382": "\";\u010a\u010a", - "2197": "\u0120beg", - "1061": "Data", - "3322": "auth", - "49": "R", - "956": "'t", - "2866": "ights", - "2863": "\u0120stat", - "711": "\u0120def", - "1976": "raph", - "2878": "Group", - "2235": "map", - "2848": "\u0120prof", - "3764": "(value", - "1574": "icense", - "1805": "vice", - "412": "out", - "19": "4", - "3940": "\u0120started", - "2975": "_url", - "1611": "\u0120De", - "757": "\u0120me", - "284": "\u0120=", - "89": "z", - "1011": "ength", - "203": "\u010f", - "917": "\u0120app", - "2624": "\u0120disc", - "1447": "\u0120+=", - "971": "cho", - "3372": "avor", - "720": "\u0120\u010a", - "2953": "admin", - "2552": "yles", - "1674": "\u0120java", - "2165": "meric", - "3661": "dated", - "16": "1", - "2623": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "18": "3", - "1100": "yp", + "4091": "anger", + "3425": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "2796": "mission", + "387": "\u0120be", + "1595": "\u0120`", + "236": "\u0130", + "2110": "tings", + "1267": "_M", + "2411": "ium", + "4003": "ictionary", + "2079": "request", + "1807": "_data", + "3237": "\u0120express", + "254": "\u0142", + "1702": "_B", + "2568": "Count", + "1723": "function", + "4073": "\u0120Us", + "361": "ue", + "40": "I", + "2371": "04", + "929": ".l", + "2312": "ln", + "3808": "ements", + "131": "\u00c7", + "2513": "state", + "2222": "ingle", + "3346": "\u0120percent", + "3974": "\u0120live", + "2185": "www", + "1555": "\u0120through", + "2580": "js", + "3334": "rior", + "3086": "\u0120});\u010a\u010a", + "2353": "los", + "2526": "])", + "3072": "\u0120option", + "3572": ".on", + "3890": "ensor", + "1450": "\u0120hand", + "2667": "_set", + "164": "\u00e8", + "492": "ight", + "2538": "iction", + "932": "_C", + "708": "so", + "3511": "Bar", + "3881": "\u0120apply", + "3859": "\u0120unsigned", + "857": "pan", + "599": "yst", + "1550": "\u0120did", + "4081": "loor", + "825": "Cont", + "2655": "\u0120intern", + "555": "\u0120by", + "767": "\u0120var", + "2565": "\u0120block", + "2058": "iter", + "4057": "fort", + "1010": "ception", + "113": "\u00b5", + "3914": "cast", + "2439": "ons", + "2374": "System", "886": "\";\u010a", - "2644": "istory", - "2032": "loy", - "2379": "angu", - "2064": "\u0120sw", - "703": "($", - "2313": "({\u010a", - "419": "==", - "2304": "05", - "788": "ous", - "3422": "ides", - "2619": "atrix", - "3930": "\u0120du", - "2105": "\u0120assert", - "3552": "\u0120host", - "1177": "');\u010a", - "1092": "loc", - "1193": "\u0120only", - "2554": "pi", - "2625": "([", - "2163": "\u0120left", - "3253": "\u0109g", - "407": "ction", - "3390": "iddle", - "3027": "AX", - "957": "ault", - "3964": "(:", - "1834": "content", - "408": "end", - "3231": "base", - "1585": "Event", - "1641": "itch", - "1040": "const", - "1039": "lock", - "1960": "itor", - "952": "uth", - "3913": "cer", - "1631": "ym", - "2049": "idget", - "2147": "\u0120AN", - "3591": "++;\u010a", - "2818": "EL", - "2250": "Log", - "2144": "\u0120fact", - "2091": "option", + "3518": "abs", + "1994": "ene", + "3076": "69", + "3041": "\u0120give", + "2908": "ament", + "422": "\u0120if", + "445": "\u0120L", + "2557": "uint", + "3227": "(),\u010a", + "1166": "ponent", + "1148": "\u0120what", "968": "(f", - "1643": "ife", - "500": "ype", - "1399": "60", - "530": "_t", - "3815": "\u0120doing", - "3123": "echo", - "484": "ill", - "3615": "ignment", - "599": "yst", - "1963": "\u0120index", - "3734": "\u0120except", - "817": "use", - "3511": "Bar", - "2499": "};\u010a", - "2547": "\u0120body", - "2943": "apter", - "2098": "\u0120ref", - "1101": "\u0120also", - "3255": "offset", - "2436": "ences", - "415": "\u0120\u0120\u0120\u0120\u0120", - "2493": "\u0120Cl", - "4033": "\u0120official", - "1936": "rib", - "3477": "\u0120review", - "2093": "\u0120===", - "1359": ",\"", - "3455": "change", - "2227": "arm", - "2358": "({", - "2132": "\u0120second", - "3681": ".put", - "3438": "Ph", - "1437": "\u0120br", - "1183": "\u0120Tr", - "967": "enc", - "3020": "elper", - "3606": "Empty", - "1661": "\u0120num", - "3427": "go", - "3304": ".\u010d\u010a", - "2484": "source", - "1821": "ants", - "732": ".C", - "3624": "SC", + "3723": "\u0120United", + "2826": "acy", + "140": "\u00d0", + "3406": "(j", + "2520": "For", + "3698": "\u0120comb", + "3161": "\u0120With", + "2032": "loy", + "2031": "70", + "2332": "sw", + "1191": "lection", + "606": "one", + "1038": "\u010a\u010a\u010a\u010a", + "1841": "\u0120car", + "2930": "\u0120online", + "974": "ose", + "1544": "27", + "2325": "_v", + "2815": "\u0120die", + "1667": "\u0120years", + "1293": "erson", + "291": "ed", + "2739": "ru", + "221": "\u0121", + "405": "->", + "365": "ew", + "2145": "\u0120vis", + "1328": "\u0120__", + "2022": "\u0120className", + "1903": "\u0120made", + "2763": "\u0120lot", + "2150": "title", + "2899": "status", "2194": "ane", - "1365": "\"]", - "3736": "Child", - "277": "ar", - "1620": "\u0120final", - "799": "ude", - "3963": "avel", - "2868": "uro", - "3189": "\u0120wr", "78": "o", - "3527": "ipe", - "1801": ".id", + "1992": "enter", + "2912": "icy", + "3817": "lin", + "532": "ial", "1624": "\u0120del", - "1082": "ise", - "2990": "\u0120\"\\", - "3113": "\u0120display", - "3088": "egin", - "2819": "\u0120values", - "3711": "FA", - "3542": "\u0120'@", + "2961": "\u0120arr", + "1081": "\u0120comm", + "1596": "_A", + "3800": "(true", + "3211": ".v", + "1336": "wh", + "2392": "tes", + "3183": "apper", + "619": "ould", + "2608": "])\u010a", + "625": "ign", + "3582": "\u0120though", + "3236": "year", + "325": "se", + "1198": "\u0120--", + "1039": "lock", + "693": "return", + "3895": "IO", + "3431": "ury", + "1241": "\u0109public", + "726": "self", + "267": "st", + "3234": "\u0120Do", + "3688": "tributes", + "487": "::", + "1484": "99", + "771": "ink", + "2497": "local", + "872": "\u0120their", + "3541": "\u0120fre", + "2034": "sg", + "384": "\u0120e", + "2400": "dition", + "3485": "\u0120above", + "2108": "usiness", + "735": "\u0120K", + "1309": "olumn", + "423": "\u0120D", + "1916": "eger", + "3642": "Us", + "3915": "From", + "1856": "\u0120Pl", + "282": "\u0120f", + "843": "32", + "3434": "\u0120'/", + "3654": "\u0120${", + "3171": "ounds", + "3438": "Ph", + "408": "end", + "3851": "\u0120application", + "1431": "\u00e2\u0122\u013bt", + "1501": "\u0120show", + "421": "\u0120wh", + "2977": "Http", + "3484": "\u00e3\u0124", + "2725": ".find", + "2911": "\u0120children", + "2564": "\u0120...", + "2620": "Table", + "2051": "Prop", + "133": "\u00c9", + "842": "\u0120end", + "1152": "olum", + "2727": "me", + "1443": "\u0120Sh", + "1009": "ove", + "1691": "21", + "2648": "arse", + "1397": "\u0120require", + "1866": "\u0120own", + "3140": ".sw", + "2406": "_L", + "228": "\u0128", + "1721": "01", + "1365": "\"]", + "2569": "open", + "2499": "};\u010a", + "759": "atch", + "4046": "\u0120\u00c3\u00a9", + "1176": "\u0120first", + "1618": "\u0120here", + "1090": "Int", + "2126": "\u0120cour", + "1811": "\u00e3\u0122\u0124", + "2953": "admin", + "1350": "),\u010a", + "1432": "\u010a\u010a\u010a", + "3732": "\u00d0\u00b0\u00d0", + "3132": "\u0120[]", + "137": "\u00cd", + "2485": "https", + "1661": "\u0120num", + "3685": "\u0120expected", + "3866": "Vector", + "1531": "else", + "2640": "///", + "632": "ount", + "1145": "],", + "1824": "-c", + "3245": "\u0120thing", + "930": "ource", + "1408": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "3011": "\u0120That", + "3207": "\u00e2\u0122\u013bre", + "3131": "\u0120once", + "3407": "Sc", + "1264": "sum", + "2770": "SON", + "3768": "ocal", "2177": "ross", - "3539": "\u0120employ", - "3493": "\u0120provide", - "92": "}", - "3717": "\u0120connection", - "651": "erv", - "2158": "ftware", - "463": "\u0120pro", - "2968": "ability", - "1979": "assword", - "2799": "oad", - "3421": "idual", - "738": "\u0120const", - "3475": "\u0120Test", - "2234": "ung", + "265": "re", + "3647": "sm", + "166": "\u00ea", + "3020": "elper", + "259": "\u0120t", + "450": "iz", + "447": "qu", + "3470": "\u0109break", + "1920": "\u0120process", + "124": "\u00c0", + "319": "\u010d\u010a", + "1295": "ote", + "2567": "\u0120keep", + "938": "\u0120ro", + "2089": "PO", + "3340": "ued", + "3205": "iod", + "2301": "ency", + "2113": "api", + "3668": "(\"#", + "2293": "format", + "1240": ")\u010d\u010a", + "3616": "ULT", + "1221": "\u0120Con", + "3293": "\u0120recent", + "2119": "col", + "840": ".f", + "171": "\u00ef", + "258": "in", + "3995": "\u0120young", + "3401": "\u0120$_", + "1299": "ular", + "1022": "\u0120off", + "1043": "\u0120sup", + "3257": "ograph", + "917": "\u0120app", + "523": "\u0120ch", + "3600": "\u0120services", + "435": "\u0120F", + "1391": "\u0120comp", + "3948": "ught", + "3910": "GL", + "4042": "ico", + "2372": "\u0120gover", + "2047": "\u0120prop", + "616": "ell", + "821": "_d", + "1631": "ym", + "456": "get", + "1411": "ood", + "1904": "true", + "144": "\u00d4", + "1454": "();\u010a\u010a", + "1366": "aph", + "2348": ".php", + "242": "\u0136", + "12": "-", + "3568": "\u0120deal", + "2680": "\u0120access", + "261": "er", + "559": "\u0120sh", "2844": "uted", - "3491": "gle", - "3194": "\u0120miss", - "2722": "\u0120Te", - "3370": "ario", - "3968": "Fl", - "320": "\u0120(", - "2082": "\u0120code", - "632": "ount", - "2703": "_path", + "3776": "\u0120black", + "1598": "ature", + "1546": "tn", + "994": "\u0120when", + "1720": "\u0120}\u010d\u010a", + "3408": "Width", + "1441": "ets", + "1918": "iness", + "3645": "\u0120format", + "3606": "Empty", + "2838": "%%", + "853": "ange", + "504": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "2850": "ari", + "284": "\u0120=", + "3468": "\u0120Min", + "3782": "\u0120came", + "186": "\u00fe", + "639": "\u0120\u00e2\u0122", + "1798": "\u0120inst", + "1399": "60", + "392": "ub", + "2073": "\u0120und", + "3050": "iversity", + "882": "user", + "587": "ak", + "3733": "float", + "3437": "ymb", + "1638": "list", + "2117": "oto", + "2915": "ably", + "2615": ".push", + "2345": "\u00e2\u0122\u0136", + "906": "we", + "3762": "\u0109\u0120", + "174": "\u00f2", + "226": "\u0126", + "2017": "over", + "4004": "urther", + "900": "umber", + "141": "\u00d1", + "3384": "ished", + "1006": ".F", + "1345": ".M", + "1412": "uch", + "87": "x", + "2805": "pons", + "237": "\u0131", + "2088": "ilter", + "3191": "cribe", + "150": "\u00da", + "3662": "\u0120>>", + "1045": "date", + "2937": "\u0120instance", "0": "!", - "136": "\u00cc", - "987": "\u0120{\u010d\u010a", - "619": "ould", - "410": "00", - "349": "ate", - "1155": "ayer", - "1988": "\u0120input", - "3525": "[$", - "3557": "ias", - "422": "\u0120if", - "3350": "\u0120write", - "2717": "\u0120<=", - "1205": "\u0120need", - "3934": "false", + "3414": "ITY", + "1512": "cept", + "75": "l", + "47": "P", + "2440": "\u0120label", + "2232": "Qu", + "3010": "\u0120later", + "441": "ke", + "4025": ".z", + "654": "row", + "1327": "\u0120ext", + "2559": "\u0120stand", + "3728": "\u0120global", + "1683": "atter", + "2837": "point", + "3327": "\u0120expl", + "1343": "\u0120ph", + "108": "\u00b0", + "1579": "\u0120high", + "740": "\u0120*/\u010a", + "3108": "(){\u010a", + "2030": "\u0120But", + "3371": "\u0120tell", + "1781": "\u0120think", + "3659": "_of", + "2142": "utes", + "1500": ">", - "1276": ".n", - "2876": "\u0120Not", - "1076": "duct", - "1085": "#include", - "3126": "Handler", - "4004": "urther", - "710": "ax", - "3628": "sql", - "254": "\u0142", - "2249": "ux", - "3130": "\u0120room", - "1796": "\u0120List", - "4069": "INE", - "2130": "ift", - "1647": "\u0120mon", - "3497": "ustr", - "2938": "ORT", - "3770": "\u0120below", - "3781": "stit", - "279": "\u0120the", - "3136": "\u0120continue", - "724": "quest", - "3242": "\u0120imp", - "3976": "Per", - "1187": "24", - "4002": "ARR", - "3301": "INT", - "2285": "json", - "1810": "idd", - "3415": "vo", - "1140": "cess", + "99": "\u00a6", + "3262": "/m", + "2737": "\u0120including", + "3153": "ips", + "1680": "):", + "2891": "ANT", + "1527": "from", + "3287": "\u0120didn", + "970": "value", + "1357": "\u0120Int", + "1372": "ET", + "139": "\u00cf", + "4049": "inding", + "641": "cc", + "786": "abel", + "1912": "\u0120group", + "2710": ".name", + "1880": "\u0120et", + "784": "ss", + "330": "\u0120\"", + "1804": "px", + "401": ";\u010a\u010a", + "800": "\u0120St", + "3059": "ze", + "3184": "ollections", + "3797": "_sh", + "899": "vel", + "2003": "oin", + "3907": "\u0120University", + "3959": "\u0120Des", + "3231": "base", + "2192": "ai", + "1659": "formation", + "3993": "itude", + "1553": "iver", + "288": "es", + "832": "\u0120one", + "3764": "(value", + "3926": "\u0120Fe", + "3826": "dir", + "2657": "(data", + "3226": "57", + "3121": "uest", + "769": "Id", + "720": "\u0120\u010a", + "2542": "gress", + "1902": "alth", + "3862": "\u00c4\u00b1", + "1339": "ait", + "2401": "_to", + "2391": "\u0120during", + "869": "ov", + "1877": "\u0120sex", + "28": "=", + "4022": "\u0120sizeof", + "3882": "\u0120session", + "3546": "\\x", + "386": "\u0120M", + "3110": "\u0120ID", + "145": "\u00d5", + "116": "\u00b8", + "719": "\u0120but", + "3147": "\");\u010a\u010a", + "1294": "ION", + "1522": "\u0120pass", + "339": "th", + "2055": ".\u010a\u010a\u010a\u010a", + "3376": "oper", + "834": "\u0120dis", + "1908": "ified", + "2588": "location", + "2414": "left", + "640": "aram", + "2052": "\u0120All", + "3538": "ause", + "2716": "ERR", + "3127": "\u0120lib", + "331": "ch", + "2619": "atrix", + "583": "rou", + "3410": "atform", + "2839": "03", "647": "ong", - "3397": "andle", - "3013": "df", - "2530": "ours", - "3579": "ATION", - "3391": "43", - "3713": "apping", - "2088": "ilter", - "3110": "\u0120ID", - "2498": "unity", + "1537": "\u0120item", + "1300": "\u00e3\u0122", + "3305": "\u00ef\u00bb\u00bf", + "4085": ")->", + "679": "201", + "3984": "\u0120provided", + "3281": "\u0120dest", + "2540": "acter", + "3015": "ILE", + "3989": "auto", + "478": "est", + "122": "\u00be", + "1427": "\u0120look", + "3341": "\u0120Car", + "3082": "\u0120True", + "2901": "uments", + "3977": "\u0120variable", + "3169": "\u0120kind", + "684": "ition", + "2919": "\u0120days", + "788": "ous", + "1898": "Service", + "4020": "\u0120cou", + "3584": "\u0120\u010a", - "2586": "\u0120come", - "26": ";", - "2620": "Table", - "3981": "eme", - "3905": "Rec", - "3772": "\u0120media", - "1185": "={", - "3903": "elt", - "3310": "udio", - "2650": "\u0120How", - "3098": "\u00e0\u00b8", - "659": "\u0120self", - "1794": "\u0120som", - "2627": "height", - "39": "H", - "3165": "\u0120Sp", - "1614": "\u0120state", - "159": "\u00e3", - "1777": "PE", - "911": "gr", - "3653": "ilar", - "2422": "quals", + "486": "ere", + "3631": "_tr", + "2506": ".I", + "3758": "login", + "388": "ers", + "3464": "_con", + "3627": "Grid", + "3592": ".png", + "1099": "\u0120gr", + "2633": "\u0120govern", + "2692": "ained", + "3477": "\u0120review", + "2461": "ification", + "1428": "AM", + "2642": "af", + "3073": "\u0120exist", + "63": "`", + "3689": "atis", + "2973": "irtual", + "1177": "');\u010a", "953": "ix", - "176": "\u00f4", - "698": "ple", - "1512": "cept", + "668": "te", + "3899": "achine", "4038": "\u0120months", - "857": "pan", - "3811": "anged", - "2715": "aff", - "391": "ap", - "2535": "Label", - "3408": "Width", - "120": "\u00bc", - "1635": "_N", - "2568": "Count", - "1507": "ED", - "128": "\u00c4", - "541": "ord", - "2294": "\u0120great", - "789": "eld", - "3758": "login", - "1320": "td", - "845": "16", - "2228": "=>", - "1699": ".log", - "1064": "fter", - "3263": "\u0120Reg", - "2055": ".\u010a\u010a\u010a\u010a", - "563": "tring", - "842": "\u0120end", - "69": "f", - "3798": ".assert", - "3360": "Null", - "1024": "\u0120&&", - "4024": "\u0120went", - "3589": "ptr", - "3159": "uit", - "2936": "irection", - "4053": "LECT", - "3787": "rops", - "3499": "chema", - "3509": "imate", - "2442": "-t", + "743": "\u0120set", + "2186": "},", + "980": "oll", + "3291": "\u0120],\u010a", + "704": "\u0120out", + "637": "rr", + "2688": "Not", + "3535": "rary", + "948": "].", + "3536": "ades", + "584": "\u0120we", "402": "av", - "3092": "\u0120My", - "3331": "\u0120General", - "1520": "\u0120help", - "3522": "gether", - "3520": "ney", + "1980": "?\u010a\u010a", + "1559": "ouse", + "2602": "_pro", + "1988": "\u0120input", + "29": ">", + "2816": "\u0120site", + "485": "ind", + "2663": "\u0120called", + "3310": "udio", + "2948": "(a", + "3404": "Token", + "986": "ames", + "2940": "where", + "1606": "\u0120because", + "2712": "_G", + "3774": "\u0120UI", + "1516": "ution", + "3549": "\u0120created", + "3507": "\u0120IS", + "2941": "\u0120javax", + "3180": "ohn", + "728": "ft", + "1860": "\u0120av", + "3498": "\u0120publish", + "366": "\u0120<", + "794": "\":", + "3960": ".path", + "1122": "ian", + "818": "ne", + "2662": "\u0120/>\u010a", + "2968": "ability", + "1635": "_N", + "1059": "raw", + "256": "\u0120\u0120", + "3792": "Back", + "3938": "\u0120future", + "2618": "47", + "878": "OR", + "2102": ".append", + "566": "cl", + "1759": "urs", + "3710": "unsigned", + "1639": "ited", + "574": "\u0120was", + "1654": "\u0120dec", + "3871": "\u0120together", + "1656": "oun", + "3559": "\u0120);\u010a\u010a", + "24": "9", + "614": "div", + "3933": "fs", + "527": "\u0120are", + "428": "ext", + "3290": "\u0120command", + "1212": "\u0120start", + "1113": "ating", + "1025": "';\u010a", + "2674": "Point", + "1460": "ackage", + "2587": "\u0120custom", + "301": "el", + "1879": "\u0120sign", + "890": "_b", + "30": "?", + "1929": "AG", + "1065": "ys", + "3094": "\u0120step", + "933": "]\u010a", + "2419": "orth", + "1882": "','", + "246": "\u013a", + "2733": "\u0120feel", + "1931": "img", + "496": "str", + "2728": "\u0120given", "3217": "\u0120experience", - "2767": "ponents", - "3784": "property", - "2894": ".value", + "3667": "dev", + "1188": "ient", + "2726": ".org", + "3058": "thers", + "4026": "Link", + "2107": "\u0120cap", + "1673": "aint", + "1592": "\u0120tem", "3250": "\u0120doesn", - "2999": "\u0120Dr", - "238": "\u0132", - "353": "\u0120*", - "487": "::", - "359": "un", - "1444": ".w", - "1749": "\u0120method", - "634": "dd", - "1279": "ector", - "2923": "irm", - "557": "\u0120}\u010a\u010a", - "2374": "System", - "1405": "\u0120where", + "2377": "Style", + "1239": "\u0120reg", + "3033": "});\u010a", + "2827": "rec", + "1514": "\u0120play", + "4012": "OK", + "1481": "-b", + "780": "atic", + "3364": "/*\u010a", + "2140": "\u0120Cont", + "2553": "ormal", + "1636": "ared", + "3253": "\u0109g", + "3456": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u010a", + "2340": "\u0120Americ", + "1448": "\u0120fr", "314": "\u0120{", - "1236": ".L", - "3883": "Options", - "2587": "\u0120custom", - "2126": "\u0120cour", - "3614": "ios", - "2010": "\u0120head", - "611": "\u0120/", - "2212": "\u0120around", - "3571": "position", - "1548": "He", - "1844": "Un", - "2307": "\u0120super", - "2443": "update", - "2865": "\u0120load", - "3744": "\u0120Part", - "2052": "\u0120All", - "1953": "IG", - "1999": "Class", - "1529": "\u0120bl", - "30": "?", - "708": "so", - "2368": "};\u010a\u010a", - "5": "&", - "3886": "witter", + "3917": "\u0120subject", + "649": "\u0120can", + "822": "io", + "515": "--------", + "3168": "iff", + "1571": "igned", + "549": "\u0120U", + "2268": "!\u010a\u010a", + "3315": "da", + "4036": "\u0120received", + "1316": "day", + "3141": "eb", + "2845": "\u00e3\u0125", + "2170": "As", + "2810": "Util", + "4035": "olve", + "1845": "\u0120bool", + "2960": "\u0120cent", + "3734": "\u0120except", + "2023": "pany", + "3400": "\u0120Cal", + "984": "AL", + "1348": "fore", + "2711": "ension", + "2820": "\u0120})\u010a", + "1987": "38", + "1867": "\u0120cor", + "3766": "\u0120previous", + "3835": "\u0120msg", + "1785": "FF", + "779": "\u0120so", "753": "\u00e2\u0122\u013bs", - "17": "2", - "2933": "au", - "123": "\u00bf", - "3687": "\u0120indiv", - "1881": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "979": "reate", - "3515": "\u0120having", - "3222": "URL", + "3799": "\u0120Dec", + "757": "\u0120me", + "1108": "igh", + "2481": "eters", + "3479": "move", + "2584": "Instance", + "3893": "ii", + "642": "\u0120*/", + "148": "\u00d8", + "119": "\u00bb", + "2431": "erg", + "880": "tem", + "3652": "()\u010d\u010a", + "3529": "/d", + "947": "];\u010a", + "1210": ".\"", + "1229": "\u0120Q", + "3523": "plied", + "2420": "${", + "3814": "\u0120night", + "3309": "\u0120told", + "406": "nt", + "346": "ce", + "2210": "ervices", + "2200": "hip", + "3816": "\u0120Red", "1941": "bers", - "809": "fer", - "962": ".d", - "1701": "\u0120using", - "1395": "be", - "1570": "ily", - "1716": "\u0120child", - "3457": "ici", - "1221": "\u0120Con", - "1879": "\u0120sign", - "3975": "temp", - "1053": "\u0120would", - "2201": "no", - "223": "\u0123", - "660": "ated", - "3191": "cribe", - "3224": "\u0120country", - "2651": "\u0120*)", - "749": ".m", - "423": "\u0120D", + "2991": "\u0120Text", + "2993": "(function", + "889": "\u0120who", + "3570": "Integer", + "1261": "loat", + "1451": "Ind", + "1139": "\u0120into", + "3729": "\u0120contact", + "272": "\u0120c", + "977": "ater", + "2387": "(l", + "1505": "\u0120find", + "3254": "\u0120single", + "1590": "ars", + "602": "\u0120i", + "646": "_s", + "167": "\u00eb", + "1182": "CT", + "483": "ine", + "530": "_t", + "2523": "\u0120fun", + "2094": "akes", + "4075": "mark", + "2033": "\u0120double", + "3612": "style", + "3896": "\u0120template", + "2570": "\u0120['", + "1842": "ypes", + "1746": "\u0120($", + "2259": "ina", + "2305": "\u0120month", + "340": ")\u010a", + "772": "lect", + "1735": "object", + "730": "////", + "524": "", - "2688": "Not", - "1818": "ween", - "3720": "Mode", - "3024": "\u0120json", - "2011": "\u0120must", - "3175": "width", - "3850": "parent", - "511": "oc", - "3037": "pecial", - "2896": "ottom", - "739": ".t", - "1874": "search", - "626": "St", - "639": "\u0120\u00e2\u0122", - "3779": "\u0120added", - "1521": "\u0120these", - "1925": "\u0120main", - "641": "cc", - "2376": "response", - "3662": "\u0120>>", - "730": "////", - "81": "r", - "1373": "\u0120read", - "1489": "Time", - "3342": "emp", - "318": "im", - "257": "\u0120\u0120\u0120\u0120", - "2185": "www", - "3329": "aring", - "3771": "\u0120Christ", - "1335": "action", - "1192": "))\u010a", - "3046": "ators", - "2048": "table", - "2193": "tract", - "2511": ">>", - "1230": "php", - "1158": "\")\u010a", - "112": "\u00b4", - "1843": "image", - "3555": "(!", - "814": "\u0120they", - "1500": ">", + "1324": "ptions", + "2490": "ories", + "3711": "FA", + "2040": "\u0120oper", + "1681": "Set", + "916": ".com", "2860": "\u0120total", - "1260": "ible", - "368": "()", - "305": "\u0120h", - "2071": "check", - "2390": "ATE", - "401": ";\u010a\u010a", - "866": "clude", - "2380": "\u0120three", - "685": "ance", - "3318": "\u0120working", - "1691": "21", - "2553": "ormal", - "2618": "47", - "2085": "\u0120without", - "3296": "\u0120By", - "3487": "56", - "2657": "(data", - "2003": "oin", - "542": "alue", - "2318": "08", - "506": "\u0120ex", - "3812": "End", - "1340": "", + "817": "use", + "1416": "\u0120/*", + "1489": "Time", + "538": "\u0120class", + "337": "ol", + "1619": "ined", + "1095": "\u0120let", + "3620": "uary", + "50": "S", + "3665": "\u0120save", + "908": "\u0120li", + "608": "====", + "816": "\u0120Y", + "3794": "\u0120getting", + "2351": "'re", "127": "\u00c3", - "1869": "miss", - "2054": "box", - "3077": "'ve", - "1740": "ffect", - "36": "E", - "2119": "col", - "553": "ize", - "2089": "PO", - "3367": "inary", - "1944": "oot", - "1613": "},\u010a", - "766": "_f", - "3129": "translation", - "3474": "\u0120det", + "372": "um", + "3031": "req", + "3903": "elt", + "307": "id", + "3440": "\u0120attack", + "1765": "\u0120sur", + "3870": "\u00c3\u00a5", + "2843": "IF", + "955": "\u0120type", + "1910": "yn", + "1690": "\u0120many", + "3124": "child", + "3442": "ROM", + "551": "\u0120:", + "3349": ":(", + "2280": "namespace", + "3971": "51", + "2769": "_ex", + "2127": "An", + "146": "\u00d6", + "868": "15", + "1455": "\u0120most", + "327": "ex", + "2920": "reated", + "2965": ",'", + "3958": "\u0120bad", + "1426": "_F", + "795": "olor", + "2453": "ram", + "1840": "\u00d0\u00b8", + "1014": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "159": "\u00e3", + "2529": "\u0120},", + "1268": "\u0120how", + "518": "',", + "349": "ate", + "3610": "\u0120million", + "2918": "erial", + "3192": "255", + "849": "Ex", + "2512": "\u0120care", + "1768": "par", + "1582": "uthor", + "3987": "\u0120hope", + "2781": "\u0120er", + "2864": "UM", + "376": "tr", + "320": "\u0120(", + "3096": "CK", + "379": "\u0120y", + "2336": "ways", + "1386": "uff", + "2856": "\u0120offic", + "1225": "very", + "525": "ave", + "2766": "\u0120bit", + "2776": ".pro", + "292": "ic", + "205": "\u0111", + "2270": "ived", + "699": "ild", + "3029": "\u00c3\u00b6", + "3578": "ABLE", + "2457": "\u0120date", + "1758": "35", + "3367": "inary", + "2470": "num", + "3113": "\u0120display", + "973": "utton", + "1900": "ats", + "4000": ">();\u010a", + "2591": "\u0120\u010d\u010a", + "3166": "IV", + "3324": "\u0109t", + "1828": "\u0120next", + "1577": "(int", + "1020": "().", + "88": "y", + "3304": ".\u010d\u010a", + "2599": "AP", + "481": "able", + "888": "read", + "3476": "ports", + "3562": "\u0120Color", + "3230": "\u0120specific", + "3457": "ici", + "534": "}\u010a", + "1370": "\u0120par", + "3336": "IB", + "633": "}\u010a\u010a", + "298": "\u0109\u0109", + "4015": "\u0120ST", + "305": "\u0120h", + "550": "\u0120pr", + "2756": "apt", + "1160": "\u0120list", + "1488": "\u0120fil", + "2415": "ources", + "2644": "istory", + "83": "t", + "2639": "urch", + "3807": "nav", + "1319": "(p", + "802": "\u0120ar", + "2376": "response", + "3836": "fect", "3946": "ires", - "3830": "\u0120box", - "2550": "199", - "1809": "}\u010d\u010a", - "1031": "nc", - "3270": "\u0120\"\"\"\u010a", - "168": "\u00ec", - "2159": "ond", - "517": "{\u010a", - "585": "ject", - "37": "F", + "1597": "Button", + "1462": "._", + "1612": "\u0120dist", + "1265": "_in", + "1185": "={", + "891": "\">\u010a", + "2252": "post", + "2090": "\u0109this", + "229": "\u0129", + "1617": "att", + "1581": "Index", + "3516": "lem", + "2256": "assert", + "1907": "rid", + "4083": "\u0120opp", + "1048": "\u0120bu", "416": "lic", - "1848": ".Form", - "2808": "lib", + "4076": "\u0120Name", + "2041": "ced", + "3629": "\u0120often", + "49": "R", + "3846": "\u0120ce", + "3875": "host", + "9": "*", + "2631": "\u0120required", + "2783": "EM", + "1369": "\u0120Se", + "3399": "\u0120ben", + "897": "rop", + "1091": "ask", + "1278": "(m", + "3681": ".put", + "659": "\u0120self", + "3032": "Client", + "1470": "\u0109c", + "451": "de", + "1007": "pec", + "1847": "\u0120game", + "11": ",", + "2257": "Val", + "3486": "itive", + "1534": "ized", + "664": "ud", + "1322": "\u0120Pro", + "3004": "quired", + "2455": "())\u010a", + "3103": "Stream", + "2547": "\u0120body", + "2495": "78", + "1015": "atus", + "473": "\u0120H", + "3159": "uit", + "309": "am", + "1736": "ince", + "1580": "html", + "2719": "#if", + "3599": "('#", + "3215": "\u0120button", + "3887": "\u0120fund", + "1899": "UN", + "2148": "31", + "425": "age", + "1611": "\u0120De", + "2226": "Override", + "3847": "itted", + "2895": "\u0120Gr", + "1695": "\u0120good", + "1292": "_name", + "3952": "\u0120took", + "3705": "comm", + "3475": "\u0120Test", + "3136": "\u0120continue", + "2800": "\u0120rest", + "1046": "\u0120acc", + "669": "_c", + "2465": "my", + "324": "ur", + "1029": "ml", + "2234": "ung", + "3185": "\u0120side", + "1647": "\u0120mon", + "1836": "\u0120includ", + "1310": "rc", + "3173": "_key", + "1106": "_P", + "581": "ib", + "4016": "icrosoft", + "2935": "control", + "3844": "(S", + "2240": "creen", + "1716": "\u0120child", + "621": "pon", + "569": "ard", + "1747": "Model", + "3317": "));\u010a\u010a", + "711": "\u0120def", + "2751": "\u0120got", + "1972": "\u0120real", + "2060": "uration", + "1648": "\u0120way", + "1231": "ttp", + "3028": "\u0120Copyright", + "121": "\u00bd", + "3255": "offset", + "2797": "AME", + "3247": "\u0120THE", + "1859": "nection", + "2875": "\u0120short", + "3980": "\u0120ge", + "2218": "\u0120target", + "385": "lo", + "4079": "amic", + "3639": "\u0120What", + "3301": "INT", + "972": "18", + "499": "\u0120you", + "2622": "]);\u010a", + "3969": "\u0120exec", + "2835": "\u0120video", + "1311": "_re", + "2390": "ATE", + "927": "\u0120over", + "2050": "place", + "2670": "\u0120};\u010a", + "2982": "\u0120Array", + "147": "\u00d7", + "3879": "framework", + "2522": "\u0120Sc", + "2693": "atur", + "1334": "air", + "2488": "################", + "1969": "mand", + "471": "\u0120return", + "1131": "...", + "852": "urre", + "3558": "BUG", + "2645": "Str", + "3319": "\u0120query", + "3769": "\u0120material", + "1883": "\u0120App", + "3874": "'))", + "3095": "ances", + "347": "od", + "1545": "ample", + "2576": "\u0120url", + "666": "\u0120Th", + "1526": "uild", + "1049": "200", + "3316": "msg", + "1697": "ational", + "494": "orm", + "3045": "client", + "2020": "static", + "2629": "ta", + "2056": ".R", + "2561": "\u0120available", + "3550": "uper", + "3806": "ops", + "497": "..", + "1930": "valid", + "3280": "\u0120season", + "1105": "ors", + "4094": "efined", + "1896": ".in", + "1352": "ateg", + "578": "\u0120The", + "2552": "yles", + "2612": "\u0120output", + "3326": "agn", + "877": "LE", + "1247": "ek", + "2923": "irm", + "3246": "Trans", + "2331": "\u0120]", + "222": "\u0122", + "1405": "\u0120where", + "3783": "\u0120delete", + "2452": ".sh", + "2018": "\u0120contin", + "2444": "anel", + "207": "\u0113", + "2223": "ital", + "3634": "\u0120space", + "3936": "Title", + "2515": "\u0120effect", + "1551": "wn", + "3450": "\u0120mom", + "1013": "ader", + "3375": "imum", + "620": "ull", + "2510": ".Cont", + "3770": "\u0120below", + "1135": "50", + "2873": "\u0120seem", + "1773": "-d", + "3151": "omic", + "1418": "\u0120while", + "1762": "pre", + "3260": "\u0120anal", + "2914": "ands", + "232": "\u012c", + "1915": "Field", + "1167": "\u0120z", + "1849": "rows", + "2421": "88", + "2159": "ond", + "380": "ist", + "420": "\u0120this", + "2752": "stand", + "1682": "29", + "3248": "\u0120\u010d\u010a", - "2103": "\u0120still", - "1277": "http", - "2148": "31", - "1849": "rows", - "1452": "ULL", - "3675": "ien", - "1202": "\u0120its", - "2141": "ailable", - "31": "@", - "2131": "55", - "260": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "1104": "ific", - "2080": "\u0120struct", - "2291": "\u0120fore", - "2005": "ui", - "1719": "\u0120param", - "1196": ".W", - "2779": "ken", - "2160": "\u0120ret", - "399": "urn", - "3979": "\u0120Def", - "2328": "ocation", - "3665": "\u0120save", - "2226": "Override", - "1725": "ps", - "3075": "\u0120Object", - "3928": "unit", - "2200": "hip", - "3701": "TH", - "2395": "\u0120spe", - "3926": "\u0120Fe", - "3276": "\u0120ant", - "1864": "rem", - "3750": "curity", - "2598": "\u0120partic", - "1269": "rite", - "227": "\u0127", - "2303": "oy", - "1807": "_data", - "4044": "87", - "2270": "ived", - "1116": "ref", - "2940": "where", - "3731": "\u0120abs", - "682": "\u0120all", - "1000": "indow", - "199": "\u010b", - "590": "oid", - "2944": "\u0120reason", + "1812": "\u0120med", + "799": "ude", + "2536": "\u0120non", + "3199": "omain", + "3393": "\u0120async", + "3196": "\u0120based", + "1711": "uring", + "3018": "ffer", + "1429": "ility", + "1344": "\u0120fl", + "3235": "\u0120along", + "3125": "ifier", + "3165": "\u0120Sp", + "1132": "arch", + "3791": "CC", + "3722": "\u0120diff", + "691": "IN", + "3524": "ao", + "4032": "Ref", + "149": "\u00d9", + "1865": "\").", + "671": "\u0120ab", + "2881": "irl", + "695": "data", + "4021": "ogn", + "1325": "vid", + "2233": "\u00d1\u0122", + "2149": "Al", + "3997": "\u0120website", + "3009": "\u0120stop", + "845": "16", + "169": "\u00ed", "3100": "(C", - "2293": "format", - "1654": "\u0120dec", - "3166": "IV", - "3889": "__(", - "3699": "items", - "3443": "400", - "2203": "sp", - "1162": "\u0120case", - "328": "\u0120S", - "1764": "ma", - "182": "\u00fa", - "1227": "64", - "3836": "fect", - "1278": "(m", - "2378": "Up", - "697": "Re", - "52": "U", - "1332": "\u0120min", - "3086": "\u0120});\u010a\u010a", - "1119": "roll", - "2514": "\u0109private", - "154": "\u00de", - "3842": "\u0120John", - "1223": "ower", - "2152": "CE", - "4091": "anger", - "449": "\u0120with", - "1731": "rawing", - "3283": "(h", - "465": "\u0109\u0109\u0109\u0109", - "1184": "ople", - "1394": "az", - "936": "ca", - "3014": "UG", - "1293": "erson", - "3184": "ollections", - "2731": "\u0120better", - "1310": "rc", - "2911": "\u0120children", - "3679": "\u0120redu", - "3810": "\u0120enter", - "2507": "NS", - "2386": "email", - "3897": "Position", - "4046": "\u0120\u00c3\u00a9", - "806": "11", - "2092": "\u0120sol", - "1144": "\u0120\\", - "3642": "Us", - "878": "OR", - "1307": "arget", - "913": "param", - "2198": "CH", - "3543": "\u0120complet", - "1084": "\u0120\u0120\u0120\u0120\u010a", - "1342": "text", - "1302": "bs", - "1934": "\u0120report", - "3740": "OUT", - "2446": ".to", - "216": "\u011c", - "1337": "type", - "413": "turn", - "3752": "\u0120character", - "3078": "vers", - "2527": "start", - "2902": "-h", - "1377": "\u0120dr", - "2500": "\u0120another", - "627": ".\u010a", - "2772": "ocial", - "259": "\u0120t", - "4070": "\u00e2\u0122\u013bve", - "1522": "\u0120pass", - "1678": "til", - "265": "re", - "2843": "IF", - "4034": "\u0120visit", - "2597": "\u0120await", - "3793": "emplate", - "539": "\u0120not", - "1473": ":\u010a\u010a", - "429": "=\"", - "1850": "*/", - "908": "\u0120li", - "3111": "ajor", - "2414": "left", - "2188": "\u0120\u00c2", - "1713": "oken", - "3551": ".state", - "2090": "\u0109this", - "2189": "())", - "397": ">\u010a", - "351": "ag", - "3034": ".Com", - "1216": "ey", - "2423": "ese", - "2073": "\u0120und", - "3229": "\u0120author", - "521": "ase", - "2570": "\u0120['", - "2325": "_v", - "714": "alse", - "1565": "ler", - "1636": "ared", - "1932": "PI", - "2477": "\u0120pop", - "196": "\u0108", - "2915": "ably", - "588": "ve", - "219": "\u011f", - "2796": "mission", - "2980": "\u0120consider", - "474": "ack", - "826": "ings", - "1378": "Exception", - "3763": "click", - "958": "\u0120inter", - "2962": "_add", - "3879": "framework", - "1351": ".re", + "3824": "\u0120dam", + "4054": "One", + "3496": "\u00c3\u00a3o", + "2775": "header", + "2790": "46", + "2675": "You", + "3576": "sv", + "190": "\u0102", + "1796": "\u0120List", + "1620": "\u0120final", + "3679": "\u0120redu", + "1940": "\u0120sk", + "1254": "\u0120Wh", + "2504": "ework", + "789": "eld", + "2178": "();", "480": "\u0120G", - "3188": "imary", - "4082": "FT", - "2831": "Content", - "3989": "auto", - "1863": "ENT", - "676": "ress", - "4048": "\u0120learn", - "3074": "ica", - "276": "an", - "2635": "lex", - "352": "ay", - "3418": "yc", - "2402": "ards", - "674": "\u0120#", - "1261": "loat", - "1820": "the", - "2689": "oo", - "1056": "iss", - "3349": ":(", - "3306": "sl", - "1016": "Th", - "338": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "940": "ual", - "3212": "imer", - "3373": "\u0120select", - "174": "\u00f2", - "2996": "down", - "3870": "\u00c3\u00a5", - "3564": "\u0120porn", - "126": "\u00c2", - "865": "\u0120x", - "1439": "ures", - "395": "ass", - "3832": "(v", - "838": "val", - "864": "\u0120pre", - "3974": "\u0120live", - "1360": "Pro", - "3205": "iod", - "2074": "ategory", - "1066": "\u0120///", - "3389": "\u0120late", - "3581": "anch", - "1734": "\\n", - "1338": "util", - "3045": "client", - "3080": "67", - "2075": "75", - "256": "\u0120\u0120", - "3292": "_x", - "3954": "\u0120}}", + "1050": "\u0120Re", + "2829": "\u0109\u0120\u0120\u0120", + "3534": "97", + "1281": "\u0120jav", + "627": ".\u010a", + "2543": "\u0120turn", + "1458": "rough", + "2573": "Action", + "874": "irst", + "2765": "\u0109d", + "2736": "\u0120already", + "682": "\u0120all", + "1026": "ms", + "3202": "UP", + "562": "ption", + "4061": "Check", + "1417": "\u0120arg", + "1935": "\u0120take", + "638": "ome", + "3694": "\u0120+\u010a", + "2507": "NS", + "96": "\u00a3", + "3433": "roy", + "3441": "\u0120Ste", + "576": "this", + "750": "ally", + "1070": "\u0120there", + "1053": "\u0120would", + "1286": "ole", + "1513": "clud", "969": "ra", - "1673": "aint", - "1461": "\u0120him", - "927": "\u0120over", - "2392": "tes", - "602": "\u0120i", - "3220": "_value", - "3456": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u010a", - "544": "ff", - "938": "\u0120ro", - "2604": "riter", - "3419": "\u0120pot", - "1086": "ethod", - "221": "\u0121", - "1415": "arent", - "1931": "img", - "1876": "Form", - "2420": "${", - "4014": "rest", - "3780": "\u0120buy", - "2079": "request", - "824": "\u0120per", - "2326": "\u0120US", - "3118": "\u0120present", - "2518": "ending", - "1755": "\u0120expect", - "1330": "pace", - "392": "ub", - "1563": "mary", - "794": "\":", - "3733": "float", - "1291": "ier", - "1977": "\u0120build", - "3565": "top", - "2407": "cur", - "2713": "\u0120update", - "1612": "\u0120dist", - "2529": "\u0120},", + "175": "\u00f3", "945": "\u0120ag", - "2599": "AP", + "3941": "\u0120mode", + "1509": "enu", + "3355": "\u0120vi", + "1565": "ler", + "3224": "\u0120country", + "3007": "react", + "3581": "anch", + "2275": "\u00d0\u00be\u00d0", + "217": "\u011d", + "3114": "\u00d0\u00bb", + "2774": "erties", + "3008": "\u0120ann", + "92": "}", + "1934": "\u0120report", + "2976": "argin", + "1055": "ven", + "1699": ".log", + "2638": "\u0120Set", + "2215": "\u0120rece", + "3458": "na", + "3944": "\u0120ax", + "2508": "ology", + "2114": "OD", + "1492": "\u0120->", + "3781": "stit", + "1169": "let", + "3403": "\u0120enough", + "1685": "\u0120pres", + "907": "\u0120value", + "1154": "ages", + "2753": "\u0120less", + "1073": "of", + "670": "uct", + "2214": "ration", + "594": "\u0120res", + "2237": "\u0120level", + "2300": "Ent", + "3614": "ios", + "3210": "ively", + "3449": "\u0120meet", + "1924": "ives", + "215": "\u011b", + "2246": "\u0120document", + "2316": "\u0120title", + "3328": "itter", + "1645": "\u0120ac", + "996": "\u0120\u0120\u0120\u0120\u0120\u0120", + "3649": "\u0120details", + "253": "\u0141", + "2995": "\u0120eng", + "1230": "php", + "3278": "\u0120women", + "573": "\u0109\u0109\u0109", + "389": "\u0120on", + "404": "ir", + "241": "\u0135", + "2746": "If", + "3898": "\u0120econ", + "861": "List", + "949": "\u0120?", + "180": "\u00f8", + "4063": "\"),", + "1739": "uf", + "1936": "rib", + "2327": "UI", + "128": "\u00c4", + "2143": "atab", + "1403": "\u0120two", + "15": "0", + "2999": "\u0120Dr", + "3934": "false", + "1889": "code", + "4048": "\u0120learn", + "477": "\u0120or", + "3391": "43", + "3707": ")\u010a\u010a\u010a", + "1321": "cond", + "3702": "=True", + "2535": "Label", + "785": "own", + "371": "ort", + "136": "\u00cc", + "3663": "\u0120face", + "1120": "\u0120just", + "2005": "ui", + "2998": "\u0120En", + "2247": "\",\"", + "758": "\u0120!", + "582": "ac", + "2871": "IR", + "2384": "endif", + "1763": "\u0120)\u010a", + "1467": "#define", + "3469": "\u0120opt", + "1192": "))\u010a", + "3339": "\u0120making", + "3586": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "1340": "<", + "2574": "\u0120things", + "3588": "\u0120::", + "382": ".\u010a\u010a", + "2720": "uction", + "1271": "To", + "655": "ber", + "3787": "rops", + "3208": "\u0120lik", + "2874": "\u0120q", + "1795": "\u0120http", + "55": "X", + "153": "\u00dd", + "3053": "\u0120Can", + "1873": "ience", + "4001": "ional", + "3325": "\u0120least", + "2310": "_de", + "1305": "Tr", + "367": "ation", + "89": "z", + "2969": "outh", + "3699": "items", + "2798": "\u0120temp", + "1754": "\u0120()", + "760": "\">", + "3748": "ilit", + "2972": "pose", + "2123": "Code", + "2456": "\u0120(\u010a", + "1706": "lish", + "2866": "ights", + "2101": "aining", + "706": "\u0120has", + "3405": ".user", "3297": "\u0120May", - "3503": "allback", + "2021": ".Text", + "3160": "\u0120length", + "2330": "\u0120[\u010a", + "1818": "ween", + "1195": "mer", + "623": "_p", + "2857": "ks", + "3916": "ales", + "2978": "\u0120school", + "1539": "UR", + "781": "\u0120lo", + "1643": "ife", + "2934": "Url", + "1149": "anc", + "989": "ely", + "3312": "\u0120equ", + "3749": "\u0120Event", + "3378": "IP", + "1518": "\u0120see", + "362": "\u0120A", + "2501": "<<", + "1925": "\u0120main", + "2176": "ee", + "322": "//", + "3078": "vers", + "2892": "()\u010a\u010a", + "3070": "\u0120family", + "3216": "\u0120Up", + "714": "alse", + "3811": "anged", + "2974": "earch", + "1669": "IS", + "778": "cont", + "2560": "\u0120proble", + "3973": "\u0120border", + "60": "]", + "648": "ie", + "1413": "ative", + "73": "j", + "281": "\u0120p", + "3163": "event", + "3218": "CL", + "3872": "\u0120throws", + "2758": "\u0120Add", + "2100": "\u0120So", + "3370": "ario", + "1496": "roller", + "617": "\u0120have", + "2889": "rist", + "3542": "\u0120'@", + "850": "error", + "1088": "ute", + "1187": "24", + "2263": ".print", + "1986": "ivers", + "2686": "\u0120address", + "3622": "\u0120server", + "2808": "lib", + "3714": "ros", + "1663": "query", + "8": ")", + "434": "ess", + "956": "'t", + "547": "ime", + "2164": "args", + "2012": "////////////////", + "2158": "ftware", + "2292": "(String", + "1263": "und", + "2994": "null", + "2630": "field", + "2001": "\u0120\u00e2\u0122\u0136", + "3337": "EX", + "3964": "(:", + "1872": "std", + "200": "\u010c", + "359": "un", + "2511": ">>", + "1246": ".G", + "3142": "\u0120Public", + "1146": "wo", + "3347": "\u0120past", + "1404": "\u0120size", + "293": "\u0120b", + "3750": "curity", + "1284": "_st", + "3625": "\u0120les", + "1549": "gram", + "3962": "\u0120Post", + "3658": "\u0120Free", + "3992": "btn", + "2811": "Listener", + "1124": "\u0120them", + "3897": "Position", + "3395": "cul", + "1668": "Color", + "3703": "\u0120bar", + "1766": "\u0120found", + "2007": "\u0120table", + "2906": "erver", + "2115": "\u0120field", + "731": "ake", + "873": "')", + "2408": "imit", + "3526": "\u0120++", + "3739": "i\u00c3\u00b3n", + "1615": "\u0120cons", + "1576": "lear", + "3638": ",\u010a\u010a", + "2404": "etwork", + "1030": "co", + "179": "\u00f7", + "2138": "\u0120fam", + "2013": ".util", + "1895": "Array", + "3350": "\u0120write", + "856": "\u0120my", + "990": "\u0120work", + "3374": "level", + "2703": "_path", + "1094": "ody", + "806": "11", + "3221": "\u0120near", + "3137": "\u0120talk", + "3420": "\u0120Trump", + "751": "set", + "2718": "ately", + "1333": "stance", + "4028": "\u0120across", + "333": "if", + "3778": "\u0120American", + "1341": "IC", + "3942": "\u0120'<", + "1815": "ache", + "3063": "\u0120lead", + "738": "\u0120const", + "390": "\u0120con", + "1087": ".P", + "629": ");\u010a\u010a", + "3867": "byte", + "1675": "\u0120sim", + "1985": "test", + "1523": "\u0120down", + "269": "or", + "3928": "unit", + "1196": ".W", + "3181": "alf", + "3060": "\u0120either", + "1482": "\u00d0\u00be", + "64": "a", + "4068": "\u0120comment", + "227": "\u0127", + "1787": "plate", + "686": "ear", + "2207": "_IN", + "1801": ".id", + "2082": "\u0120code", + "3480": "\u0120switch", + "3075": "\u0120Object", + "3323": "only", + "2687": "\u0120uint", + "2997": "\u0120include", + "2834": "\u0120won", + "1687": "We", + "3271": "aper", + "2550": "199", + "2093": "\u0120===", + "4006": "ALL", + "2480": "uid", + "2002": "ten", + "3054": "ounc", + "3261": "create", + "2317": "\u0120context", + "2106": "med", "2360": "\u0120No", - "1905": "AB", - "575": "ip", - "2215": "\u0120rece", - "208": "\u0114", + "1538": "ump", + "1351": ".re", + "2832": "_W", + "828": "\u0120data", + "3335": "\u0120record", + "1560": "\u0120es", + "3038": "_com", + "101": "\u00a8", + "1468": "itt", + "3537": "_se", + "1287": "){\u010a", + "2109": "\u0120rep", + "3786": "\u0120card", + "3362": "IZE", + "1728": "lication", + "170": "\u00ee", + "2664": "body", + "954": "ays", + "1495": "\u0120text", + "2708": "\u0120appe", + "1228": "lick", + "2613": "\u0120email", + "1066": "\u0120///", + "464": ";\u010d\u010a", + "3923": "What", + "2851": "\u0120player", + "875": "ick", + "2334": "script", + "3388": "\u0120course", + "1772": "\u0120post", + "3084": "_time", + "724": "quest", + "1102": "\u0120It", + "3833": "rel", + "3296": "\u0120By", + "605": "10", + "3501": "\u0120CON", + "2744": "\u0120always", + "1868": "count", + "1707": "\u0120stud", + "2773": "\u0120prom", + "91": "|", + "3697": "\u0120members", + "657": "ll", + "676": "ress", + "2946": "59", + "1258": ".st", + "1171": "red", + "2074": "ategory", + "3902": "main", + "56": "Y", + "3023": "oud", + "1888": "\u0120best", + "2471": "Act", + "1709": "\u0120gu", "2528": "vious", - "261": "er", - "633": "}\u010a\u010a", - "825": "Cont", - "2483": "\u00c3\u0143", - "1773": "-d", - "1552": "_h", - "1350": "),\u010a", - "3432": "\u0120today", - "2469": "ots", - "504": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "2840": "\u0120gl", - "3261": "create", - "3647": "sm", - "3393": "\u0120async", - "421": "\u0120wh", - "3595": "pond", - "2867": "\u0120clear", + "941": "Type", + "2196": "context", + "881": "\u010d\u010a\u010d\u010a", + "1871": "\u0120mem", + "3935": "unt", + "611": "\u0120/", + "2133": "\u0120going", + "734": "\u0120function", + "1236": ".L", + "1475": "\u0120every", + "1874": "search", + "773": "ug", + "1346": "ccess", + "3540": "\u0120*/\u010d\u010a", + "1469": "\u0120ap", + "2338": "\u0120src", + "3229": "\u0120author", + "713": "ge", + "3052": "{{", + "1803": "\u0120gener", + "1289": "opy", + "1406": "UT", + "2368": "};\u010a\u010a", + "1961": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "3258": "ley", + "733": "\u0120go", + "2072": ".Forms", + "3784": "property", + "4039": "\u0120anim", + "1878": "ename", + "1168": "\u0120kn", + "2096": "44", + "1616": "rint", + "2447": "\u0120project", + "3358": "'ll", + "2925": "omb", + "3939": "ached", "3700": "\u0120Per", - "3748": "ilit", - "2531": "\u0120leg", - "77": "n", - "1339": "ait", - "2393": "\u0120console", - "1811": "\u00e3\u0122\u0124", - "577": "\u0120u", - "2656": "used", - "629": ");\u010a\u010a", - "965": "EN", - "2151": "\u0120android", - "574": "\u0120was", + "2181": "It", + "98": "\u00a5", + "796": "tp", + "3673": "\u0120items", + "2518": "ending", + "2324": "\u0120life", + "2517": "_file", + "138": "\u00ce", + "2409": "With", + "1977": "\u0120build", + "936": "ca", + "2627": "height", + "3947": "There", + "204": "\u0110", + "2700": "\u0120port", + "1116": "ref", + "1465": "\u0120);\u010a", + "4044": "87", + "2628": ".html", + "2120": "(x", + "3514": "ourn", + "871": "\u0120>", + "1607": "roid", + "1993": "ground", + "2269": "-f", + "2743": "outer", "1995": "\u00c3\u00a1", - "2959": "root", - "2828": "()->", - "3538": "ause", - "2476": ",\u00e2\u0122\u013f", - "2277": "ading", - "1664": "\u0120well", - "3535": "rary", - "3714": "ros", - "2387": "(l", - "4094": "efined", - "72": "i", - "1368": "erty", - "3374": "level", - "2347": "obj", - "86": "w", - "333": "if", - "2471": "Act", - "2081": "escription", - "2781": "\u0120er", - "2355": "\u0120\u0120\u010a", - "1265": "_in", + "837": "\u0120true", + "2281": "_O", "567": "##", - "2822": "No", - "1972": "\u0120real", - "3844": "(S", - "1479": "cription", - "2490": "ories", - "3941": "\u0120mode", - "1651": "\u0120att", - "3062": "\u0120important", - "892": "\u0120time", - "447": "qu", - "3514": "ourn", - "1166": "ponent", - "381": "----", - "118": "\u00ba", - "3084": "_time", - "3914": "cast", - "978": "\u00c3\u00a9", - "846": "cre", - "3105": "ama", - "1505": "\u0120find", - "904": "\u0120any", - "561": "ast", - "2974": "earch", - "4013": "NU", - "462": "ri", - "1117": "_S", - "1124": "\u0120them", - "2408": "imit", - "1689": "\u0120bel", - "1486": "\u0120point", - "1336": "wh", - "4009": "\u0120network", - "3547": "\u0120fac", - "1472": "\u0120You", - "2924": "idden", - "1266": "../", - "1099": "\u0120gr", - "1670": "\u0120default", - "2142": "utes", - "3156": "\u0120until", - "3446": "\u0120story", - "3106": "Entity", - "1273": "ww", - "2885": "\");\u010d\u010a", - "603": "\u0120us", - "3630": "\u0120info", - "607": "ail", - "3791": "CC", - "1940": "\u0120sk", - "220": "\u0120", - "3640": "ged", - "1317": "\u0120long", - "3095": "ances", - "2895": "\u0120Gr", - "930": "ource", - "3859": "\u0120unsigned", - "3429": "\u0120para", - "4017": "\u0120limit", - "394": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "910": "nder", - "172": "\u00f0", - "4008": "ili", + "3751": "ster", "3269": "(M", - "1852": ".Drawing", - "1700": "{\u010d\u010a", - "2837": "point", - "3239": "env", - "889": "\u0120who", - "2631": "\u0120required", - "437": "os", - "1779": "select", - "2907": "\u0120),\u010a", - "996": "\u0120\u0120\u0120\u0120\u0120\u0120", - "3569": "++)\u010a", - "1775": "olean", - "1244": "reat", - "713": "ge", - "2626": "\u0120business", - "2780": "\u0120orig", - "1441": "ets", - "2730": "#endif", - "1493": "\u0120error", - "2510": ".Cont", - "839": "led", - "11": ",", - "2409": "With", - "2045": "icle", - "494": "orm", - "576": "this", - "3984": "\u0120provided", - "2630": "field", - "3857": "\u0120section", - "3749": "\u0120Event", - "3185": "\u0120side", - "3401": "\u0120$_", - "3862": "\u00c4\u00b1", - "1629": "\u0120run", - "1111": "AN", - "3561": "Reg", - "1498": "ql", - "1893": "\u0120create", - "4043": "\u0120ess", - "1393": "\u0120||", - "3452": "\u0120?>", - "2508": "ology", - "1913": "\u0120fin", - "1432": "\u010a\u010a\u010a", - "3719": "\u0120become", - "2205": "iable", - "631": "put", - "3915": "From", - "2059": "Sh", - "2775": "header", - "3476": "ports", - "2969": "outh", - "1645": "\u0120ac", - "1739": "uf", - "380": "ist", - "2595": "]\u010a\u010a", - "3590": "DB", - "1306": "\u0120after", - "1062": "\u0120val", + "1255": "Bo", + "3951": "icult", + "1710": "config", + "999": "quire", + "553": "ize", + "1205": "\u0120need", + "2366": "202", + "1189": "cent", + "1740": "ffect", + "1932": "PI", + "634": "dd", + "2679": "\u0120ide", + "2598": "\u0120partic", + "2586": "\u0120come", + "1201": "ank", + "3913": "cer", + "1749": "\u0120method", + "178": "\u00f6", + "1184": "ople", + "2065": "ization", + "3539": "\u0120employ", + "848": "log", + "3738": "\u0120certain", + "3569": "++)\u010a", + "2878": "Group", + "1543": "man", + "3994": "Attribute", + "1826": "\u0120est", + "1802": "aking", + "1966": "On", + "564": "ok", + "1600": "ES", + "2754": "/s", + "725": "odel", + "2004": "\u0120ke", + "3435": "\u0120polit", + "2637": ".,", + "2213": "App", + "2011": "\u0120must", + "3311": "current", + "1967": ";\u010d\u010a\u010d\u010a", + "1298": "rror", + "3117": "\u0120far", + "2416": "atri", + "855": "rray", + "2403": "\u0120against", + "2478": "\u0120few", + "521": "ase", + "1753": "ING", + "3918": "password", + "3660": "(false", + "517": "{\u010a", + "160": "\u00e4", + "3313": "\u0120auto", + "3611": "\u0120aff", + "2132": "\u0120second", + "1733": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "2361": "\u0120position", + "3074": "ica", + "707": "String", + "2668": "ird", + "2105": "\u0120assert", + "1814": "ider", + "812": "ents", + "1979": "assword", + "2124": ".is", + "3002": "side", + "791": "The", + "3146": "\u0120**", + "715": "ON", + "3509": "imate", + "985": "using", + "3088": "egin", + "663": "']", + "112": "\u00b4", + "27": "<", + "2188": "\u0120\u00c2", + "3446": "\u0120story", + "3123": "echo", + "529": "\u00e2\u0122\u013b", + "352": "ay", + "835": "AT", + "2812": "arge", + "4095": "........", + "2209": "\u0120Is", + "522": ".c", + "1036": "ann", + "2049": "idget", + "1541": "\u0120don", + "219": "\u011f", + "520": "\u0120at", + "2328": "ocation", + "2169": "Pl", + "2172": "ado", "2095": "Controller", - "1527": "from", - "1139": "\u0120into", - "3366": "ixed", - "2287": "66", - "630": "form", - "3865": "\u00d1\u0125", - "2714": "Config", - "2190": "size", - "2012": "////////////////", - "3814": "\u0120night", - "1353": "\u0120loc", - "4003": "ictionary", - "2025": "head", - "3271": "aper", - "1693": "ender", - "458": "ile", - "1421": "\u0120rec", - "2793": "\u0120invest", - "847": "ark", - "3158": "\u0120area", - "1297": "\u0120bet", - "2849": "\u0120cell", - "1576": "lear", - "2377": "Style", - "331": "ch", - "2265": "ric", - "1271": "To", - "1921": "(b", - "1165": "////////", - "393": "\u0120P", - "3237": "\u0120express", - "2733": "\u0120feel", - "2622": "]);\u010a", - "105": "\u00ac", - "1356": "trib", - "2747": "orage", - "3031": "req", - "4018": "\u0120cut", - "546": "ont", - "556": "ire", - "2428": "ique", - "3006": "Column", - "2605": "\u0120prov", - "492": "ight", - "2301": "ency", - "2070": "ape", - "332": "ut", - "3279": "\u0120Ed", - "1928": "_g", - "2977": "Http", - "428": "ext", - "3234": "\u0120Do", - "3248": "\u0120", - "1943": ".Add", - "38": "G", - "3697": "\u0120members", - "2435": "\u0120They", - "339": "th", - "109": "\u00b1", - "3403": "\u0120enough", - "2155": "\u0120writ", - "192": "\u0104", - "1546": "tn", - "2564": "\u0120...", - "1672": "\u0120ins", - "1113": "ating", - "3275": "artment", - "900": "umber", - "48": "Q", - "1495": "\u0120text", - "1049": "200", - "1465": "\u0120);\u010a", - "3684": "next", - "3757": "cel", - "1181": "\u0120char", - "3652": "()\u010d\u010a", - "759": "atch", - "1324": "ptions", - "4021": "ogn", - "2344": "plit", - "1263": "und", - "954": "ays", + "2362": "\u0120old", + "2759": "\u0120account", "1079": "Res", - "516": ".s", - "3516": "lem", - "398": "ly", - "3927": "\u0120individual", - "1318": "[]", - "1246": ".G", - "985": "using", - "591": "\u0120=>", - "2112": ".N", - "746": "()\u010a", + "820": "old", + "2792": "posit", + "3344": "\u0120Med", + "540": "og", + "354": "ot", + "3111": "ajor", + "1301": "\u0120\u00d0", + "2071": "check", + "1665": "\u0120object", + "3812": "End", + "2585": "\u0120control", + "1134": "\u0120<<", + "4011": "istance", + "411": "ith", + "1726": "Element", + "2045": "icle", + "363": "ow", + "316": "om", + "2879": "section", + "940": "ual", + "1651": "\u0120att", + "240": "\u0134", + "442": "rom", + "636": "\u0120get", + "2279": "UE", + "1921": "(b", + "3090": "\u0120water", + "2084": "uffer", + "2967": "\u0120bro", + "2322": "\u0120met", + "123": "\u00bf", + "2852": "ais", + "1950": "AS", + "3628": "sql", + "2731": "\u0120better", + "3646": "most", + "3225": "ibility", + "457": "\u0120}\u010a", + "2061": "older", + "2057": "\u0120To", + "1383": "By", + "4077": "`\u010a", + "2157": "\u0109m", + "297": "\u0120o", + "3057": "\u0120appro", "2136": "ness", - "2823": "icro", - "1825": "\u0120open", - "1718": "lob", - "1937": "OM", - "1443": "\u0120Sh", - "4010": "yl", - "1595": "\u0120`", - "2221": "tra", - "2680": "\u0120access", - "2496": "irc", - "3120": "\u0120\\\u010a", - "3627": "Grid", - "663": "']", - "2815": "\u0120die", - "2311": "ords", - "226": "\u0126", - "313": "--", - "828": "\u0120data", - "1509": "enu", - "1564": "uto", - "4066": "Base", - "3436": "\u0120''", - "1856": "\u0120Pl", - "3070": "\u0120family", - "982": "yle", - "1463": "ify", - "489": "\u0120+", - "1093": "\u0120like", - "3042": "amb", - "2986": ".println", - "2785": "ling", - "2576": "\u0120url", - "1200": "\u0120op", - "4": "%", - "3000": "\u0120db", - "1602": "\u0109\u010a", - "190": "\u0102", - "1702": "_B", - "1786": "ool", - "131": "\u00c7", - "2551": "(_", - "1827": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u010a", - "3845": "ube", - "918": "_l", - "91": "|", - "2692": "ained", - "2129": "\u0109int", - "637": "rr", - "1642": "Box", - "1839": "\u0120href", - "3644": "_index", - "3985": "////////////////////////////////", - "3594": "soft", - "2721": "95", - "2370": "aving", - "807": "bject", - "2284": ">(", - "3797": "_sh", - "2068": "\u0120program", - "2579": "\u0120red", - "1791": "uc", - "1981": "\u00e2\u0122\u00a6", - "2323": "Test", - "4074": "\u0120admin", - "1305": "Tr", - "2340": "\u0120Americ", - "1237": ");", - "1511": "\u0120used", + "276": "an", + "3655": "rypt", + "2381": "init", + "3945": "'];\u010a", + "283": "ou", + "2435": "\u0120They", + "541": "ord", + "290": "ion", "658": "\u0120el", - "2810": "Util", - "1004": "[i", - "667": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "2051": "Prop", - "3059": "ze", - "3139": "\u0120grow", - "1682": "29", - "2400": "dition", - "505": "\u0120from", - "3530": "[:", + "615": "ystem", + "3201": "\u0120away", + "3019": "POST", + "3533": "_ID", + "275": "it", + "2203": "sp", + "2701": "eh", + "2121": "ght", + "1183": "\u0120Tr", + "1497": "lose", + "460": "ata", + "3597": "----------------------------------------------------------------", + "173": "\u00f1", + "2768": "\u0120following", + "2489": "\u0120match", + "2672": "\u0120Gener", + "3077": "'ve", + "2505": "TE", + "1016": "Th", + "3064": "attr", + "32": "A", + "2888": "\u0120belie", + "3651": "\u0120AND", + "2880": "max", + "3097": "AY", + "846": "cre", + "3062": "\u0120important", + "3187": "\u0120example", + "77": "n", + "1101": "\u0120also", + "1155": "ayer", + "1398": "\u0120Ex", + "3949": "tml", "7": "(", - "664": "ud", - "179": "\u00f7", - "1538": "ump", - "2584": "Instance", - "3710": "unsigned", - "212": "\u0118", - "898": "public", - "2445": "sc", - "620": "ull", - "1484": "99", - "99": "\u00a6", - "1149": "anc", - "3765": "\u0120Ab", - "3523": "plied", - "677": "ob", - "2120": "(x", - "3210": "ively", - "1623": "LO", - "115": "\u00b7", - "3600": "\u0120services", - "2967": "\u0120bro", - "3153": "ips", - "3336": "IB", - "702": "\"\u010a", - "2108": "usiness", - "3424": "\u0120property", - "1909": ",\u010d\u010a", - "1089": "\u0120ass", - "3103": "Stream", - "2899": "status", - "2565": "\u0120block", - "1536": "\u0120ob", - "1137": "ense", - "3801": "\u0120comput", - "2684": "\u0120There", - "170": "\u00ee", - "3021": "\u0120love", - "3878": "\u0120terms", - "2519": "Line", - "4011": "istance", - "772": "lect", - "1304": "\u0120make", - "4090": "uation", - "1956": "Date", - "638": "ome", - "1615": "\u0120cons", - "3978": "iet", - "624": "\u0120==", - "3759": "heet", - "670": "uct", - "1867": "\u0120cor", - "1499": "\u0120pol", - "193": "\u0105", - "694": "\u0120\u010a", + "489": "\u0120+", + "2932": ".con", + "3149": "ured", + "1180": "\u0120act", "2430": "\u0120width", - "2719": "#if", - "1245": "ious", - "164": "\u00e8", - "3707": ")\u010a\u010a\u010a", - "2029": ".\u00e2\u0122\u013f", - "895": "========", - "3696": "dr", - "966": "30", - "3008": "\u0120ann", - "3524": "ao", - "3829": "reet", - "3871": "\u0120together", - "3426": "amework", - "3629": "\u0120often", - "535": "ive", + "2339": "\u0120catch", + "2813": "77", + "2054": "box", + "95": "\u00a2", + "510": "\u0120[", + "967": "enc", + "3823": "\u0120human", + "961": "\u0120part", + "981": "pos", + "1745": "_r", + "2448": "\u00c3\u00bc", + "613": "ach", + "39": "H", + "1943": ".Add", + "391": "ap", + "165": "\u00e9", + "3594": "soft", + "493": "('", + "1318": "[]", + "299": "ro", + "2358": "({", + "1157": "ues", + "1387": "play", + "3922": "\u00ef\u00bc\u012e", + "2168": "oci", + "1741": "AC", + "3302": "bsite", + "3066": "Equal", + "3065": "ART", + "3361": "\u0120special", + "2483": "\u00c3\u0143", + "3421": "idual", + "700": "ther", + "1117": "_S", + "2445": "sc", + "3056": "\u0120occ", + "4066": "Base", + "2854": "\u0120hum", + "168": "\u00ec", + "2689": "oo", + "2604": "riter", + "68": "e", + "4072": "\u0120El", + "1853": "\u0120path", + "1973": "\u0120max", + "1662": "aj", + "2571": "\u0120throw", + "937": "SE", + "702": "\"\u010a", + "2780": "\u0120orig", + "2698": "\u0120fri", + "1200": "\u0120op", + "4060": "\u0120End", + "946": "AR", + "3619": "\u0120understand", + "1012": "include", + "3763": "click", + "3359": "aged", + "448": "lass", + "3416": "ison", + "1521": "\u0120these", + "1674": "\u0120java", + "2379": "angu", + "2255": "andom", + "1461": "\u0120him", + "3580": "\u0120See", + "344": "iv", + "3831": "\u0120strong", + "1698": "ware", + "373": "ame", + "3081": "output", + "1850": "*/", + "1367": "\u0120exp", + "2000": "for", + "1696": "\u0109\u0109\u0109\u0109\u0109\u0109", + "1115": "\u0120This", + "3837": "_at", + "571": "\u0120@", + "3418": "yc", + "115": "\u00b7", + "1003": "ash", + "1960": "itor", + "2650": "\u0120How", + "3548": "", "252": "\u0140", - "635": "ite", - "1528": "(d", - "75": "l", - "1860": "\u0120av", - "3216": "\u0120Up", - "1470": "\u0109c", - "2939": "\u0120Fr", - "2220": "opyright", - "863": "\u00e2\u0122\u013f", - "2187": "\u0120allow", - "2857": "ks", + "3186": "_info", + "3848": "VER", + "865": "\u0120x", + "514": "\u0120le", + "1272": "40", + "2748": "\u0120parent", "2134": "\u0120range", - "1347": "br", - "3567": "\u0120dev", - "1153": "\u0120pos", - "4056": "\u0120~", - "3018": "ffer", - "3050": "iversity", - "3725": "fully", - "3919": "ny", - "1282": "],\u010a", - "2273": "\u0120float", - "3971": "51", - "2712": "_G", - "1787": "plate", - "1880": "\u0120et", - "3149": "ured", - "2217": "\u0120image", - "3469": "\u0120opt", + "2429": "rame", + "3272": ".model", + "2859": "atures", + "4031": "NE", + "277": "ar", + "2352": "aster", + "2402": "ards", + "338": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", + "2026": "rg", + "2498": "unity", + "2989": "PL", + "1610": "reg", + "1655": "lease", + "1846": "AD", + "3472": "KE", + "1056": "iss", + "2454": "ched", + "2311": "ords", + "2184": "chool", + "418": "pt", + "192": "\u0104", + "3653": "ilar", + "2029": ".\u00e2\u0122\u013f", + "1277": "http", + "1938": "\u0120day", + "3105": "ama", + "187": "\u00ff", + "397": ">\u010a", + "3042": "amb", + "687": "\u0120cont", + "3965": "150", + "1067": "round", + "847": "ark", + "1374": "print", + "1901": "\u0120Z", + "2236": "\u0109\u0109\u0109\u0109\u0109\u0109\u0109\u0109", + "321": "il", + "597": "\u0120k", + "3920": "\u0120esc", + "1525": "ason", + "2677": "Click", + "1011": "ength", + "300": "as", + "202": "\u010e", + "3632": "src", + "672": "ork", + "1076": "duct", + "914": "25", + "577": "\u0120u", + "958": "\u0120inter", + "2858": ".text", + "2153": "\\\"", + "2917": "\u0120cur", + "3636": "\u0120password", + "689": "ia", + "1425": "ride", + "4059": "\u0120mind", + "2139": "\u0120nil", "1159": "_T", - "2569": "open", - "2365": "bug", - "2839": "03", - "274": "\u0120s", - "306": "ent", - "2872": "\u0120row", - "2433": "ittle", - "3924": ".H", - "2765": "\u0109d", - "4075": "mark", - "3392": "\u0120amount", - "1286": "ole", - "1675": "\u0120sim", - "1887": "\u0120system", - "2602": "_pro", - "3548": "=", + "2037": "message", "3682": "\u0120major", - "1440": "\u0120know", - "1644": "33", - "972": "18", - "1080": "\u0120co", - "3140": ".sw", - "1541": "\u0120don", - "692": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", - "1652": "\u0120cal", - "2411": "ium", - "3529": "/d", - "998": "to", - "837": "\u0120true", - "595": "))", - "3841": "je", - "386": "\u0120M", - "771": "ink", - "1169": "let", - "3839": "\u0120track", - "2961": "\u0120arr", - "3135": "\u0120results", - "1308": "RO", - "3203": " Date: Mon, 1 Jul 2024 11:08:12 -0400 Subject: [PATCH 06/10] created the split of string and list of codes --- codec-to-token.py | 4 +- research/two_byte_encoding.ipynb | 1466 +++++++++++------------------- 2 files changed, 536 insertions(+), 934 deletions(-) diff --git a/codec-to-token.py b/codec-to-token.py index 749a522..9f2aef0 100644 --- a/codec-to-token.py +++ b/codec-to-token.py @@ -50,8 +50,10 @@ def save(self, file_path): if __name__ == "__main__": test_dir = [f"dataset/default/partial-train/000{i}.parquet" for i in range(10)] vocab = Vocabulary() - + # buidling the vocabulary vocab.build_vocabulary(test_dir) + + # saving it to file vocab.save("snac-to-llama.json") diff --git a/research/two_byte_encoding.ipynb b/research/two_byte_encoding.ipynb index a284d41..766fc13 100644 --- a/research/two_byte_encoding.ipynb +++ b/research/two_byte_encoding.ipynb @@ -2,9 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ngkuissi/miniforge3/envs/laion/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "import os, json\n", "from datasets import load_dataset\n" @@ -12,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -57,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +75,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -77,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -86,7 +95,7 @@ "list" ] }, - "execution_count": 9, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -106,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -115,7 +124,7 @@ "str" ] }, - "execution_count": 14, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -126,66 +135,162 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "\n", + "def add_special_character(corpus, special_char='▁'):\n", + " modified_corpus = []\n", + " for sentence in corpus:\n", + " modified_sentence = ''\n", + " words = []\n", + " previous_char_is_space = False\n", + " \n", + " for char in sentence:\n", + " if char == ' ':\n", + " previous_char_is_space = True\n", + " words.append(modified_sentence)\n", + " modified_sentence = ''\n", + " elif previous_char_is_space:\n", + " modified_sentence += special_char + char\n", + " previous_char_is_space = False\n", + " else:\n", + " modified_sentence += char\n", + " \n", + " modified_corpus.extend(words)\n", + " \n", + " return modified_corpus\n", + "\n", + "# Example usage\n", + "corpus = [t for t in txt]\n", + "modified_corpus = add_special_character(corpus)\n", + "word_freqs = defaultdict(int)\n", + "for word in modified_corpus:\n", + " word_freqs[word] += 1\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['896',\n", + " '▁2029',\n", + " '▁935',\n", + " '▁679',\n", + " '▁1115',\n", + " '▁3601',\n", + " '▁3000',\n", + " '▁222',\n", + " '▁3446',\n", + " '▁2218']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "modified_corpus[0:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4435" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(word_freqs)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "▁this ▁is ▁a ▁test\n", - "▁another ▁example ▁sentence\n" + "('896', 5)\n", + "('▁2029', 4261)\n", + "('▁935', 6058)\n", + "('▁679', 8699)\n", + "('▁1115', 1982)\n", + "('▁3601', 639)\n", + "('▁3000', 635)\n", + "('▁222', 27691)\n" ] } ], "source": [ - "def add_special_character(corpus, special_char='▁'):\n", - " modified_corpus = []\n", - " for sentence in corpus:\n", - " # Split the sentence into words\n", - " words = sentence.split()\n", - " # Add the special character to the beginning of each word\n", - " modified_words = [special_char + word for word in words]\n", - " # Join the modified words back into a sentence\n", - " modified_sentence = ' '.join(modified_words)\n", - " # Append the modified sentence to the new corpus\n", - " modified_corpus.append(modified_sentence)\n", - " return modified_corpus\n", - "\n", - "# Example usage\n", - "corpus = [\"this is a test\", \"another example sentence\"]\n", - "modified_corpus = add_special_character(corpus)\n", - "for sentence in modified_corpus:\n", - " print(sentence)\n" + "for i, item in enumerate(word_freqs.items()):\n", + " print(item)\n", + " if i == 7:\n", + " break" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "▁2029\n" + "4435\n", + "['▁990', '▁991', '▁992', '▁993', '▁994', '▁995', '▁996', '▁997', '▁998']\n" ] } ], "source": [ - "from collections import defaultdict\n", + "alphabet = set()\n", "\n", - "def add_special_character(corpus, special_char='▁'):\n", - " modified_corpus = []\n", - " for sentence in corpus:\n", + "for word in word_freqs.keys():\n", + " alphabet.add(word)\n", + "alphabet = sorted(alphabet)\n", + "\n", + "print(len(alphabet))\n", + "print(alphabet[-10:-1])" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "def preprocess_corpus(text, special_char='_'):\n", + " output = []\n", + " for sentence in text:\n", " modified_sentence = ''\n", " words = []\n", + " full_sent = \"\"\n", " previous_char_is_space = False\n", " \n", " for char in sentence:\n", " if char == ' ':\n", " previous_char_is_space = True\n", " words.append(modified_sentence)\n", + " full_sent += modified_sentence\n", " modified_sentence = ''\n", " elif previous_char_is_space:\n", " modified_sentence += special_char + char\n", @@ -193,922 +298,417 @@ " else:\n", " modified_sentence += char\n", " \n", - " modified_corpus.extend(words)\n", + " output.append((full_sent, words))\n", " \n", - " return modified_corpus\n", - "\n", - "# Example usage\n", - "corpus = [txt[0], txt[1], txt[2]]\n", - "modified_corpus = add_special_character(corpus)\n", - "word_freqs = defaultdict(int)\n", - "for word in modified_corpus:\n", - " word_freqs[word] += 1\n" + " return output" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "896 2029 935 679 1115 3601 3000 222 3446 2218 3072 550 3652 665 2596 2809 3649 251 2610 2536 47 2852 2940 3353 3400 3336 325 2647 4076 3653 3253 58 3664 1424 1388 222 278 897 447 2355 2453 2531 2712 828 2895 2398 2908 901 2536 222 3686 2620 3254 3962 0 1448 222 863 3593 124 124 1048 1593 222 4086 2647 3236 1767 2800 697 514 3648 2337 1338 1114 340 3514 4076 2658 1954 3867 2300 251 317 7 1091 1768 1440 3167 672 1253 188 3544 2934 1368 479 3951 3387 514 2438 1262 3166 462 3530 333 2596 3808 2796 1920 794 263 2626 2596 1949 57 3990 3785 146 404 3731 479 3840 3840 3664 940 2550 4076 544 3465 3232 269 79 2159 3879 1734 3900 755 1756 818 800 1249 171 319 727 171 3698 3683 2596 3969 2431 1838 3969 126 2673 2596 4012 1010 2151 3437 417 2386 2712 3705 1838 3428 1168 1838 1527 3885 1952 2443 3997 3562 1667 3651 3981 2426 1494 1532 2426 3602 1855\n" + ] + } + ], + "source": [ + "print(str(txt[0]))\n" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "defaultdict(int,\n", - " {'896': 1,\n", - " '▁2029': 4,\n", - " '▁935': 2,\n", - " '▁679': 4,\n", - " '▁1115': 1,\n", - " '▁3601': 1,\n", - " '▁3000': 1,\n", - " '▁222': 21,\n", - " '▁3446': 2,\n", - " '▁2218': 1,\n", - " '▁3072': 1,\n", - " '▁550': 3,\n", - " '▁3652': 1,\n", - " '▁665': 8,\n", - " '▁2596': 5,\n", - " '▁2809': 2,\n", - " '▁3649': 1,\n", - " '▁251': 6,\n", - " '▁2610': 3,\n", - " '▁2536': 2,\n", - " '▁47': 1,\n", - " '▁2852': 2,\n", - " '▁2940': 2,\n", - " '▁3353': 3,\n", - " '▁3400': 1,\n", - " '▁3336': 1,\n", - " '▁325': 1,\n", - " '▁2647': 4,\n", - " '▁4076': 8,\n", - " '▁3653': 1,\n", - " '▁3253': 2,\n", - " '▁58': 1,\n", - " '▁3664': 2,\n", - " '▁1424': 3,\n", - " '▁1388': 1,\n", - " '▁278': 3,\n", - " '▁897': 1,\n", - " '▁447': 4,\n", - " '▁2355': 1,\n", - " '▁2453': 4,\n", - " '▁2531': 4,\n", - " '▁2712': 20,\n", - " '▁828': 1,\n", - " '▁2895': 2,\n", - " '▁2398': 4,\n", - " '▁2908': 1,\n", - " '▁901': 4,\n", - " '▁3686': 2,\n", - " '▁2620': 1,\n", - " '▁3254': 2,\n", - " '▁3962': 1,\n", - " '▁0': 1,\n", - " '▁1448': 1,\n", - " '▁863': 1,\n", - " '▁3593': 4,\n", - " '▁124': 3,\n", - " '▁1048': 2,\n", - " '▁1593': 3,\n", - " '▁4086': 3,\n", - " '▁3236': 2,\n", - " '▁1767': 1,\n", - " '▁2800': 1,\n", - " '▁697': 2,\n", - " '▁514': 5,\n", - " '▁3648': 1,\n", - " '▁2337': 6,\n", - " '▁1338': 1,\n", - " '▁1114': 4,\n", - " '▁340': 1,\n", - " '▁3514': 6,\n", - " '▁2658': 1,\n", - " '▁1954': 1,\n", - " '▁3867': 3,\n", - " '▁2300': 1,\n", - " '▁317': 2,\n", - " '▁7': 15,\n", - " '▁1091': 1,\n", - " '▁1768': 3,\n", - " '▁1440': 1,\n", - " '▁3167': 1,\n", - " '▁672': 1,\n", - " '▁1253': 1,\n", - " '▁188': 8,\n", - " '▁3544': 1,\n", - " '▁2934': 1,\n", - " '▁1368': 1,\n", - " '▁479': 3,\n", - " '▁3951': 1,\n", - " '▁3387': 2,\n", - " '▁2438': 1,\n", - " '▁1262': 1,\n", - " '▁3166': 2,\n", - " '▁462': 2,\n", - " '▁3530': 1,\n", - " '▁333': 1,\n", - " '▁3808': 4,\n", - " '▁2796': 1,\n", - " '▁1920': 2,\n", - " '▁794': 1,\n", - " '▁263': 4,\n", - " '▁2626': 2,\n", - " '▁1949': 1,\n", - " '▁57': 1,\n", - " '▁3990': 1,\n", - " '▁3785': 1,\n", - " '▁146': 2,\n", - " '▁404': 1,\n", - " '▁3731': 15,\n", - " '▁3840': 2,\n", - " '▁940': 2,\n", - " '▁2550': 1,\n", - " '▁544': 1,\n", - " '▁3465': 1,\n", - " '▁3232': 4,\n", - " '▁269': 1,\n", - " '▁79': 2,\n", - " '▁2159': 1,\n", - " '▁3879': 2,\n", - " '▁1734': 1,\n", - " '▁3900': 1,\n", - " '▁755': 3,\n", - " '▁1756': 1,\n", - " '▁818': 1,\n", - " '▁800': 1,\n", - " '▁1249': 1,\n", - " '▁171': 2,\n", - " '▁319': 1,\n", - " '▁727': 2,\n", - " '▁3698': 2,\n", - " '▁3683': 1,\n", - " '▁3969': 4,\n", - " '▁2431': 3,\n", - " '▁1838': 6,\n", - " '▁126': 1,\n", - " '▁2673': 8,\n", - " '▁4012': 1,\n", - " '▁1010': 3,\n", - " '▁2151': 3,\n", - " '▁3437': 5,\n", - " '▁417': 1,\n", - " '▁2386': 5,\n", - " '▁3705': 1,\n", - " '▁3428': 1,\n", - " '▁1168': 1,\n", - " '▁1527': 2,\n", - " '▁3885': 1,\n", - " '▁1952': 2,\n", - " '▁2443': 1,\n", - " '▁3997': 1,\n", - " '▁3562': 1,\n", - " '▁1667': 2,\n", - " '▁3651': 1,\n", - " '▁3981': 2,\n", - " '▁2426': 3,\n", - " '▁1494': 2,\n", - " '▁1532': 2,\n", - " '▁3602': 2,\n", - " '2712': 1,\n", - " '▁1604': 5,\n", - " '▁3458': 3,\n", - " '▁4031': 1,\n", - " '▁1669': 11,\n", - " '▁2008': 6,\n", - " '▁857': 1,\n", - " '▁3123': 2,\n", - " '▁3321': 2,\n", - " '▁1887': 1,\n", - " '▁846': 1,\n", - " '▁763': 2,\n", - " '▁612': 1,\n", - " '▁3846': 1,\n", - " '▁1060': 3,\n", - " '▁312': 1,\n", - " '▁859': 3,\n", - " '▁3638': 1,\n", - " '▁2238': 5,\n", - " '▁590': 1,\n", - " '▁2458': 2,\n", - " '▁3847': 1,\n", - " '▁304': 3,\n", - " '▁1888': 1,\n", - " '▁1986': 1,\n", - " '▁3412': 3,\n", - " '▁1885': 1,\n", - " '▁375': 5,\n", - " '▁2176': 1,\n", - " '▁887': 1,\n", - " '▁3051': 4,\n", - " '▁3374': 1,\n", - " '▁3485': 1,\n", - " '▁973': 2,\n", - " '▁622': 1,\n", - " '▁2557': 1,\n", - " '▁3750': 3,\n", - " '▁240': 3,\n", - " '▁2452': 2,\n", - " '▁2913': 4,\n", - " '▁2525': 3,\n", - " '▁691': 2,\n", - " '▁1363': 1,\n", - " '▁796': 1,\n", - " '▁1232': 1,\n", - " '▁1332': 1,\n", - " '▁3282': 1,\n", - " '▁966': 4,\n", - " '▁3883': 1,\n", - " '▁1774': 3,\n", - " '▁2559': 2,\n", - " '▁748': 1,\n", - " '▁2975': 1,\n", - " '▁2608': 1,\n", - " '▁3345': 4,\n", - " '▁868': 3,\n", - " '▁731': 2,\n", - " '▁2872': 1,\n", - " '▁1336': 4,\n", - " '▁2488': 1,\n", - " '▁3706': 2,\n", - " '▁2276': 2,\n", - " '▁3739': 1,\n", - " '▁434': 1,\n", - " '▁2203': 2,\n", - " '▁2019': 3,\n", - " '▁873': 1,\n", - " '▁1273': 1,\n", - " '▁3627': 1,\n", - " '▁2912': 1,\n", - " '▁4046': 1,\n", - " '▁120': 1,\n", - " '▁2888': 1,\n", - " '▁1707': 2,\n", - " '▁1153': 2,\n", - " '▁2927': 1,\n", - " '▁1188': 3,\n", - " '▁1400': 2,\n", - " '▁397': 2,\n", - " '▁1140': 1,\n", - " '▁792': 1,\n", - " '▁20': 2,\n", - " '▁3452': 4,\n", - " '▁1247': 4,\n", - " '▁3297': 3,\n", - " '▁326': 1,\n", - " '▁2813': 4,\n", - " '▁2365': 4,\n", - " '▁3368': 2,\n", - " '▁1129': 2,\n", - " '▁3260': 1,\n", - " '▁186': 3,\n", - " '▁1814': 2,\n", - " '▁1445': 2,\n", - " '▁438': 1,\n", - " '▁3323': 2,\n", - " '▁3744': 2,\n", - " '▁2392': 2,\n", - " '▁448': 2,\n", - " '▁1953': 5,\n", - " '▁2204': 7,\n", - " '▁2430': 1,\n", - " '▁1094': 3,\n", - " '▁1702': 1,\n", - " '▁2688': 3,\n", - " '▁2380': 1,\n", - " '▁3714': 1,\n", - " '▁3391': 1,\n", - " '▁2738': 5,\n", - " '▁2312': 3,\n", - " '▁3866': 3,\n", - " '▁4069': 1,\n", - " '▁752': 3,\n", - " '▁1861': 2,\n", - " '▁73': 3,\n", - " '▁3403': 1,\n", - " '▁3825': 2,\n", - " '▁685': 5,\n", - " '▁332': 1,\n", - " '▁305': 1,\n", - " '▁4049': 1,\n", - " '▁1004': 1,\n", - " '▁3468': 2,\n", - " '▁2885': 2,\n", - " '▁740': 1,\n", - " '▁1001': 1,\n", - " '▁3367': 1,\n", - " '▁48': 2,\n", - " '▁2617': 2,\n", - " '▁793': 2,\n", - " '▁927': 1,\n", - " '▁2801': 2,\n", - " '▁3229': 3,\n", - " '▁3896': 1,\n", - " '▁886': 1,\n", - " '▁773': 1,\n", - " '▁3396': 4,\n", - " '▁1160': 1,\n", - " '▁1968': 1,\n", - " '▁3272': 1,\n", - " '▁381': 1,\n", - " '▁1307': 3,\n", - " '▁2240': 1,\n", - " '▁215': 1,\n", - " '▁726': 1,\n", - " '▁3195': 2,\n", - " '▁3026': 1,\n", - " '▁903': 1,\n", - " '▁2317': 2,\n", - " '▁1484': 2,\n", - " '▁2182': 4,\n", - " '▁3688': 1,\n", - " '▁234': 1,\n", - " '▁3009': 1,\n", - " '▁370': 1,\n", - " '▁3871': 1,\n", - " '▁3551': 2,\n", - " '▁499': 7,\n", - " '▁1742': 1,\n", - " '▁582': 1,\n", - " '▁862': 1,\n", - " '▁930': 2,\n", - " '▁1097': 1,\n", - " '▁688': 2,\n", - " '▁2450': 2,\n", - " '▁1658': 1,\n", - " '▁97': 1,\n", - " '▁2502': 2,\n", - " '▁308': 2,\n", - " '▁746': 2,\n", - " '▁488': 1,\n", - " '▁608': 2,\n", - " '▁2948': 1,\n", - " '▁3919': 2,\n", - " '▁3219': 1,\n", - " '▁1941': 1,\n", - " '▁845': 1,\n", - " '▁1398': 1,\n", - " '▁2440': 1,\n", - " '▁3765': 2,\n", - " '▁1644': 2,\n", - " '▁2206': 2,\n", - " '▁3795': 1,\n", - " '▁2076': 2,\n", - " '▁3089': 1,\n", - " '▁2331': 2,\n", - " '▁2807': 1,\n", - " '▁1660': 2,\n", - " '▁95': 1,\n", - " '▁3671': 4,\n", - " '▁2918': 4,\n", - " '▁3008': 2,\n", - " '▁152': 1,\n", - " '▁117': 2,\n", - " '▁1924': 2,\n", - " '▁365': 3,\n", - " '▁3893': 3,\n", - " '▁3069': 3,\n", - " '▁1725': 1,\n", - " '▁3258': 1,\n", - " '▁1794': 1,\n", - " '▁2718': 2,\n", - " '▁829': 4,\n", - " '▁575': 1,\n", - " '▁2326': 2,\n", - " '▁294': 1,\n", - " '▁4054': 1,\n", - " '▁1349': 1,\n", - " '▁3177': 2,\n", - " '▁3872': 1,\n", - " '▁3281': 1,\n", - " '▁588': 1,\n", - " '▁992': 1,\n", - " '▁3687': 2,\n", - " '▁3087': 1,\n", - " '▁3786': 1,\n", - " '▁450': 1,\n", - " '▁1788': 14,\n", - " '▁831': 1,\n", - " '▁2984': 1,\n", - " '▁3180': 2,\n", - " '▁206': 1,\n", - " '▁100': 1,\n", - " '▁2506': 1,\n", - " '▁3230': 1,\n", - " '▁399': 2,\n", - " '▁204': 4,\n", - " '▁1806': 5,\n", - " '▁3603': 1,\n", - " '▁2779': 2,\n", - " '▁289': 5,\n", - " '▁572': 1,\n", - " '▁1032': 2,\n", - " '▁1932': 5,\n", - " '▁990': 2,\n", - " '▁3702': 4,\n", - " '▁1046': 3,\n", - " '▁3161': 2,\n", - " '▁2085': 1,\n", - " '▁3350': 1,\n", - " '▁702': 1,\n", - " '▁489': 1,\n", - " '▁2434': 3,\n", - " '▁3693': 1,\n", - " '▁2788': 4,\n", - " '▁1026': 1,\n", - " '▁3251': 1,\n", - " '▁1701': 1,\n", - " '▁1477': 1,\n", - " '▁318': 1,\n", - " '▁1930': 2,\n", - " '▁1325': 1,\n", - " '▁1595': 1,\n", - " '▁237': 2,\n", - " '▁1054': 1,\n", - " '▁3820': 2,\n", - " '▁931': 1,\n", - " '▁1863': 2,\n", - " '▁3218': 5,\n", - " '▁2094': 3,\n", - " '▁3937': 1,\n", - " '▁1229': 1,\n", - " '▁1408': 2,\n", - " '▁153': 1,\n", - " '▁1990': 1,\n", - " '▁1435': 1,\n", - " '▁427': 4,\n", - " '▁961': 1,\n", - " '▁3030': 4,\n", - " '▁1516': 2,\n", - " '▁3775': 1,\n", - " '▁3013': 1,\n", - " '▁267': 1,\n", - " '▁477': 1,\n", - " '▁1134': 4,\n", - " '▁2083': 2,\n", - " '▁1217': 1,\n", - " '▁243': 1,\n", - " '▁2070': 2,\n", - " '▁695': 1,\n", - " '▁3566': 2,\n", - " '▁3075': 1,\n", - " '▁2167': 1,\n", - " '▁616': 1,\n", - " '▁3574': 1,\n", - " '▁3375': 1,\n", - " '▁1655': 3,\n", - " '▁457': 3,\n", - " '▁1131': 1,\n", - " '▁3316': 1,\n", - " '▁3498': 2,\n", - " '▁1261': 1,\n", - " '▁1369': 1,\n", - " '▁2516': 1,\n", - " '▁435': 2,\n", - " '▁890': 1,\n", - " '▁951': 1,\n", - " '▁1443': 3,\n", - " '▁2003': 2,\n", - " '▁1648': 2,\n", - " '▁1299': 1,\n", - " '▁1395': 1,\n", - " '▁3324': 3,\n", - " '▁2593': 1,\n", - " '▁719': 1,\n", - " '▁2939': 4,\n", - " '▁3592': 1,\n", - " '▁3613': 1,\n", - " '▁3487': 2,\n", - " '▁2786': 1,\n", - " '▁3318': 1,\n", - " '▁231': 1,\n", - " '▁1346': 1,\n", - " '▁3828': 1,\n", - " '▁1482': 1,\n", - " '▁175': 4,\n", - " '▁1017': 2,\n", - " '▁99': 2,\n", - " '▁2469': 1,\n", - " '▁584': 2,\n", - " '▁2001': 2,\n", - " '▁2750': 5,\n", - " '▁2573': 1,\n", - " '▁784': 1,\n", - " '▁1524': 3,\n", - " '▁1580': 1,\n", - " '▁1793': 1,\n", - " '▁2874': 1,\n", - " '▁1574': 2,\n", - " '▁2160': 4,\n", - " '▁1316': 2,\n", - " '▁2655': 1,\n", - " '▁1675': 1,\n", - " '▁1052': 1,\n", - " '▁2147': 7,\n", - " '▁1351': 1,\n", - " '▁527': 2,\n", - " '▁26': 1,\n", - " '▁3286': 1,\n", - " '▁2228': 1,\n", - " '▁50': 5,\n", - " '▁1112': 1,\n", - " '▁2342': 2,\n", - " '▁761': 1,\n", - " '▁1093': 1,\n", - " '▁670': 1,\n", - " '▁1312': 1,\n", - " '▁3044': 1,\n", - " '▁3199': 1,\n", - " '▁295': 2,\n", - " '▁2259': 3,\n", - " '▁3384': 5,\n", - " '▁354': 3,\n", - " '▁2943': 2,\n", - " '▁2952': 1,\n", - " '▁3194': 1,\n", - " '▁1231': 1,\n", - " '▁3288': 1,\n", - " '▁3672': 1,\n", - " '▁1539': 2,\n", - " '▁1161': 3,\n", - " '▁2675': 1,\n", - " '▁2876': 1,\n", - " '▁3979': 1,\n", - " '▁2444': 2,\n", - " '▁2013': 2,\n", - " '▁2224': 3,\n", - " '▁926': 3,\n", - " '▁580': 1,\n", - " '▁1430': 1,\n", - " '▁1907': 3,\n", - " '▁1059': 2,\n", - " '▁1544': 1,\n", - " '▁2080': 1,\n", - " '▁114': 1,\n", - " '▁1130': 2,\n", - " '▁2066': 1,\n", - " '▁1848': 2,\n", - " '▁1726': 1,\n", - " '▁1567': 1,\n", - " '▁833': 3,\n", - " '▁3724': 3,\n", - " '▁3910': 2,\n", - " '▁568': 1,\n", - " '▁1423': 2,\n", - " '▁1305': 1,\n", - " '▁1631': 1,\n", - " '▁536': 2,\n", - " '▁2174': 1,\n", - " '▁979': 1,\n", - " '▁1688': 1,\n", - " '▁484': 1,\n", - " '▁3708': 1,\n", - " '▁3875': 1,\n", - " '▁1148': 1,\n", - " '▁2700': 1,\n", - " '▁3761': 1,\n", - " '▁1420': 1,\n", - " '▁1761': 1,\n", - " '▁2115': 1,\n", - " '▁2419': 1,\n", - " '▁1283': 2,\n", - " '▁2310': 1,\n", - " '▁756': 1,\n", - " '▁1164': 1,\n", - " '▁92': 1,\n", - " '▁2328': 1,\n", - " '▁1877': 1,\n", - " '▁85': 1,\n", - " '▁2662': 2,\n", - " '▁4093': 1,\n", - " '▁3415': 1,\n", - " '▁2408': 1,\n", - " '▁3269': 1,\n", - " '▁1462': 3,\n", - " '▁2435': 1,\n", - " '▁3205': 3,\n", - " '▁230': 1,\n", - " '▁1594': 1,\n", - " '▁3881': 1,\n", - " '▁1934': 1,\n", - " '▁2676': 1,\n", - " '▁2891': 1,\n", - " '▁782': 2,\n", - " '▁4071': 2,\n", - " '▁2338': 2,\n", - " '▁623': 1,\n", - " '▁1733': 2,\n", - " '▁757': 1,\n", - " '▁3467': 1,\n", - " '▁3352': 3,\n", - " '▁2467': 1,\n", - " '▁1912': 1,\n", - " '▁775': 2,\n", - " '▁2254': 1,\n", - " '▁216': 1,\n", - " '▁2736': 1,\n", - " '▁3106': 3,\n", - " '▁4036': 1,\n", - " '▁4080': 1,\n", - " '▁2613': 1,\n", - " '▁2870': 1,\n", - " '▁1970': 1,\n", - " '▁3183': 1,\n", - " '▁2782': 5,\n", - " '▁1851': 1,\n", - " '▁599': 1,\n", - " '▁3028': 1,\n", - " '▁105': 3,\n", - " '▁764': 1,\n", - " '▁3450': 1,\n", - " '▁1905': 1,\n", - " '▁3806': 1,\n", - " '▁654': 1,\n", - " '▁2831': 1,\n", - " '▁1343': 1,\n", - " '▁1813': 1,\n", - " '▁2415': 1,\n", - " '▁2558': 2,\n", - " '▁657': 1,\n", - " '▁3507': 2,\n", - " '▁1581': 1,\n", - " '▁366': 2,\n", - " '▁3005': 1,\n", - " '▁2124': 1,\n", - " '▁1446': 1,\n", - " '▁2817': 1,\n", - " '▁802': 1,\n", - " '▁2462': 2,\n", - " '▁2448': 1,\n", - " '▁2068': 1,\n", - " '▁928': 1,\n", - " '▁3547': 1,\n", - " '▁2992': 1,\n", - " '▁3463': 1,\n", - " '▁2100': 1,\n", - " '▁2909': 2,\n", - " '▁2689': 1,\n", - " '▁933': 2,\n", - " '▁2473': 1,\n", - " '▁3856': 1,\n", - " '▁3662': 1,\n", - " '▁2900': 1,\n", - " '▁208': 1,\n", - " '▁1106': 1,\n", - " '▁2513': 1,\n", - " '▁3849': 2,\n", - " '▁981': 1,\n", - " '▁3181': 1,\n", - " '▁1818': 3,\n", - " '▁2470': 1,\n", - " '▁680': 3,\n", - " '▁77': 1,\n", - " '▁2946': 1,\n", - " '▁3149': 1,\n", - " '▁529': 1,\n", - " '▁162': 1,\n", - " '▁1055': 1,\n", - " '▁2867': 1,\n", - " '▁1904': 2,\n", - " '▁2511': 1,\n", - " '▁2964': 1,\n", - " '▁3619': 1,\n", - " '▁1310': 1,\n", - " '▁3570': 1,\n", - " '▁1751': 2,\n", - " '▁410': 3,\n", - " '▁660': 1,\n", - " '▁2320': 2,\n", - " '▁344': 1,\n", - " '▁3740': 1,\n", - " '▁3704': 1,\n", - " '▁1519': 1,\n", - " '▁945': 1,\n", - " '▁87': 1,\n", - " '▁429': 1,\n", - " '▁247': 1,\n", - " '▁196': 3,\n", - " '▁3998': 1,\n", - " '▁449': 2,\n", - " '▁721': 1,\n", - " '▁3171': 1,\n", - " '▁1578': 1,\n", - " '▁455': 1,\n", - " '▁3855': 1,\n", - " '▁1678': 1,\n", - " '▁3173': 1,\n", - " '▁3112': 1,\n", - " '▁177': 1,\n", - " '▁4033': 1,\n", - " '▁637': 2,\n", - " '▁3461': 1,\n", - " '▁1041': 1,\n", - " '▁2089': 1,\n", - " '▁2968': 1,\n", - " '▁991': 1,\n", - " '▁3901': 4,\n", - " '▁811': 1,\n", - " '▁1088': 1,\n", - " '▁1875': 2,\n", - " '▁3212': 1,\n", - " '▁1589': 1,\n", - " '▁497': 1,\n", - " '▁4059': 1,\n", - " '▁1269': 1,\n", - " '▁946': 1,\n", - " '▁595': 1,\n", - " '▁684': 1,\n", - " '▁2253': 1,\n", - " '▁31': 1,\n", - " '▁149': 1,\n", - " '▁3877': 1,\n", - " '▁351': 1,\n", - " '▁2991': 1,\n", - " '▁3102': 2,\n", - " '▁4087': 1,\n", - " '▁2221': 2,\n", - " '▁3227': 1,\n", - " '▁1411': 1,\n", - " '▁2158': 1,\n", - " '▁1833': 1,\n", - " '▁3154': 1,\n", - " '▁60': 1,\n", - " '▁1764': 1,\n", - " '▁2073': 3,\n", - " '▁1565': 1,\n", - " '▁902': 1,\n", - " '▁1143': 1,\n", - " '▁716': 1,\n", - " '▁3936': 1,\n", - " '▁646': 1,\n", - " '▁3799': 1,\n", - " '▁1238': 1,\n", - " '▁2616': 1,\n", - " '▁4026': 1,\n", - " '▁1617': 1,\n", - " '▁2491': 1,\n", - " '▁1659': 1,\n", - " '▁832': 1,\n", - " '▁1425': 1,\n", - " '▁586': 2,\n", - " '▁1042': 1,\n", - " '▁3187': 1,\n", - " '▁39': 1,\n", - " '▁1444': 1,\n", - " '▁2480': 1,\n", - " '▁956': 1,\n", - " '▁1485': 1,\n", - " '▁2410': 1,\n", - " '▁3001': 1,\n", - " '▁3907': 1,\n", - " '▁2634': 1,\n", - " '▁1757': 3,\n", - " '▁1966': 1,\n", - " '▁3062': 3,\n", - " '▁1747': 2,\n", - " '▁2166': 1,\n", - " '▁2097': 2,\n", - " '▁2005': 2,\n", - " '▁219': 1,\n", - " '▁3560': 1,\n", - " '▁1159': 1,\n", - " '▁645': 1,\n", - " '▁3246': 1,\n", - " '▁1246': 1,\n", - " '▁2290': 1,\n", - " '▁3497': 1,\n", - " '▁15': 1,\n", - " '▁12': 1,\n", - " '▁1390': 1,\n", - " '▁4070': 1,\n", - " '▁2485': 1,\n", - " '▁1599': 1,\n", - " '▁3477': 1,\n", - " '▁169': 1,\n", - " '▁777': 1,\n", - " '▁1489': 1,\n", - " '▁2735': 1,\n", - " '▁1882': 1,\n", - " '3446': 1,\n", - " '▁2542': 1,\n", - " '▁1488': 1,\n", - " '▁1810': 1,\n", - " '▁3480': 1,\n", - " '▁3220': 1,\n", - " '▁98': 1,\n", - " '▁1749': 1,\n", - " '▁1893': 1,\n", - " '▁781': 1,\n", - " '▁1993': 1,\n", - " '▁1991': 1,\n", - " '▁3659': 1,\n", - " '▁3047': 1,\n", - " '▁1289': 2,\n", - " '▁2945': 1,\n", - " '▁2280': 1,\n", - " '▁2524': 1,\n", - " '▁2917': 1,\n", - " '▁4042': 1,\n", - " '▁280': 1,\n", - " '▁1375': 1,\n", - " '▁134': 1,\n", - " '▁400': 1,\n", - " '▁1481': 1,\n", - " '▁463': 1,\n", - " '▁3442': 1,\n", - " '▁2357': 1,\n", - " '▁2947': 1,\n", - " '▁258': 1,\n", - " '▁2123': 1,\n", - " '▁571': 1,\n", - " '▁3762': 1,\n", - " '▁3978': 1,\n", - " '▁1670': 1,\n", - " '▁651': 1,\n", - " '▁2694': 2,\n", - " '▁894': 2,\n", - " '▁1061': 1,\n", - " '▁3020': 1,\n", - " '▁218': 1,\n", - " '▁921': 1,\n", - " '▁2265': 1,\n", - " '▁1560': 1,\n", - " '▁1573': 1,\n", - " '▁610': 1,\n", - " '▁2862': 1,\n", - " '▁510': 1,\n", - " '▁1739': 1,\n", - " '▁950': 1,\n", - " '▁4009': 1,\n", - " '▁2332': 1,\n", - " '▁442': 1,\n", - " '▁2887': 1,\n", - " '▁853': 1,\n", - " '▁1865': 1,\n", - " '▁2002': 1,\n", - " '▁2356': 2,\n", - " '▁25': 1,\n", - " '▁1507': 1,\n", - " '▁3362': 1,\n", - " '▁2507': 1,\n", - " '▁3983': 2,\n", - " '▁3738': 1,\n", - " '▁1284': 1,\n", - " '▁2249': 1,\n", - " '▁472': 2,\n", - " '▁3717': 1,\n", - " '▁1056': 1,\n", - " '▁3390': 2,\n", - " '▁1554': 1,\n", - " '▁3040': 1,\n", - " '▁168': 1,\n", - " '▁4040': 1,\n", - " '▁3342': 1,\n", - " '▁90': 2,\n", - " '▁2322': 1,\n", - " '▁3165': 1,\n", - " '▁606': 1,\n", - " '▁997': 1,\n", - " '▁2520': 1,\n", - " '▁3356': 2,\n", - " '▁53': 1,\n", - " '▁62': 2,\n", - " '▁1421': 1,\n", - " '▁1650': 1,\n", - " '▁2244': 1,\n", - " '▁1095': 1,\n", - " '▁3684': 1,\n", - " '▁3410': 1,\n", - " '▁2971': 1,\n", - " '▁210': 1,\n", - " '▁700': 1,\n", - " '▁955': 1,\n", - " '▁1769': 1,\n", - " '▁1919': 1,\n", - " '▁2169': 1,\n", - " '▁977': 1,\n", - " '▁2053': 1,\n", - " '▁1234': 1,\n", - " '▁1705': 1,\n", - " '▁1909': 1,\n", - " '▁2133': 1,\n", - " '▁3933': 1,\n", - " '▁2503': 1,\n", - " '▁441': 1,\n", - " '▁2266': 1,\n", - " '▁2609': 1})" + "('896_2029_935_679_1115_3601_3000_222_3446_2218_3072_550_3652_665_2596_2809_3649_251_2610_2536_47_2852_2940_3353_3400_3336_325_2647_4076_3653_3253_58_3664_1424_1388_222_278_897_447_2355_2453_2531_2712_828_2895_2398_2908_901_2536_222_3686_2620_3254_3962_0_1448_222_863_3593_124_124_1048_1593_222_4086_2647_3236_1767_2800_697_514_3648_2337_1338_1114_340_3514_4076_2658_1954_3867_2300_251_317_7_1091_1768_1440_3167_672_1253_188_3544_2934_1368_479_3951_3387_514_2438_1262_3166_462_3530_333_2596_3808_2796_1920_794_263_2626_2596_1949_57_3990_3785_146_404_3731_479_3840_3840_3664_940_2550_4076_544_3465_3232_269_79_2159_3879_1734_3900_755_1756_818_800_1249_171_319_727_171_3698_3683_2596_3969_2431_1838_3969_126_2673_2596_4012_1010_2151_3437_417_2386_2712_3705_1838_3428_1168_1838_1527_3885_1952_2443_3997_3562_1667_3651_3981_2426_1494_1532_2426_3602',\n", + " ['896',\n", + " '_2029',\n", + " '_935',\n", + " '_679',\n", + " '_1115',\n", + " '_3601',\n", + " '_3000',\n", + " '_222',\n", + " '_3446',\n", + " '_2218',\n", + " '_3072',\n", + " '_550',\n", + " '_3652',\n", + " '_665',\n", + " '_2596',\n", + " '_2809',\n", + " '_3649',\n", + " '_251',\n", + " '_2610',\n", + " '_2536',\n", + " '_47',\n", + " '_2852',\n", + " '_2940',\n", + " '_3353',\n", + " '_3400',\n", + " '_3336',\n", + " '_325',\n", + " '_2647',\n", + " '_4076',\n", + " '_3653',\n", + " '_3253',\n", + " '_58',\n", + " '_3664',\n", + " '_1424',\n", + " '_1388',\n", + " '_222',\n", + " '_278',\n", + " '_897',\n", + " '_447',\n", + " '_2355',\n", + " '_2453',\n", + " '_2531',\n", + " '_2712',\n", + " '_828',\n", + " '_2895',\n", + " '_2398',\n", + " '_2908',\n", + " '_901',\n", + " '_2536',\n", + " '_222',\n", + " '_3686',\n", + " '_2620',\n", + " '_3254',\n", + " '_3962',\n", + " '_0',\n", + " '_1448',\n", + " '_222',\n", + " '_863',\n", + " '_3593',\n", + " '_124',\n", + " '_124',\n", + " '_1048',\n", + " '_1593',\n", + " '_222',\n", + " '_4086',\n", + " '_2647',\n", + " '_3236',\n", + " '_1767',\n", + " '_2800',\n", + " '_697',\n", + " '_514',\n", + " '_3648',\n", + " '_2337',\n", + " '_1338',\n", + " '_1114',\n", + " '_340',\n", + " '_3514',\n", + " '_4076',\n", + " '_2658',\n", + " '_1954',\n", + " '_3867',\n", + " '_2300',\n", + " '_251',\n", + " '_317',\n", + " '_7',\n", + " '_1091',\n", + " '_1768',\n", + " '_1440',\n", + " '_3167',\n", + " '_672',\n", + " '_1253',\n", + " '_188',\n", + " '_3544',\n", + " '_2934',\n", + " '_1368',\n", + " '_479',\n", + " '_3951',\n", + " '_3387',\n", + " '_514',\n", + " '_2438',\n", + " '_1262',\n", + " '_3166',\n", + " '_462',\n", + " '_3530',\n", + " '_333',\n", + " '_2596',\n", + " '_3808',\n", + " '_2796',\n", + " '_1920',\n", + " '_794',\n", + " '_263',\n", + " '_2626',\n", + " '_2596',\n", + " '_1949',\n", + " '_57',\n", + " '_3990',\n", + " '_3785',\n", + " '_146',\n", + " '_404',\n", + " '_3731',\n", + " '_479',\n", + " '_3840',\n", + " '_3840',\n", + " '_3664',\n", + " '_940',\n", + " '_2550',\n", + " '_4076',\n", + " '_544',\n", + " '_3465',\n", + " '_3232',\n", + " '_269',\n", + " '_79',\n", + " '_2159',\n", + " '_3879',\n", + " '_1734',\n", + " '_3900',\n", + " '_755',\n", + " '_1756',\n", + " '_818',\n", + " '_800',\n", + " '_1249',\n", + " '_171',\n", + " '_319',\n", + " '_727',\n", + " '_171',\n", + " '_3698',\n", + " '_3683',\n", + " '_2596',\n", + " '_3969',\n", + " '_2431',\n", + " '_1838',\n", + " '_3969',\n", + " '_126',\n", + " '_2673',\n", + " '_2596',\n", + " '_4012',\n", + " '_1010',\n", + " '_2151',\n", + " '_3437',\n", + " '_417',\n", + " '_2386',\n", + " '_2712',\n", + " '_3705',\n", + " '_1838',\n", + " '_3428',\n", + " '_1168',\n", + " '_1838',\n", + " '_1527',\n", + " '_3885',\n", + " '_1952',\n", + " '_2443',\n", + " '_3997',\n", + " '_3562',\n", + " '_1667',\n", + " '_3651',\n", + " '_3981',\n", + " '_2426',\n", + " '_1494',\n", + " '_1532',\n", + " '_2426',\n", + " '_3602'])" ] }, - "execution_count": 26, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "word_freqs" + "preprocess_corpus(txt)[0]" ] }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "896_2029_935_679_1115_3601_3000_222_3446_2218_3072_550_3652_665_2596_2809_3649_251_2610_2536_47_2852_2940_3353_3400_3336_325_2647_4076_3653_3253_58_3664_1424_1388_222_278_897_447_2355_2453_2531_2712_828_2895_2398_2908_901_2536_222_3686_2620_3254_3962_0_1448_222_863_3593_124_124_1048_1593_222_4086_2647_3236_1767_2800_697_514_3648_2337_1338_1114_340_3514_4076_2658_1954_3867_2300_251_317_7_1091_1768_1440_3167_672_1253_188_3544_2934_1368_479_3951_3387_514_2438_1262_3166_462_3530_333_2596_3808_2796_1920_794_263_2626_2596_1949_57_3990_3785_146_404_3731_479_3840_3840_3664_940_2550_4076_544_3465_3232_269_79_2159_3879_1734_3900_755_1756_818_800_1249_171_319_727_171_3698_3683_2596_3969_2431_1838_3969_126_2673_2596_4012_1010_2151_3437_417_2386_2712_3705_1838_3428_1168_1838_1527_3885_1952_2443_3997_3562_1667_3651_3981_2426_1494_1532_2426_3602\n", + "['896', '_2029', '_935', '_679', '_1115', '_3601', '_3000', '_222', '_3446', '_2218', '_3072', '_550', '_3652', '_665', '_2596', '_2809', '_3649', '_251', '_2610', '_2536', '_47', '_2852', '_2940', '_3353', '_3400', '_3336', '_325', '_2647', '_4076', '_3653', '_3253', '_58', '_3664', '_1424', '_1388', '_222', '_278', '_897', '_447', '_2355', '_2453', '_2531', '_2712', '_828', '_2895', '_2398', '_2908', '_901', '_2536', '_222', '_3686', '_2620', '_3254', '_3962', '_0', '_1448', '_222', '_863', '_3593', '_124', '_124', '_1048', '_1593', '_222', '_4086', '_2647', '_3236', '_1767', '_2800', '_697', '_514', '_3648', '_2337', '_1338', '_1114', '_340', '_3514', '_4076', '_2658', '_1954', '_3867', '_2300', '_251', '_317', '_7', '_1091', '_1768', '_1440', '_3167', '_672', '_1253', '_188', '_3544', '_2934', '_1368', '_479', '_3951', '_3387', '_514', '_2438', '_1262', '_3166', '_462', '_3530', '_333', '_2596', '_3808', '_2796', '_1920', '_794', '_263', '_2626', '_2596', '_1949', '_57', '_3990', '_3785', '_146', '_404', '_3731', '_479', '_3840', '_3840', '_3664', '_940', '_2550', '_4076', '_544', '_3465', '_3232', '_269', '_79', '_2159', '_3879', '_1734', '_3900', '_755', '_1756', '_818', '_800', '_1249', '_171', '_319', '_727', '_171', '_3698', '_3683', '_2596', '_3969', '_2431', '_1838', '_3969', '_126', '_2673', '_2596', '_4012', '_1010', '_2151', '_3437', '_417', '_2386', '_2712', '_3705', '_1838', '_3428', '_1168', '_1838', '_1527', '_3885', '_1952', '_2443', '_3997', '_3562', '_1667', '_3651', '_3981', '_2426', '_1494', '_1532', '_2426', '_3602']\n" + ] + } + ], + "source": [ + "for sent, word in preprocess_corpus(txt):\n", + " print(sent)\n", + " print(word)\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "splits = {sentence: [c for c in word] for (sentence, word) in preprocess_corpus(txt)}" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('896_2029_935_679_1115_3601_3000_222_3446_2218_3072_550_3652_665_2596_2809_3649_251_2610_2536_47_2852_2940_3353_3400_3336_325_2647_4076_3653_3253_58_3664_1424_1388_222_278_897_447_2355_2453_2531_2712_828_2895_2398_2908_901_2536_222_3686_2620_3254_3962_0_1448_222_863_3593_124_124_1048_1593_222_4086_2647_3236_1767_2800_697_514_3648_2337_1338_1114_340_3514_4076_2658_1954_3867_2300_251_317_7_1091_1768_1440_3167_672_1253_188_3544_2934_1368_479_3951_3387_514_2438_1262_3166_462_3530_333_2596_3808_2796_1920_794_263_2626_2596_1949_57_3990_3785_146_404_3731_479_3840_3840_3664_940_2550_4076_544_3465_3232_269_79_2159_3879_1734_3900_755_1756_818_800_1249_171_319_727_171_3698_3683_2596_3969_2431_1838_3969_126_2673_2596_4012_1010_2151_3437_417_2386_2712_3705_1838_3428_1168_1838_1527_3885_1952_2443_3997_3562_1667_3651_3981_2426_1494_1532_2426_3602', ['896', '_2029', '_935', '_679', '_1115', '_3601', '_3000', '_222', '_3446', '_2218', '_3072', '_550', '_3652', '_665', '_2596', '_2809', '_3649', '_251', '_2610', '_2536', '_47', '_2852', '_2940', '_3353', '_3400', '_3336', '_325', '_2647', '_4076', '_3653', '_3253', '_58', '_3664', '_1424', '_1388', '_222', '_278', '_897', '_447', '_2355', '_2453', '_2531', '_2712', '_828', '_2895', '_2398', '_2908', '_901', '_2536', '_222', '_3686', '_2620', '_3254', '_3962', '_0', '_1448', '_222', '_863', '_3593', '_124', '_124', '_1048', '_1593', '_222', '_4086', '_2647', '_3236', '_1767', '_2800', '_697', '_514', '_3648', '_2337', '_1338', '_1114', '_340', '_3514', '_4076', '_2658', '_1954', '_3867', '_2300', '_251', '_317', '_7', '_1091', '_1768', '_1440', '_3167', '_672', '_1253', '_188', '_3544', '_2934', '_1368', '_479', '_3951', '_3387', '_514', '_2438', '_1262', '_3166', '_462', '_3530', '_333', '_2596', '_3808', '_2796', '_1920', '_794', '_263', '_2626', '_2596', '_1949', '_57', '_3990', '_3785', '_146', '_404', '_3731', '_479', '_3840', '_3840', '_3664', '_940', '_2550', '_4076', '_544', '_3465', '_3232', '_269', '_79', '_2159', '_3879', '_1734', '_3900', '_755', '_1756', '_818', '_800', '_1249', '_171', '_319', '_727', '_171', '_3698', '_3683', '_2596', '_3969', '_2431', '_1838', '_3969', '_126', '_2673', '_2596', '_4012', '_1010', '_2151', '_3437', '_417', '_2386', '_2712', '_3705', '_1838', '_3428', '_1168', '_1838', '_1527', '_3885', '_1952', '_2443', '_3997', '_3562', '_1667', '_3651', '_3981', '_2426', '_1494', '_1532', '_2426', '_3602'])\n", + "('2712_1604_3458_4031_1604_1669_2008_2337_857_3123_3321_1887_846_2398_763_612_3846_1060_312_859_3638_763_2238_590_2458_3847_304_1888_1986_2238_3412_1885_375_2176_887_3051_2238_3374_3485_973_251_622_3051_2557_727_3750_240_2386_2452_2712_2913_2525_691_1114_1363_796_3731_1232_1332_3282_966_3883_2431_1774_2559_755_748_2975_2608_3345_7_868_731_2872_1336_2488_3706_2276_3739_434_2203_2019_873_1273_3627_2912_4046_120_2888_1707_1153_3731_2927_1188_1400_966_397_1153_2712_1140_792_3412_20_3452_2452_1247_3297_1669_326_2813_2365_3368_1774_1129_3260_186_1814_1445_438_1247_3323_3368_3744_2392_448_1953_1247_2204_2430_1094_1702_2688_1953_2712_966_2380_3714_3446_3391_2531_2738_2312_3866_1952_2238_4069_752_1861_73_3403_3825_685_1707_332_2738_685_305_4049_1004_186_1188_4076_3468_2885_740_1001_251_3367_2712_48_752_2673_2617_793_927_2712_2801_3229_3896_886_773_3229_3396_1160_1968_3272_381_3452_1307_3396_2240_1307_215_726_679_3195_2712_3026_903_2317_1114_901_1484_665_2182_3688_7_234_3009_370_2712_3871_3551_499_240_1742_2531_582_862_930_1097_688_2450_1658_2738_97_3698_2502_308_746_488_608_2948_1669_3919_2204_2431_3219_1774_1941_845_1398_2440_3765_1644_2206_3795_2076_1953_685_1953_1953_3879_3089_2331_2807_2238_1660_95_222_3671_2386_2918_1094_3008_152_117_1924_365_3893_3069_1725_499_3731_3258_1794_2718_2502_829_575_2326_294_4054_1349_1814_3177_1188_3872_3281_588_3750_2813_992_3687_3731_3087_3786_2453_450_2365_930_1788_831_1644_2984_3180_1010_206_1788_3808_100_2506_3230_399_204_1806_48_3603_1669_2779_289_3514_572_1032_1932_1060_990_3702_1046_3161_2085_1932_1932_3350_702_263_665_1032_2895_901_489_859_2434_2712_3693_2788_1838_1026_3251_1701_665_1477_204_2008_318_289_2788_1930_1325_1595_237_1054_3820_1669_665_931_1863_3218_2094_859_289_2712_3937_1920_1229_1408_153_1990_2712_1435_427_1838_961_901_2450_3030_1516_3775_3013_267_204_2626_222_477_1134_2083_1217_243_2070_695_550_2434_2525_3566_3253_3075_222_2167_616_3574_3375_1655_457_1131_3316_3702_2076_990_3498_1261_1369_2516_435_890_3969_951_3867_222_1443_1134_1838_2003_1648_447_2647_1299_1395_3324_3514_1046_1060_188_2593_3498_3514_1648_2813_3353_2647_1048_719_3353_2939_3592_3613_2788_3487_499_2786_2801_3318_3396_7_2206_231_1346_240_3828_1482_188_966_175_1017_868_99_2469_222_584_2001_2750_2573_784_2001_1524_2913_3593_1580_1793_2874_1574_2160_1316_3254_2655_1675_2750_1052_2147_2809_1351_3008_1443_527_3321_3030_26_3286_2228_50_1112_2342_761_2559_3702_3702_1093_670_527_222_2003_2750_1312_3044_3199_295_222_4086_2673_263_1443_251_2259_222_4086_3384_263_354_2943_2943_222_2952_3194_1231_354_1932_3288_222_354_1593_3384_3672_1539_3551_222_1161_3324_3458_2675_1307_2876_665_688_3979_1660_2444_3229_2013_665_3345_2224_2224_926_2224_580_1430_1129_1907_3177_1161_1059_4076_188_1544_2080_114_1130_2066_186_665_3671_1848_1726_1567_2673_833_3724_3910_289_568_1423_2008_1305_188_375_1631_3724_20_536_2174_1788_124_979_1688_484_679_3708_188_3875_1516_3236_1148_2700_3761_1524_1420_1761_2115_2419_457_2813_222_3808_175_1283_2310_2673_756_1806_1164_2083_499_146_92_2328_1788_1877_2750_1768_85_2662_4093_2147_2317_1134_204_2779_1768_3415_2147_2408_3269_1462_2435_2434_3205_2147_230_1594_3881_1114_1934_2676_2891_782_4071_3687_3195_2338_623_1733_757_3467_3706_3352_1806_2467_188_3161_1912_3724_1907_2750_3384_3030_775_2254_3514_427_3893_1848_7_216_3205_2736_3106_2331_4036_2160_2939_4080_2613_73_3412_2870_3731_3566_1970_3183_2782_1851_599_222_308_3028_1863_105_764_3450_1788_1905_1283_3866_462_365_3806_654_2913_2831_1343_3106_365_1813_3731_2415_2558_657_2610_1408_3507_1788_2940_1581_2013_366_2337_3005_1788_2124_1010_755_278_1446_2817_222_3808_802_2462_2448_2068_3507_3731_928_3547_2992_2204_3463_2100_1788_2909_1094_940_3106_2689_933_3731_2392_2473_3856_3662_935_2918_3731_2900_1669_1669_2662_2008_2444_1788_2019_1655_829_2939_208_2365_7_3671_1106_833_2513_3849_1655_981_3181_1818_2453_3352_2738_2470_680_77_2946_3149_926_2738_529_3731_162_1055_448_926_2151_731_7_2204_2867_1904_4071_105_2511_1788_2964_3619_2338_2019_1310_3570_1247_1751_410_2885_660_2320_237_7_1924_79_3384_3297_2386_344_1788_3740_3704_1519_945_87_1462_2147_429_247_289_2029_196_3218_2147_3998_449_3218_721_196_3171_7_427_1578_455_3855_1678_3173_1733_2913_1400_3112_3323_177_2718_4033_637_697_3461_1130_1041_2398_3030_775_2788_2089_2968_2673_3205_188_366_685_991_3901_811_1527_1088_1875_399_3893_3212_829_1589_2160_449_497_691_782_4059_1269_2160_2939_946_595_684_2253_31_2147_2182_149_3877_1423_3218_317_1524_1336_351_2320_2991_1046_3102_7_2204_4087_2221_3227_304_1411_7_105_3849_2158_637_1833_3154_7_60_1764_2073_1565_3866_2610_7_902_1143_435_716_2525_3936_7_2909_646_3799_1238_2616_2688_3731_4026_1593_3102_1617_2491_550_7_1659_457_832_1425_3218_586_3731_2617_2673_3919_1042_3187_39_3731_73_397_1904_1818_175_1444_680_1161_2204_2480_2182_99_956_680_2204_1485_2410_3001_3901_3907_2326_2634_1757_3744_1751_1059_1445_3731_1966_829_2458_3062_3514_1747_3396_3468_2166_499_3969_2097_2005_1788_219_3560_1159_645_3867_3246_1246_2290_1669_3497_1316_1669_15_1806_479_196_12_2029_427_1390_1788_4070_2398_1747_3166_2008_2918_117_2485_1757_1599_3477_2203_304_169_777_1489_2735_2531_1882', ['2712', '_1604', '_3458', '_4031', '_1604', '_1669', '_2008', '_2337', '_857', '_3123', '_3321', '_1887', '_846', '_2398', '_763', '_612', '_3846', '_1060', '_312', '_859', '_3638', '_763', '_2238', '_590', '_2458', '_3847', '_304', '_1888', '_1986', '_2238', '_3412', '_1885', '_375', '_2176', '_887', '_3051', '_2238', '_3374', '_3485', '_973', '_251', '_622', '_3051', '_2557', '_727', '_3750', '_240', '_2386', '_2452', '_2712', '_2913', '_2525', '_691', '_1114', '_1363', '_796', '_3731', '_1232', '_1332', '_3282', '_966', '_3883', '_2431', '_1774', '_2559', '_755', '_748', '_2975', '_2608', '_3345', '_7', '_868', '_731', '_2872', '_1336', '_2488', '_3706', '_2276', '_3739', '_434', '_2203', '_2019', '_873', '_1273', '_3627', '_2912', '_4046', '_120', '_2888', '_1707', '_1153', '_3731', '_2927', '_1188', '_1400', '_966', '_397', '_1153', '_2712', '_1140', '_792', '_3412', '_20', '_3452', '_2452', '_1247', '_3297', '_1669', '_326', '_2813', '_2365', '_3368', '_1774', '_1129', '_3260', '_186', '_1814', '_1445', '_438', '_1247', '_3323', '_3368', '_3744', '_2392', '_448', '_1953', '_1247', '_2204', '_2430', '_1094', '_1702', '_2688', '_1953', '_2712', '_966', '_2380', '_3714', '_3446', '_3391', '_2531', '_2738', '_2312', '_3866', '_1952', '_2238', '_4069', '_752', '_1861', '_73', '_3403', '_3825', '_685', '_1707', '_332', '_2738', '_685', '_305', '_4049', '_1004', '_186', '_1188', '_4076', '_3468', '_2885', '_740', '_1001', '_251', '_3367', '_2712', '_48', '_752', '_2673', '_2617', '_793', '_927', '_2712', '_2801', '_3229', '_3896', '_886', '_773', '_3229', '_3396', '_1160', '_1968', '_3272', '_381', '_3452', '_1307', '_3396', '_2240', '_1307', '_215', '_726', '_679', '_3195', '_2712', '_3026', '_903', '_2317', '_1114', '_901', '_1484', '_665', '_2182', '_3688', '_7', '_234', '_3009', '_370', '_2712', '_3871', '_3551', '_499', '_240', '_1742', '_2531', '_582', '_862', '_930', '_1097', '_688', '_2450', '_1658', '_2738', '_97', '_3698', '_2502', '_308', '_746', '_488', '_608', '_2948', '_1669', '_3919', '_2204', '_2431', '_3219', '_1774', '_1941', '_845', '_1398', '_2440', '_3765', '_1644', '_2206', '_3795', '_2076', '_1953', '_685', '_1953', '_1953', '_3879', '_3089', '_2331', '_2807', '_2238', '_1660', '_95', '_222', '_3671', '_2386', '_2918', '_1094', '_3008', '_152', '_117', '_1924', '_365', '_3893', '_3069', '_1725', '_499', '_3731', '_3258', '_1794', '_2718', '_2502', '_829', '_575', '_2326', '_294', '_4054', '_1349', '_1814', '_3177', '_1188', '_3872', '_3281', '_588', '_3750', '_2813', '_992', '_3687', '_3731', '_3087', '_3786', '_2453', '_450', '_2365', '_930', '_1788', '_831', '_1644', '_2984', '_3180', '_1010', '_206', '_1788', '_3808', '_100', '_2506', '_3230', '_399', '_204', '_1806', '_48', '_3603', '_1669', '_2779', '_289', '_3514', '_572', '_1032', '_1932', '_1060', '_990', '_3702', '_1046', '_3161', '_2085', '_1932', '_1932', '_3350', '_702', '_263', '_665', '_1032', '_2895', '_901', '_489', '_859', '_2434', '_2712', '_3693', '_2788', '_1838', '_1026', '_3251', '_1701', '_665', '_1477', '_204', '_2008', '_318', '_289', '_2788', '_1930', '_1325', '_1595', '_237', '_1054', '_3820', '_1669', '_665', '_931', '_1863', '_3218', '_2094', '_859', '_289', '_2712', '_3937', '_1920', '_1229', '_1408', '_153', '_1990', '_2712', '_1435', '_427', '_1838', '_961', '_901', '_2450', '_3030', '_1516', '_3775', '_3013', '_267', '_204', '_2626', '_222', '_477', '_1134', '_2083', '_1217', '_243', '_2070', '_695', '_550', '_2434', '_2525', '_3566', '_3253', '_3075', '_222', '_2167', '_616', '_3574', '_3375', '_1655', '_457', '_1131', '_3316', '_3702', '_2076', '_990', '_3498', '_1261', '_1369', '_2516', '_435', '_890', '_3969', '_951', '_3867', '_222', '_1443', '_1134', '_1838', '_2003', '_1648', '_447', '_2647', '_1299', '_1395', '_3324', '_3514', '_1046', '_1060', '_188', '_2593', '_3498', '_3514', '_1648', '_2813', '_3353', '_2647', '_1048', '_719', '_3353', '_2939', '_3592', '_3613', '_2788', '_3487', '_499', '_2786', '_2801', '_3318', '_3396', '_7', '_2206', '_231', '_1346', '_240', '_3828', '_1482', '_188', '_966', '_175', '_1017', '_868', '_99', '_2469', '_222', '_584', '_2001', '_2750', '_2573', '_784', '_2001', '_1524', '_2913', '_3593', '_1580', '_1793', '_2874', '_1574', '_2160', '_1316', '_3254', '_2655', '_1675', '_2750', '_1052', '_2147', '_2809', '_1351', '_3008', '_1443', '_527', '_3321', '_3030', '_26', '_3286', '_2228', '_50', '_1112', '_2342', '_761', '_2559', '_3702', '_3702', '_1093', '_670', '_527', '_222', '_2003', '_2750', '_1312', '_3044', '_3199', '_295', '_222', '_4086', '_2673', '_263', '_1443', '_251', '_2259', '_222', '_4086', '_3384', '_263', '_354', '_2943', '_2943', '_222', '_2952', '_3194', '_1231', '_354', '_1932', '_3288', '_222', '_354', '_1593', '_3384', '_3672', '_1539', '_3551', '_222', '_1161', '_3324', '_3458', '_2675', '_1307', '_2876', '_665', '_688', '_3979', '_1660', '_2444', '_3229', '_2013', '_665', '_3345', '_2224', '_2224', '_926', '_2224', '_580', '_1430', '_1129', '_1907', '_3177', '_1161', '_1059', '_4076', '_188', '_1544', '_2080', '_114', '_1130', '_2066', '_186', '_665', '_3671', '_1848', '_1726', '_1567', '_2673', '_833', '_3724', '_3910', '_289', '_568', '_1423', '_2008', '_1305', '_188', '_375', '_1631', '_3724', '_20', '_536', '_2174', '_1788', '_124', '_979', '_1688', '_484', '_679', '_3708', '_188', '_3875', '_1516', '_3236', '_1148', '_2700', '_3761', '_1524', '_1420', '_1761', '_2115', '_2419', '_457', '_2813', '_222', '_3808', '_175', '_1283', '_2310', '_2673', '_756', '_1806', '_1164', '_2083', '_499', '_146', '_92', '_2328', '_1788', '_1877', '_2750', '_1768', '_85', '_2662', '_4093', '_2147', '_2317', '_1134', '_204', '_2779', '_1768', '_3415', '_2147', '_2408', '_3269', '_1462', '_2435', '_2434', '_3205', '_2147', '_230', '_1594', '_3881', '_1114', '_1934', '_2676', '_2891', '_782', '_4071', '_3687', '_3195', '_2338', '_623', '_1733', '_757', '_3467', '_3706', '_3352', '_1806', '_2467', '_188', '_3161', '_1912', '_3724', '_1907', '_2750', '_3384', '_3030', '_775', '_2254', '_3514', '_427', '_3893', '_1848', '_7', '_216', '_3205', '_2736', '_3106', '_2331', '_4036', '_2160', '_2939', '_4080', '_2613', '_73', '_3412', '_2870', '_3731', '_3566', '_1970', '_3183', '_2782', '_1851', '_599', '_222', '_308', '_3028', '_1863', '_105', '_764', '_3450', '_1788', '_1905', '_1283', '_3866', '_462', '_365', '_3806', '_654', '_2913', '_2831', '_1343', '_3106', '_365', '_1813', '_3731', '_2415', '_2558', '_657', '_2610', '_1408', '_3507', '_1788', '_2940', '_1581', '_2013', '_366', '_2337', '_3005', '_1788', '_2124', '_1010', '_755', '_278', '_1446', '_2817', '_222', '_3808', '_802', '_2462', '_2448', '_2068', '_3507', '_3731', '_928', '_3547', '_2992', '_2204', '_3463', '_2100', '_1788', '_2909', '_1094', '_940', '_3106', '_2689', '_933', '_3731', '_2392', '_2473', '_3856', '_3662', '_935', '_2918', '_3731', '_2900', '_1669', '_1669', '_2662', '_2008', '_2444', '_1788', '_2019', '_1655', '_829', '_2939', '_208', '_2365', '_7', '_3671', '_1106', '_833', '_2513', '_3849', '_1655', '_981', '_3181', '_1818', '_2453', '_3352', '_2738', '_2470', '_680', '_77', '_2946', '_3149', '_926', '_2738', '_529', '_3731', '_162', '_1055', '_448', '_926', '_2151', '_731', '_7', '_2204', '_2867', '_1904', '_4071', '_105', '_2511', '_1788', '_2964', '_3619', '_2338', '_2019', '_1310', '_3570', '_1247', '_1751', '_410', '_2885', '_660', '_2320', '_237', '_7', '_1924', '_79', '_3384', '_3297', '_2386', '_344', '_1788', '_3740', '_3704', '_1519', '_945', '_87', '_1462', '_2147', '_429', '_247', '_289', '_2029', '_196', '_3218', '_2147', '_3998', '_449', '_3218', '_721', '_196', '_3171', '_7', '_427', '_1578', '_455', '_3855', '_1678', '_3173', '_1733', '_2913', '_1400', '_3112', '_3323', '_177', '_2718', '_4033', '_637', '_697', '_3461', '_1130', '_1041', '_2398', '_3030', '_775', '_2788', '_2089', '_2968', '_2673', '_3205', '_188', '_366', '_685', '_991', '_3901', '_811', '_1527', '_1088', '_1875', '_399', '_3893', '_3212', '_829', '_1589', '_2160', '_449', '_497', '_691', '_782', '_4059', '_1269', '_2160', '_2939', '_946', '_595', '_684', '_2253', '_31', '_2147', '_2182', '_149', '_3877', '_1423', '_3218', '_317', '_1524', '_1336', '_351', '_2320', '_2991', '_1046', '_3102', '_7', '_2204', '_4087', '_2221', '_3227', '_304', '_1411', '_7', '_105', '_3849', '_2158', '_637', '_1833', '_3154', '_7', '_60', '_1764', '_2073', '_1565', '_3866', '_2610', '_7', '_902', '_1143', '_435', '_716', '_2525', '_3936', '_7', '_2909', '_646', '_3799', '_1238', '_2616', '_2688', '_3731', '_4026', '_1593', '_3102', '_1617', '_2491', '_550', '_7', '_1659', '_457', '_832', '_1425', '_3218', '_586', '_3731', '_2617', '_2673', '_3919', '_1042', '_3187', '_39', '_3731', '_73', '_397', '_1904', '_1818', '_175', '_1444', '_680', '_1161', '_2204', '_2480', '_2182', '_99', '_956', '_680', '_2204', '_1485', '_2410', '_3001', '_3901', '_3907', '_2326', '_2634', '_1757', '_3744', '_1751', '_1059', '_1445', '_3731', '_1966', '_829', '_2458', '_3062', '_3514', '_1747', '_3396', '_3468', '_2166', '_499', '_3969', '_2097', '_2005', '_1788', '_219', '_3560', '_1159', '_645', '_3867', '_3246', '_1246', '_2290', '_1669', '_3497', '_1316', '_1669', '_15', '_1806', '_479', '_196', '_12', '_2029', '_427', '_1390', '_1788', '_4070', '_2398', '_1747', '_3166', '_2008', '_2918', '_117', '_2485', '_1757', '_1599', '_3477', '_2203', '_304', '_169', '_777', '_1489', '_2735', '_2531', '_1882'])\n", + "('3446_2542_1488_3452_1810_499_2365_752_3437_2386_2782_1604_410_3480_2712_3220_2259_586_3345_98_1749_1930_1893_2688_781_1993_1991_746_222_3659_447_536_3047_1289_2182_1788_3180_2945_3352_2280_447_3123_222_2524_2917_4042_280_1375_134_2852_400_1481_499_463_3442_1932_4076_3910_2357_2947_1424_258_2123_3051_1861_571_2008_3762_3978_1574_2712_375_2453_3750_278_1670_651_2276_1336_2694_833_894_1669_1061_4076_3020_218_921_3487_2265_1560_4076_3593_1573_610_2862_2005_679_2712_1875_2694_1669_510_1739_1462_3232_950_4009_2782_2332_375_2462_2712_3437_442_2029_3062_2887_2151_853_584_3820_1865_3671_1484_2002_2712_1907_251_3324_2356_608_25_1507_375_3362_2507_3983_2558_3738_1806_2356_3452_1284_1604_2249_175_472_3717_1289_1757_3051_1056_2312_472_3390_1554_3040_2918_168_793_3232_2094_4040_2673_3342_3384_90_2322_1336_3165_606_1667_997_2520_3232_2094_685_1539_3356_973_2097_2712_1424_410_2073_3983_53_3825_2712_2342_62_1421_3356_1650_3765_2712_3069_3387_90_894_2244_3458_514_1095_2070_3684_3062_3410_3345_514_868_1818_2971_3069_210_700_514_3593_2073_955_3390_62_1769_2337_1919_1017_2169_3901_2312_2221_2337_3297_977_2053_3901_1134_3686_1234_1705_1909_295_1604_50_2782_2337_3437_2782_50_933_2259_2133_3933_3437_50_50_2503_441_2266_3981_2609_1494_1532_2426_3602', ['3446', '_2542', '_1488', '_3452', '_1810', '_499', '_2365', '_752', '_3437', '_2386', '_2782', '_1604', '_410', '_3480', '_2712', '_3220', '_2259', '_586', '_3345', '_98', '_1749', '_1930', '_1893', '_2688', '_781', '_1993', '_1991', '_746', '_222', '_3659', '_447', '_536', '_3047', '_1289', '_2182', '_1788', '_3180', '_2945', '_3352', '_2280', '_447', '_3123', '_222', '_2524', '_2917', '_4042', '_280', '_1375', '_134', '_2852', '_400', '_1481', '_499', '_463', '_3442', '_1932', '_4076', '_3910', '_2357', '_2947', '_1424', '_258', '_2123', '_3051', '_1861', '_571', '_2008', '_3762', '_3978', '_1574', '_2712', '_375', '_2453', '_3750', '_278', '_1670', '_651', '_2276', '_1336', '_2694', '_833', '_894', '_1669', '_1061', '_4076', '_3020', '_218', '_921', '_3487', '_2265', '_1560', '_4076', '_3593', '_1573', '_610', '_2862', '_2005', '_679', '_2712', '_1875', '_2694', '_1669', '_510', '_1739', '_1462', '_3232', '_950', '_4009', '_2782', '_2332', '_375', '_2462', '_2712', '_3437', '_442', '_2029', '_3062', '_2887', '_2151', '_853', '_584', '_3820', '_1865', '_3671', '_1484', '_2002', '_2712', '_1907', '_251', '_3324', '_2356', '_608', '_25', '_1507', '_375', '_3362', '_2507', '_3983', '_2558', '_3738', '_1806', '_2356', '_3452', '_1284', '_1604', '_2249', '_175', '_472', '_3717', '_1289', '_1757', '_3051', '_1056', '_2312', '_472', '_3390', '_1554', '_3040', '_2918', '_168', '_793', '_3232', '_2094', '_4040', '_2673', '_3342', '_3384', '_90', '_2322', '_1336', '_3165', '_606', '_1667', '_997', '_2520', '_3232', '_2094', '_685', '_1539', '_3356', '_973', '_2097', '_2712', '_1424', '_410', '_2073', '_3983', '_53', '_3825', '_2712', '_2342', '_62', '_1421', '_3356', '_1650', '_3765', '_2712', '_3069', '_3387', '_90', '_894', '_2244', '_3458', '_514', '_1095', '_2070', '_3684', '_3062', '_3410', '_3345', '_514', '_868', '_1818', '_2971', '_3069', '_210', '_700', '_514', '_3593', '_2073', '_955', '_3390', '_62', '_1769', '_2337', '_1919', '_1017', '_2169', '_3901', '_2312', '_2221', '_2337', '_3297', '_977', '_2053', '_3901', '_1134', '_3686', '_1234', '_1705', '_1909', '_295', '_1604', '_50', '_2782', '_2337', '_3437', '_2782', '_50', '_933', '_2259', '_2133', '_3933', '_3437', '_50', '_50', '_2503', '_441', '_2266', '_3981', '_2609', '_1494', '_1532', '_2426', '_3602'])\n", + "('2308_2094_1158_2030_2094_29_2782_2308_2094_1755_2169_519_3146_4040_4076_1684_2133_793_1130_443_3612_2692_3106_2025_1626_339_977_768_485_3351_363_2624_4031_1934_1217_536_1875_744_208_2839_3050_2601_3396_3106_2167_1421_745_3229_1767_3396_1962_3187_776_379_363_3650_3396_229_1097_413_1688_2282_1139_3396_1878_2689_2397_3220_2813_247_7_4041_3037_2892_3161_3374_1449_536_3274_1826_1794_3009_118_2442_3051_1922_3359_1723_3662_1527_1565_3396_343_2718_415_2280_204_2243_2712_928_2518_1700_3069_1562_1755_1810_2532_2604_2909_379_3864_1644_40_2944_39_118_933_2386_2750_2160_731_2693_555_2371_109_555_536_2919_2199_887_1962_2693_1924_53_3091_2786_1747_4003_2386_657_1861_3897_248_3253_1130_657_1169_514_1130_3253_903_731_294_2536_1234_379_3787_1728_2873_1644_2750_3232_731_3146_251_2094_2910_1857_2308_2094_2462_2462_1868_2462_2782_2644_857_263_3449_3562_548', ['2308', '_2094', '_1158', '_2030', '_2094', '_29', '_2782', '_2308', '_2094', '_1755', '_2169', '_519', '_3146', '_4040', '_4076', '_1684', '_2133', '_793', '_1130', '_443', '_3612', '_2692', '_3106', '_2025', '_1626', '_339', '_977', '_768', '_485', '_3351', '_363', '_2624', '_4031', '_1934', '_1217', '_536', '_1875', '_744', '_208', '_2839', '_3050', '_2601', '_3396', '_3106', '_2167', '_1421', '_745', '_3229', '_1767', '_3396', '_1962', '_3187', '_776', '_379', '_363', '_3650', '_3396', '_229', '_1097', '_413', '_1688', '_2282', '_1139', '_3396', '_1878', '_2689', '_2397', '_3220', '_2813', '_247', '_7', '_4041', '_3037', '_2892', '_3161', '_3374', '_1449', '_536', '_3274', '_1826', '_1794', '_3009', '_118', '_2442', '_3051', '_1922', '_3359', '_1723', '_3662', '_1527', '_1565', '_3396', '_343', '_2718', '_415', '_2280', '_204', '_2243', '_2712', '_928', '_2518', '_1700', '_3069', '_1562', '_1755', '_1810', '_2532', '_2604', '_2909', '_379', '_3864', '_1644', '_40', '_2944', '_39', '_118', '_933', '_2386', '_2750', '_2160', '_731', '_2693', '_555', '_2371', '_109', '_555', '_536', '_2919', '_2199', '_887', '_1962', '_2693', '_1924', '_53', '_3091', '_2786', '_1747', '_4003', '_2386', '_657', '_1861', '_3897', '_248', '_3253', '_1130', '_657', '_1169', '_514', '_1130', '_3253', '_903', '_731', '_294', '_2536', '_1234', '_379', '_3787', '_1728', '_2873', '_1644', '_2750', '_3232', '_731', '_3146', '_251', '_2094', '_2910', '_1857', '_2308', '_2094', '_2462', '_2462', '_1868', '_2462', '_2782', '_2644', '_857', '_263', '_3449', '_3562', '_548'])\n", + "('1083_2968_762_1669_3336_317_2008_3835_1072_3218_3218_3350_1046_104_2596_2944_2511_15_461_204_1326_4076_3278_718_3893_3481_208_2647_3920_2623_1644_1725_3473_2361_1229_4076_3845_3251_2550_2600_4061_890_4033_2511_1838_2662_2622_3374_2331_3920_3912_2001_2132_1579_2363_2700_986_2027_1991_331_3136_4061_503_1970_3452_3092_79_158_3093_1933_2786_3815_2556_2506_3072_2857_368_3137_3575_854_1669_1161_2008_2662_4033_3053_2662_3750_1866_1725_2726_3920_2714_1375_940_3276_940_727_1809_2740_1877_550_1244_2610_2648_2147_2471_2750_2525_1342_2712_2679_1282_2909_3610_2386_723_933_1800_572_2675_1048_3400_685_538_809_4033_2431_2833_1904_3713_3886_1644_4076_1781_683_3541_3129_93_2784_4076_2237_2945_2249_260_2308_2399_1826_262_4069_476_143_950_3374_1826_2975_42_792_3863_2536_2083_2077_3747_427_3462_3418_2107', ['1083', '_2968', '_762', '_1669', '_3336', '_317', '_2008', '_3835', '_1072', '_3218', '_3218', '_3350', '_1046', '_104', '_2596', '_2944', '_2511', '_15', '_461', '_204', '_1326', '_4076', '_3278', '_718', '_3893', '_3481', '_208', '_2647', '_3920', '_2623', '_1644', '_1725', '_3473', '_2361', '_1229', '_4076', '_3845', '_3251', '_2550', '_2600', '_4061', '_890', '_4033', '_2511', '_1838', '_2662', '_2622', '_3374', '_2331', '_3920', '_3912', '_2001', '_2132', '_1579', '_2363', '_2700', '_986', '_2027', '_1991', '_331', '_3136', '_4061', '_503', '_1970', '_3452', '_3092', '_79', '_158', '_3093', '_1933', '_2786', '_3815', '_2556', '_2506', '_3072', '_2857', '_368', '_3137', '_3575', '_854', '_1669', '_1161', '_2008', '_2662', '_4033', '_3053', '_2662', '_3750', '_1866', '_1725', '_2726', '_3920', '_2714', '_1375', '_940', '_3276', '_940', '_727', '_1809', '_2740', '_1877', '_550', '_1244', '_2610', '_2648', '_2147', '_2471', '_2750', '_2525', '_1342', '_2712', '_2679', '_1282', '_2909', '_3610', '_2386', '_723', '_933', '_1800', '_572', '_2675', '_1048', '_3400', '_685', '_538', '_809', '_4033', '_2431', '_2833', '_1904', '_3713', '_3886', '_1644', '_4076', '_1781', '_683', '_3541', '_3129', '_93', '_2784', '_4076', '_2237', '_2945', '_2249', '_260', '_2308', '_2399', '_1826', '_262', '_4069', '_476', '_143', '_950', '_3374', '_1826', '_2975', '_42', '_792', '_3863', '_2536', '_2083', '_2077', '_3747', '_427', '_3462', '_3418', '_2107'])\n", + "('1234_3901_1328_1351_3273_331_985_1176_1130_1964_96_1907_3725_3497_3745_3680_619_499_7_1872_1112_1131_2306_1872_2431_203_2453_398_536_2835_3538_410_2530_3232_184_2712_798_363_1222_2139_679_2504_3039_734_3498_3978_2918_2925_3702_682_1565_3508_2251_1566_104_1485_3110_3387_1815_1033_1114_1726_3673_1369_56_1966_951_1106_1506_1629_2859_3303_208_277_3376_773_3764_1083_1756_160_1514_3250_124_3102_1083_1667_1424_2859_1575_490_3551_1083_1667_4057_1924_3278_3109_3109_310_1667_3720_3832_2154_3750_700_1083_2796_412_2333_1684_2739_1406_1234_2542_2259_248_246_9_669_204_3901_2753_2608_2154_3975_3109_2859_175_20_1987_3505_802_2837_187_1106_829_2453_3984_1419_1310_2859_342_3418_3551_3381_1388_3039_3745_2532_792_3775_3265_1112_3178_1926_3549_1485_3232_2179_3787_681_2990_3297_436_1924_2702_1349_4082_754_1423_26_2786_308_1848_3652_3684_3801_3199_2480_3967_727_93_3091_3798_3311_3358_3798_152_1140_682_212_2945_4091_4041_1289_4032_1357_2392_3777_1818_278_1159_396_518_3575_3866_1920_3659_1261_1312_375_2168_740_2782_2356_701_3168_1083_805_1046_2782_1629_3560_535_3119_2094_527_1647_3335_525_3321_939_805_2722_2456_1216_3008_3410_3643_97_3134_313_285_1229_4054_1369_3984_414_2361_3293_575_56_682_1914_398_3706_1031_3702_2013_3684_1150_3278_3968_3382_977_2575_4076_3369_3638_2920_1242_600_890_1083_2562_2693_3082_1907_890_1307_204_944_714_3990_286_1046_2453_3981_1186_658_424_2609_1513', ['1234', '_3901', '_1328', '_1351', '_3273', '_331', '_985', '_1176', '_1130', '_1964', '_96', '_1907', '_3725', '_3497', '_3745', '_3680', '_619', '_499', '_7', '_1872', '_1112', '_1131', '_2306', '_1872', '_2431', '_203', '_2453', '_398', '_536', '_2835', '_3538', '_410', '_2530', '_3232', '_184', '_2712', '_798', '_363', '_1222', '_2139', '_679', '_2504', '_3039', '_734', '_3498', '_3978', '_2918', '_2925', '_3702', '_682', '_1565', '_3508', '_2251', '_1566', '_104', '_1485', '_3110', '_3387', '_1815', '_1033', '_1114', '_1726', '_3673', '_1369', '_56', '_1966', '_951', '_1106', '_1506', '_1629', '_2859', '_3303', '_208', '_277', '_3376', '_773', '_3764', '_1083', '_1756', '_160', '_1514', '_3250', '_124', '_3102', '_1083', '_1667', '_1424', '_2859', '_1575', '_490', '_3551', '_1083', '_1667', '_4057', '_1924', '_3278', '_3109', '_3109', '_310', '_1667', '_3720', '_3832', '_2154', '_3750', '_700', '_1083', '_2796', '_412', '_2333', '_1684', '_2739', '_1406', '_1234', '_2542', '_2259', '_248', '_246', '_9', '_669', '_204', '_3901', '_2753', '_2608', '_2154', '_3975', '_3109', '_2859', '_175', '_20', '_1987', '_3505', '_802', '_2837', '_187', '_1106', '_829', '_2453', '_3984', '_1419', '_1310', '_2859', '_342', '_3418', '_3551', '_3381', '_1388', '_3039', '_3745', '_2532', '_792', '_3775', '_3265', '_1112', '_3178', '_1926', '_3549', '_1485', '_3232', '_2179', '_3787', '_681', '_2990', '_3297', '_436', '_1924', '_2702', '_1349', '_4082', '_754', '_1423', '_26', '_2786', '_308', '_1848', '_3652', '_3684', '_3801', '_3199', '_2480', '_3967', '_727', '_93', '_3091', '_3798', '_3311', '_3358', '_3798', '_152', '_1140', '_682', '_212', '_2945', '_4091', '_4041', '_1289', '_4032', '_1357', '_2392', '_3777', '_1818', '_278', '_1159', '_396', '_518', '_3575', '_3866', '_1920', '_3659', '_1261', '_1312', '_375', '_2168', '_740', '_2782', '_2356', '_701', '_3168', '_1083', '_805', '_1046', '_2782', '_1629', '_3560', '_535', '_3119', '_2094', '_527', '_1647', '_3335', '_525', '_3321', '_939', '_805', '_2722', '_2456', '_1216', '_3008', '_3410', '_3643', '_97', '_3134', '_313', '_285', '_1229', '_4054', '_1369', '_3984', '_414', '_2361', '_3293', '_575', '_56', '_682', '_1914', '_398', '_3706', '_1031', '_3702', '_2013', '_3684', '_1150', '_3278', '_3968', '_3382', '_977', '_2575', '_4076', '_3369', '_3638', '_2920', '_1242', '_600', '_890', '_1083', '_2562', '_2693', '_3082', '_1907', '_890', '_1307', '_204', '_944', '_714', '_3990', '_286', '_1046', '_2453', '_3981', '_1186', '_658', '_424', '_2609', '_1513'])\n", + "('2712_2424_1311_2693_1684_3721_2303_1234_1130_1487_2021_2435_3778_3410_3627_1491_3467_2076_2204_3721_331_40_3105_2673_2442_3345_1270_1138_3051_2134_3820_3230_1076_3368_3893_1986_1161_96_515_1051_3476_224_1733_1114_2669_81_3969_633_1968_117_1114_2984_2046_1565_599_2093_117_3167_2236_92_248_3822_1815_2214_2322_378_2225_249_1294_3082_2214_643_2424_289_3178_196_3999_654_1565_3165_3030_716_748_3737_2738_1114_1671_370_1114_1235_3508_456_1887_1794_1779_1734_629_2230_3731_2909_1266_1381_1719_3183_2431_2647_2675_802_3720_1170_1349_2858_2214_490_168_2626_105_2398_1578_3197_2448_1562_2536_3352_2531_1590_2910_2839_3743_2094_2939_3541_1303_925_1114_1833_950_2939_1220_3379_3648_2617_3665_112_3069_1105_1350_3051_3671_1351_1395_3808_290_9_3051_2310_1970_3623_3297_501_2531_1148_248_1099_1932_450_3744_1186_2910_1978_1424_1310_2617_1661_416_3051_1114_2773_2738_3969_3866_544_3051_1770_2713_2398_64_1669_1168_160_637_2365_910_1129_2018_3958_2439_1129_60_2025_2760_4069_1848_2738_1203_2589_3775_20_1815_200_3051_3297_1651_2673_2144_1462_399_3879_937_2398_2520_1170_3777_1262_3051_2205_1945_2918_2524_1312_586_1925_1170_2386_3856_1170_3092_1604_2712_62_2001_3990_2496_3404_3183_117_2205_1269_2342_658_43_620_3627_1048_3262_1254_2310_712_1877_3051_3437_1595_874_1684_1453_1234_3051_3163_3764_939_1161_2304_2983_472_3167_1160_1061_2348_1669_2952_1861_248_55_3569_3969_962_661_2738_375_469_2203_2384_2673_1907_2738_3167_1308_3753_3297_2249_890_3197_2782_1351_1763_1170_2750_3495_3731_775_3732_4004_2939_3468_238_456_1491_1912_495_1491_3379_482_3051_3785_3556_442_3297_1229_933_2647_1076_268_1338_1170_3638_3109_3040_1076_1848_1669_3526_3778_1907_2439_1170_2642_3790_1170_9_2265_2738_2310_499_854_2310_2925_802_3040_248_1661_622_2782_571_2673_3051_308_1924_1607_2310_3092_3907_2738_3785_181_3951_2939_2892_2530_2803_3381_463_535_3297_3039_2249_2738_3297_784_3178_3468_132_4092_2214_944_2466_3877_1684_2516_215_766_308_2203_2303_1129_3470_2177_3051_1491_3026_669_3297_3259_3777_3051_1684_1312_2525_3468_372_2918_3051_1338_3721_3458_3297_2304_1872_3627_3468_893_2251_1696_15_1768_1234_3747_2434_2693_248_3426_3724_117_2782_3183_2057_248_861_74_3627_2809_456_1672_248_45_1742_3627_3297_100_2712_2809_1932_2365_3731_62_1742_2386_3686_1594_3329_3051_2496_1970_2750_933_1312_3942_3879_3297_2854_2491_64_1398_2166_2738_2347_755_2002_2573_3867_2554_2738_1219_2096_2843_308_2363_2415_2322_462_416_416_3069_2428_3407_2214_1575_2357_289_461_1845_4009_4016_3177_3618_1739_2492_859_691_2712_3107_2561_1768_2154_2673_1401_1234_944_1920_2008_2948_2169_4003_3885_1567_1563_2579_3562_2030_3977_3981_2609_1494_1532_2426_3602', ['2712', '_2424', '_1311', '_2693', '_1684', '_3721', '_2303', '_1234', '_1130', '_1487', '_2021', '_2435', '_3778', '_3410', '_3627', '_1491', '_3467', '_2076', '_2204', '_3721', '_331', '_40', '_3105', '_2673', '_2442', '_3345', '_1270', '_1138', '_3051', '_2134', '_3820', '_3230', '_1076', '_3368', '_3893', '_1986', '_1161', '_96', '_515', '_1051', '_3476', '_224', '_1733', '_1114', '_2669', '_81', '_3969', '_633', '_1968', '_117', '_1114', '_2984', '_2046', '_1565', '_599', '_2093', '_117', '_3167', '_2236', '_92', '_248', '_3822', '_1815', '_2214', '_2322', '_378', '_2225', '_249', '_1294', '_3082', '_2214', '_643', '_2424', '_289', '_3178', '_196', '_3999', '_654', '_1565', '_3165', '_3030', '_716', '_748', '_3737', '_2738', '_1114', '_1671', '_370', '_1114', '_1235', '_3508', '_456', '_1887', '_1794', '_1779', '_1734', '_629', '_2230', '_3731', '_2909', '_1266', '_1381', '_1719', '_3183', '_2431', '_2647', '_2675', '_802', '_3720', '_1170', '_1349', '_2858', '_2214', '_490', '_168', '_2626', '_105', '_2398', '_1578', '_3197', '_2448', '_1562', '_2536', '_3352', '_2531', '_1590', '_2910', '_2839', '_3743', '_2094', '_2939', '_3541', '_1303', '_925', '_1114', '_1833', '_950', '_2939', '_1220', '_3379', '_3648', '_2617', '_3665', '_112', '_3069', '_1105', '_1350', '_3051', '_3671', '_1351', '_1395', '_3808', '_290', '_9', '_3051', '_2310', '_1970', '_3623', '_3297', '_501', '_2531', '_1148', '_248', '_1099', '_1932', '_450', '_3744', '_1186', '_2910', '_1978', '_1424', '_1310', '_2617', '_1661', '_416', '_3051', '_1114', '_2773', '_2738', '_3969', '_3866', '_544', '_3051', '_1770', '_2713', '_2398', '_64', '_1669', '_1168', '_160', '_637', '_2365', '_910', '_1129', '_2018', '_3958', '_2439', '_1129', '_60', '_2025', '_2760', '_4069', '_1848', '_2738', '_1203', '_2589', '_3775', '_20', '_1815', '_200', '_3051', '_3297', '_1651', '_2673', '_2144', '_1462', '_399', '_3879', '_937', '_2398', '_2520', '_1170', '_3777', '_1262', '_3051', '_2205', '_1945', '_2918', '_2524', '_1312', '_586', '_1925', '_1170', '_2386', '_3856', '_1170', '_3092', '_1604', '_2712', '_62', '_2001', '_3990', '_2496', '_3404', '_3183', '_117', '_2205', '_1269', '_2342', '_658', '_43', '_620', '_3627', '_1048', '_3262', '_1254', '_2310', '_712', '_1877', '_3051', '_3437', '_1595', '_874', '_1684', '_1453', '_1234', '_3051', '_3163', '_3764', '_939', '_1161', '_2304', '_2983', '_472', '_3167', '_1160', '_1061', '_2348', '_1669', '_2952', '_1861', '_248', '_55', '_3569', '_3969', '_962', '_661', '_2738', '_375', '_469', '_2203', '_2384', '_2673', '_1907', '_2738', '_3167', '_1308', '_3753', '_3297', '_2249', '_890', '_3197', '_2782', '_1351', '_1763', '_1170', '_2750', '_3495', '_3731', '_775', '_3732', '_4004', '_2939', '_3468', '_238', '_456', '_1491', '_1912', '_495', '_1491', '_3379', '_482', '_3051', '_3785', '_3556', '_442', '_3297', '_1229', '_933', '_2647', '_1076', '_268', '_1338', '_1170', '_3638', '_3109', '_3040', '_1076', '_1848', '_1669', '_3526', '_3778', '_1907', '_2439', '_1170', '_2642', '_3790', '_1170', '_9', '_2265', '_2738', '_2310', '_499', '_854', '_2310', '_2925', '_802', '_3040', '_248', '_1661', '_622', '_2782', '_571', '_2673', '_3051', '_308', '_1924', '_1607', '_2310', '_3092', '_3907', '_2738', '_3785', '_181', '_3951', '_2939', '_2892', '_2530', '_2803', '_3381', '_463', '_535', '_3297', '_3039', '_2249', '_2738', '_3297', '_784', '_3178', '_3468', '_132', '_4092', '_2214', '_944', '_2466', '_3877', '_1684', '_2516', '_215', '_766', '_308', '_2203', '_2303', '_1129', '_3470', '_2177', '_3051', '_1491', '_3026', '_669', '_3297', '_3259', '_3777', '_3051', '_1684', '_1312', '_2525', '_3468', '_372', '_2918', '_3051', '_1338', '_3721', '_3458', '_3297', '_2304', '_1872', '_3627', '_3468', '_893', '_2251', '_1696', '_15', '_1768', '_1234', '_3747', '_2434', '_2693', '_248', '_3426', '_3724', '_117', '_2782', '_3183', '_2057', '_248', '_861', '_74', '_3627', '_2809', '_456', '_1672', '_248', '_45', '_1742', '_3627', '_3297', '_100', '_2712', '_2809', '_1932', '_2365', '_3731', '_62', '_1742', '_2386', '_3686', '_1594', '_3329', '_3051', '_2496', '_1970', '_2750', '_933', '_1312', '_3942', '_3879', '_3297', '_2854', '_2491', '_64', '_1398', '_2166', '_2738', '_2347', '_755', '_2002', '_2573', '_3867', '_2554', '_2738', '_1219', '_2096', '_2843', '_308', '_2363', '_2415', '_2322', '_462', '_416', '_416', '_3069', '_2428', '_3407', '_2214', '_1575', '_2357', '_289', '_461', '_1845', '_4009', '_4016', '_3177', '_3618', '_1739', '_2492', '_859', '_691', '_2712', '_3107', '_2561', '_1768', '_2154', '_2673', '_1401', '_1234', '_944', '_1920', '_2008', '_2948', '_2169', '_4003', '_3885', '_1567', '_1563', '_2579', '_3562', '_2030', '_3977', '_3981', '_2609', '_1494', '_1532', '_2426', '_3602'])\n", + "('4076_240_918_2524_4070_3900_625_2308_1114_2433_488_3671_2154_3199_1825_2610_1578_4008_1336_1562_3092_2561_428_2097_1959_210_1593_263_3745_1003_3384_1856_3662_1763_3251_3127_229_4061_977_2264_727_3498_2308_3541_456_609_3575_890_3521_109_3662_600_773_1566_2859_1872_1212_3798_2431_2632_117_846_3452_612_1336_977_727_3692_1046_1833_1825_2404_3092_1112_3712_4061_1448_3138_1560_447_1593_1326_4032_3008_387_799_608_1539_1543_3165_3250_3030_2280_3533_399_2952_3271_3240_4076_308_2930_1723_1604_1889_3203_2308_1490_3681_1889_933_399_1889_117_3437_3567_2604_1965_609_4077_4061_3124_1060_317_1032_2673_1143_7_379_2_3550_3093_3702_1593_1450_2015_1424_1140_261_3893_4031_3520_799_628_3849_2952_2796_4009_1810_1684_3100_1920_914_610_1153_4076_1958_503_3271_914_3353_2693_2712_944_1863_1728_1161_2081_3881_2712_805_3479_2782_1604_2398_1768_1225_2094_4093_3218_944_1219', ['4076', '_240', '_918', '_2524', '_4070', '_3900', '_625', '_2308', '_1114', '_2433', '_488', '_3671', '_2154', '_3199', '_1825', '_2610', '_1578', '_4008', '_1336', '_1562', '_3092', '_2561', '_428', '_2097', '_1959', '_210', '_1593', '_263', '_3745', '_1003', '_3384', '_1856', '_3662', '_1763', '_3251', '_3127', '_229', '_4061', '_977', '_2264', '_727', '_3498', '_2308', '_3541', '_456', '_609', '_3575', '_890', '_3521', '_109', '_3662', '_600', '_773', '_1566', '_2859', '_1872', '_1212', '_3798', '_2431', '_2632', '_117', '_846', '_3452', '_612', '_1336', '_977', '_727', '_3692', '_1046', '_1833', '_1825', '_2404', '_3092', '_1112', '_3712', '_4061', '_1448', '_3138', '_1560', '_447', '_1593', '_1326', '_4032', '_3008', '_387', '_799', '_608', '_1539', '_1543', '_3165', '_3250', '_3030', '_2280', '_3533', '_399', '_2952', '_3271', '_3240', '_4076', '_308', '_2930', '_1723', '_1604', '_1889', '_3203', '_2308', '_1490', '_3681', '_1889', '_933', '_399', '_1889', '_117', '_3437', '_3567', '_2604', '_1965', '_609', '_4077', '_4061', '_3124', '_1060', '_317', '_1032', '_2673', '_1143', '_7', '_379', '_2', '_3550', '_3093', '_3702', '_1593', '_1450', '_2015', '_1424', '_1140', '_261', '_3893', '_4031', '_3520', '_799', '_628', '_3849', '_2952', '_2796', '_4009', '_1810', '_1684', '_3100', '_1920', '_914', '_610', '_1153', '_4076', '_1958', '_503', '_3271', '_914', '_3353', '_2693', '_2712', '_944', '_1863', '_1728', '_1161', '_2081', '_3881', '_2712', '_805', '_3479', '_2782', '_1604', '_2398', '_1768', '_1225', '_2094', '_4093', '_3218', '_944', '_1219'])\n" + ] + } + ], + "source": [ + "for i, item in enumerate(splits.items()):\n", + " print(item)\n", + " if i == 7:\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4435 4435\n" + ] + } + ], + "source": [ + "print(len(word_freqs), len(splits))" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "def compute_pair_freqs(splits, word_freqs=word_freqs):\n", + " pair_freqs = defaultdict(int)\n", + " for word, freq in word_freqs.items():\n", + " split = splits[word]\n", + " if len(split) == 1:\n", + " continue\n", + " for i in range(len(split) - 1):\n", + " pair = (split[i], split[i + 1])\n", + " pair_freqs[pair] += freq\n", + " return pair_freqs" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "pair_freqs = compute_pair_freqs(splits)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(('8', '9'), 131938)\n", + "(('9', '6'), 170831)\n", + "(('▁', '2'), 2060412)\n", + "(('2', '0'), 312772)\n", + "(('0', '2'), 107433)\n", + "(('2', '9'), 233446)\n", + "(('▁', '9'), 250512)\n", + "(('9', '3'), 171191)\n" + ] + } + ], + "source": [ + "for i, (key, val) in enumerate(pair_freqs.items()):\n", + " print((key, val))\n", + " if i == 7:\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('▁', '2') 2060412\n" + ] + } + ], + "source": [ + "best_pair = \"\"\n", + "max_freq = None\n", + "\n", + "for pair, freq in pair_freqs.items():\n", + " if max_freq is None or max_freq < freq:\n", + " best_pair = pair\n", + " max_freq = freq\n", + "\n", + "print(best_pair, max_freq)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, From 09844af4f608cc3b859d54ff78b1ba25d16d252f Mon Sep 17 00:00:00 2001 From: Nathan Gabriel Date: Thu, 4 Jul 2024 19:43:16 -0400 Subject: [PATCH 07/10] updated pair_freqs method for task --- research/two_byte_encoding.ipynb | 88 +++++++++++++++++++++++++++----- 1 file changed, 76 insertions(+), 12 deletions(-) diff --git a/research/two_byte_encoding.ipynb b/research/two_byte_encoding.ipynb index 766fc13..9cbc603 100644 --- a/research/two_byte_encoding.ipynb +++ b/research/two_byte_encoding.ipynb @@ -580,14 +580,41 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "4435 4435\n" + "('896', 5)\n", + "('▁2029', 4261)\n", + "('▁935', 6058)\n", + "('▁679', 8699)\n", + "('▁1115', 1982)\n", + "('▁3601', 639)\n", + "('▁3000', 635)\n", + "('▁222', 27691)\n" + ] + } + ], + "source": [ + "for i, item in enumerate(word_freqs.items()):\n", + " print(item)\n", + " if i == 7:\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4435 18680\n" ] } ], @@ -597,31 +624,68 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ - "def compute_pair_freqs(splits, word_freqs=word_freqs):\n", + "def compute_pair_freqs(splits):\n", " pair_freqs = defaultdict(int)\n", - " for word, freq in word_freqs.items():\n", - " split = splits[word]\n", - " if len(split) == 1:\n", - " continue\n", - " for i in range(len(split) - 1):\n", - " pair = (split[i], split[i + 1])\n", - " pair_freqs[pair] += freq\n", + " for sent, word_list in splits.items():\n", + " for i in range(len(word_list) - 1):\n", + " pair = (word_list[i], word_list[i + 1])\n", + " pair_freqs[pair] += 1\n", " return pair_freqs" ] }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "pair_freqs = compute_pair_freqs(splits)" ] }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(('896', '_2029'), 1)\n", + "(('_2029', '_935'), 8)\n", + "(('_935', '_679'), 6)\n", + "(('_679', '_1115'), 3)\n", + "(('_1115', '_3601'), 2)\n", + "(('_3601', '_3000'), 1)\n", + "(('_3000', '_222'), 4)\n", + "(('_222', '_3446'), 11)\n", + "(('_3446', '_2218'), 2)\n", + "(('_2218', '_3072'), 1)\n", + "(('_3072', '_550'), 1)\n", + "(('_550', '_3652'), 4)\n", + "(('_3652', '_665'), 5)\n", + "(('_665', '_2596'), 9)\n", + "(('_2596', '_2809'), 14)\n", + "(('_2809', '_3649'), 4)\n", + "(('_3649', '_251'), 27)\n", + "(('_251', '_2610'), 52)\n", + "(('_2610', '_2536'), 22)\n", + "(('_2536', '_47'), 1)\n", + "(('_47', '_2852'), 1)\n" + ] + } + ], + "source": [ + "for i, item in enumerate(pair_freqs.items()):\n", + " print(item)\n", + " if i == 20:\n", + " break" + ] + }, { "cell_type": "code", "execution_count": 70, From c0b15ed68e8116db9b431c113d8d943e0f1c3a4e Mon Sep 17 00:00:00 2001 From: Nathan Gabriel Date: Thu, 4 Jul 2024 20:12:57 -0400 Subject: [PATCH 08/10] added the algorith --- research/two_byte_encoding.ipynb | 182 +++++++++++++++++++++---------- 1 file changed, 124 insertions(+), 58 deletions(-) diff --git a/research/two_byte_encoding.ipynb b/research/two_byte_encoding.ipynb index 9cbc603..d75127b 100644 --- a/research/two_byte_encoding.ipynb +++ b/research/two_byte_encoding.ipynb @@ -648,7 +648,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 48, "metadata": {}, "outputs": [ { @@ -665,64 +665,27 @@ "(('_222', '_3446'), 11)\n", "(('_3446', '_2218'), 2)\n", "(('_2218', '_3072'), 1)\n", - "(('_3072', '_550'), 1)\n", - "(('_550', '_3652'), 4)\n", - "(('_3652', '_665'), 5)\n", - "(('_665', '_2596'), 9)\n", - "(('_2596', '_2809'), 14)\n", - "(('_2809', '_3649'), 4)\n", - "(('_3649', '_251'), 27)\n", - "(('_251', '_2610'), 52)\n", - "(('_2610', '_2536'), 22)\n", - "(('_2536', '_47'), 1)\n", - "(('_47', '_2852'), 1)\n" + "(('_3072', '_550'), 1)\n" ] } ], "source": [ "for i, item in enumerate(pair_freqs.items()):\n", " print(item)\n", - " if i == 20:\n", + " if i == 10:\n", " break" ] }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "(('8', '9'), 131938)\n", - "(('9', '6'), 170831)\n", - "(('▁', '2'), 2060412)\n", - "(('2', '0'), 312772)\n", - "(('0', '2'), 107433)\n", - "(('2', '9'), 233446)\n", - "(('▁', '9'), 250512)\n", - "(('9', '3'), 171191)\n" - ] - } - ], - "source": [ - "for i, (key, val) in enumerate(pair_freqs.items()):\n", - " print((key, val))\n", - " if i == 7:\n", - " break" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "('▁', '2') 2060412\n" + "('_2426', '_3602') 5548\n" ] } ], @@ -740,45 +703,148 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def merge_pair(a, b, splits):\n", + " for sent, word_list in splits.items():\n", + " split = splits[sent]\n", + "\n", + " i = 0\n", + " while i < len(split) - 1:\n", + " if split[i] == a and split[i + 1] == b:\n", + " split = split[:i] + [a + b] + split[i + 2 :]\n", + " else:\n", + " i += 1\n", + " splits[sent] = split\n", + " return splits" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 58, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('896_2029_935_679_1115_3601_3000_222_3446_2218_3072_550_3652_665_2596_2809_3649_251_2610_2536_47_2852_2940_3353_3400_3336_325_2647_4076_3653_3253_58_3664_1424_1388_222_278_897_447_2355_2453_2531_2712_828_2895_2398_2908_901_2536_222_3686_2620_3254_3962_0_1448_222_863_3593_124_124_1048_1593_222_4086_2647_3236_1767_2800_697_514_3648_2337_1338_1114_340_3514_4076_2658_1954_3867_2300_251_317_7_1091_1768_1440_3167_672_1253_188_3544_2934_1368_479_3951_3387_514_2438_1262_3166_462_3530_333_2596_3808_2796_1920_794_263_2626_2596_1949_57_3990_3785_146_404_3731_479_3840_3840_3664_940_2550_4076_544_3465_3232_269_79_2159_3879_1734_3900_755_1756_818_800_1249_171_319_727_171_3698_3683_2596_3969_2431_1838_3969_126_2673_2596_4012_1010_2151_3437_417_2386_2712_3705_1838_3428_1168_1838_1527_3885_1952_2443_3997_3562_1667_3651_3981_2426_1494_1532_2426_3602', ['896', '_2029', '_935', '_679', '_1115', '_3601', '_3000', '_222_3446', '_2218', '_3072', '_550', '_3652', '_665', '_2596', '_2809', '_3649', '_251', '_2610', '_2536', '_47', '_2852', '_2940', '_3353', '_3400', '_3336', '_325', '_2647', '_4076', '_3653', '_3253', '_58', '_3664', '_1424', '_1388', '_222', '_278', '_897', '_447', '_2355', '_2453', '_2531', '_2712', '_828', '_2895', '_2398', '_2908', '_901', '_2536', '_222', '_3686', '_2620', '_3254', '_3962', '_0', '_1448', '_222', '_863', '_3593', '_124', '_124', '_1048', '_1593', '_222', '_4086', '_2647', '_3236', '_1767', '_2800', '_697', '_514', '_3648', '_2337', '_1338', '_1114', '_340', '_3514', '_4076', '_2658', '_1954', '_3867', '_2300', '_251', '_317', '_7', '_1091', '_1768', '_1440', '_3167', '_672', '_1253', '_188', '_3544', '_2934', '_1368', '_479', '_3951', '_3387', '_514', '_2438', '_1262', '_3166', '_462', '_3530', '_333', '_2596', '_3808', '_2796', '_1920', '_794', '_263', '_2626', '_2596', '_1949', '_57', '_3990', '_3785', '_146', '_404', '_3731', '_479', '_3840', '_3840', '_3664', '_940', '_2550', '_4076', '_544', '_3465', '_3232', '_269', '_79', '_2159', '_3879', '_1734', '_3900', '_755', '_1756', '_818', '_800', '_1249', '_171', '_319', '_727', '_171', '_3698', '_3683', '_2596', '_3969', '_2431', '_1838', '_3969', '_126', '_2673', '_2596', '_4012', '_1010', '_2151', '_3437', '_417', '_2386', '_2712', '_3705', '_1838', '_3428', '_1168', '_1838', '_1527', '_3885', '_1952', '_2443', '_3997', '_3562', '_1667', '_3651', '_3981', '_2426', '_1494', '_1532', '_2426_3602'])\n", + "('2712_1604_3458_4031_1604_1669_2008_2337_857_3123_3321_1887_846_2398_763_612_3846_1060_312_859_3638_763_2238_590_2458_3847_304_1888_1986_2238_3412_1885_375_2176_887_3051_2238_3374_3485_973_251_622_3051_2557_727_3750_240_2386_2452_2712_2913_2525_691_1114_1363_796_3731_1232_1332_3282_966_3883_2431_1774_2559_755_748_2975_2608_3345_7_868_731_2872_1336_2488_3706_2276_3739_434_2203_2019_873_1273_3627_2912_4046_120_2888_1707_1153_3731_2927_1188_1400_966_397_1153_2712_1140_792_3412_20_3452_2452_1247_3297_1669_326_2813_2365_3368_1774_1129_3260_186_1814_1445_438_1247_3323_3368_3744_2392_448_1953_1247_2204_2430_1094_1702_2688_1953_2712_966_2380_3714_3446_3391_2531_2738_2312_3866_1952_2238_4069_752_1861_73_3403_3825_685_1707_332_2738_685_305_4049_1004_186_1188_4076_3468_2885_740_1001_251_3367_2712_48_752_2673_2617_793_927_2712_2801_3229_3896_886_773_3229_3396_1160_1968_3272_381_3452_1307_3396_2240_1307_215_726_679_3195_2712_3026_903_2317_1114_901_1484_665_2182_3688_7_234_3009_370_2712_3871_3551_499_240_1742_2531_582_862_930_1097_688_2450_1658_2738_97_3698_2502_308_746_488_608_2948_1669_3919_2204_2431_3219_1774_1941_845_1398_2440_3765_1644_2206_3795_2076_1953_685_1953_1953_3879_3089_2331_2807_2238_1660_95_222_3671_2386_2918_1094_3008_152_117_1924_365_3893_3069_1725_499_3731_3258_1794_2718_2502_829_575_2326_294_4054_1349_1814_3177_1188_3872_3281_588_3750_2813_992_3687_3731_3087_3786_2453_450_2365_930_1788_831_1644_2984_3180_1010_206_1788_3808_100_2506_3230_399_204_1806_48_3603_1669_2779_289_3514_572_1032_1932_1060_990_3702_1046_3161_2085_1932_1932_3350_702_263_665_1032_2895_901_489_859_2434_2712_3693_2788_1838_1026_3251_1701_665_1477_204_2008_318_289_2788_1930_1325_1595_237_1054_3820_1669_665_931_1863_3218_2094_859_289_2712_3937_1920_1229_1408_153_1990_2712_1435_427_1838_961_901_2450_3030_1516_3775_3013_267_204_2626_222_477_1134_2083_1217_243_2070_695_550_2434_2525_3566_3253_3075_222_2167_616_3574_3375_1655_457_1131_3316_3702_2076_990_3498_1261_1369_2516_435_890_3969_951_3867_222_1443_1134_1838_2003_1648_447_2647_1299_1395_3324_3514_1046_1060_188_2593_3498_3514_1648_2813_3353_2647_1048_719_3353_2939_3592_3613_2788_3487_499_2786_2801_3318_3396_7_2206_231_1346_240_3828_1482_188_966_175_1017_868_99_2469_222_584_2001_2750_2573_784_2001_1524_2913_3593_1580_1793_2874_1574_2160_1316_3254_2655_1675_2750_1052_2147_2809_1351_3008_1443_527_3321_3030_26_3286_2228_50_1112_2342_761_2559_3702_3702_1093_670_527_222_2003_2750_1312_3044_3199_295_222_4086_2673_263_1443_251_2259_222_4086_3384_263_354_2943_2943_222_2952_3194_1231_354_1932_3288_222_354_1593_3384_3672_1539_3551_222_1161_3324_3458_2675_1307_2876_665_688_3979_1660_2444_3229_2013_665_3345_2224_2224_926_2224_580_1430_1129_1907_3177_1161_1059_4076_188_1544_2080_114_1130_2066_186_665_3671_1848_1726_1567_2673_833_3724_3910_289_568_1423_2008_1305_188_375_1631_3724_20_536_2174_1788_124_979_1688_484_679_3708_188_3875_1516_3236_1148_2700_3761_1524_1420_1761_2115_2419_457_2813_222_3808_175_1283_2310_2673_756_1806_1164_2083_499_146_92_2328_1788_1877_2750_1768_85_2662_4093_2147_2317_1134_204_2779_1768_3415_2147_2408_3269_1462_2435_2434_3205_2147_230_1594_3881_1114_1934_2676_2891_782_4071_3687_3195_2338_623_1733_757_3467_3706_3352_1806_2467_188_3161_1912_3724_1907_2750_3384_3030_775_2254_3514_427_3893_1848_7_216_3205_2736_3106_2331_4036_2160_2939_4080_2613_73_3412_2870_3731_3566_1970_3183_2782_1851_599_222_308_3028_1863_105_764_3450_1788_1905_1283_3866_462_365_3806_654_2913_2831_1343_3106_365_1813_3731_2415_2558_657_2610_1408_3507_1788_2940_1581_2013_366_2337_3005_1788_2124_1010_755_278_1446_2817_222_3808_802_2462_2448_2068_3507_3731_928_3547_2992_2204_3463_2100_1788_2909_1094_940_3106_2689_933_3731_2392_2473_3856_3662_935_2918_3731_2900_1669_1669_2662_2008_2444_1788_2019_1655_829_2939_208_2365_7_3671_1106_833_2513_3849_1655_981_3181_1818_2453_3352_2738_2470_680_77_2946_3149_926_2738_529_3731_162_1055_448_926_2151_731_7_2204_2867_1904_4071_105_2511_1788_2964_3619_2338_2019_1310_3570_1247_1751_410_2885_660_2320_237_7_1924_79_3384_3297_2386_344_1788_3740_3704_1519_945_87_1462_2147_429_247_289_2029_196_3218_2147_3998_449_3218_721_196_3171_7_427_1578_455_3855_1678_3173_1733_2913_1400_3112_3323_177_2718_4033_637_697_3461_1130_1041_2398_3030_775_2788_2089_2968_2673_3205_188_366_685_991_3901_811_1527_1088_1875_399_3893_3212_829_1589_2160_449_497_691_782_4059_1269_2160_2939_946_595_684_2253_31_2147_2182_149_3877_1423_3218_317_1524_1336_351_2320_2991_1046_3102_7_2204_4087_2221_3227_304_1411_7_105_3849_2158_637_1833_3154_7_60_1764_2073_1565_3866_2610_7_902_1143_435_716_2525_3936_7_2909_646_3799_1238_2616_2688_3731_4026_1593_3102_1617_2491_550_7_1659_457_832_1425_3218_586_3731_2617_2673_3919_1042_3187_39_3731_73_397_1904_1818_175_1444_680_1161_2204_2480_2182_99_956_680_2204_1485_2410_3001_3901_3907_2326_2634_1757_3744_1751_1059_1445_3731_1966_829_2458_3062_3514_1747_3396_3468_2166_499_3969_2097_2005_1788_219_3560_1159_645_3867_3246_1246_2290_1669_3497_1316_1669_15_1806_479_196_12_2029_427_1390_1788_4070_2398_1747_3166_2008_2918_117_2485_1757_1599_3477_2203_304_169_777_1489_2735_2531_1882', ['2712', '_1604', '_3458', '_4031', '_1604', '_1669', '_2008', '_2337', '_857', '_3123', '_3321', '_1887', '_846', '_2398', '_763', '_612', '_3846', '_1060', '_312', '_859', '_3638', '_763', '_2238', '_590', '_2458', '_3847', '_304', '_1888', '_1986', '_2238', '_3412', '_1885', '_375', '_2176', '_887', '_3051', '_2238', '_3374', '_3485', '_973', '_251', '_622', '_3051', '_2557', '_727', '_3750', '_240', '_2386', '_2452', '_2712', '_2913', '_2525', '_691', '_1114', '_1363', '_796', '_3731', '_1232', '_1332', '_3282', '_966', '_3883', '_2431', '_1774', '_2559', '_755', '_748', '_2975', '_2608', '_3345', '_7', '_868', '_731', '_2872', '_1336', '_2488', '_3706', '_2276', '_3739', '_434', '_2203', '_2019', '_873', '_1273', '_3627', '_2912', '_4046', '_120', '_2888', '_1707', '_1153', '_3731', '_2927', '_1188', '_1400', '_966', '_397', '_1153', '_2712', '_1140', '_792', '_3412', '_20', '_3452', '_2452', '_1247', '_3297', '_1669', '_326', '_2813', '_2365', '_3368', '_1774', '_1129', '_3260', '_186', '_1814', '_1445', '_438', '_1247', '_3323', '_3368', '_3744', '_2392', '_448', '_1953', '_1247', '_2204', '_2430', '_1094', '_1702', '_2688', '_1953', '_2712', '_966', '_2380', '_3714', '_3446', '_3391', '_2531', '_2738', '_2312', '_3866', '_1952', '_2238', '_4069', '_752', '_1861', '_73', '_3403', '_3825', '_685', '_1707', '_332', '_2738', '_685', '_305', '_4049', '_1004', '_186', '_1188', '_4076', '_3468', '_2885', '_740', '_1001', '_251', '_3367', '_2712', '_48', '_752', '_2673', '_2617', '_793', '_927', '_2712', '_2801', '_3229', '_3896', '_886', '_773', '_3229', '_3396', '_1160', '_1968', '_3272', '_381', '_3452', '_1307', '_3396', '_2240', '_1307', '_215', '_726', '_679', '_3195', '_2712', '_3026', '_903', '_2317', '_1114', '_901', '_1484', '_665', '_2182', '_3688', '_7', '_234', '_3009', '_370', '_2712', '_3871', '_3551', '_499', '_240', '_1742', '_2531', '_582', '_862', '_930', '_1097', '_688', '_2450', '_1658', '_2738', '_97', '_3698', '_2502', '_308', '_746', '_488', '_608', '_2948', '_1669', '_3919', '_2204', '_2431', '_3219', '_1774', '_1941', '_845', '_1398', '_2440', '_3765', '_1644', '_2206', '_3795', '_2076', '_1953', '_685', '_1953', '_1953', '_3879', '_3089', '_2331', '_2807', '_2238', '_1660', '_95', '_222', '_3671', '_2386', '_2918', '_1094', '_3008', '_152', '_117', '_1924', '_365', '_3893', '_3069', '_1725', '_499', '_3731', '_3258', '_1794', '_2718', '_2502', '_829', '_575', '_2326', '_294', '_4054', '_1349', '_1814', '_3177', '_1188', '_3872', '_3281', '_588', '_3750', '_2813', '_992', '_3687', '_3731', '_3087', '_3786', '_2453', '_450', '_2365', '_930', '_1788', '_831', '_1644', '_2984', '_3180', '_1010', '_206', '_1788', '_3808', '_100', '_2506', '_3230', '_399', '_204', '_1806', '_48', '_3603', '_1669', '_2779', '_289', '_3514', '_572', '_1032', '_1932', '_1060', '_990', '_3702', '_1046', '_3161', '_2085', '_1932', '_1932', '_3350', '_702', '_263', '_665', '_1032', '_2895', '_901', '_489', '_859', '_2434', '_2712', '_3693', '_2788', '_1838', '_1026', '_3251', '_1701', '_665', '_1477', '_204', '_2008', '_318', '_289', '_2788', '_1930', '_1325', '_1595', '_237', '_1054', '_3820', '_1669', '_665', '_931', '_1863', '_3218', '_2094', '_859', '_289', '_2712', '_3937', '_1920', '_1229', '_1408', '_153', '_1990', '_2712', '_1435', '_427', '_1838', '_961', '_901', '_2450', '_3030', '_1516', '_3775', '_3013', '_267', '_204', '_2626', '_222', '_477', '_1134', '_2083', '_1217', '_243', '_2070', '_695', '_550', '_2434', '_2525', '_3566', '_3253', '_3075', '_222', '_2167', '_616', '_3574', '_3375', '_1655', '_457', '_1131', '_3316', '_3702', '_2076', '_990', '_3498', '_1261', '_1369', '_2516', '_435', '_890', '_3969', '_951', '_3867', '_222', '_1443', '_1134', '_1838', '_2003', '_1648', '_447', '_2647', '_1299', '_1395', '_3324', '_3514', '_1046', '_1060', '_188', '_2593', '_3498', '_3514', '_1648', '_2813', '_3353', '_2647', '_1048', '_719', '_3353', '_2939', '_3592', '_3613', '_2788', '_3487', '_499', '_2786', '_2801', '_3318', '_3396', '_7', '_2206', '_231', '_1346', '_240', '_3828', '_1482', '_188', '_966', '_175', '_1017', '_868', '_99', '_2469', '_222', '_584', '_2001', '_2750', '_2573', '_784', '_2001', '_1524', '_2913', '_3593', '_1580', '_1793', '_2874', '_1574', '_2160', '_1316', '_3254', '_2655', '_1675', '_2750', '_1052', '_2147', '_2809', '_1351', '_3008', '_1443', '_527', '_3321', '_3030', '_26', '_3286', '_2228', '_50', '_1112', '_2342', '_761', '_2559', '_3702', '_3702', '_1093', '_670', '_527', '_222', '_2003', '_2750', '_1312', '_3044', '_3199', '_295', '_222', '_4086', '_2673', '_263', '_1443', '_251', '_2259', '_222', '_4086', '_3384', '_263', '_354', '_2943', '_2943', '_222', '_2952', '_3194', '_1231', '_354', '_1932', '_3288', '_222', '_354', '_1593', '_3384', '_3672', '_1539', '_3551', '_222', '_1161', '_3324', '_3458', '_2675', '_1307', '_2876', '_665', '_688', '_3979', '_1660', '_2444', '_3229', '_2013', '_665', '_3345', '_2224', '_2224', '_926', '_2224', '_580', '_1430', '_1129', '_1907', '_3177', '_1161', '_1059', '_4076', '_188', '_1544', '_2080', '_114', '_1130', '_2066', '_186', '_665', '_3671', '_1848', '_1726', '_1567', '_2673', '_833', '_3724', '_3910', '_289', '_568', '_1423', '_2008', '_1305', '_188', '_375', '_1631', '_3724', '_20', '_536', '_2174', '_1788', '_124', '_979', '_1688', '_484', '_679', '_3708', '_188', '_3875', '_1516', '_3236', '_1148', '_2700', '_3761', '_1524', '_1420', '_1761', '_2115', '_2419', '_457', '_2813', '_222', '_3808', '_175', '_1283', '_2310', '_2673', '_756', '_1806', '_1164', '_2083', '_499', '_146', '_92', '_2328', '_1788', '_1877', '_2750', '_1768', '_85', '_2662', '_4093', '_2147', '_2317', '_1134', '_204', '_2779', '_1768', '_3415', '_2147', '_2408', '_3269', '_1462', '_2435', '_2434', '_3205', '_2147', '_230', '_1594', '_3881', '_1114', '_1934', '_2676', '_2891', '_782', '_4071', '_3687', '_3195', '_2338', '_623', '_1733', '_757', '_3467', '_3706', '_3352', '_1806', '_2467', '_188', '_3161', '_1912', '_3724', '_1907', '_2750', '_3384', '_3030', '_775', '_2254', '_3514', '_427', '_3893', '_1848', '_7', '_216', '_3205', '_2736', '_3106', '_2331', '_4036', '_2160', '_2939', '_4080', '_2613', '_73', '_3412', '_2870', '_3731', '_3566', '_1970', '_3183', '_2782', '_1851', '_599', '_222', '_308', '_3028', '_1863', '_105', '_764', '_3450', '_1788', '_1905', '_1283', '_3866', '_462', '_365', '_3806', '_654', '_2913', '_2831', '_1343', '_3106', '_365', '_1813', '_3731', '_2415', '_2558', '_657', '_2610', '_1408', '_3507', '_1788', '_2940', '_1581', '_2013', '_366', '_2337', '_3005', '_1788', '_2124', '_1010', '_755', '_278', '_1446', '_2817', '_222', '_3808', '_802', '_2462', '_2448', '_2068', '_3507', '_3731', '_928', '_3547', '_2992', '_2204', '_3463', '_2100', '_1788', '_2909', '_1094', '_940', '_3106', '_2689', '_933', '_3731', '_2392', '_2473', '_3856', '_3662', '_935', '_2918', '_3731', '_2900', '_1669', '_1669', '_2662', '_2008', '_2444', '_1788', '_2019', '_1655', '_829', '_2939', '_208', '_2365', '_7', '_3671', '_1106', '_833', '_2513', '_3849', '_1655', '_981', '_3181', '_1818', '_2453', '_3352', '_2738', '_2470', '_680', '_77', '_2946', '_3149', '_926', '_2738', '_529', '_3731', '_162', '_1055', '_448', '_926', '_2151', '_731', '_7', '_2204', '_2867', '_1904', '_4071', '_105', '_2511', '_1788', '_2964', '_3619', '_2338', '_2019', '_1310', '_3570', '_1247', '_1751', '_410', '_2885', '_660', '_2320', '_237', '_7', '_1924', '_79', '_3384', '_3297', '_2386', '_344', '_1788', '_3740', '_3704', '_1519', '_945', '_87', '_1462', '_2147', '_429', '_247', '_289', '_2029', '_196', '_3218', '_2147', '_3998', '_449', '_3218', '_721', '_196', '_3171', '_7', '_427', '_1578', '_455', '_3855', '_1678', '_3173', '_1733', '_2913', '_1400', '_3112', '_3323', '_177', '_2718', '_4033', '_637', '_697', '_3461', '_1130', '_1041', '_2398', '_3030', '_775', '_2788', '_2089', '_2968', '_2673', '_3205', '_188', '_366', '_685', '_991', '_3901', '_811', '_1527', '_1088', '_1875', '_399', '_3893', '_3212', '_829', '_1589', '_2160', '_449', '_497', '_691', '_782', '_4059', '_1269', '_2160', '_2939', '_946', '_595', '_684', '_2253', '_31', '_2147', '_2182', '_149', '_3877', '_1423', '_3218', '_317', '_1524', '_1336', '_351', '_2320', '_2991', '_1046', '_3102', '_7', '_2204', '_4087', '_2221', '_3227', '_304', '_1411', '_7', '_105', '_3849', '_2158', '_637', '_1833', '_3154', '_7', '_60', '_1764', '_2073', '_1565', '_3866', '_2610', '_7', '_902', '_1143', '_435', '_716', '_2525', '_3936', '_7', '_2909', '_646', '_3799', '_1238', '_2616', '_2688', '_3731', '_4026', '_1593', '_3102', '_1617', '_2491', '_550', '_7', '_1659', '_457', '_832', '_1425', '_3218', '_586', '_3731', '_2617', '_2673', '_3919', '_1042', '_3187', '_39', '_3731', '_73', '_397', '_1904', '_1818', '_175', '_1444', '_680', '_1161', '_2204', '_2480', '_2182', '_99', '_956', '_680', '_2204', '_1485', '_2410', '_3001', '_3901', '_3907', '_2326', '_2634', '_1757', '_3744', '_1751', '_1059', '_1445', '_3731', '_1966', '_829', '_2458', '_3062', '_3514', '_1747', '_3396', '_3468', '_2166', '_499', '_3969', '_2097', '_2005', '_1788', '_219', '_3560', '_1159', '_645', '_3867', '_3246', '_1246', '_2290', '_1669', '_3497', '_1316', '_1669', '_15', '_1806', '_479', '_196', '_12', '_2029', '_427', '_1390', '_1788', '_4070', '_2398', '_1747', '_3166', '_2008', '_2918', '_117', '_2485', '_1757', '_1599', '_3477', '_2203', '_304', '_169', '_777', '_1489', '_2735', '_2531', '_1882'])\n", + "('3446_2542_1488_3452_1810_499_2365_752_3437_2386_2782_1604_410_3480_2712_3220_2259_586_3345_98_1749_1930_1893_2688_781_1993_1991_746_222_3659_447_536_3047_1289_2182_1788_3180_2945_3352_2280_447_3123_222_2524_2917_4042_280_1375_134_2852_400_1481_499_463_3442_1932_4076_3910_2357_2947_1424_258_2123_3051_1861_571_2008_3762_3978_1574_2712_375_2453_3750_278_1670_651_2276_1336_2694_833_894_1669_1061_4076_3020_218_921_3487_2265_1560_4076_3593_1573_610_2862_2005_679_2712_1875_2694_1669_510_1739_1462_3232_950_4009_2782_2332_375_2462_2712_3437_442_2029_3062_2887_2151_853_584_3820_1865_3671_1484_2002_2712_1907_251_3324_2356_608_25_1507_375_3362_2507_3983_2558_3738_1806_2356_3452_1284_1604_2249_175_472_3717_1289_1757_3051_1056_2312_472_3390_1554_3040_2918_168_793_3232_2094_4040_2673_3342_3384_90_2322_1336_3165_606_1667_997_2520_3232_2094_685_1539_3356_973_2097_2712_1424_410_2073_3983_53_3825_2712_2342_62_1421_3356_1650_3765_2712_3069_3387_90_894_2244_3458_514_1095_2070_3684_3062_3410_3345_514_868_1818_2971_3069_210_700_514_3593_2073_955_3390_62_1769_2337_1919_1017_2169_3901_2312_2221_2337_3297_977_2053_3901_1134_3686_1234_1705_1909_295_1604_50_2782_2337_3437_2782_50_933_2259_2133_3933_3437_50_50_2503_441_2266_3981_2609_1494_1532_2426_3602', ['3446', '_2542', '_1488', '_3452', '_1810', '_499', '_2365', '_752', '_3437', '_2386', '_2782', '_1604', '_410', '_3480', '_2712', '_3220', '_2259', '_586', '_3345', '_98', '_1749', '_1930', '_1893', '_2688', '_781', '_1993', '_1991', '_746', '_222', '_3659', '_447', '_536', '_3047', '_1289', '_2182', '_1788', '_3180', '_2945', '_3352', '_2280', '_447', '_3123', '_222', '_2524', '_2917', '_4042', '_280', '_1375', '_134', '_2852', '_400', '_1481', '_499', '_463', '_3442', '_1932', '_4076', '_3910', '_2357', '_2947', '_1424', '_258', '_2123', '_3051', '_1861', '_571', '_2008', '_3762', '_3978', '_1574', '_2712', '_375', '_2453', '_3750', '_278', '_1670', '_651', '_2276', '_1336', '_2694', '_833', '_894', '_1669', '_1061', '_4076', '_3020', '_218', '_921', '_3487', '_2265', '_1560', '_4076', '_3593', '_1573', '_610', '_2862', '_2005', '_679', '_2712', '_1875', '_2694', '_1669', '_510', '_1739', '_1462', '_3232', '_950', '_4009', '_2782', '_2332', '_375', '_2462', '_2712', '_3437', '_442', '_2029', '_3062', '_2887', '_2151', '_853', '_584', '_3820', '_1865', '_3671', '_1484', '_2002', '_2712', '_1907', '_251', '_3324', '_2356', '_608', '_25', '_1507', '_375', '_3362', '_2507', '_3983', '_2558', '_3738', '_1806', '_2356', '_3452', '_1284', '_1604', '_2249', '_175', '_472', '_3717', '_1289', '_1757', '_3051', '_1056', '_2312', '_472', '_3390', '_1554', '_3040', '_2918', '_168', '_793', '_3232', '_2094', '_4040', '_2673', '_3342', '_3384', '_90', '_2322', '_1336', '_3165', '_606', '_1667', '_997', '_2520', '_3232', '_2094', '_685', '_1539', '_3356', '_973', '_2097', '_2712', '_1424', '_410', '_2073', '_3983', '_53', '_3825', '_2712', '_2342', '_62', '_1421', '_3356', '_1650', '_3765', '_2712', '_3069', '_3387', '_90', '_894', '_2244', '_3458', '_514', '_1095', '_2070', '_3684', '_3062', '_3410', '_3345', '_514', '_868', '_1818', '_2971', '_3069', '_210', '_700', '_514', '_3593', '_2073', '_955', '_3390', '_62', '_1769', '_2337', '_1919', '_1017', '_2169', '_3901', '_2312', '_2221', '_2337', '_3297', '_977', '_2053', '_3901', '_1134', '_3686', '_1234', '_1705', '_1909', '_295', '_1604', '_50', '_2782', '_2337', '_3437', '_2782', '_50', '_933', '_2259', '_2133', '_3933', '_3437', '_50', '_50', '_2503', '_441', '_2266', '_3981', '_2609', '_1494', '_1532', '_2426_3602'])\n", + "('2308_2094_1158_2030_2094_29_2782_2308_2094_1755_2169_519_3146_4040_4076_1684_2133_793_1130_443_3612_2692_3106_2025_1626_339_977_768_485_3351_363_2624_4031_1934_1217_536_1875_744_208_2839_3050_2601_3396_3106_2167_1421_745_3229_1767_3396_1962_3187_776_379_363_3650_3396_229_1097_413_1688_2282_1139_3396_1878_2689_2397_3220_2813_247_7_4041_3037_2892_3161_3374_1449_536_3274_1826_1794_3009_118_2442_3051_1922_3359_1723_3662_1527_1565_3396_343_2718_415_2280_204_2243_2712_928_2518_1700_3069_1562_1755_1810_2532_2604_2909_379_3864_1644_40_2944_39_118_933_2386_2750_2160_731_2693_555_2371_109_555_536_2919_2199_887_1962_2693_1924_53_3091_2786_1747_4003_2386_657_1861_3897_248_3253_1130_657_1169_514_1130_3253_903_731_294_2536_1234_379_3787_1728_2873_1644_2750_3232_731_3146_251_2094_2910_1857_2308_2094_2462_2462_1868_2462_2782_2644_857_263_3449_3562_548', ['2308', '_2094', '_1158', '_2030', '_2094', '_29', '_2782', '_2308', '_2094', '_1755', '_2169', '_519', '_3146', '_4040', '_4076', '_1684', '_2133', '_793', '_1130', '_443', '_3612', '_2692', '_3106', '_2025', '_1626', '_339', '_977', '_768', '_485', '_3351', '_363', '_2624', '_4031', '_1934', '_1217', '_536', '_1875', '_744', '_208', '_2839', '_3050', '_2601', '_3396', '_3106', '_2167', '_1421', '_745', '_3229', '_1767', '_3396', '_1962', '_3187', '_776', '_379', '_363', '_3650', '_3396', '_229', '_1097', '_413', '_1688', '_2282', '_1139', '_3396', '_1878', '_2689', '_2397', '_3220', '_2813', '_247', '_7', '_4041', '_3037', '_2892', '_3161', '_3374', '_1449', '_536', '_3274', '_1826', '_1794', '_3009', '_118', '_2442', '_3051', '_1922', '_3359', '_1723', '_3662', '_1527', '_1565', '_3396', '_343', '_2718', '_415', '_2280', '_204', '_2243', '_2712', '_928', '_2518', '_1700', '_3069', '_1562', '_1755', '_1810', '_2532', '_2604', '_2909', '_379', '_3864', '_1644', '_40', '_2944', '_39', '_118', '_933', '_2386', '_2750', '_2160', '_731', '_2693', '_555', '_2371', '_109', '_555', '_536', '_2919', '_2199', '_887', '_1962', '_2693', '_1924', '_53', '_3091', '_2786', '_1747', '_4003', '_2386', '_657', '_1861', '_3897', '_248', '_3253', '_1130', '_657', '_1169', '_514', '_1130', '_3253', '_903', '_731', '_294', '_2536', '_1234', '_379', '_3787', '_1728', '_2873', '_1644', '_2750', '_3232', '_731', '_3146', '_251', '_2094', '_2910', '_1857', '_2308', '_2094', '_2462', '_2462', '_1868', '_2462', '_2782', '_2644', '_857', '_263', '_3449', '_3562', '_548'])\n", + "('1083_2968_762_1669_3336_317_2008_3835_1072_3218_3218_3350_1046_104_2596_2944_2511_15_461_204_1326_4076_3278_718_3893_3481_208_2647_3920_2623_1644_1725_3473_2361_1229_4076_3845_3251_2550_2600_4061_890_4033_2511_1838_2662_2622_3374_2331_3920_3912_2001_2132_1579_2363_2700_986_2027_1991_331_3136_4061_503_1970_3452_3092_79_158_3093_1933_2786_3815_2556_2506_3072_2857_368_3137_3575_854_1669_1161_2008_2662_4033_3053_2662_3750_1866_1725_2726_3920_2714_1375_940_3276_940_727_1809_2740_1877_550_1244_2610_2648_2147_2471_2750_2525_1342_2712_2679_1282_2909_3610_2386_723_933_1800_572_2675_1048_3400_685_538_809_4033_2431_2833_1904_3713_3886_1644_4076_1781_683_3541_3129_93_2784_4076_2237_2945_2249_260_2308_2399_1826_262_4069_476_143_950_3374_1826_2975_42_792_3863_2536_2083_2077_3747_427_3462_3418_2107', ['1083', '_2968', '_762', '_1669', '_3336', '_317', '_2008', '_3835', '_1072', '_3218', '_3218', '_3350', '_1046', '_104', '_2596', '_2944', '_2511', '_15', '_461', '_204', '_1326', '_4076', '_3278', '_718', '_3893', '_3481', '_208', '_2647', '_3920', '_2623', '_1644', '_1725', '_3473', '_2361', '_1229', '_4076', '_3845', '_3251', '_2550', '_2600', '_4061', '_890', '_4033', '_2511', '_1838', '_2662', '_2622', '_3374', '_2331', '_3920', '_3912', '_2001', '_2132', '_1579', '_2363', '_2700', '_986', '_2027', '_1991', '_331', '_3136', '_4061', '_503', '_1970', '_3452', '_3092', '_79', '_158', '_3093', '_1933', '_2786', '_3815', '_2556', '_2506', '_3072', '_2857', '_368', '_3137', '_3575', '_854', '_1669', '_1161', '_2008', '_2662', '_4033', '_3053', '_2662', '_3750', '_1866', '_1725', '_2726', '_3920', '_2714', '_1375', '_940', '_3276', '_940', '_727', '_1809', '_2740', '_1877', '_550', '_1244', '_2610', '_2648', '_2147', '_2471', '_2750', '_2525', '_1342', '_2712', '_2679', '_1282', '_2909', '_3610', '_2386', '_723', '_933', '_1800', '_572', '_2675', '_1048', '_3400', '_685', '_538', '_809', '_4033', '_2431', '_2833', '_1904', '_3713', '_3886', '_1644', '_4076', '_1781', '_683', '_3541', '_3129', '_93', '_2784', '_4076', '_2237', '_2945', '_2249', '_260', '_2308', '_2399', '_1826', '_262', '_4069', '_476', '_143', '_950', '_3374', '_1826', '_2975', '_42', '_792', '_3863', '_2536', '_2083', '_2077', '_3747', '_427', '_3462', '_3418', '_2107'])\n", + "('1234_3901_1328_1351_3273_331_985_1176_1130_1964_96_1907_3725_3497_3745_3680_619_499_7_1872_1112_1131_2306_1872_2431_203_2453_398_536_2835_3538_410_2530_3232_184_2712_798_363_1222_2139_679_2504_3039_734_3498_3978_2918_2925_3702_682_1565_3508_2251_1566_104_1485_3110_3387_1815_1033_1114_1726_3673_1369_56_1966_951_1106_1506_1629_2859_3303_208_277_3376_773_3764_1083_1756_160_1514_3250_124_3102_1083_1667_1424_2859_1575_490_3551_1083_1667_4057_1924_3278_3109_3109_310_1667_3720_3832_2154_3750_700_1083_2796_412_2333_1684_2739_1406_1234_2542_2259_248_246_9_669_204_3901_2753_2608_2154_3975_3109_2859_175_20_1987_3505_802_2837_187_1106_829_2453_3984_1419_1310_2859_342_3418_3551_3381_1388_3039_3745_2532_792_3775_3265_1112_3178_1926_3549_1485_3232_2179_3787_681_2990_3297_436_1924_2702_1349_4082_754_1423_26_2786_308_1848_3652_3684_3801_3199_2480_3967_727_93_3091_3798_3311_3358_3798_152_1140_682_212_2945_4091_4041_1289_4032_1357_2392_3777_1818_278_1159_396_518_3575_3866_1920_3659_1261_1312_375_2168_740_2782_2356_701_3168_1083_805_1046_2782_1629_3560_535_3119_2094_527_1647_3335_525_3321_939_805_2722_2456_1216_3008_3410_3643_97_3134_313_285_1229_4054_1369_3984_414_2361_3293_575_56_682_1914_398_3706_1031_3702_2013_3684_1150_3278_3968_3382_977_2575_4076_3369_3638_2920_1242_600_890_1083_2562_2693_3082_1907_890_1307_204_944_714_3990_286_1046_2453_3981_1186_658_424_2609_1513', ['1234', '_3901', '_1328', '_1351', '_3273', '_331', '_985', '_1176', '_1130', '_1964', '_96', '_1907', '_3725', '_3497', '_3745', '_3680', '_619', '_499', '_7', '_1872', '_1112', '_1131', '_2306', '_1872', '_2431', '_203', '_2453', '_398', '_536', '_2835', '_3538', '_410', '_2530', '_3232', '_184', '_2712', '_798', '_363', '_1222', '_2139', '_679', '_2504', '_3039', '_734', '_3498', '_3978', '_2918', '_2925', '_3702', '_682', '_1565', '_3508', '_2251', '_1566', '_104', '_1485', '_3110', '_3387', '_1815', '_1033', '_1114', '_1726', '_3673', '_1369', '_56', '_1966', '_951', '_1106', '_1506', '_1629', '_2859', '_3303', '_208', '_277', '_3376', '_773', '_3764', '_1083', '_1756', '_160', '_1514', '_3250', '_124', '_3102', '_1083', '_1667', '_1424', '_2859', '_1575', '_490', '_3551', '_1083', '_1667', '_4057', '_1924', '_3278', '_3109', '_3109', '_310', '_1667', '_3720', '_3832', '_2154', '_3750', '_700', '_1083', '_2796', '_412', '_2333', '_1684', '_2739', '_1406', '_1234', '_2542', '_2259', '_248', '_246', '_9', '_669', '_204', '_3901', '_2753', '_2608', '_2154', '_3975', '_3109', '_2859', '_175', '_20', '_1987', '_3505', '_802', '_2837', '_187', '_1106', '_829', '_2453', '_3984', '_1419', '_1310', '_2859', '_342', '_3418', '_3551', '_3381', '_1388', '_3039', '_3745', '_2532', '_792', '_3775', '_3265', '_1112', '_3178', '_1926', '_3549', '_1485', '_3232', '_2179', '_3787', '_681', '_2990', '_3297', '_436', '_1924', '_2702', '_1349', '_4082', '_754', '_1423', '_26', '_2786', '_308', '_1848', '_3652', '_3684', '_3801', '_3199', '_2480', '_3967', '_727', '_93', '_3091', '_3798', '_3311', '_3358', '_3798', '_152', '_1140', '_682', '_212', '_2945', '_4091', '_4041', '_1289', '_4032', '_1357', '_2392', '_3777', '_1818', '_278', '_1159', '_396', '_518', '_3575', '_3866', '_1920', '_3659', '_1261', '_1312', '_375', '_2168', '_740', '_2782', '_2356', '_701', '_3168', '_1083', '_805', '_1046', '_2782', '_1629', '_3560', '_535', '_3119', '_2094', '_527', '_1647', '_3335', '_525', '_3321', '_939', '_805', '_2722', '_2456', '_1216', '_3008', '_3410', '_3643', '_97', '_3134', '_313', '_285', '_1229', '_4054', '_1369', '_3984', '_414', '_2361', '_3293', '_575', '_56', '_682', '_1914', '_398', '_3706', '_1031', '_3702', '_2013', '_3684', '_1150', '_3278', '_3968', '_3382', '_977', '_2575', '_4076', '_3369', '_3638', '_2920', '_1242', '_600', '_890', '_1083', '_2562', '_2693', '_3082', '_1907', '_890', '_1307', '_204', '_944', '_714', '_3990', '_286', '_1046', '_2453', '_3981', '_1186', '_658', '_424', '_2609', '_1513'])\n", + "('2712_2424_1311_2693_1684_3721_2303_1234_1130_1487_2021_2435_3778_3410_3627_1491_3467_2076_2204_3721_331_40_3105_2673_2442_3345_1270_1138_3051_2134_3820_3230_1076_3368_3893_1986_1161_96_515_1051_3476_224_1733_1114_2669_81_3969_633_1968_117_1114_2984_2046_1565_599_2093_117_3167_2236_92_248_3822_1815_2214_2322_378_2225_249_1294_3082_2214_643_2424_289_3178_196_3999_654_1565_3165_3030_716_748_3737_2738_1114_1671_370_1114_1235_3508_456_1887_1794_1779_1734_629_2230_3731_2909_1266_1381_1719_3183_2431_2647_2675_802_3720_1170_1349_2858_2214_490_168_2626_105_2398_1578_3197_2448_1562_2536_3352_2531_1590_2910_2839_3743_2094_2939_3541_1303_925_1114_1833_950_2939_1220_3379_3648_2617_3665_112_3069_1105_1350_3051_3671_1351_1395_3808_290_9_3051_2310_1970_3623_3297_501_2531_1148_248_1099_1932_450_3744_1186_2910_1978_1424_1310_2617_1661_416_3051_1114_2773_2738_3969_3866_544_3051_1770_2713_2398_64_1669_1168_160_637_2365_910_1129_2018_3958_2439_1129_60_2025_2760_4069_1848_2738_1203_2589_3775_20_1815_200_3051_3297_1651_2673_2144_1462_399_3879_937_2398_2520_1170_3777_1262_3051_2205_1945_2918_2524_1312_586_1925_1170_2386_3856_1170_3092_1604_2712_62_2001_3990_2496_3404_3183_117_2205_1269_2342_658_43_620_3627_1048_3262_1254_2310_712_1877_3051_3437_1595_874_1684_1453_1234_3051_3163_3764_939_1161_2304_2983_472_3167_1160_1061_2348_1669_2952_1861_248_55_3569_3969_962_661_2738_375_469_2203_2384_2673_1907_2738_3167_1308_3753_3297_2249_890_3197_2782_1351_1763_1170_2750_3495_3731_775_3732_4004_2939_3468_238_456_1491_1912_495_1491_3379_482_3051_3785_3556_442_3297_1229_933_2647_1076_268_1338_1170_3638_3109_3040_1076_1848_1669_3526_3778_1907_2439_1170_2642_3790_1170_9_2265_2738_2310_499_854_2310_2925_802_3040_248_1661_622_2782_571_2673_3051_308_1924_1607_2310_3092_3907_2738_3785_181_3951_2939_2892_2530_2803_3381_463_535_3297_3039_2249_2738_3297_784_3178_3468_132_4092_2214_944_2466_3877_1684_2516_215_766_308_2203_2303_1129_3470_2177_3051_1491_3026_669_3297_3259_3777_3051_1684_1312_2525_3468_372_2918_3051_1338_3721_3458_3297_2304_1872_3627_3468_893_2251_1696_15_1768_1234_3747_2434_2693_248_3426_3724_117_2782_3183_2057_248_861_74_3627_2809_456_1672_248_45_1742_3627_3297_100_2712_2809_1932_2365_3731_62_1742_2386_3686_1594_3329_3051_2496_1970_2750_933_1312_3942_3879_3297_2854_2491_64_1398_2166_2738_2347_755_2002_2573_3867_2554_2738_1219_2096_2843_308_2363_2415_2322_462_416_416_3069_2428_3407_2214_1575_2357_289_461_1845_4009_4016_3177_3618_1739_2492_859_691_2712_3107_2561_1768_2154_2673_1401_1234_944_1920_2008_2948_2169_4003_3885_1567_1563_2579_3562_2030_3977_3981_2609_1494_1532_2426_3602', ['2712', '_2424', '_1311', '_2693', '_1684', '_3721', '_2303', '_1234', '_1130', '_1487', '_2021', '_2435', '_3778', '_3410', '_3627', '_1491', '_3467', '_2076', '_2204', '_3721', '_331', '_40', '_3105', '_2673', '_2442', '_3345', '_1270', '_1138', '_3051', '_2134', '_3820', '_3230', '_1076', '_3368', '_3893', '_1986', '_1161', '_96', '_515', '_1051', '_3476', '_224', '_1733', '_1114', '_2669', '_81', '_3969', '_633', '_1968', '_117', '_1114', '_2984', '_2046', '_1565', '_599', '_2093', '_117', '_3167', '_2236', '_92', '_248', '_3822', '_1815', '_2214', '_2322', '_378', '_2225', '_249', '_1294', '_3082', '_2214', '_643', '_2424', '_289', '_3178', '_196', '_3999', '_654', '_1565', '_3165', '_3030', '_716', '_748', '_3737', '_2738', '_1114', '_1671', '_370', '_1114', '_1235', '_3508', '_456', '_1887', '_1794', '_1779', '_1734', '_629', '_2230', '_3731', '_2909', '_1266', '_1381', '_1719', '_3183', '_2431', '_2647', '_2675', '_802', '_3720', '_1170', '_1349', '_2858', '_2214', '_490', '_168', '_2626', '_105', '_2398', '_1578', '_3197', '_2448', '_1562', '_2536', '_3352', '_2531', '_1590', '_2910', '_2839', '_3743', '_2094', '_2939', '_3541', '_1303', '_925', '_1114', '_1833', '_950', '_2939', '_1220', '_3379', '_3648', '_2617', '_3665', '_112', '_3069', '_1105', '_1350', '_3051', '_3671', '_1351', '_1395', '_3808', '_290', '_9', '_3051', '_2310', '_1970', '_3623', '_3297', '_501', '_2531', '_1148', '_248', '_1099', '_1932', '_450', '_3744', '_1186', '_2910', '_1978', '_1424', '_1310', '_2617', '_1661', '_416', '_3051', '_1114', '_2773', '_2738', '_3969', '_3866', '_544', '_3051', '_1770', '_2713', '_2398', '_64', '_1669', '_1168', '_160', '_637', '_2365', '_910', '_1129', '_2018', '_3958', '_2439', '_1129', '_60', '_2025', '_2760', '_4069', '_1848', '_2738', '_1203', '_2589', '_3775', '_20', '_1815', '_200', '_3051', '_3297', '_1651', '_2673', '_2144', '_1462', '_399', '_3879', '_937', '_2398', '_2520', '_1170', '_3777', '_1262', '_3051', '_2205', '_1945', '_2918', '_2524', '_1312', '_586', '_1925', '_1170', '_2386', '_3856', '_1170', '_3092', '_1604', '_2712', '_62', '_2001', '_3990', '_2496', '_3404', '_3183', '_117', '_2205', '_1269', '_2342', '_658', '_43', '_620', '_3627', '_1048', '_3262', '_1254', '_2310', '_712', '_1877', '_3051', '_3437', '_1595', '_874', '_1684', '_1453', '_1234', '_3051', '_3163', '_3764', '_939', '_1161', '_2304', '_2983', '_472', '_3167', '_1160', '_1061', '_2348', '_1669', '_2952', '_1861', '_248', '_55', '_3569', '_3969', '_962', '_661', '_2738', '_375', '_469', '_2203', '_2384', '_2673', '_1907', '_2738', '_3167', '_1308', '_3753', '_3297', '_2249', '_890', '_3197', '_2782', '_1351', '_1763', '_1170', '_2750', '_3495', '_3731', '_775', '_3732', '_4004', '_2939', '_3468', '_238', '_456', '_1491', '_1912', '_495', '_1491', '_3379', '_482', '_3051', '_3785', '_3556', '_442', '_3297', '_1229', '_933', '_2647', '_1076', '_268', '_1338', '_1170', '_3638', '_3109', '_3040', '_1076', '_1848', '_1669', '_3526', '_3778', '_1907', '_2439', '_1170', '_2642', '_3790', '_1170', '_9', '_2265', '_2738', '_2310', '_499', '_854', '_2310', '_2925', '_802', '_3040', '_248', '_1661', '_622', '_2782', '_571', '_2673', '_3051', '_308', '_1924', '_1607', '_2310', '_3092', '_3907', '_2738', '_3785', '_181', '_3951', '_2939', '_2892', '_2530', '_2803', '_3381', '_463', '_535', '_3297', '_3039', '_2249', '_2738', '_3297', '_784', '_3178', '_3468', '_132', '_4092', '_2214', '_944', '_2466', '_3877', '_1684', '_2516', '_215', '_766', '_308', '_2203', '_2303', '_1129', '_3470', '_2177', '_3051', '_1491', '_3026', '_669', '_3297', '_3259', '_3777', '_3051', '_1684', '_1312', '_2525', '_3468', '_372', '_2918', '_3051', '_1338', '_3721', '_3458', '_3297', '_2304', '_1872', '_3627', '_3468', '_893', '_2251', '_1696', '_15', '_1768', '_1234', '_3747', '_2434', '_2693', '_248', '_3426', '_3724', '_117', '_2782', '_3183', '_2057', '_248', '_861', '_74', '_3627', '_2809', '_456', '_1672', '_248', '_45', '_1742', '_3627', '_3297', '_100', '_2712', '_2809', '_1932', '_2365', '_3731', '_62', '_1742', '_2386', '_3686', '_1594', '_3329', '_3051', '_2496', '_1970', '_2750', '_933', '_1312', '_3942', '_3879', '_3297', '_2854', '_2491', '_64', '_1398', '_2166', '_2738', '_2347', '_755', '_2002', '_2573', '_3867', '_2554', '_2738', '_1219', '_2096', '_2843', '_308', '_2363', '_2415', '_2322', '_462', '_416', '_416', '_3069', '_2428', '_3407', '_2214', '_1575', '_2357', '_289', '_461', '_1845', '_4009', '_4016', '_3177', '_3618', '_1739', '_2492', '_859', '_691', '_2712', '_3107', '_2561', '_1768', '_2154', '_2673', '_1401', '_1234', '_944', '_1920', '_2008', '_2948', '_2169', '_4003', '_3885', '_1567', '_1563', '_2579', '_3562', '_2030', '_3977', '_3981', '_2609', '_1494', '_1532', '_2426_3602'])\n", + "('4076_240_918_2524_4070_3900_625_2308_1114_2433_488_3671_2154_3199_1825_2610_1578_4008_1336_1562_3092_2561_428_2097_1959_210_1593_263_3745_1003_3384_1856_3662_1763_3251_3127_229_4061_977_2264_727_3498_2308_3541_456_609_3575_890_3521_109_3662_600_773_1566_2859_1872_1212_3798_2431_2632_117_846_3452_612_1336_977_727_3692_1046_1833_1825_2404_3092_1112_3712_4061_1448_3138_1560_447_1593_1326_4032_3008_387_799_608_1539_1543_3165_3250_3030_2280_3533_399_2952_3271_3240_4076_308_2930_1723_1604_1889_3203_2308_1490_3681_1889_933_399_1889_117_3437_3567_2604_1965_609_4077_4061_3124_1060_317_1032_2673_1143_7_379_2_3550_3093_3702_1593_1450_2015_1424_1140_261_3893_4031_3520_799_628_3849_2952_2796_4009_1810_1684_3100_1920_914_610_1153_4076_1958_503_3271_914_3353_2693_2712_944_1863_1728_1161_2081_3881_2712_805_3479_2782_1604_2398_1768_1225_2094_4093_3218_944_1219', ['4076', '_240', '_918', '_2524', '_4070', '_3900', '_625', '_2308', '_1114', '_2433', '_488', '_3671', '_2154', '_3199', '_1825', '_2610', '_1578', '_4008', '_1336', '_1562', '_3092', '_2561', '_428', '_2097', '_1959', '_210', '_1593', '_263', '_3745', '_1003', '_3384', '_1856', '_3662', '_1763', '_3251', '_3127', '_229', '_4061', '_977', '_2264', '_727', '_3498', '_2308', '_3541', '_456', '_609', '_3575', '_890', '_3521', '_109', '_3662', '_600', '_773', '_1566', '_2859', '_1872', '_1212', '_3798', '_2431', '_2632', '_117', '_846', '_3452', '_612', '_1336', '_977', '_727', '_3692', '_1046', '_1833', '_1825', '_2404', '_3092', '_1112', '_3712', '_4061', '_1448', '_3138', '_1560', '_447', '_1593', '_1326', '_4032', '_3008', '_387', '_799', '_608', '_1539', '_1543', '_3165', '_3250', '_3030', '_2280', '_3533', '_399', '_2952', '_3271', '_3240', '_4076', '_308', '_2930', '_1723', '_1604', '_1889', '_3203', '_2308', '_1490', '_3681', '_1889', '_933', '_399', '_1889', '_117', '_3437', '_3567', '_2604', '_1965', '_609', '_4077', '_4061', '_3124', '_1060', '_317', '_1032', '_2673', '_1143', '_7', '_379', '_2', '_3550', '_3093', '_3702', '_1593', '_1450', '_2015', '_1424', '_1140', '_261', '_3893', '_4031', '_3520', '_799', '_628', '_3849', '_2952', '_2796', '_4009', '_1810', '_1684', '_3100', '_1920', '_914', '_610', '_1153', '_4076', '_1958', '_503', '_3271', '_914', '_3353', '_2693', '_2712', '_944', '_1863', '_1728', '_1161', '_2081', '_3881', '_2712', '_805', '_3479', '_2782', '_1604', '_2398', '_1768', '_1225', '_2094', '_4093', '_3218', '_944', '_1219'])\n", + "('4076_1728_2531_2008_2604_890_340_222_1663_3727_719_2968_3092_2444_1930_2032_1264_3232_4093_646_669_222_119_1747_2386_519_1046_1838_222_537_891_3324_3347_3324_2525_1776_3664_1952_208_1114_9_1800_1788_3744_3417_1800_2440_846_1678_222_624_3236_1800_3164_1691_1414_2308_1934_3866_977_2298_571_2750_2712_2862_3750_918_1875_2434_1984_3030_893_2525_121_1962_3820_1172_222_3953_3086_1607_2139_1388_3123_2147_2229_157_3218_2492_3547_3649_3578_3478_3123_79_2568_1993_1606_222_2866_900_1763_3415_1180_27_4061_105_32_2544_3115_3404_748_214_3146_126_2123_3336_1551_358_3724_3478_573_2897_3336_2974_3757_101_427_3495_3329_2941_1311_3400_2147_2779_1506_2797_2175_3895_1326_2147_62_3619_3027_1176_1046_2254_1246_427_3251_1139_2263_3570_3725_3030_3274_3251_2531_944_3253_1848_3030_3415_586_4061_3336_345_2101_3030_3512_1967_1618_1962_2608_2608_1776_4093_3321_4033_48_1351_2220_222_2448_3649_1046_902_2466_1726_761_4070_1848_3610_479_2315_1671_2147_1883_1021_2525_2858_1462_3415_1282_775_1669_208_2617_3702_4064_2147_3990_2969_3318_2942_977_3849_2533_2866_1863_2008_3538_304_2673_2712_2498_2008_3039_3478_3452_2525_1282_3544_846_2366_3949_3649_2770_2712_3329_2822_571_3671_1602_2531_2276_3691_2089_363_3744_2673_1757_1282_2809_1538_3452_2809_3706_1338_222_915_3498_3901_3544_746_286_2712_3767_3684_1311_1960_1046_3404_2712_2562_1669_859_2618_2386_4092_1788_1871_1784_3379_914_3050_2241_2803_3751_3244_482_1771_661_2102_2186_3751_3649_1310_1795_3218_1593_1806_1387_2817_3253_3415_3652_3008_2071_3415_427_2259_3192_1046_2154_2147_2497_410_3123_537_3324_2227_1788_3259_1650_3840_1905_208_951_4076_636_1248_3251_2476_399_901_2337_2801_1863_3404_261_3199_617_3981_3562_1494_3741_2609_1513', ['4076', '_1728', '_2531', '_2008', '_2604', '_890', '_340', '_222', '_1663', '_3727', '_719', '_2968', '_3092', '_2444', '_1930', '_2032', '_1264', '_3232', '_4093', '_646', '_669', '_222', '_119', '_1747', '_2386', '_519', '_1046', '_1838', '_222', '_537', '_891', '_3324', '_3347', '_3324', '_2525', '_1776', '_3664', '_1952', '_208', '_1114', '_9', '_1800', '_1788', '_3744', '_3417', '_1800', '_2440', '_846', '_1678', '_222', '_624', '_3236', '_1800', '_3164', '_1691', '_1414', '_2308', '_1934', '_3866', '_977', '_2298', '_571', '_2750', '_2712', '_2862', '_3750', '_918', '_1875', '_2434', '_1984', '_3030', '_893', '_2525', '_121', '_1962', '_3820', '_1172', '_222', '_3953', '_3086', '_1607', '_2139', '_1388', '_3123', '_2147', '_2229', '_157', '_3218', '_2492', '_3547', '_3649', '_3578', '_3478', '_3123', '_79', '_2568', '_1993', '_1606', '_222', '_2866', '_900', '_1763', '_3415', '_1180', '_27', '_4061', '_105', '_32', '_2544', '_3115', '_3404', '_748', '_214', '_3146', '_126', '_2123', '_3336', '_1551', '_358', '_3724', '_3478', '_573', '_2897', '_3336', '_2974', '_3757', '_101', '_427', '_3495', '_3329', '_2941', '_1311', '_3400', '_2147', '_2779', '_1506', '_2797', '_2175', '_3895', '_1326', '_2147', '_62', '_3619', '_3027', '_1176', '_1046', '_2254', '_1246', '_427', '_3251', '_1139', '_2263', '_3570', '_3725', '_3030', '_3274', '_3251', '_2531', '_944', '_3253', '_1848', '_3030', '_3415', '_586', '_4061', '_3336', '_345', '_2101', '_3030', '_3512', '_1967', '_1618', '_1962', '_2608', '_2608', '_1776', '_4093', '_3321', '_4033', '_48', '_1351', '_2220', '_222', '_2448', '_3649', '_1046', '_902', '_2466', '_1726', '_761', '_4070', '_1848', '_3610', '_479', '_2315', '_1671', '_2147', '_1883', '_1021', '_2525', '_2858', '_1462', '_3415', '_1282', '_775', '_1669', '_208', '_2617', '_3702', '_4064', '_2147', '_3990', '_2969', '_3318', '_2942', '_977', '_3849', '_2533', '_2866', '_1863', '_2008', '_3538', '_304', '_2673', '_2712', '_2498', '_2008', '_3039', '_3478', '_3452', '_2525', '_1282', '_3544', '_846', '_2366', '_3949', '_3649', '_2770', '_2712', '_3329', '_2822', '_571', '_3671', '_1602', '_2531', '_2276', '_3691', '_2089', '_363', '_3744', '_2673', '_1757', '_1282', '_2809', '_1538', '_3452', '_2809', '_3706', '_1338', '_222', '_915', '_3498', '_3901', '_3544', '_746', '_286', '_2712', '_3767', '_3684', '_1311', '_1960', '_1046', '_3404', '_2712', '_2562', '_1669', '_859', '_2618', '_2386', '_4092', '_1788', '_1871', '_1784', '_3379', '_914', '_3050', '_2241', '_2803', '_3751', '_3244', '_482', '_1771', '_661', '_2102', '_2186', '_3751', '_3649', '_1310', '_1795', '_3218', '_1593', '_1806', '_1387', '_2817', '_3253', '_3415', '_3652', '_3008', '_2071', '_3415', '_427', '_2259', '_3192', '_1046', '_2154', '_2147', '_2497', '_410', '_3123', '_537', '_3324', '_2227', '_1788', '_3259', '_1650', '_3840', '_1905', '_208', '_951', '_4076', '_636', '_1248', '_3251', '_2476', '_399', '_901', '_2337', '_2801', '_1863', '_3404', '_261', '_3199', '_617', '_3981', '_3562', '_1494', '_3741', '_2609', '_1513'])\n", + "('3835_2964_1275_1033_3649_2632_3452_3835_3146_1046_985_2779_3514_317_2596_1228_3404_1046_3182_2943_2466_1249_1495_1419_1163_3513_64_1934_3684_1623_1229_2673_1596_702_1838_2463_3613_3530_1_2940_1890_379_2635_1093_4065_19_1543_2589_1046_50_990_2076_3404_894_251_3253_2243_83_3379_679_1316_2750_204_50_3163_746_846_3351_2685_104_3004_4093_957_317_2501_2788_3253_3835_1013_251_3893_2964_3778_1932_878_2069_3514_2662_3457_363_1863_3004_2952_903_2885_3838_3893_2337_878_3659_2466_1434_308_3893_1314_3835_1543_1726_1326_2964_2433_3253_2596_2497_1134_2662_3415_1838_3272_3684_843_352_3039_212_1669_1056_3130_3488_3410_2531_240_1046_679_2243_3345_3379_4061_3336_1593_1800_3110_3851_940_1302_857_1375_1119_3110_3694_2021_3379_857_3288_439_2243_3969_237_951_1565_846_1848_2635_3415_3356_870_3978_2008_1920_1696_3177_1140_3253_857_859_4093_2243_2741_1147_1229_2003_2662_2362_4061_2601_237_124_2003_727_1112_1803_3456_3638_1729_2852_3514_1618_1696_957_3765_3404_484_1872_3452_1696_1543_1338_3544_3785_3324_1741_2635_2968_2817_2386_2448_1593_3379_3110_3712_3498_1848_880_1326_846_2786_2016_2745_1599_2059_571_2750_2243_2003_3893_4040_3182_2943_538_2243_4086_4061_3026_3182_1393_1726_3110_3851_3387_846_664_3404_1726_2243_1730_1388_1718_406_1872_3254_1369_2059_900_1_3207_1046_3026_2243_2359_2943_304_3737_3251_1393_3684_2968_3787_679_1460_1593_2343_550_3571_679_3253_3105_1275_3379_2596_3705_1060_1932_55_1326_3760_1861_52_2996_3253_3182_204_2097_1369_3764_2097_2943_3660_1046_903_1861_2820_498_1134_2026_2003_2786_2786_1648_4094_3649_1032_2466_3026_2786_654_3199_3218_143_901_247_536_2968_27_2003_82_3867_1261_2442_2125_1990_4006_308_3893_2431_2596_3182_1105_1275_880_2413_237_4076_189_1424_1618_3908_499_1046_3865_875_2398_304_3909_3880', ['3835', '_2964', '_1275', '_1033', '_3649', '_2632', '_3452', '_3835', '_3146', '_1046', '_985', '_2779', '_3514', '_317', '_2596', '_1228', '_3404', '_1046', '_3182', '_2943', '_2466', '_1249', '_1495', '_1419', '_1163', '_3513', '_64', '_1934', '_3684', '_1623', '_1229', '_2673', '_1596', '_702', '_1838', '_2463', '_3613', '_3530', '_1', '_2940', '_1890', '_379', '_2635', '_1093', '_4065', '_19', '_1543', '_2589', '_1046', '_50', '_990', '_2076', '_3404', '_894', '_251', '_3253', '_2243', '_83', '_3379', '_679', '_1316', '_2750', '_204', '_50', '_3163', '_746', '_846', '_3351', '_2685', '_104', '_3004', '_4093', '_957', '_317', '_2501', '_2788', '_3253', '_3835', '_1013', '_251', '_3893', '_2964', '_3778', '_1932', '_878', '_2069', '_3514', '_2662', '_3457', '_363', '_1863', '_3004', '_2952', '_903', '_2885', '_3838', '_3893', '_2337', '_878', '_3659', '_2466', '_1434', '_308', '_3893', '_1314', '_3835', '_1543', '_1726', '_1326', '_2964', '_2433', '_3253', '_2596', '_2497', '_1134', '_2662', '_3415', '_1838', '_3272', '_3684', '_843', '_352', '_3039', '_212', '_1669', '_1056', '_3130', '_3488', '_3410', '_2531', '_240', '_1046', '_679', '_2243', '_3345', '_3379', '_4061', '_3336', '_1593', '_1800', '_3110', '_3851', '_940', '_1302', '_857', '_1375', '_1119', '_3110', '_3694', '_2021', '_3379', '_857', '_3288', '_439', '_2243', '_3969', '_237', '_951', '_1565', '_846', '_1848', '_2635', '_3415', '_3356', '_870', '_3978', '_2008', '_1920', '_1696', '_3177', '_1140', '_3253', '_857', '_859', '_4093', '_2243', '_2741', '_1147', '_1229', '_2003', '_2662', '_2362', '_4061', '_2601', '_237', '_124', '_2003', '_727', '_1112', '_1803', '_3456', '_3638', '_1729', '_2852', '_3514', '_1618', '_1696', '_957', '_3765', '_3404', '_484', '_1872', '_3452', '_1696', '_1543', '_1338', '_3544', '_3785', '_3324', '_1741', '_2635', '_2968', '_2817', '_2386', '_2448', '_1593', '_3379', '_3110', '_3712', '_3498', '_1848', '_880', '_1326', '_846', '_2786', '_2016', '_2745', '_1599', '_2059', '_571', '_2750', '_2243', '_2003', '_3893', '_4040', '_3182', '_2943', '_538', '_2243', '_4086', '_4061', '_3026', '_3182', '_1393', '_1726', '_3110', '_3851', '_3387', '_846', '_664', '_3404', '_1726', '_2243', '_1730', '_1388', '_1718', '_406', '_1872', '_3254', '_1369', '_2059', '_900', '_1', '_3207', '_1046', '_3026', '_2243', '_2359', '_2943', '_304', '_3737', '_3251', '_1393', '_3684', '_2968', '_3787', '_679', '_1460', '_1593', '_2343', '_550', '_3571', '_679', '_3253', '_3105', '_1275', '_3379', '_2596', '_3705', '_1060', '_1932', '_55', '_1326', '_3760', '_1861', '_52', '_2996', '_3253', '_3182', '_204', '_2097', '_1369', '_3764', '_2097', '_2943', '_3660', '_1046', '_903', '_1861', '_2820', '_498', '_1134', '_2026', '_2003', '_2786', '_2786', '_1648', '_4094', '_3649', '_1032', '_2466', '_3026', '_2786', '_654', '_3199', '_3218', '_143', '_901', '_247', '_536', '_2968', '_27', '_2003', '_82', '_3867', '_1261', '_2442', '_2125', '_1990', '_4006', '_308', '_3893', '_2431', '_2596', '_3182', '_1105', '_1275', '_880', '_2413', '_237', '_4076', '_189', '_1424', '_1618', '_3908', '_499', '_1046', '_3865', '_875', '_2398', '_304', '_3909', '_3880'])\n", + "('2793_3017_2284_2520_1604_3111_953_752_2918_3585_2531_1843_2397_301_660_1404_1987_1934_918_3603_3147_472_3431_2626_3428_91_3428_772_3824_3273_554_3961_278_436_20_1788_1604_890_436_20_3609_2639_2712_2448_1637_2016_191_1578_3082_1861_2960_3668_2016_64_3658_2286_1733_4014_1709_675_1161_108_232_2910_381_3114_3114_3161_2415_836_3051_1734_2862_1813_3983_4073_3050_456_3686_985_237_2310_2433_790_2910_2533_2265_944_2310_1738_488_2337_248_499_2589_966_4092_2132_117_868_3384_3050_2524_2514_427_1234_1361_2952_1169_3001_1934_1751_3879_2939_3864_972_2939_983_3183_4078_248_1967_1618_2939_2365_488_2559_248_1310_829_375_118_792_2290_1424_3183_598_3779_598_1718_3627_3446_1235_2218_3352_944_4063_117_2371_3840_1077_637_4065_1025_1234_3729_2428_3546_1684_3251_1877_993_2419_3146_2433_1893_1012_4062_242_1960_3439_415_375_792_3308_3197_3779_396_3575_2634_3990_3394_3731_1612_3280_2616_62_2386_2083_2184_2435_3082_2169_432_1588_15_3232_1050_160_600_2610_3130_1708_2559_2610_1052_90_2782_1413', ['2793', '_3017', '_2284', '_2520', '_1604', '_3111', '_953', '_752', '_2918', '_3585', '_2531', '_1843', '_2397', '_301', '_660', '_1404', '_1987', '_1934', '_918', '_3603', '_3147', '_472', '_3431', '_2626', '_3428', '_91', '_3428', '_772', '_3824', '_3273', '_554', '_3961', '_278', '_436', '_20', '_1788', '_1604', '_890', '_436', '_20', '_3609', '_2639', '_2712', '_2448', '_1637', '_2016', '_191', '_1578', '_3082', '_1861', '_2960', '_3668', '_2016', '_64', '_3658', '_2286', '_1733', '_4014', '_1709', '_675', '_1161', '_108', '_232', '_2910', '_381', '_3114', '_3114', '_3161', '_2415', '_836', '_3051', '_1734', '_2862', '_1813', '_3983', '_4073', '_3050', '_456', '_3686', '_985', '_237', '_2310', '_2433', '_790', '_2910', '_2533', '_2265', '_944', '_2310', '_1738', '_488', '_2337', '_248', '_499', '_2589', '_966', '_4092', '_2132', '_117', '_868', '_3384', '_3050', '_2524', '_2514', '_427', '_1234', '_1361', '_2952', '_1169', '_3001', '_1934', '_1751', '_3879', '_2939', '_3864', '_972', '_2939', '_983', '_3183', '_4078', '_248', '_1967', '_1618', '_2939', '_2365', '_488', '_2559', '_248', '_1310', '_829', '_375', '_118', '_792', '_2290', '_1424', '_3183', '_598', '_3779', '_598', '_1718', '_3627', '_3446', '_1235', '_2218', '_3352', '_944', '_4063', '_117', '_2371', '_3840', '_1077', '_637', '_4065', '_1025', '_1234', '_3729', '_2428', '_3546', '_1684', '_3251', '_1877', '_993', '_2419', '_3146', '_2433', '_1893', '_1012', '_4062', '_242', '_1960', '_3439', '_415', '_375', '_792', '_3308', '_3197', '_3779', '_396', '_3575', '_2634', '_3990', '_3394', '_3731', '_1612', '_3280', '_2616', '_62', '_2386', '_2083', '_2184', '_2435', '_3082', '_2169', '_432', '_1588', '_15', '_3232', '_1050', '_160', '_600', '_2610', '_3130', '_1708', '_2559', '_2610', '_1052', '_90', '_2782', '_1413'])\n" + ] + } + ], + "source": [ + "output = merge_pair('_222', '_3446', splits)\n", + "for i, item in enumerate(output.items()):\n", + " print(item)\n", + " if i == 10:\n", + " break" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "4096" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merges = {}\n", + "vocab = set()\n", + "\n", + "for sent in dataset[\"train\"][\"txt\"]:\n", + " for word in sent.split():\n", + " vocab.add(word)\n", + "len(vocab)" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 60, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Algorithm\n", + "\n", + "vocab_size = 4100\n", + "\n", + "while len(vocab) < vocab_size:\n", + " pair_freqs = compute_pair_freqs(splits)\n", + " best_pair = \"\"\n", + " max_freq = None\n", + " for pair, freq in pair_freqs.items():\n", + " if max_freq is None or max_freq < freq:\n", + " best_pair = pair\n", + " max_freq = freq\n", + " splits = merge_pair(*best_pair, splits)\n", + " merges[best_pair] = best_pair[0] + best_pair[1]\n", + " vocab.add(best_pair[0] + best_pair[1])" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "{('_2426', '_3602'): '_2426_3602',\n", + " ('_1532', '_2426_3602'): '_1532_2426_3602',\n", + " ('_2426', '_1568'): '_2426_1568',\n", + " ('_3981', '_2426_1568'): '_3981_2426_1568'}" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merges" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 63, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_2426_3602\n", + "_2426_1568\n", + "_3981_2426_1568\n", + "_1532_2426_3602\n" + ] + } + ], + "source": [ + "for i in vocab:\n", + " if i in merges.values():\n", + " print(i)" + ] }, { "cell_type": "code", From e5df127fa64bf067bf22277e7094d9258342c705 Mon Sep 17 00:00:00 2001 From: Nathan Gabriel Date: Fri, 5 Jul 2024 20:33:46 -0400 Subject: [PATCH 09/10] added tokenize method --- research/two_byte_encoding.ipynb | 70 ++++++++++++++++++++++++-------- 1 file changed, 52 insertions(+), 18 deletions(-) diff --git a/research/two_byte_encoding.ipynb b/research/two_byte_encoding.ipynb index d75127b..3e249da 100644 --- a/research/two_byte_encoding.ipynb +++ b/research/two_byte_encoding.ipynb @@ -135,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 70, "metadata": {}, "outputs": [], "source": [ @@ -173,7 +173,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 71, "metadata": {}, "outputs": [ { @@ -191,7 +191,7 @@ " '▁2218']" ] }, - "execution_count": 8, + "execution_count": 71, "metadata": {}, "output_type": "execute_result" } @@ -848,31 +848,65 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 83, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def tokenize(text, merges=merges):\n", + " pre_tokenized_text = add_special_character([text])\n", + " print(type(pre_tokenized_text))\n", + " splits = pre_tokenized_text #[[l for l in word] for word in pre_tokenized_text]\n", + " for pair, merge in merges.items():\n", + " for idx, split in enumerate(splits):\n", + " i = 0\n", + " while i < len(split) - 1:\n", + " if split[i] == pair[0] and split[i + 1] == pair[1]:\n", + " split = split[:i] + [merge] + split[i + 2 :]\n", + " else:\n", + " i += 1\n", + " splits[idx] = split\n", + " print(splits)\n", + " return\n", + " return sum(splits, [])" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 84, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "['896', '▁2029', '▁935', '▁679', '▁1115', '▁3601', '▁3000', '▁222', '▁3446', '▁2218', '▁3072', '▁550', '▁3652', '▁665', '▁2596', '▁2809', '▁3649', '▁251', '▁2610', '▁2536', '▁47', '▁2852', '▁2940', '▁3353', '▁3400', '▁3336', '▁325', '▁2647', '▁4076', '▁3653', '▁3253', '▁58', '▁3664', '▁1424', '▁1388', '▁222', '▁278', '▁897', '▁447', '▁2355', '▁2453', '▁2531', '▁2712', '▁828', '▁2895', '▁2398', '▁2908', '▁901', '▁2536', '▁222', '▁3686', '▁2620', '▁3254', '▁3962', '▁0', '▁1448', '▁222', '▁863', '▁3593', '▁124', '▁124', '▁1048', '▁1593', '▁222', '▁4086', '▁2647', '▁3236', '▁1767', '▁2800', '▁697', '▁514', '▁3648', '▁2337', '▁1338', '▁1114', '▁340', '▁3514', '▁4076', '▁2658', '▁1954', '▁3867', '▁2300', '▁251', '▁317', '▁7', '▁1091', '▁1768', '▁1440', '▁3167', '▁672', '▁1253', '▁188', '▁3544', '▁2934', '▁1368', '▁479', '▁3951', '▁3387', '▁514', '▁2438', '▁1262', '▁3166', '▁462', '▁3530', '▁333', '▁2596', '▁3808', '▁2796', '▁1920', '▁794', '▁263', '▁2626', '▁2596', '▁1949', '▁57', '▁3990', '▁3785', '▁146', '▁404', '▁3731', '▁479', '▁3840', '▁3840', '▁3664', '▁940', '▁2550', '▁4076', '▁544', '▁3465', '▁3232', '▁269', '▁79', '▁2159', '▁3879', '▁1734', '▁3900', '▁755', '▁1756', '▁818', '▁800', '▁1249', '▁171', '▁319', '▁727', '▁171', '▁3698', '▁3683', '▁2596', '▁3969', '▁2431', '▁1838', '▁3969', '▁126', '▁2673', '▁2596', '▁4012', '▁1010', '▁2151', '▁3437', '▁417', '▁2386', '▁2712', '▁3705', '▁1838', '▁3428', '▁1168', '▁1838', '▁1527', '▁3885', '▁1952', '▁2443', '▁3997', '▁3562', '▁1667', '▁3651', '▁3981', '▁2426', '▁1494', '▁1532', '▁2426', '▁3602']\n" + ] + } + ], + "source": [ + "tokenize(txt[0])" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 67, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "'896 2029 935 679 1115 3601 3000 222 3446 2218 3072 550 3652 665 2596 2809 3649 251 2610 2536 47 2852 2940 3353 3400 3336 325 2647 4076 3653 3253 58 3664 1424 1388 222 278 897 447 2355 2453 2531 2712 828 2895 2398 2908 901 2536 222 3686 2620 3254 3962 0 1448 222 863 3593 124 124 1048 1593 222 4086 2647 3236 1767 2800 697 514 3648 2337 1338 1114 340 3514 4076 2658 1954 3867 2300 251 317 7 1091 1768 1440 3167 672 1253 188 3544 2934 1368 479 3951 3387 514 2438 1262 3166 462 3530 333 2596 3808 2796 1920 794 263 2626 2596 1949 57 3990 3785 146 404 3731 479 3840 3840 3664 940 2550 4076 544 3465 3232 269 79 2159 3879 1734 3900 755 1756 818 800 1249 171 319 727 171 3698 3683 2596 3969 2431 1838 3969 126 2673 2596 4012 1010 2151 3437 417 2386 2712 3705 1838 3428 1168 1838 1527 3885 1952 2443 3997 3562 1667 3651 3981 2426 1494 1532 2426 3602 1855'" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "txt[0]" + ] }, { "cell_type": "code", From b18c52d1f45fee6a2244c537c3cd3d9ce5a23066 Mon Sep 17 00:00:00 2001 From: Nathan Gabriel Date: Tue, 9 Jul 2024 20:36:04 -0400 Subject: [PATCH 10/10] fixing some bugs --- research/two_byte_encoding.ipynb | 983 +++++++++++++++++++++++-------- 1 file changed, 724 insertions(+), 259 deletions(-) diff --git a/research/two_byte_encoding.ipynb b/research/two_byte_encoding.ipynb index 3e249da..0caf238 100644 --- a/research/two_byte_encoding.ipynb +++ b/research/two_byte_encoding.ipynb @@ -135,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -173,7 +173,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -191,7 +191,7 @@ " '▁2218']" ] }, - "execution_count": 71, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -274,7 +274,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -305,7 +305,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -322,208 +322,208 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "('896_2029_935_679_1115_3601_3000_222_3446_2218_3072_550_3652_665_2596_2809_3649_251_2610_2536_47_2852_2940_3353_3400_3336_325_2647_4076_3653_3253_58_3664_1424_1388_222_278_897_447_2355_2453_2531_2712_828_2895_2398_2908_901_2536_222_3686_2620_3254_3962_0_1448_222_863_3593_124_124_1048_1593_222_4086_2647_3236_1767_2800_697_514_3648_2337_1338_1114_340_3514_4076_2658_1954_3867_2300_251_317_7_1091_1768_1440_3167_672_1253_188_3544_2934_1368_479_3951_3387_514_2438_1262_3166_462_3530_333_2596_3808_2796_1920_794_263_2626_2596_1949_57_3990_3785_146_404_3731_479_3840_3840_3664_940_2550_4076_544_3465_3232_269_79_2159_3879_1734_3900_755_1756_818_800_1249_171_319_727_171_3698_3683_2596_3969_2431_1838_3969_126_2673_2596_4012_1010_2151_3437_417_2386_2712_3705_1838_3428_1168_1838_1527_3885_1952_2443_3997_3562_1667_3651_3981_2426_1494_1532_2426_3602',\n", - " ['896',\n", - " '_2029',\n", - " '_935',\n", - " '_679',\n", - " '_1115',\n", - " '_3601',\n", - " '_3000',\n", - " '_222',\n", - " '_3446',\n", - " '_2218',\n", - " '_3072',\n", - " '_550',\n", - " '_3652',\n", - " '_665',\n", - " '_2596',\n", - " '_2809',\n", - " '_3649',\n", - " '_251',\n", - " '_2610',\n", - " '_2536',\n", - " '_47',\n", - " '_2852',\n", - " '_2940',\n", - " '_3353',\n", - " '_3400',\n", - " '_3336',\n", - " '_325',\n", - " '_2647',\n", + "('2308_2094_1158_2030_2094_29_2782_2308_2094_1755_2169_519_3146_4040_4076_1684_2133_793_1130_443_3612_2692_3106_2025_1626_339_977_768_485_3351_363_2624_4031_1934_1217_536_1875_744_208_2839_3050_2601_3396_3106_2167_1421_745_3229_1767_3396_1962_3187_776_379_363_3650_3396_229_1097_413_1688_2282_1139_3396_1878_2689_2397_3220_2813_247_7_4041_3037_2892_3161_3374_1449_536_3274_1826_1794_3009_118_2442_3051_1922_3359_1723_3662_1527_1565_3396_343_2718_415_2280_204_2243_2712_928_2518_1700_3069_1562_1755_1810_2532_2604_2909_379_3864_1644_40_2944_39_118_933_2386_2750_2160_731_2693_555_2371_109_555_536_2919_2199_887_1962_2693_1924_53_3091_2786_1747_4003_2386_657_1861_3897_248_3253_1130_657_1169_514_1130_3253_903_731_294_2536_1234_379_3787_1728_2873_1644_2750_3232_731_3146_251_2094_2910_1857_2308_2094_2462_2462_1868_2462_2782_2644_857_263_3449_3562_548',\n", + " ['2308',\n", + " '_2094',\n", + " '_1158',\n", + " '_2030',\n", + " '_2094',\n", + " '_29',\n", + " '_2782',\n", + " '_2308',\n", + " '_2094',\n", + " '_1755',\n", + " '_2169',\n", + " '_519',\n", + " '_3146',\n", + " '_4040',\n", " '_4076',\n", - " '_3653',\n", - " '_3253',\n", - " '_58',\n", - " '_3664',\n", - " '_1424',\n", - " '_1388',\n", - " '_222',\n", - " '_278',\n", - " '_897',\n", - " '_447',\n", - " '_2355',\n", - " '_2453',\n", - " '_2531',\n", - " '_2712',\n", - " '_828',\n", - " '_2895',\n", - " '_2398',\n", - " '_2908',\n", - " '_901',\n", - " '_2536',\n", - " '_222',\n", - " '_3686',\n", - " '_2620',\n", - " '_3254',\n", - " '_3962',\n", - " '_0',\n", - " '_1448',\n", - " '_222',\n", - " '_863',\n", - " '_3593',\n", - " '_124',\n", - " '_124',\n", - " '_1048',\n", - " '_1593',\n", - " '_222',\n", - " '_4086',\n", - " '_2647',\n", - " '_3236',\n", + " '_1684',\n", + " '_2133',\n", + " '_793',\n", + " '_1130',\n", + " '_443',\n", + " '_3612',\n", + " '_2692',\n", + " '_3106',\n", + " '_2025',\n", + " '_1626',\n", + " '_339',\n", + " '_977',\n", + " '_768',\n", + " '_485',\n", + " '_3351',\n", + " '_363',\n", + " '_2624',\n", + " '_4031',\n", + " '_1934',\n", + " '_1217',\n", + " '_536',\n", + " '_1875',\n", + " '_744',\n", + " '_208',\n", + " '_2839',\n", + " '_3050',\n", + " '_2601',\n", + " '_3396',\n", + " '_3106',\n", + " '_2167',\n", + " '_1421',\n", + " '_745',\n", + " '_3229',\n", " '_1767',\n", - " '_2800',\n", - " '_697',\n", - " '_514',\n", - " '_3648',\n", - " '_2337',\n", - " '_1338',\n", - " '_1114',\n", - " '_340',\n", - " '_3514',\n", - " '_4076',\n", - " '_2658',\n", - " '_1954',\n", - " '_3867',\n", - " '_2300',\n", - " '_251',\n", - " '_317',\n", + " '_3396',\n", + " '_1962',\n", + " '_3187',\n", + " '_776',\n", + " '_379',\n", + " '_363',\n", + " '_3650',\n", + " '_3396',\n", + " '_229',\n", + " '_1097',\n", + " '_413',\n", + " '_1688',\n", + " '_2282',\n", + " '_1139',\n", + " '_3396',\n", + " '_1878',\n", + " '_2689',\n", + " '_2397',\n", + " '_3220',\n", + " '_2813',\n", + " '_247',\n", " '_7',\n", - " '_1091',\n", - " '_1768',\n", - " '_1440',\n", - " '_3167',\n", - " '_672',\n", - " '_1253',\n", - " '_188',\n", - " '_3544',\n", - " '_2934',\n", - " '_1368',\n", - " '_479',\n", - " '_3951',\n", - " '_3387',\n", + " '_4041',\n", + " '_3037',\n", + " '_2892',\n", + " '_3161',\n", + " '_3374',\n", + " '_1449',\n", + " '_536',\n", + " '_3274',\n", + " '_1826',\n", + " '_1794',\n", + " '_3009',\n", + " '_118',\n", + " '_2442',\n", + " '_3051',\n", + " '_1922',\n", + " '_3359',\n", + " '_1723',\n", + " '_3662',\n", + " '_1527',\n", + " '_1565',\n", + " '_3396',\n", + " '_343',\n", + " '_2718',\n", + " '_415',\n", + " '_2280',\n", + " '_204',\n", + " '_2243',\n", + " '_2712',\n", + " '_928',\n", + " '_2518',\n", + " '_1700',\n", + " '_3069',\n", + " '_1562',\n", + " '_1755',\n", + " '_1810',\n", + " '_2532',\n", + " '_2604',\n", + " '_2909',\n", + " '_379',\n", + " '_3864',\n", + " '_1644',\n", + " '_40',\n", + " '_2944',\n", + " '_39',\n", + " '_118',\n", + " '_933',\n", + " '_2386',\n", + " '_2750',\n", + " '_2160',\n", + " '_731',\n", + " '_2693',\n", + " '_555',\n", + " '_2371',\n", + " '_109',\n", + " '_555',\n", + " '_536',\n", + " '_2919',\n", + " '_2199',\n", + " '_887',\n", + " '_1962',\n", + " '_2693',\n", + " '_1924',\n", + " '_53',\n", + " '_3091',\n", + " '_2786',\n", + " '_1747',\n", + " '_4003',\n", + " '_2386',\n", + " '_657',\n", + " '_1861',\n", + " '_3897',\n", + " '_248',\n", + " '_3253',\n", + " '_1130',\n", + " '_657',\n", + " '_1169',\n", " '_514',\n", - " '_2438',\n", - " '_1262',\n", - " '_3166',\n", - " '_462',\n", - " '_3530',\n", - " '_333',\n", - " '_2596',\n", - " '_3808',\n", - " '_2796',\n", - " '_1920',\n", - " '_794',\n", - " '_263',\n", - " '_2626',\n", - " '_2596',\n", - " '_1949',\n", - " '_57',\n", - " '_3990',\n", - " '_3785',\n", - " '_146',\n", - " '_404',\n", - " '_3731',\n", - " '_479',\n", - " '_3840',\n", - " '_3840',\n", - " '_3664',\n", - " '_940',\n", - " '_2550',\n", - " '_4076',\n", - " '_544',\n", - " '_3465',\n", + " '_1130',\n", + " '_3253',\n", + " '_903',\n", + " '_731',\n", + " '_294',\n", + " '_2536',\n", + " '_1234',\n", + " '_379',\n", + " '_3787',\n", + " '_1728',\n", + " '_2873',\n", + " '_1644',\n", + " '_2750',\n", " '_3232',\n", - " '_269',\n", - " '_79',\n", - " '_2159',\n", - " '_3879',\n", - " '_1734',\n", - " '_3900',\n", - " '_755',\n", - " '_1756',\n", - " '_818',\n", - " '_800',\n", - " '_1249',\n", - " '_171',\n", - " '_319',\n", - " '_727',\n", - " '_171',\n", - " '_3698',\n", - " '_3683',\n", - " '_2596',\n", - " '_3969',\n", - " '_2431',\n", - " '_1838',\n", - " '_3969',\n", - " '_126',\n", - " '_2673',\n", - " '_2596',\n", - " '_4012',\n", - " '_1010',\n", - " '_2151',\n", - " '_3437',\n", - " '_417',\n", - " '_2386',\n", - " '_2712',\n", - " '_3705',\n", - " '_1838',\n", - " '_3428',\n", - " '_1168',\n", - " '_1838',\n", - " '_1527',\n", - " '_3885',\n", - " '_1952',\n", - " '_2443',\n", - " '_3997',\n", + " '_731',\n", + " '_3146',\n", + " '_251',\n", + " '_2094',\n", + " '_2910',\n", + " '_1857',\n", + " '_2308',\n", + " '_2094',\n", + " '_2462',\n", + " '_2462',\n", + " '_1868',\n", + " '_2462',\n", + " '_2782',\n", + " '_2644',\n", + " '_857',\n", + " '_263',\n", + " '_3449',\n", " '_3562',\n", - " '_1667',\n", - " '_3651',\n", - " '_3981',\n", - " '_2426',\n", - " '_1494',\n", - " '_1532',\n", - " '_2426',\n", - " '_3602'])" + " '_548'])" ] }, - "execution_count": 30, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "preprocess_corpus(txt)[0]" + "preprocess_corpus(txt)[3]" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -544,7 +544,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -553,7 +553,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -580,7 +580,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -607,7 +607,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -624,7 +624,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 61, "metadata": {}, "outputs": [], "source": [ @@ -639,7 +639,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 62, "metadata": {}, "outputs": [], "source": [ @@ -648,7 +648,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -665,27 +665,47 @@ "(('_222', '_3446'), 11)\n", "(('_3446', '_2218'), 2)\n", "(('_2218', '_3072'), 1)\n", - "(('_3072', '_550'), 1)\n" + "(('_3072', '_550'), 1)\n", + "(('_550', '_3652'), 4)\n", + "(('_3652', '_665'), 5)\n", + "(('_665', '_2596'), 9)\n", + "(('_2596', '_2809'), 14)\n", + "(('_2809', '_3649'), 4)\n", + "(('_3649', '_251'), 27)\n", + "(('_251', '_2610'), 52)\n", + "(('_2610', '_2536'), 22)\n", + "(('_2536', '_47'), 1)\n", + "(('_47', '_2852'), 1)\n", + "(('_2852', '_2940'), 5)\n", + "(('_2940', '_3353'), 5)\n", + "(('_3353', '_3400'), 5)\n", + "(('_3400', '_3336'), 5)\n", + "(('_3336', '_325'), 1)\n", + "(('_325', '_2647'), 3)\n", + "(('_2647', '_4076'), 15)\n", + "(('_4076', '_3653'), 35)\n", + "(('_3653', '_3253'), 2)\n", + "(('_3253', '_58'), 5)\n" ] } ], "source": [ "for i, item in enumerate(pair_freqs.items()):\n", " print(item)\n", - " if i == 10:\n", + " if i == 30:\n", " break" ] }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 63, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "('_2426', '_3602') 5548\n" + "('_195', '_1667') 151\n" ] } ], @@ -703,7 +723,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -723,20 +743,20 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "('896_2029_935_679_1115_3601_3000_222_3446_2218_3072_550_3652_665_2596_2809_3649_251_2610_2536_47_2852_2940_3353_3400_3336_325_2647_4076_3653_3253_58_3664_1424_1388_222_278_897_447_2355_2453_2531_2712_828_2895_2398_2908_901_2536_222_3686_2620_3254_3962_0_1448_222_863_3593_124_124_1048_1593_222_4086_2647_3236_1767_2800_697_514_3648_2337_1338_1114_340_3514_4076_2658_1954_3867_2300_251_317_7_1091_1768_1440_3167_672_1253_188_3544_2934_1368_479_3951_3387_514_2438_1262_3166_462_3530_333_2596_3808_2796_1920_794_263_2626_2596_1949_57_3990_3785_146_404_3731_479_3840_3840_3664_940_2550_4076_544_3465_3232_269_79_2159_3879_1734_3900_755_1756_818_800_1249_171_319_727_171_3698_3683_2596_3969_2431_1838_3969_126_2673_2596_4012_1010_2151_3437_417_2386_2712_3705_1838_3428_1168_1838_1527_3885_1952_2443_3997_3562_1667_3651_3981_2426_1494_1532_2426_3602', ['896', '_2029', '_935', '_679', '_1115', '_3601', '_3000', '_222_3446', '_2218', '_3072', '_550', '_3652', '_665', '_2596', '_2809', '_3649', '_251', '_2610', '_2536', '_47', '_2852', '_2940', '_3353', '_3400', '_3336', '_325', '_2647', '_4076', '_3653', '_3253', '_58', '_3664', '_1424', '_1388', '_222', '_278', '_897', '_447', '_2355', '_2453', '_2531', '_2712', '_828', '_2895', '_2398', '_2908', '_901', '_2536', '_222', '_3686', '_2620', '_3254', '_3962', '_0', '_1448', '_222', '_863', '_3593', '_124', '_124', '_1048', '_1593', '_222', '_4086', '_2647', '_3236', '_1767', '_2800', '_697', '_514', '_3648', '_2337', '_1338', '_1114', '_340', '_3514', '_4076', '_2658', '_1954', '_3867', '_2300', '_251', '_317', '_7', '_1091', '_1768', '_1440', '_3167', '_672', '_1253', '_188', '_3544', '_2934', '_1368', '_479', '_3951', '_3387', '_514', '_2438', '_1262', '_3166', '_462', '_3530', '_333', '_2596', '_3808', '_2796', '_1920', '_794', '_263', '_2626', '_2596', '_1949', '_57', '_3990', '_3785', '_146', '_404', '_3731', '_479', '_3840', '_3840', '_3664', '_940', '_2550', '_4076', '_544', '_3465', '_3232', '_269', '_79', '_2159', '_3879', '_1734', '_3900', '_755', '_1756', '_818', '_800', '_1249', '_171', '_319', '_727', '_171', '_3698', '_3683', '_2596', '_3969', '_2431', '_1838', '_3969', '_126', '_2673', '_2596', '_4012', '_1010', '_2151', '_3437', '_417', '_2386', '_2712', '_3705', '_1838', '_3428', '_1168', '_1838', '_1527', '_3885', '_1952', '_2443', '_3997', '_3562', '_1667', '_3651', '_3981', '_2426', '_1494', '_1532', '_2426_3602'])\n", + "('896_2029_935_679_1115_3601_3000_222_3446_2218_3072_550_3652_665_2596_2809_3649_251_2610_2536_47_2852_2940_3353_3400_3336_325_2647_4076_3653_3253_58_3664_1424_1388_222_278_897_447_2355_2453_2531_2712_828_2895_2398_2908_901_2536_222_3686_2620_3254_3962_0_1448_222_863_3593_124_124_1048_1593_222_4086_2647_3236_1767_2800_697_514_3648_2337_1338_1114_340_3514_4076_2658_1954_3867_2300_251_317_7_1091_1768_1440_3167_672_1253_188_3544_2934_1368_479_3951_3387_514_2438_1262_3166_462_3530_333_2596_3808_2796_1920_794_263_2626_2596_1949_57_3990_3785_146_404_3731_479_3840_3840_3664_940_2550_4076_544_3465_3232_269_79_2159_3879_1734_3900_755_1756_818_800_1249_171_319_727_171_3698_3683_2596_3969_2431_1838_3969_126_2673_2596_4012_1010_2151_3437_417_2386_2712_3705_1838_3428_1168_1838_1527_3885_1952_2443_3997_3562_1667_3651_3981_2426_1494_1532_2426_3602', ['896', '_2029', '_935', '_679', '_1115', '_3601', '_3000', '_222_3446', '_2218', '_3072', '_550', '_3652', '_665', '_2596', '_2809', '_3649', '_251', '_2610', '_2536', '_47', '_2852', '_2940', '_3353', '_3400', '_3336', '_325', '_2647', '_4076', '_3653', '_3253', '_58', '_3664', '_1424', '_1388', '_222', '_278', '_897', '_447', '_2355', '_2453', '_2531', '_2712', '_828', '_2895', '_2398', '_2908', '_901', '_2536', '_222', '_3686', '_2620', '_3254', '_3962', '_0', '_1448', '_222', '_863', '_3593', '_124', '_124', '_1048', '_1593', '_222', '_4086', '_2647', '_3236', '_1767', '_2800', '_697', '_514', '_3648', '_2337', '_1338', '_1114', '_340', '_3514', '_4076', '_2658', '_1954', '_3867', '_2300', '_251', '_317', '_7', '_1091', '_1768', '_1440', '_3167', '_672', '_1253', '_188', '_3544', '_2934', '_1368', '_479', '_3951', '_3387', '_514', '_2438', '_1262', '_3166', '_462', '_3530', '_333', '_2596', '_3808', '_2796', '_1920', '_794', '_263', '_2626', '_2596', '_1949', '_57', '_3990', '_3785', '_146', '_404', '_3731', '_479', '_3840', '_3840', '_3664', '_940', '_2550', '_4076', '_544', '_3465', '_3232', '_269', '_79', '_2159', '_3879', '_1734', '_3900', '_755', '_1756', '_818', '_800', '_1249', '_171', '_319', '_727', '_171', '_3698', '_3683', '_2596', '_3969', '_2431', '_1838', '_3969', '_126', '_2673', '_2596', '_4012', '_1010', '_2151', '_3437', '_417', '_2386', '_2712', '_3705', '_1838', '_3428', '_1168', '_1838', '_1527', '_3885', '_1952', '_2443', '_3997', '_3562', '_1667', '_3651', '_3981', '_2426', '_1494', '_1532', '_2426', '_3602'])\n", "('2712_1604_3458_4031_1604_1669_2008_2337_857_3123_3321_1887_846_2398_763_612_3846_1060_312_859_3638_763_2238_590_2458_3847_304_1888_1986_2238_3412_1885_375_2176_887_3051_2238_3374_3485_973_251_622_3051_2557_727_3750_240_2386_2452_2712_2913_2525_691_1114_1363_796_3731_1232_1332_3282_966_3883_2431_1774_2559_755_748_2975_2608_3345_7_868_731_2872_1336_2488_3706_2276_3739_434_2203_2019_873_1273_3627_2912_4046_120_2888_1707_1153_3731_2927_1188_1400_966_397_1153_2712_1140_792_3412_20_3452_2452_1247_3297_1669_326_2813_2365_3368_1774_1129_3260_186_1814_1445_438_1247_3323_3368_3744_2392_448_1953_1247_2204_2430_1094_1702_2688_1953_2712_966_2380_3714_3446_3391_2531_2738_2312_3866_1952_2238_4069_752_1861_73_3403_3825_685_1707_332_2738_685_305_4049_1004_186_1188_4076_3468_2885_740_1001_251_3367_2712_48_752_2673_2617_793_927_2712_2801_3229_3896_886_773_3229_3396_1160_1968_3272_381_3452_1307_3396_2240_1307_215_726_679_3195_2712_3026_903_2317_1114_901_1484_665_2182_3688_7_234_3009_370_2712_3871_3551_499_240_1742_2531_582_862_930_1097_688_2450_1658_2738_97_3698_2502_308_746_488_608_2948_1669_3919_2204_2431_3219_1774_1941_845_1398_2440_3765_1644_2206_3795_2076_1953_685_1953_1953_3879_3089_2331_2807_2238_1660_95_222_3671_2386_2918_1094_3008_152_117_1924_365_3893_3069_1725_499_3731_3258_1794_2718_2502_829_575_2326_294_4054_1349_1814_3177_1188_3872_3281_588_3750_2813_992_3687_3731_3087_3786_2453_450_2365_930_1788_831_1644_2984_3180_1010_206_1788_3808_100_2506_3230_399_204_1806_48_3603_1669_2779_289_3514_572_1032_1932_1060_990_3702_1046_3161_2085_1932_1932_3350_702_263_665_1032_2895_901_489_859_2434_2712_3693_2788_1838_1026_3251_1701_665_1477_204_2008_318_289_2788_1930_1325_1595_237_1054_3820_1669_665_931_1863_3218_2094_859_289_2712_3937_1920_1229_1408_153_1990_2712_1435_427_1838_961_901_2450_3030_1516_3775_3013_267_204_2626_222_477_1134_2083_1217_243_2070_695_550_2434_2525_3566_3253_3075_222_2167_616_3574_3375_1655_457_1131_3316_3702_2076_990_3498_1261_1369_2516_435_890_3969_951_3867_222_1443_1134_1838_2003_1648_447_2647_1299_1395_3324_3514_1046_1060_188_2593_3498_3514_1648_2813_3353_2647_1048_719_3353_2939_3592_3613_2788_3487_499_2786_2801_3318_3396_7_2206_231_1346_240_3828_1482_188_966_175_1017_868_99_2469_222_584_2001_2750_2573_784_2001_1524_2913_3593_1580_1793_2874_1574_2160_1316_3254_2655_1675_2750_1052_2147_2809_1351_3008_1443_527_3321_3030_26_3286_2228_50_1112_2342_761_2559_3702_3702_1093_670_527_222_2003_2750_1312_3044_3199_295_222_4086_2673_263_1443_251_2259_222_4086_3384_263_354_2943_2943_222_2952_3194_1231_354_1932_3288_222_354_1593_3384_3672_1539_3551_222_1161_3324_3458_2675_1307_2876_665_688_3979_1660_2444_3229_2013_665_3345_2224_2224_926_2224_580_1430_1129_1907_3177_1161_1059_4076_188_1544_2080_114_1130_2066_186_665_3671_1848_1726_1567_2673_833_3724_3910_289_568_1423_2008_1305_188_375_1631_3724_20_536_2174_1788_124_979_1688_484_679_3708_188_3875_1516_3236_1148_2700_3761_1524_1420_1761_2115_2419_457_2813_222_3808_175_1283_2310_2673_756_1806_1164_2083_499_146_92_2328_1788_1877_2750_1768_85_2662_4093_2147_2317_1134_204_2779_1768_3415_2147_2408_3269_1462_2435_2434_3205_2147_230_1594_3881_1114_1934_2676_2891_782_4071_3687_3195_2338_623_1733_757_3467_3706_3352_1806_2467_188_3161_1912_3724_1907_2750_3384_3030_775_2254_3514_427_3893_1848_7_216_3205_2736_3106_2331_4036_2160_2939_4080_2613_73_3412_2870_3731_3566_1970_3183_2782_1851_599_222_308_3028_1863_105_764_3450_1788_1905_1283_3866_462_365_3806_654_2913_2831_1343_3106_365_1813_3731_2415_2558_657_2610_1408_3507_1788_2940_1581_2013_366_2337_3005_1788_2124_1010_755_278_1446_2817_222_3808_802_2462_2448_2068_3507_3731_928_3547_2992_2204_3463_2100_1788_2909_1094_940_3106_2689_933_3731_2392_2473_3856_3662_935_2918_3731_2900_1669_1669_2662_2008_2444_1788_2019_1655_829_2939_208_2365_7_3671_1106_833_2513_3849_1655_981_3181_1818_2453_3352_2738_2470_680_77_2946_3149_926_2738_529_3731_162_1055_448_926_2151_731_7_2204_2867_1904_4071_105_2511_1788_2964_3619_2338_2019_1310_3570_1247_1751_410_2885_660_2320_237_7_1924_79_3384_3297_2386_344_1788_3740_3704_1519_945_87_1462_2147_429_247_289_2029_196_3218_2147_3998_449_3218_721_196_3171_7_427_1578_455_3855_1678_3173_1733_2913_1400_3112_3323_177_2718_4033_637_697_3461_1130_1041_2398_3030_775_2788_2089_2968_2673_3205_188_366_685_991_3901_811_1527_1088_1875_399_3893_3212_829_1589_2160_449_497_691_782_4059_1269_2160_2939_946_595_684_2253_31_2147_2182_149_3877_1423_3218_317_1524_1336_351_2320_2991_1046_3102_7_2204_4087_2221_3227_304_1411_7_105_3849_2158_637_1833_3154_7_60_1764_2073_1565_3866_2610_7_902_1143_435_716_2525_3936_7_2909_646_3799_1238_2616_2688_3731_4026_1593_3102_1617_2491_550_7_1659_457_832_1425_3218_586_3731_2617_2673_3919_1042_3187_39_3731_73_397_1904_1818_175_1444_680_1161_2204_2480_2182_99_956_680_2204_1485_2410_3001_3901_3907_2326_2634_1757_3744_1751_1059_1445_3731_1966_829_2458_3062_3514_1747_3396_3468_2166_499_3969_2097_2005_1788_219_3560_1159_645_3867_3246_1246_2290_1669_3497_1316_1669_15_1806_479_196_12_2029_427_1390_1788_4070_2398_1747_3166_2008_2918_117_2485_1757_1599_3477_2203_304_169_777_1489_2735_2531_1882', ['2712', '_1604', '_3458', '_4031', '_1604', '_1669', '_2008', '_2337', '_857', '_3123', '_3321', '_1887', '_846', '_2398', '_763', '_612', '_3846', '_1060', '_312', '_859', '_3638', '_763', '_2238', '_590', '_2458', '_3847', '_304', '_1888', '_1986', '_2238', '_3412', '_1885', '_375', '_2176', '_887', '_3051', '_2238', '_3374', '_3485', '_973', '_251', '_622', '_3051', '_2557', '_727', '_3750', '_240', '_2386', '_2452', '_2712', '_2913', '_2525', '_691', '_1114', '_1363', '_796', '_3731', '_1232', '_1332', '_3282', '_966', '_3883', '_2431', '_1774', '_2559', '_755', '_748', '_2975', '_2608', '_3345', '_7', '_868', '_731', '_2872', '_1336', '_2488', '_3706', '_2276', '_3739', '_434', '_2203', '_2019', '_873', '_1273', '_3627', '_2912', '_4046', '_120', '_2888', '_1707', '_1153', '_3731', '_2927', '_1188', '_1400', '_966', '_397', '_1153', '_2712', '_1140', '_792', '_3412', '_20', '_3452', '_2452', '_1247', '_3297', '_1669', '_326', '_2813', '_2365', '_3368', '_1774', '_1129', '_3260', '_186', '_1814', '_1445', '_438', '_1247', '_3323', '_3368', '_3744', '_2392', '_448', '_1953', '_1247', '_2204', '_2430', '_1094', '_1702', '_2688', '_1953', '_2712', '_966', '_2380', '_3714', '_3446', '_3391', '_2531', '_2738', '_2312', '_3866', '_1952', '_2238', '_4069', '_752', '_1861', '_73', '_3403', '_3825', '_685', '_1707', '_332', '_2738', '_685', '_305', '_4049', '_1004', '_186', '_1188', '_4076', '_3468', '_2885', '_740', '_1001', '_251', '_3367', '_2712', '_48', '_752', '_2673', '_2617', '_793', '_927', '_2712', '_2801', '_3229', '_3896', '_886', '_773', '_3229', '_3396', '_1160', '_1968', '_3272', '_381', '_3452', '_1307', '_3396', '_2240', '_1307', '_215', '_726', '_679', '_3195', '_2712', '_3026', '_903', '_2317', '_1114', '_901', '_1484', '_665', '_2182', '_3688', '_7', '_234', '_3009', '_370', '_2712', '_3871', '_3551', '_499', '_240', '_1742', '_2531', '_582', '_862', '_930', '_1097', '_688', '_2450', '_1658', '_2738', '_97', '_3698', '_2502', '_308', '_746', '_488', '_608', '_2948', '_1669', '_3919', '_2204', '_2431', '_3219', '_1774', '_1941', '_845', '_1398', '_2440', '_3765', '_1644', '_2206', '_3795', '_2076', '_1953', '_685', '_1953', '_1953', '_3879', '_3089', '_2331', '_2807', '_2238', '_1660', '_95', '_222', '_3671', '_2386', '_2918', '_1094', '_3008', '_152', '_117', '_1924', '_365', '_3893', '_3069', '_1725', '_499', '_3731', '_3258', '_1794', '_2718', '_2502', '_829', '_575', '_2326', '_294', '_4054', '_1349', '_1814', '_3177', '_1188', '_3872', '_3281', '_588', '_3750', '_2813', '_992', '_3687', '_3731', '_3087', '_3786', '_2453', '_450', '_2365', '_930', '_1788', '_831', '_1644', '_2984', '_3180', '_1010', '_206', '_1788', '_3808', '_100', '_2506', '_3230', '_399', '_204', '_1806', '_48', '_3603', '_1669', '_2779', '_289', '_3514', '_572', '_1032', '_1932', '_1060', '_990', '_3702', '_1046', '_3161', '_2085', '_1932', '_1932', '_3350', '_702', '_263', '_665', '_1032', '_2895', '_901', '_489', '_859', '_2434', '_2712', '_3693', '_2788', '_1838', '_1026', '_3251', '_1701', '_665', '_1477', '_204', '_2008', '_318', '_289', '_2788', '_1930', '_1325', '_1595', '_237', '_1054', '_3820', '_1669', '_665', '_931', '_1863', '_3218', '_2094', '_859', '_289', '_2712', '_3937', '_1920', '_1229', '_1408', '_153', '_1990', '_2712', '_1435', '_427', '_1838', '_961', '_901', '_2450', '_3030', '_1516', '_3775', '_3013', '_267', '_204', '_2626', '_222', '_477', '_1134', '_2083', '_1217', '_243', '_2070', '_695', '_550', '_2434', '_2525', '_3566', '_3253', '_3075', '_222', '_2167', '_616', '_3574', '_3375', '_1655', '_457', '_1131', '_3316', '_3702', '_2076', '_990', '_3498', '_1261', '_1369', '_2516', '_435', '_890', '_3969', '_951', '_3867', '_222', '_1443', '_1134', '_1838', '_2003', '_1648', '_447', '_2647', '_1299', '_1395', '_3324', '_3514', '_1046', '_1060', '_188', '_2593', '_3498', '_3514', '_1648', '_2813', '_3353', '_2647', '_1048', '_719', '_3353', '_2939', '_3592', '_3613', '_2788', '_3487', '_499', '_2786', '_2801', '_3318', '_3396', '_7', '_2206', '_231', '_1346', '_240', '_3828', '_1482', '_188', '_966', '_175', '_1017', '_868', '_99', '_2469', '_222', '_584', '_2001', '_2750', '_2573', '_784', '_2001', '_1524', '_2913', '_3593', '_1580', '_1793', '_2874', '_1574', '_2160', '_1316', '_3254', '_2655', '_1675', '_2750', '_1052', '_2147', '_2809', '_1351', '_3008', '_1443', '_527', '_3321', '_3030', '_26', '_3286', '_2228', '_50', '_1112', '_2342', '_761', '_2559', '_3702', '_3702', '_1093', '_670', '_527', '_222', '_2003', '_2750', '_1312', '_3044', '_3199', '_295', '_222', '_4086', '_2673', '_263', '_1443', '_251', '_2259', '_222', '_4086', '_3384', '_263', '_354', '_2943', '_2943', '_222', '_2952', '_3194', '_1231', '_354', '_1932', '_3288', '_222', '_354', '_1593', '_3384', '_3672', '_1539', '_3551', '_222', '_1161', '_3324', '_3458', '_2675', '_1307', '_2876', '_665', '_688', '_3979', '_1660', '_2444', '_3229', '_2013', '_665', '_3345', '_2224', '_2224', '_926', '_2224', '_580', '_1430', '_1129', '_1907', '_3177', '_1161', '_1059', '_4076', '_188', '_1544', '_2080', '_114', '_1130', '_2066', '_186', '_665', '_3671', '_1848', '_1726', '_1567', '_2673', '_833', '_3724', '_3910', '_289', '_568', '_1423', '_2008', '_1305', '_188', '_375', '_1631', '_3724', '_20', '_536', '_2174', '_1788', '_124', '_979', '_1688', '_484', '_679', '_3708', '_188', '_3875', '_1516', '_3236', '_1148', '_2700', '_3761', '_1524', '_1420', '_1761', '_2115', '_2419', '_457', '_2813', '_222', '_3808', '_175', '_1283', '_2310', '_2673', '_756', '_1806', '_1164', '_2083', '_499', '_146', '_92', '_2328', '_1788', '_1877', '_2750', '_1768', '_85', '_2662', '_4093', '_2147', '_2317', '_1134', '_204', '_2779', '_1768', '_3415', '_2147', '_2408', '_3269', '_1462', '_2435', '_2434', '_3205', '_2147', '_230', '_1594', '_3881', '_1114', '_1934', '_2676', '_2891', '_782', '_4071', '_3687', '_3195', '_2338', '_623', '_1733', '_757', '_3467', '_3706', '_3352', '_1806', '_2467', '_188', '_3161', '_1912', '_3724', '_1907', '_2750', '_3384', '_3030', '_775', '_2254', '_3514', '_427', '_3893', '_1848', '_7', '_216', '_3205', '_2736', '_3106', '_2331', '_4036', '_2160', '_2939', '_4080', '_2613', '_73', '_3412', '_2870', '_3731', '_3566', '_1970', '_3183', '_2782', '_1851', '_599', '_222', '_308', '_3028', '_1863', '_105', '_764', '_3450', '_1788', '_1905', '_1283', '_3866', '_462', '_365', '_3806', '_654', '_2913', '_2831', '_1343', '_3106', '_365', '_1813', '_3731', '_2415', '_2558', '_657', '_2610', '_1408', '_3507', '_1788', '_2940', '_1581', '_2013', '_366', '_2337', '_3005', '_1788', '_2124', '_1010', '_755', '_278', '_1446', '_2817', '_222', '_3808', '_802', '_2462', '_2448', '_2068', '_3507', '_3731', '_928', '_3547', '_2992', '_2204', '_3463', '_2100', '_1788', '_2909', '_1094', '_940', '_3106', '_2689', '_933', '_3731', '_2392', '_2473', '_3856', '_3662', '_935', '_2918', '_3731', '_2900', '_1669', '_1669', '_2662', '_2008', '_2444', '_1788', '_2019', '_1655', '_829', '_2939', '_208', '_2365', '_7', '_3671', '_1106', '_833', '_2513', '_3849', '_1655', '_981', '_3181', '_1818', '_2453', '_3352', '_2738', '_2470', '_680', '_77', '_2946', '_3149', '_926', '_2738', '_529', '_3731', '_162', '_1055', '_448', '_926', '_2151', '_731', '_7', '_2204', '_2867', '_1904', '_4071', '_105', '_2511', '_1788', '_2964', '_3619', '_2338', '_2019', '_1310', '_3570', '_1247', '_1751', '_410', '_2885', '_660', '_2320', '_237', '_7', '_1924', '_79', '_3384', '_3297', '_2386', '_344', '_1788', '_3740', '_3704', '_1519', '_945', '_87', '_1462', '_2147', '_429', '_247', '_289', '_2029', '_196', '_3218', '_2147', '_3998', '_449', '_3218', '_721', '_196', '_3171', '_7', '_427', '_1578', '_455', '_3855', '_1678', '_3173', '_1733', '_2913', '_1400', '_3112', '_3323', '_177', '_2718', '_4033', '_637', '_697', '_3461', '_1130', '_1041', '_2398', '_3030', '_775', '_2788', '_2089', '_2968', '_2673', '_3205', '_188', '_366', '_685', '_991', '_3901', '_811', '_1527', '_1088', '_1875', '_399', '_3893', '_3212', '_829', '_1589', '_2160', '_449', '_497', '_691', '_782', '_4059', '_1269', '_2160', '_2939', '_946', '_595', '_684', '_2253', '_31', '_2147', '_2182', '_149', '_3877', '_1423', '_3218', '_317', '_1524', '_1336', '_351', '_2320', '_2991', '_1046', '_3102', '_7', '_2204', '_4087', '_2221', '_3227', '_304', '_1411', '_7', '_105', '_3849', '_2158', '_637', '_1833', '_3154', '_7', '_60', '_1764', '_2073', '_1565', '_3866', '_2610', '_7', '_902', '_1143', '_435', '_716', '_2525', '_3936', '_7', '_2909', '_646', '_3799', '_1238', '_2616', '_2688', '_3731', '_4026', '_1593', '_3102', '_1617', '_2491', '_550', '_7', '_1659', '_457', '_832', '_1425', '_3218', '_586', '_3731', '_2617', '_2673', '_3919', '_1042', '_3187', '_39', '_3731', '_73', '_397', '_1904', '_1818', '_175', '_1444', '_680', '_1161', '_2204', '_2480', '_2182', '_99', '_956', '_680', '_2204', '_1485', '_2410', '_3001', '_3901', '_3907', '_2326', '_2634', '_1757', '_3744', '_1751', '_1059', '_1445', '_3731', '_1966', '_829', '_2458', '_3062', '_3514', '_1747', '_3396', '_3468', '_2166', '_499', '_3969', '_2097', '_2005', '_1788', '_219', '_3560', '_1159', '_645', '_3867', '_3246', '_1246', '_2290', '_1669', '_3497', '_1316', '_1669', '_15', '_1806', '_479', '_196', '_12', '_2029', '_427', '_1390', '_1788', '_4070', '_2398', '_1747', '_3166', '_2008', '_2918', '_117', '_2485', '_1757', '_1599', '_3477', '_2203', '_304', '_169', '_777', '_1489', '_2735', '_2531', '_1882'])\n", - "('3446_2542_1488_3452_1810_499_2365_752_3437_2386_2782_1604_410_3480_2712_3220_2259_586_3345_98_1749_1930_1893_2688_781_1993_1991_746_222_3659_447_536_3047_1289_2182_1788_3180_2945_3352_2280_447_3123_222_2524_2917_4042_280_1375_134_2852_400_1481_499_463_3442_1932_4076_3910_2357_2947_1424_258_2123_3051_1861_571_2008_3762_3978_1574_2712_375_2453_3750_278_1670_651_2276_1336_2694_833_894_1669_1061_4076_3020_218_921_3487_2265_1560_4076_3593_1573_610_2862_2005_679_2712_1875_2694_1669_510_1739_1462_3232_950_4009_2782_2332_375_2462_2712_3437_442_2029_3062_2887_2151_853_584_3820_1865_3671_1484_2002_2712_1907_251_3324_2356_608_25_1507_375_3362_2507_3983_2558_3738_1806_2356_3452_1284_1604_2249_175_472_3717_1289_1757_3051_1056_2312_472_3390_1554_3040_2918_168_793_3232_2094_4040_2673_3342_3384_90_2322_1336_3165_606_1667_997_2520_3232_2094_685_1539_3356_973_2097_2712_1424_410_2073_3983_53_3825_2712_2342_62_1421_3356_1650_3765_2712_3069_3387_90_894_2244_3458_514_1095_2070_3684_3062_3410_3345_514_868_1818_2971_3069_210_700_514_3593_2073_955_3390_62_1769_2337_1919_1017_2169_3901_2312_2221_2337_3297_977_2053_3901_1134_3686_1234_1705_1909_295_1604_50_2782_2337_3437_2782_50_933_2259_2133_3933_3437_50_50_2503_441_2266_3981_2609_1494_1532_2426_3602', ['3446', '_2542', '_1488', '_3452', '_1810', '_499', '_2365', '_752', '_3437', '_2386', '_2782', '_1604', '_410', '_3480', '_2712', '_3220', '_2259', '_586', '_3345', '_98', '_1749', '_1930', '_1893', '_2688', '_781', '_1993', '_1991', '_746', '_222', '_3659', '_447', '_536', '_3047', '_1289', '_2182', '_1788', '_3180', '_2945', '_3352', '_2280', '_447', '_3123', '_222', '_2524', '_2917', '_4042', '_280', '_1375', '_134', '_2852', '_400', '_1481', '_499', '_463', '_3442', '_1932', '_4076', '_3910', '_2357', '_2947', '_1424', '_258', '_2123', '_3051', '_1861', '_571', '_2008', '_3762', '_3978', '_1574', '_2712', '_375', '_2453', '_3750', '_278', '_1670', '_651', '_2276', '_1336', '_2694', '_833', '_894', '_1669', '_1061', '_4076', '_3020', '_218', '_921', '_3487', '_2265', '_1560', '_4076', '_3593', '_1573', '_610', '_2862', '_2005', '_679', '_2712', '_1875', '_2694', '_1669', '_510', '_1739', '_1462', '_3232', '_950', '_4009', '_2782', '_2332', '_375', '_2462', '_2712', '_3437', '_442', '_2029', '_3062', '_2887', '_2151', '_853', '_584', '_3820', '_1865', '_3671', '_1484', '_2002', '_2712', '_1907', '_251', '_3324', '_2356', '_608', '_25', '_1507', '_375', '_3362', '_2507', '_3983', '_2558', '_3738', '_1806', '_2356', '_3452', '_1284', '_1604', '_2249', '_175', '_472', '_3717', '_1289', '_1757', '_3051', '_1056', '_2312', '_472', '_3390', '_1554', '_3040', '_2918', '_168', '_793', '_3232', '_2094', '_4040', '_2673', '_3342', '_3384', '_90', '_2322', '_1336', '_3165', '_606', '_1667', '_997', '_2520', '_3232', '_2094', '_685', '_1539', '_3356', '_973', '_2097', '_2712', '_1424', '_410', '_2073', '_3983', '_53', '_3825', '_2712', '_2342', '_62', '_1421', '_3356', '_1650', '_3765', '_2712', '_3069', '_3387', '_90', '_894', '_2244', '_3458', '_514', '_1095', '_2070', '_3684', '_3062', '_3410', '_3345', '_514', '_868', '_1818', '_2971', '_3069', '_210', '_700', '_514', '_3593', '_2073', '_955', '_3390', '_62', '_1769', '_2337', '_1919', '_1017', '_2169', '_3901', '_2312', '_2221', '_2337', '_3297', '_977', '_2053', '_3901', '_1134', '_3686', '_1234', '_1705', '_1909', '_295', '_1604', '_50', '_2782', '_2337', '_3437', '_2782', '_50', '_933', '_2259', '_2133', '_3933', '_3437', '_50', '_50', '_2503', '_441', '_2266', '_3981', '_2609', '_1494', '_1532', '_2426_3602'])\n", + "('3446_2542_1488_3452_1810_499_2365_752_3437_2386_2782_1604_410_3480_2712_3220_2259_586_3345_98_1749_1930_1893_2688_781_1993_1991_746_222_3659_447_536_3047_1289_2182_1788_3180_2945_3352_2280_447_3123_222_2524_2917_4042_280_1375_134_2852_400_1481_499_463_3442_1932_4076_3910_2357_2947_1424_258_2123_3051_1861_571_2008_3762_3978_1574_2712_375_2453_3750_278_1670_651_2276_1336_2694_833_894_1669_1061_4076_3020_218_921_3487_2265_1560_4076_3593_1573_610_2862_2005_679_2712_1875_2694_1669_510_1739_1462_3232_950_4009_2782_2332_375_2462_2712_3437_442_2029_3062_2887_2151_853_584_3820_1865_3671_1484_2002_2712_1907_251_3324_2356_608_25_1507_375_3362_2507_3983_2558_3738_1806_2356_3452_1284_1604_2249_175_472_3717_1289_1757_3051_1056_2312_472_3390_1554_3040_2918_168_793_3232_2094_4040_2673_3342_3384_90_2322_1336_3165_606_1667_997_2520_3232_2094_685_1539_3356_973_2097_2712_1424_410_2073_3983_53_3825_2712_2342_62_1421_3356_1650_3765_2712_3069_3387_90_894_2244_3458_514_1095_2070_3684_3062_3410_3345_514_868_1818_2971_3069_210_700_514_3593_2073_955_3390_62_1769_2337_1919_1017_2169_3901_2312_2221_2337_3297_977_2053_3901_1134_3686_1234_1705_1909_295_1604_50_2782_2337_3437_2782_50_933_2259_2133_3933_3437_50_50_2503_441_2266_3981_2609_1494_1532_2426_3602', ['3446', '_2542', '_1488', '_3452', '_1810', '_499', '_2365', '_752', '_3437', '_2386', '_2782', '_1604', '_410', '_3480', '_2712', '_3220', '_2259', '_586', '_3345', '_98', '_1749', '_1930', '_1893', '_2688', '_781', '_1993', '_1991', '_746', '_222', '_3659', '_447', '_536', '_3047', '_1289', '_2182', '_1788', '_3180', '_2945', '_3352', '_2280', '_447', '_3123', '_222', '_2524', '_2917', '_4042', '_280', '_1375', '_134', '_2852', '_400', '_1481', '_499', '_463', '_3442', '_1932', '_4076', '_3910', '_2357', '_2947', '_1424', '_258', '_2123', '_3051', '_1861', '_571', '_2008', '_3762', '_3978', '_1574', '_2712', '_375', '_2453', '_3750', '_278', '_1670', '_651', '_2276', '_1336', '_2694', '_833', '_894', '_1669', '_1061', '_4076', '_3020', '_218', '_921', '_3487', '_2265', '_1560', '_4076', '_3593', '_1573', '_610', '_2862', '_2005', '_679', '_2712', '_1875', '_2694', '_1669', '_510', '_1739', '_1462', '_3232', '_950', '_4009', '_2782', '_2332', '_375', '_2462', '_2712', '_3437', '_442', '_2029', '_3062', '_2887', '_2151', '_853', '_584', '_3820', '_1865', '_3671', '_1484', '_2002', '_2712', '_1907', '_251', '_3324', '_2356', '_608', '_25', '_1507', '_375', '_3362', '_2507', '_3983', '_2558', '_3738', '_1806', '_2356', '_3452', '_1284', '_1604', '_2249', '_175', '_472', '_3717', '_1289', '_1757', '_3051', '_1056', '_2312', '_472', '_3390', '_1554', '_3040', '_2918', '_168', '_793', '_3232', '_2094', '_4040', '_2673', '_3342', '_3384', '_90', '_2322', '_1336', '_3165', '_606', '_1667', '_997', '_2520', '_3232', '_2094', '_685', '_1539', '_3356', '_973', '_2097', '_2712', '_1424', '_410', '_2073', '_3983', '_53', '_3825', '_2712', '_2342', '_62', '_1421', '_3356', '_1650', '_3765', '_2712', '_3069', '_3387', '_90', '_894', '_2244', '_3458', '_514', '_1095', '_2070', '_3684', '_3062', '_3410', '_3345', '_514', '_868', '_1818', '_2971', '_3069', '_210', '_700', '_514', '_3593', '_2073', '_955', '_3390', '_62', '_1769', '_2337', '_1919', '_1017', '_2169', '_3901', '_2312', '_2221', '_2337', '_3297', '_977', '_2053', '_3901', '_1134', '_3686', '_1234', '_1705', '_1909', '_295', '_1604', '_50', '_2782', '_2337', '_3437', '_2782', '_50', '_933', '_2259', '_2133', '_3933', '_3437', '_50', '_50', '_2503', '_441', '_2266', '_3981', '_2609', '_1494', '_1532', '_2426', '_3602'])\n", "('2308_2094_1158_2030_2094_29_2782_2308_2094_1755_2169_519_3146_4040_4076_1684_2133_793_1130_443_3612_2692_3106_2025_1626_339_977_768_485_3351_363_2624_4031_1934_1217_536_1875_744_208_2839_3050_2601_3396_3106_2167_1421_745_3229_1767_3396_1962_3187_776_379_363_3650_3396_229_1097_413_1688_2282_1139_3396_1878_2689_2397_3220_2813_247_7_4041_3037_2892_3161_3374_1449_536_3274_1826_1794_3009_118_2442_3051_1922_3359_1723_3662_1527_1565_3396_343_2718_415_2280_204_2243_2712_928_2518_1700_3069_1562_1755_1810_2532_2604_2909_379_3864_1644_40_2944_39_118_933_2386_2750_2160_731_2693_555_2371_109_555_536_2919_2199_887_1962_2693_1924_53_3091_2786_1747_4003_2386_657_1861_3897_248_3253_1130_657_1169_514_1130_3253_903_731_294_2536_1234_379_3787_1728_2873_1644_2750_3232_731_3146_251_2094_2910_1857_2308_2094_2462_2462_1868_2462_2782_2644_857_263_3449_3562_548', ['2308', '_2094', '_1158', '_2030', '_2094', '_29', '_2782', '_2308', '_2094', '_1755', '_2169', '_519', '_3146', '_4040', '_4076', '_1684', '_2133', '_793', '_1130', '_443', '_3612', '_2692', '_3106', '_2025', '_1626', '_339', '_977', '_768', '_485', '_3351', '_363', '_2624', '_4031', '_1934', '_1217', '_536', '_1875', '_744', '_208', '_2839', '_3050', '_2601', '_3396', '_3106', '_2167', '_1421', '_745', '_3229', '_1767', '_3396', '_1962', '_3187', '_776', '_379', '_363', '_3650', '_3396', '_229', '_1097', '_413', '_1688', '_2282', '_1139', '_3396', '_1878', '_2689', '_2397', '_3220', '_2813', '_247', '_7', '_4041', '_3037', '_2892', '_3161', '_3374', '_1449', '_536', '_3274', '_1826', '_1794', '_3009', '_118', '_2442', '_3051', '_1922', '_3359', '_1723', '_3662', '_1527', '_1565', '_3396', '_343', '_2718', '_415', '_2280', '_204', '_2243', '_2712', '_928', '_2518', '_1700', '_3069', '_1562', '_1755', '_1810', '_2532', '_2604', '_2909', '_379', '_3864', '_1644', '_40', '_2944', '_39', '_118', '_933', '_2386', '_2750', '_2160', '_731', '_2693', '_555', '_2371', '_109', '_555', '_536', '_2919', '_2199', '_887', '_1962', '_2693', '_1924', '_53', '_3091', '_2786', '_1747', '_4003', '_2386', '_657', '_1861', '_3897', '_248', '_3253', '_1130', '_657', '_1169', '_514', '_1130', '_3253', '_903', '_731', '_294', '_2536', '_1234', '_379', '_3787', '_1728', '_2873', '_1644', '_2750', '_3232', '_731', '_3146', '_251', '_2094', '_2910', '_1857', '_2308', '_2094', '_2462', '_2462', '_1868', '_2462', '_2782', '_2644', '_857', '_263', '_3449', '_3562', '_548'])\n", "('1083_2968_762_1669_3336_317_2008_3835_1072_3218_3218_3350_1046_104_2596_2944_2511_15_461_204_1326_4076_3278_718_3893_3481_208_2647_3920_2623_1644_1725_3473_2361_1229_4076_3845_3251_2550_2600_4061_890_4033_2511_1838_2662_2622_3374_2331_3920_3912_2001_2132_1579_2363_2700_986_2027_1991_331_3136_4061_503_1970_3452_3092_79_158_3093_1933_2786_3815_2556_2506_3072_2857_368_3137_3575_854_1669_1161_2008_2662_4033_3053_2662_3750_1866_1725_2726_3920_2714_1375_940_3276_940_727_1809_2740_1877_550_1244_2610_2648_2147_2471_2750_2525_1342_2712_2679_1282_2909_3610_2386_723_933_1800_572_2675_1048_3400_685_538_809_4033_2431_2833_1904_3713_3886_1644_4076_1781_683_3541_3129_93_2784_4076_2237_2945_2249_260_2308_2399_1826_262_4069_476_143_950_3374_1826_2975_42_792_3863_2536_2083_2077_3747_427_3462_3418_2107', ['1083', '_2968', '_762', '_1669', '_3336', '_317', '_2008', '_3835', '_1072', '_3218', '_3218', '_3350', '_1046', '_104', '_2596', '_2944', '_2511', '_15', '_461', '_204', '_1326', '_4076', '_3278', '_718', '_3893', '_3481', '_208', '_2647', '_3920', '_2623', '_1644', '_1725', '_3473', '_2361', '_1229', '_4076', '_3845', '_3251', '_2550', '_2600', '_4061', '_890', '_4033', '_2511', '_1838', '_2662', '_2622', '_3374', '_2331', '_3920', '_3912', '_2001', '_2132', '_1579', '_2363', '_2700', '_986', '_2027', '_1991', '_331', '_3136', '_4061', '_503', '_1970', '_3452', '_3092', '_79', '_158', '_3093', '_1933', '_2786', '_3815', '_2556', '_2506', '_3072', '_2857', '_368', '_3137', '_3575', '_854', '_1669', '_1161', '_2008', '_2662', '_4033', '_3053', '_2662', '_3750', '_1866', '_1725', '_2726', '_3920', '_2714', '_1375', '_940', '_3276', '_940', '_727', '_1809', '_2740', '_1877', '_550', '_1244', '_2610', '_2648', '_2147', '_2471', '_2750', '_2525', '_1342', '_2712', '_2679', '_1282', '_2909', '_3610', '_2386', '_723', '_933', '_1800', '_572', '_2675', '_1048', '_3400', '_685', '_538', '_809', '_4033', '_2431', '_2833', '_1904', '_3713', '_3886', '_1644', '_4076', '_1781', '_683', '_3541', '_3129', '_93', '_2784', '_4076', '_2237', '_2945', '_2249', '_260', '_2308', '_2399', '_1826', '_262', '_4069', '_476', '_143', '_950', '_3374', '_1826', '_2975', '_42', '_792', '_3863', '_2536', '_2083', '_2077', '_3747', '_427', '_3462', '_3418', '_2107'])\n", "('1234_3901_1328_1351_3273_331_985_1176_1130_1964_96_1907_3725_3497_3745_3680_619_499_7_1872_1112_1131_2306_1872_2431_203_2453_398_536_2835_3538_410_2530_3232_184_2712_798_363_1222_2139_679_2504_3039_734_3498_3978_2918_2925_3702_682_1565_3508_2251_1566_104_1485_3110_3387_1815_1033_1114_1726_3673_1369_56_1966_951_1106_1506_1629_2859_3303_208_277_3376_773_3764_1083_1756_160_1514_3250_124_3102_1083_1667_1424_2859_1575_490_3551_1083_1667_4057_1924_3278_3109_3109_310_1667_3720_3832_2154_3750_700_1083_2796_412_2333_1684_2739_1406_1234_2542_2259_248_246_9_669_204_3901_2753_2608_2154_3975_3109_2859_175_20_1987_3505_802_2837_187_1106_829_2453_3984_1419_1310_2859_342_3418_3551_3381_1388_3039_3745_2532_792_3775_3265_1112_3178_1926_3549_1485_3232_2179_3787_681_2990_3297_436_1924_2702_1349_4082_754_1423_26_2786_308_1848_3652_3684_3801_3199_2480_3967_727_93_3091_3798_3311_3358_3798_152_1140_682_212_2945_4091_4041_1289_4032_1357_2392_3777_1818_278_1159_396_518_3575_3866_1920_3659_1261_1312_375_2168_740_2782_2356_701_3168_1083_805_1046_2782_1629_3560_535_3119_2094_527_1647_3335_525_3321_939_805_2722_2456_1216_3008_3410_3643_97_3134_313_285_1229_4054_1369_3984_414_2361_3293_575_56_682_1914_398_3706_1031_3702_2013_3684_1150_3278_3968_3382_977_2575_4076_3369_3638_2920_1242_600_890_1083_2562_2693_3082_1907_890_1307_204_944_714_3990_286_1046_2453_3981_1186_658_424_2609_1513', ['1234', '_3901', '_1328', '_1351', '_3273', '_331', '_985', '_1176', '_1130', '_1964', '_96', '_1907', '_3725', '_3497', '_3745', '_3680', '_619', '_499', '_7', '_1872', '_1112', '_1131', '_2306', '_1872', '_2431', '_203', '_2453', '_398', '_536', '_2835', '_3538', '_410', '_2530', '_3232', '_184', '_2712', '_798', '_363', '_1222', '_2139', '_679', '_2504', '_3039', '_734', '_3498', '_3978', '_2918', '_2925', '_3702', '_682', '_1565', '_3508', '_2251', '_1566', '_104', '_1485', '_3110', '_3387', '_1815', '_1033', '_1114', '_1726', '_3673', '_1369', '_56', '_1966', '_951', '_1106', '_1506', '_1629', '_2859', '_3303', '_208', '_277', '_3376', '_773', '_3764', '_1083', '_1756', '_160', '_1514', '_3250', '_124', '_3102', '_1083', '_1667', '_1424', '_2859', '_1575', '_490', '_3551', '_1083', '_1667', '_4057', '_1924', '_3278', '_3109', '_3109', '_310', '_1667', '_3720', '_3832', '_2154', '_3750', '_700', '_1083', '_2796', '_412', '_2333', '_1684', '_2739', '_1406', '_1234', '_2542', '_2259', '_248', '_246', '_9', '_669', '_204', '_3901', '_2753', '_2608', '_2154', '_3975', '_3109', '_2859', '_175', '_20', '_1987', '_3505', '_802', '_2837', '_187', '_1106', '_829', '_2453', '_3984', '_1419', '_1310', '_2859', '_342', '_3418', '_3551', '_3381', '_1388', '_3039', '_3745', '_2532', '_792', '_3775', '_3265', '_1112', '_3178', '_1926', '_3549', '_1485', '_3232', '_2179', '_3787', '_681', '_2990', '_3297', '_436', '_1924', '_2702', '_1349', '_4082', '_754', '_1423', '_26', '_2786', '_308', '_1848', '_3652', '_3684', '_3801', '_3199', '_2480', '_3967', '_727', '_93', '_3091', '_3798', '_3311', '_3358', '_3798', '_152', '_1140', '_682', '_212', '_2945', '_4091', '_4041', '_1289', '_4032', '_1357', '_2392', '_3777', '_1818', '_278', '_1159', '_396', '_518', '_3575', '_3866', '_1920', '_3659', '_1261', '_1312', '_375', '_2168', '_740', '_2782', '_2356', '_701', '_3168', '_1083', '_805', '_1046', '_2782', '_1629', '_3560', '_535', '_3119', '_2094', '_527', '_1647', '_3335', '_525', '_3321', '_939', '_805', '_2722', '_2456', '_1216', '_3008', '_3410', '_3643', '_97', '_3134', '_313', '_285', '_1229', '_4054', '_1369', '_3984', '_414', '_2361', '_3293', '_575', '_56', '_682', '_1914', '_398', '_3706', '_1031', '_3702', '_2013', '_3684', '_1150', '_3278', '_3968', '_3382', '_977', '_2575', '_4076', '_3369', '_3638', '_2920', '_1242', '_600', '_890', '_1083', '_2562', '_2693', '_3082', '_1907', '_890', '_1307', '_204', '_944', '_714', '_3990', '_286', '_1046', '_2453', '_3981', '_1186', '_658', '_424', '_2609', '_1513'])\n", - "('2712_2424_1311_2693_1684_3721_2303_1234_1130_1487_2021_2435_3778_3410_3627_1491_3467_2076_2204_3721_331_40_3105_2673_2442_3345_1270_1138_3051_2134_3820_3230_1076_3368_3893_1986_1161_96_515_1051_3476_224_1733_1114_2669_81_3969_633_1968_117_1114_2984_2046_1565_599_2093_117_3167_2236_92_248_3822_1815_2214_2322_378_2225_249_1294_3082_2214_643_2424_289_3178_196_3999_654_1565_3165_3030_716_748_3737_2738_1114_1671_370_1114_1235_3508_456_1887_1794_1779_1734_629_2230_3731_2909_1266_1381_1719_3183_2431_2647_2675_802_3720_1170_1349_2858_2214_490_168_2626_105_2398_1578_3197_2448_1562_2536_3352_2531_1590_2910_2839_3743_2094_2939_3541_1303_925_1114_1833_950_2939_1220_3379_3648_2617_3665_112_3069_1105_1350_3051_3671_1351_1395_3808_290_9_3051_2310_1970_3623_3297_501_2531_1148_248_1099_1932_450_3744_1186_2910_1978_1424_1310_2617_1661_416_3051_1114_2773_2738_3969_3866_544_3051_1770_2713_2398_64_1669_1168_160_637_2365_910_1129_2018_3958_2439_1129_60_2025_2760_4069_1848_2738_1203_2589_3775_20_1815_200_3051_3297_1651_2673_2144_1462_399_3879_937_2398_2520_1170_3777_1262_3051_2205_1945_2918_2524_1312_586_1925_1170_2386_3856_1170_3092_1604_2712_62_2001_3990_2496_3404_3183_117_2205_1269_2342_658_43_620_3627_1048_3262_1254_2310_712_1877_3051_3437_1595_874_1684_1453_1234_3051_3163_3764_939_1161_2304_2983_472_3167_1160_1061_2348_1669_2952_1861_248_55_3569_3969_962_661_2738_375_469_2203_2384_2673_1907_2738_3167_1308_3753_3297_2249_890_3197_2782_1351_1763_1170_2750_3495_3731_775_3732_4004_2939_3468_238_456_1491_1912_495_1491_3379_482_3051_3785_3556_442_3297_1229_933_2647_1076_268_1338_1170_3638_3109_3040_1076_1848_1669_3526_3778_1907_2439_1170_2642_3790_1170_9_2265_2738_2310_499_854_2310_2925_802_3040_248_1661_622_2782_571_2673_3051_308_1924_1607_2310_3092_3907_2738_3785_181_3951_2939_2892_2530_2803_3381_463_535_3297_3039_2249_2738_3297_784_3178_3468_132_4092_2214_944_2466_3877_1684_2516_215_766_308_2203_2303_1129_3470_2177_3051_1491_3026_669_3297_3259_3777_3051_1684_1312_2525_3468_372_2918_3051_1338_3721_3458_3297_2304_1872_3627_3468_893_2251_1696_15_1768_1234_3747_2434_2693_248_3426_3724_117_2782_3183_2057_248_861_74_3627_2809_456_1672_248_45_1742_3627_3297_100_2712_2809_1932_2365_3731_62_1742_2386_3686_1594_3329_3051_2496_1970_2750_933_1312_3942_3879_3297_2854_2491_64_1398_2166_2738_2347_755_2002_2573_3867_2554_2738_1219_2096_2843_308_2363_2415_2322_462_416_416_3069_2428_3407_2214_1575_2357_289_461_1845_4009_4016_3177_3618_1739_2492_859_691_2712_3107_2561_1768_2154_2673_1401_1234_944_1920_2008_2948_2169_4003_3885_1567_1563_2579_3562_2030_3977_3981_2609_1494_1532_2426_3602', ['2712', '_2424', '_1311', '_2693', '_1684', '_3721', '_2303', '_1234', '_1130', '_1487', '_2021', '_2435', '_3778', '_3410', '_3627', '_1491', '_3467', '_2076', '_2204', '_3721', '_331', '_40', '_3105', '_2673', '_2442', '_3345', '_1270', '_1138', '_3051', '_2134', '_3820', '_3230', '_1076', '_3368', '_3893', '_1986', '_1161', '_96', '_515', '_1051', '_3476', '_224', '_1733', '_1114', '_2669', '_81', '_3969', '_633', '_1968', '_117', '_1114', '_2984', '_2046', '_1565', '_599', '_2093', '_117', '_3167', '_2236', '_92', '_248', '_3822', '_1815', '_2214', '_2322', '_378', '_2225', '_249', '_1294', '_3082', '_2214', '_643', '_2424', '_289', '_3178', '_196', '_3999', '_654', '_1565', '_3165', '_3030', '_716', '_748', '_3737', '_2738', '_1114', '_1671', '_370', '_1114', '_1235', '_3508', '_456', '_1887', '_1794', '_1779', '_1734', '_629', '_2230', '_3731', '_2909', '_1266', '_1381', '_1719', '_3183', '_2431', '_2647', '_2675', '_802', '_3720', '_1170', '_1349', '_2858', '_2214', '_490', '_168', '_2626', '_105', '_2398', '_1578', '_3197', '_2448', '_1562', '_2536', '_3352', '_2531', '_1590', '_2910', '_2839', '_3743', '_2094', '_2939', '_3541', '_1303', '_925', '_1114', '_1833', '_950', '_2939', '_1220', '_3379', '_3648', '_2617', '_3665', '_112', '_3069', '_1105', '_1350', '_3051', '_3671', '_1351', '_1395', '_3808', '_290', '_9', '_3051', '_2310', '_1970', '_3623', '_3297', '_501', '_2531', '_1148', '_248', '_1099', '_1932', '_450', '_3744', '_1186', '_2910', '_1978', '_1424', '_1310', '_2617', '_1661', '_416', '_3051', '_1114', '_2773', '_2738', '_3969', '_3866', '_544', '_3051', '_1770', '_2713', '_2398', '_64', '_1669', '_1168', '_160', '_637', '_2365', '_910', '_1129', '_2018', '_3958', '_2439', '_1129', '_60', '_2025', '_2760', '_4069', '_1848', '_2738', '_1203', '_2589', '_3775', '_20', '_1815', '_200', '_3051', '_3297', '_1651', '_2673', '_2144', '_1462', '_399', '_3879', '_937', '_2398', '_2520', '_1170', '_3777', '_1262', '_3051', '_2205', '_1945', '_2918', '_2524', '_1312', '_586', '_1925', '_1170', '_2386', '_3856', '_1170', '_3092', '_1604', '_2712', '_62', '_2001', '_3990', '_2496', '_3404', '_3183', '_117', '_2205', '_1269', '_2342', '_658', '_43', '_620', '_3627', '_1048', '_3262', '_1254', '_2310', '_712', '_1877', '_3051', '_3437', '_1595', '_874', '_1684', '_1453', '_1234', '_3051', '_3163', '_3764', '_939', '_1161', '_2304', '_2983', '_472', '_3167', '_1160', '_1061', '_2348', '_1669', '_2952', '_1861', '_248', '_55', '_3569', '_3969', '_962', '_661', '_2738', '_375', '_469', '_2203', '_2384', '_2673', '_1907', '_2738', '_3167', '_1308', '_3753', '_3297', '_2249', '_890', '_3197', '_2782', '_1351', '_1763', '_1170', '_2750', '_3495', '_3731', '_775', '_3732', '_4004', '_2939', '_3468', '_238', '_456', '_1491', '_1912', '_495', '_1491', '_3379', '_482', '_3051', '_3785', '_3556', '_442', '_3297', '_1229', '_933', '_2647', '_1076', '_268', '_1338', '_1170', '_3638', '_3109', '_3040', '_1076', '_1848', '_1669', '_3526', '_3778', '_1907', '_2439', '_1170', '_2642', '_3790', '_1170', '_9', '_2265', '_2738', '_2310', '_499', '_854', '_2310', '_2925', '_802', '_3040', '_248', '_1661', '_622', '_2782', '_571', '_2673', '_3051', '_308', '_1924', '_1607', '_2310', '_3092', '_3907', '_2738', '_3785', '_181', '_3951', '_2939', '_2892', '_2530', '_2803', '_3381', '_463', '_535', '_3297', '_3039', '_2249', '_2738', '_3297', '_784', '_3178', '_3468', '_132', '_4092', '_2214', '_944', '_2466', '_3877', '_1684', '_2516', '_215', '_766', '_308', '_2203', '_2303', '_1129', '_3470', '_2177', '_3051', '_1491', '_3026', '_669', '_3297', '_3259', '_3777', '_3051', '_1684', '_1312', '_2525', '_3468', '_372', '_2918', '_3051', '_1338', '_3721', '_3458', '_3297', '_2304', '_1872', '_3627', '_3468', '_893', '_2251', '_1696', '_15', '_1768', '_1234', '_3747', '_2434', '_2693', '_248', '_3426', '_3724', '_117', '_2782', '_3183', '_2057', '_248', '_861', '_74', '_3627', '_2809', '_456', '_1672', '_248', '_45', '_1742', '_3627', '_3297', '_100', '_2712', '_2809', '_1932', '_2365', '_3731', '_62', '_1742', '_2386', '_3686', '_1594', '_3329', '_3051', '_2496', '_1970', '_2750', '_933', '_1312', '_3942', '_3879', '_3297', '_2854', '_2491', '_64', '_1398', '_2166', '_2738', '_2347', '_755', '_2002', '_2573', '_3867', '_2554', '_2738', '_1219', '_2096', '_2843', '_308', '_2363', '_2415', '_2322', '_462', '_416', '_416', '_3069', '_2428', '_3407', '_2214', '_1575', '_2357', '_289', '_461', '_1845', '_4009', '_4016', '_3177', '_3618', '_1739', '_2492', '_859', '_691', '_2712', '_3107', '_2561', '_1768', '_2154', '_2673', '_1401', '_1234', '_944', '_1920', '_2008', '_2948', '_2169', '_4003', '_3885', '_1567', '_1563', '_2579', '_3562', '_2030', '_3977', '_3981', '_2609', '_1494', '_1532', '_2426_3602'])\n", + "('2712_2424_1311_2693_1684_3721_2303_1234_1130_1487_2021_2435_3778_3410_3627_1491_3467_2076_2204_3721_331_40_3105_2673_2442_3345_1270_1138_3051_2134_3820_3230_1076_3368_3893_1986_1161_96_515_1051_3476_224_1733_1114_2669_81_3969_633_1968_117_1114_2984_2046_1565_599_2093_117_3167_2236_92_248_3822_1815_2214_2322_378_2225_249_1294_3082_2214_643_2424_289_3178_196_3999_654_1565_3165_3030_716_748_3737_2738_1114_1671_370_1114_1235_3508_456_1887_1794_1779_1734_629_2230_3731_2909_1266_1381_1719_3183_2431_2647_2675_802_3720_1170_1349_2858_2214_490_168_2626_105_2398_1578_3197_2448_1562_2536_3352_2531_1590_2910_2839_3743_2094_2939_3541_1303_925_1114_1833_950_2939_1220_3379_3648_2617_3665_112_3069_1105_1350_3051_3671_1351_1395_3808_290_9_3051_2310_1970_3623_3297_501_2531_1148_248_1099_1932_450_3744_1186_2910_1978_1424_1310_2617_1661_416_3051_1114_2773_2738_3969_3866_544_3051_1770_2713_2398_64_1669_1168_160_637_2365_910_1129_2018_3958_2439_1129_60_2025_2760_4069_1848_2738_1203_2589_3775_20_1815_200_3051_3297_1651_2673_2144_1462_399_3879_937_2398_2520_1170_3777_1262_3051_2205_1945_2918_2524_1312_586_1925_1170_2386_3856_1170_3092_1604_2712_62_2001_3990_2496_3404_3183_117_2205_1269_2342_658_43_620_3627_1048_3262_1254_2310_712_1877_3051_3437_1595_874_1684_1453_1234_3051_3163_3764_939_1161_2304_2983_472_3167_1160_1061_2348_1669_2952_1861_248_55_3569_3969_962_661_2738_375_469_2203_2384_2673_1907_2738_3167_1308_3753_3297_2249_890_3197_2782_1351_1763_1170_2750_3495_3731_775_3732_4004_2939_3468_238_456_1491_1912_495_1491_3379_482_3051_3785_3556_442_3297_1229_933_2647_1076_268_1338_1170_3638_3109_3040_1076_1848_1669_3526_3778_1907_2439_1170_2642_3790_1170_9_2265_2738_2310_499_854_2310_2925_802_3040_248_1661_622_2782_571_2673_3051_308_1924_1607_2310_3092_3907_2738_3785_181_3951_2939_2892_2530_2803_3381_463_535_3297_3039_2249_2738_3297_784_3178_3468_132_4092_2214_944_2466_3877_1684_2516_215_766_308_2203_2303_1129_3470_2177_3051_1491_3026_669_3297_3259_3777_3051_1684_1312_2525_3468_372_2918_3051_1338_3721_3458_3297_2304_1872_3627_3468_893_2251_1696_15_1768_1234_3747_2434_2693_248_3426_3724_117_2782_3183_2057_248_861_74_3627_2809_456_1672_248_45_1742_3627_3297_100_2712_2809_1932_2365_3731_62_1742_2386_3686_1594_3329_3051_2496_1970_2750_933_1312_3942_3879_3297_2854_2491_64_1398_2166_2738_2347_755_2002_2573_3867_2554_2738_1219_2096_2843_308_2363_2415_2322_462_416_416_3069_2428_3407_2214_1575_2357_289_461_1845_4009_4016_3177_3618_1739_2492_859_691_2712_3107_2561_1768_2154_2673_1401_1234_944_1920_2008_2948_2169_4003_3885_1567_1563_2579_3562_2030_3977_3981_2609_1494_1532_2426_3602', ['2712', '_2424', '_1311', '_2693', '_1684', '_3721', '_2303', '_1234', '_1130', '_1487', '_2021', '_2435', '_3778', '_3410', '_3627', '_1491', '_3467', '_2076', '_2204', '_3721', '_331', '_40', '_3105', '_2673', '_2442', '_3345', '_1270', '_1138', '_3051', '_2134', '_3820', '_3230', '_1076', '_3368', '_3893', '_1986', '_1161', '_96', '_515', '_1051', '_3476', '_224', '_1733', '_1114', '_2669', '_81', '_3969', '_633', '_1968', '_117', '_1114', '_2984', '_2046', '_1565', '_599', '_2093', '_117', '_3167', '_2236', '_92', '_248', '_3822', '_1815', '_2214', '_2322', '_378', '_2225', '_249', '_1294', '_3082', '_2214', '_643', '_2424', '_289', '_3178', '_196', '_3999', '_654', '_1565', '_3165', '_3030', '_716', '_748', '_3737', '_2738', '_1114', '_1671', '_370', '_1114', '_1235', '_3508', '_456', '_1887', '_1794', '_1779', '_1734', '_629', '_2230', '_3731', '_2909', '_1266', '_1381', '_1719', '_3183', '_2431', '_2647', '_2675', '_802', '_3720', '_1170', '_1349', '_2858', '_2214', '_490', '_168', '_2626', '_105', '_2398', '_1578', '_3197', '_2448', '_1562', '_2536', '_3352', '_2531', '_1590', '_2910', '_2839', '_3743', '_2094', '_2939', '_3541', '_1303', '_925', '_1114', '_1833', '_950', '_2939', '_1220', '_3379', '_3648', '_2617', '_3665', '_112', '_3069', '_1105', '_1350', '_3051', '_3671', '_1351', '_1395', '_3808', '_290', '_9', '_3051', '_2310', '_1970', '_3623', '_3297', '_501', '_2531', '_1148', '_248', '_1099', '_1932', '_450', '_3744', '_1186', '_2910', '_1978', '_1424', '_1310', '_2617', '_1661', '_416', '_3051', '_1114', '_2773', '_2738', '_3969', '_3866', '_544', '_3051', '_1770', '_2713', '_2398', '_64', '_1669', '_1168', '_160', '_637', '_2365', '_910', '_1129', '_2018', '_3958', '_2439', '_1129', '_60', '_2025', '_2760', '_4069', '_1848', '_2738', '_1203', '_2589', '_3775', '_20', '_1815', '_200', '_3051', '_3297', '_1651', '_2673', '_2144', '_1462', '_399', '_3879', '_937', '_2398', '_2520', '_1170', '_3777', '_1262', '_3051', '_2205', '_1945', '_2918', '_2524', '_1312', '_586', '_1925', '_1170', '_2386', '_3856', '_1170', '_3092', '_1604', '_2712', '_62', '_2001', '_3990', '_2496', '_3404', '_3183', '_117', '_2205', '_1269', '_2342', '_658', '_43', '_620', '_3627', '_1048', '_3262', '_1254', '_2310', '_712', '_1877', '_3051', '_3437', '_1595', '_874', '_1684', '_1453', '_1234', '_3051', '_3163', '_3764', '_939', '_1161', '_2304', '_2983', '_472', '_3167', '_1160', '_1061', '_2348', '_1669', '_2952', '_1861', '_248', '_55', '_3569', '_3969', '_962', '_661', '_2738', '_375', '_469', '_2203', '_2384', '_2673', '_1907', '_2738', '_3167', '_1308', '_3753', '_3297', '_2249', '_890', '_3197', '_2782', '_1351', '_1763', '_1170', '_2750', '_3495', '_3731', '_775', '_3732', '_4004', '_2939', '_3468', '_238', '_456', '_1491', '_1912', '_495', '_1491', '_3379', '_482', '_3051', '_3785', '_3556', '_442', '_3297', '_1229', '_933', '_2647', '_1076', '_268', '_1338', '_1170', '_3638', '_3109', '_3040', '_1076', '_1848', '_1669', '_3526', '_3778', '_1907', '_2439', '_1170', '_2642', '_3790', '_1170', '_9', '_2265', '_2738', '_2310', '_499', '_854', '_2310', '_2925', '_802', '_3040', '_248', '_1661', '_622', '_2782', '_571', '_2673', '_3051', '_308', '_1924', '_1607', '_2310', '_3092', '_3907', '_2738', '_3785', '_181', '_3951', '_2939', '_2892', '_2530', '_2803', '_3381', '_463', '_535', '_3297', '_3039', '_2249', '_2738', '_3297', '_784', '_3178', '_3468', '_132', '_4092', '_2214', '_944', '_2466', '_3877', '_1684', '_2516', '_215', '_766', '_308', '_2203', '_2303', '_1129', '_3470', '_2177', '_3051', '_1491', '_3026', '_669', '_3297', '_3259', '_3777', '_3051', '_1684', '_1312', '_2525', '_3468', '_372', '_2918', '_3051', '_1338', '_3721', '_3458', '_3297', '_2304', '_1872', '_3627', '_3468', '_893', '_2251', '_1696', '_15', '_1768', '_1234', '_3747', '_2434', '_2693', '_248', '_3426', '_3724', '_117', '_2782', '_3183', '_2057', '_248', '_861', '_74', '_3627', '_2809', '_456', '_1672', '_248', '_45', '_1742', '_3627', '_3297', '_100', '_2712', '_2809', '_1932', '_2365', '_3731', '_62', '_1742', '_2386', '_3686', '_1594', '_3329', '_3051', '_2496', '_1970', '_2750', '_933', '_1312', '_3942', '_3879', '_3297', '_2854', '_2491', '_64', '_1398', '_2166', '_2738', '_2347', '_755', '_2002', '_2573', '_3867', '_2554', '_2738', '_1219', '_2096', '_2843', '_308', '_2363', '_2415', '_2322', '_462', '_416', '_416', '_3069', '_2428', '_3407', '_2214', '_1575', '_2357', '_289', '_461', '_1845', '_4009', '_4016', '_3177', '_3618', '_1739', '_2492', '_859', '_691', '_2712', '_3107', '_2561', '_1768', '_2154', '_2673', '_1401', '_1234', '_944', '_1920', '_2008', '_2948', '_2169', '_4003', '_3885', '_1567', '_1563', '_2579', '_3562', '_2030', '_3977', '_3981', '_2609', '_1494', '_1532', '_2426', '_3602'])\n", "('4076_240_918_2524_4070_3900_625_2308_1114_2433_488_3671_2154_3199_1825_2610_1578_4008_1336_1562_3092_2561_428_2097_1959_210_1593_263_3745_1003_3384_1856_3662_1763_3251_3127_229_4061_977_2264_727_3498_2308_3541_456_609_3575_890_3521_109_3662_600_773_1566_2859_1872_1212_3798_2431_2632_117_846_3452_612_1336_977_727_3692_1046_1833_1825_2404_3092_1112_3712_4061_1448_3138_1560_447_1593_1326_4032_3008_387_799_608_1539_1543_3165_3250_3030_2280_3533_399_2952_3271_3240_4076_308_2930_1723_1604_1889_3203_2308_1490_3681_1889_933_399_1889_117_3437_3567_2604_1965_609_4077_4061_3124_1060_317_1032_2673_1143_7_379_2_3550_3093_3702_1593_1450_2015_1424_1140_261_3893_4031_3520_799_628_3849_2952_2796_4009_1810_1684_3100_1920_914_610_1153_4076_1958_503_3271_914_3353_2693_2712_944_1863_1728_1161_2081_3881_2712_805_3479_2782_1604_2398_1768_1225_2094_4093_3218_944_1219', ['4076', '_240', '_918', '_2524', '_4070', '_3900', '_625', '_2308', '_1114', '_2433', '_488', '_3671', '_2154', '_3199', '_1825', '_2610', '_1578', '_4008', '_1336', '_1562', '_3092', '_2561', '_428', '_2097', '_1959', '_210', '_1593', '_263', '_3745', '_1003', '_3384', '_1856', '_3662', '_1763', '_3251', '_3127', '_229', '_4061', '_977', '_2264', '_727', '_3498', '_2308', '_3541', '_456', '_609', '_3575', '_890', '_3521', '_109', '_3662', '_600', '_773', '_1566', '_2859', '_1872', '_1212', '_3798', '_2431', '_2632', '_117', '_846', '_3452', '_612', '_1336', '_977', '_727', '_3692', '_1046', '_1833', '_1825', '_2404', '_3092', '_1112', '_3712', '_4061', '_1448', '_3138', '_1560', '_447', '_1593', '_1326', '_4032', '_3008', '_387', '_799', '_608', '_1539', '_1543', '_3165', '_3250', '_3030', '_2280', '_3533', '_399', '_2952', '_3271', '_3240', '_4076', '_308', '_2930', '_1723', '_1604', '_1889', '_3203', '_2308', '_1490', '_3681', '_1889', '_933', '_399', '_1889', '_117', '_3437', '_3567', '_2604', '_1965', '_609', '_4077', '_4061', '_3124', '_1060', '_317', '_1032', '_2673', '_1143', '_7', '_379', '_2', '_3550', '_3093', '_3702', '_1593', '_1450', '_2015', '_1424', '_1140', '_261', '_3893', '_4031', '_3520', '_799', '_628', '_3849', '_2952', '_2796', '_4009', '_1810', '_1684', '_3100', '_1920', '_914', '_610', '_1153', '_4076', '_1958', '_503', '_3271', '_914', '_3353', '_2693', '_2712', '_944', '_1863', '_1728', '_1161', '_2081', '_3881', '_2712', '_805', '_3479', '_2782', '_1604', '_2398', '_1768', '_1225', '_2094', '_4093', '_3218', '_944', '_1219'])\n", "('4076_1728_2531_2008_2604_890_340_222_1663_3727_719_2968_3092_2444_1930_2032_1264_3232_4093_646_669_222_119_1747_2386_519_1046_1838_222_537_891_3324_3347_3324_2525_1776_3664_1952_208_1114_9_1800_1788_3744_3417_1800_2440_846_1678_222_624_3236_1800_3164_1691_1414_2308_1934_3866_977_2298_571_2750_2712_2862_3750_918_1875_2434_1984_3030_893_2525_121_1962_3820_1172_222_3953_3086_1607_2139_1388_3123_2147_2229_157_3218_2492_3547_3649_3578_3478_3123_79_2568_1993_1606_222_2866_900_1763_3415_1180_27_4061_105_32_2544_3115_3404_748_214_3146_126_2123_3336_1551_358_3724_3478_573_2897_3336_2974_3757_101_427_3495_3329_2941_1311_3400_2147_2779_1506_2797_2175_3895_1326_2147_62_3619_3027_1176_1046_2254_1246_427_3251_1139_2263_3570_3725_3030_3274_3251_2531_944_3253_1848_3030_3415_586_4061_3336_345_2101_3030_3512_1967_1618_1962_2608_2608_1776_4093_3321_4033_48_1351_2220_222_2448_3649_1046_902_2466_1726_761_4070_1848_3610_479_2315_1671_2147_1883_1021_2525_2858_1462_3415_1282_775_1669_208_2617_3702_4064_2147_3990_2969_3318_2942_977_3849_2533_2866_1863_2008_3538_304_2673_2712_2498_2008_3039_3478_3452_2525_1282_3544_846_2366_3949_3649_2770_2712_3329_2822_571_3671_1602_2531_2276_3691_2089_363_3744_2673_1757_1282_2809_1538_3452_2809_3706_1338_222_915_3498_3901_3544_746_286_2712_3767_3684_1311_1960_1046_3404_2712_2562_1669_859_2618_2386_4092_1788_1871_1784_3379_914_3050_2241_2803_3751_3244_482_1771_661_2102_2186_3751_3649_1310_1795_3218_1593_1806_1387_2817_3253_3415_3652_3008_2071_3415_427_2259_3192_1046_2154_2147_2497_410_3123_537_3324_2227_1788_3259_1650_3840_1905_208_951_4076_636_1248_3251_2476_399_901_2337_2801_1863_3404_261_3199_617_3981_3562_1494_3741_2609_1513', ['4076', '_1728', '_2531', '_2008', '_2604', '_890', '_340', '_222', '_1663', '_3727', '_719', '_2968', '_3092', '_2444', '_1930', '_2032', '_1264', '_3232', '_4093', '_646', '_669', '_222', '_119', '_1747', '_2386', '_519', '_1046', '_1838', '_222', '_537', '_891', '_3324', '_3347', '_3324', '_2525', '_1776', '_3664', '_1952', '_208', '_1114', '_9', '_1800', '_1788', '_3744', '_3417', '_1800', '_2440', '_846', '_1678', '_222', '_624', '_3236', '_1800', '_3164', '_1691', '_1414', '_2308', '_1934', '_3866', '_977', '_2298', '_571', '_2750', '_2712', '_2862', '_3750', '_918', '_1875', '_2434', '_1984', '_3030', '_893', '_2525', '_121', '_1962', '_3820', '_1172', '_222', '_3953', '_3086', '_1607', '_2139', '_1388', '_3123', '_2147', '_2229', '_157', '_3218', '_2492', '_3547', '_3649', '_3578', '_3478', '_3123', '_79', '_2568', '_1993', '_1606', '_222', '_2866', '_900', '_1763', '_3415', '_1180', '_27', '_4061', '_105', '_32', '_2544', '_3115', '_3404', '_748', '_214', '_3146', '_126', '_2123', '_3336', '_1551', '_358', '_3724', '_3478', '_573', '_2897', '_3336', '_2974', '_3757', '_101', '_427', '_3495', '_3329', '_2941', '_1311', '_3400', '_2147', '_2779', '_1506', '_2797', '_2175', '_3895', '_1326', '_2147', '_62', '_3619', '_3027', '_1176', '_1046', '_2254', '_1246', '_427', '_3251', '_1139', '_2263', '_3570', '_3725', '_3030', '_3274', '_3251', '_2531', '_944', '_3253', '_1848', '_3030', '_3415', '_586', '_4061', '_3336', '_345', '_2101', '_3030', '_3512', '_1967', '_1618', '_1962', '_2608', '_2608', '_1776', '_4093', '_3321', '_4033', '_48', '_1351', '_2220', '_222', '_2448', '_3649', '_1046', '_902', '_2466', '_1726', '_761', '_4070', '_1848', '_3610', '_479', '_2315', '_1671', '_2147', '_1883', '_1021', '_2525', '_2858', '_1462', '_3415', '_1282', '_775', '_1669', '_208', '_2617', '_3702', '_4064', '_2147', '_3990', '_2969', '_3318', '_2942', '_977', '_3849', '_2533', '_2866', '_1863', '_2008', '_3538', '_304', '_2673', '_2712', '_2498', '_2008', '_3039', '_3478', '_3452', '_2525', '_1282', '_3544', '_846', '_2366', '_3949', '_3649', '_2770', '_2712', '_3329', '_2822', '_571', '_3671', '_1602', '_2531', '_2276', '_3691', '_2089', '_363', '_3744', '_2673', '_1757', '_1282', '_2809', '_1538', '_3452', '_2809', '_3706', '_1338', '_222', '_915', '_3498', '_3901', '_3544', '_746', '_286', '_2712', '_3767', '_3684', '_1311', '_1960', '_1046', '_3404', '_2712', '_2562', '_1669', '_859', '_2618', '_2386', '_4092', '_1788', '_1871', '_1784', '_3379', '_914', '_3050', '_2241', '_2803', '_3751', '_3244', '_482', '_1771', '_661', '_2102', '_2186', '_3751', '_3649', '_1310', '_1795', '_3218', '_1593', '_1806', '_1387', '_2817', '_3253', '_3415', '_3652', '_3008', '_2071', '_3415', '_427', '_2259', '_3192', '_1046', '_2154', '_2147', '_2497', '_410', '_3123', '_537', '_3324', '_2227', '_1788', '_3259', '_1650', '_3840', '_1905', '_208', '_951', '_4076', '_636', '_1248', '_3251', '_2476', '_399', '_901', '_2337', '_2801', '_1863', '_3404', '_261', '_3199', '_617', '_3981', '_3562', '_1494', '_3741', '_2609', '_1513'])\n", "('3835_2964_1275_1033_3649_2632_3452_3835_3146_1046_985_2779_3514_317_2596_1228_3404_1046_3182_2943_2466_1249_1495_1419_1163_3513_64_1934_3684_1623_1229_2673_1596_702_1838_2463_3613_3530_1_2940_1890_379_2635_1093_4065_19_1543_2589_1046_50_990_2076_3404_894_251_3253_2243_83_3379_679_1316_2750_204_50_3163_746_846_3351_2685_104_3004_4093_957_317_2501_2788_3253_3835_1013_251_3893_2964_3778_1932_878_2069_3514_2662_3457_363_1863_3004_2952_903_2885_3838_3893_2337_878_3659_2466_1434_308_3893_1314_3835_1543_1726_1326_2964_2433_3253_2596_2497_1134_2662_3415_1838_3272_3684_843_352_3039_212_1669_1056_3130_3488_3410_2531_240_1046_679_2243_3345_3379_4061_3336_1593_1800_3110_3851_940_1302_857_1375_1119_3110_3694_2021_3379_857_3288_439_2243_3969_237_951_1565_846_1848_2635_3415_3356_870_3978_2008_1920_1696_3177_1140_3253_857_859_4093_2243_2741_1147_1229_2003_2662_2362_4061_2601_237_124_2003_727_1112_1803_3456_3638_1729_2852_3514_1618_1696_957_3765_3404_484_1872_3452_1696_1543_1338_3544_3785_3324_1741_2635_2968_2817_2386_2448_1593_3379_3110_3712_3498_1848_880_1326_846_2786_2016_2745_1599_2059_571_2750_2243_2003_3893_4040_3182_2943_538_2243_4086_4061_3026_3182_1393_1726_3110_3851_3387_846_664_3404_1726_2243_1730_1388_1718_406_1872_3254_1369_2059_900_1_3207_1046_3026_2243_2359_2943_304_3737_3251_1393_3684_2968_3787_679_1460_1593_2343_550_3571_679_3253_3105_1275_3379_2596_3705_1060_1932_55_1326_3760_1861_52_2996_3253_3182_204_2097_1369_3764_2097_2943_3660_1046_903_1861_2820_498_1134_2026_2003_2786_2786_1648_4094_3649_1032_2466_3026_2786_654_3199_3218_143_901_247_536_2968_27_2003_82_3867_1261_2442_2125_1990_4006_308_3893_2431_2596_3182_1105_1275_880_2413_237_4076_189_1424_1618_3908_499_1046_3865_875_2398_304_3909_3880', ['3835', '_2964', '_1275', '_1033', '_3649', '_2632', '_3452', '_3835', '_3146', '_1046', '_985', '_2779', '_3514', '_317', '_2596', '_1228', '_3404', '_1046', '_3182', '_2943', '_2466', '_1249', '_1495', '_1419', '_1163', '_3513', '_64', '_1934', '_3684', '_1623', '_1229', '_2673', '_1596', '_702', '_1838', '_2463', '_3613', '_3530', '_1', '_2940', '_1890', '_379', '_2635', '_1093', '_4065', '_19', '_1543', '_2589', '_1046', '_50', '_990', '_2076', '_3404', '_894', '_251', '_3253', '_2243', '_83', '_3379', '_679', '_1316', '_2750', '_204', '_50', '_3163', '_746', '_846', '_3351', '_2685', '_104', '_3004', '_4093', '_957', '_317', '_2501', '_2788', '_3253', '_3835', '_1013', '_251', '_3893', '_2964', '_3778', '_1932', '_878', '_2069', '_3514', '_2662', '_3457', '_363', '_1863', '_3004', '_2952', '_903', '_2885', '_3838', '_3893', '_2337', '_878', '_3659', '_2466', '_1434', '_308', '_3893', '_1314', '_3835', '_1543', '_1726', '_1326', '_2964', '_2433', '_3253', '_2596', '_2497', '_1134', '_2662', '_3415', '_1838', '_3272', '_3684', '_843', '_352', '_3039', '_212', '_1669', '_1056', '_3130', '_3488', '_3410', '_2531', '_240', '_1046', '_679', '_2243', '_3345', '_3379', '_4061', '_3336', '_1593', '_1800', '_3110', '_3851', '_940', '_1302', '_857', '_1375', '_1119', '_3110', '_3694', '_2021', '_3379', '_857', '_3288', '_439', '_2243', '_3969', '_237', '_951', '_1565', '_846', '_1848', '_2635', '_3415', '_3356', '_870', '_3978', '_2008', '_1920', '_1696', '_3177', '_1140', '_3253', '_857', '_859', '_4093', '_2243', '_2741', '_1147', '_1229', '_2003', '_2662', '_2362', '_4061', '_2601', '_237', '_124', '_2003', '_727', '_1112', '_1803', '_3456', '_3638', '_1729', '_2852', '_3514', '_1618', '_1696', '_957', '_3765', '_3404', '_484', '_1872', '_3452', '_1696', '_1543', '_1338', '_3544', '_3785', '_3324', '_1741', '_2635', '_2968', '_2817', '_2386', '_2448', '_1593', '_3379', '_3110', '_3712', '_3498', '_1848', '_880', '_1326', '_846', '_2786', '_2016', '_2745', '_1599', '_2059', '_571', '_2750', '_2243', '_2003', '_3893', '_4040', '_3182', '_2943', '_538', '_2243', '_4086', '_4061', '_3026', '_3182', '_1393', '_1726', '_3110', '_3851', '_3387', '_846', '_664', '_3404', '_1726', '_2243', '_1730', '_1388', '_1718', '_406', '_1872', '_3254', '_1369', '_2059', '_900', '_1', '_3207', '_1046', '_3026', '_2243', '_2359', '_2943', '_304', '_3737', '_3251', '_1393', '_3684', '_2968', '_3787', '_679', '_1460', '_1593', '_2343', '_550', '_3571', '_679', '_3253', '_3105', '_1275', '_3379', '_2596', '_3705', '_1060', '_1932', '_55', '_1326', '_3760', '_1861', '_52', '_2996', '_3253', '_3182', '_204', '_2097', '_1369', '_3764', '_2097', '_2943', '_3660', '_1046', '_903', '_1861', '_2820', '_498', '_1134', '_2026', '_2003', '_2786', '_2786', '_1648', '_4094', '_3649', '_1032', '_2466', '_3026', '_2786', '_654', '_3199', '_3218', '_143', '_901', '_247', '_536', '_2968', '_27', '_2003', '_82', '_3867', '_1261', '_2442', '_2125', '_1990', '_4006', '_308', '_3893', '_2431', '_2596', '_3182', '_1105', '_1275', '_880', '_2413', '_237', '_4076', '_189', '_1424', '_1618', '_3908', '_499', '_1046', '_3865', '_875', '_2398', '_304', '_3909', '_3880'])\n", @@ -754,7 +774,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -763,7 +783,7 @@ "4096" ] }, - "execution_count": 59, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -780,18 +800,20 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "# Algorithm\n", "\n", - "vocab_size = 4100\n", + "vocab_size = 4500\n", "\n", "while len(vocab) < vocab_size:\n", " pair_freqs = compute_pair_freqs(splits)\n", " best_pair = \"\"\n", " max_freq = None\n", + "\n", + " # Find pair that occurs the most often\n", " for pair, freq in pair_freqs.items():\n", " if max_freq is None or max_freq < freq:\n", " best_pair = pair\n", @@ -803,7 +825,34 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['896', '_2029', '_935', '_679', '_1115', '_3601', '_3000', '_222_3446', '_2218', '_3072', '_550', '_3652', '_665', '_2596', '_2809', '_3649', '_251', '_2610', '_2536', '_47', '_2852', '_2940', '_3353', '_3400', '_3336', '_325', '_2647', '_4076', '_3653', '_3253', '_58', '_3664', '_1424', '_1388', '_222', '_278', '_897', '_447', '_2355', '_2453', '_2531', '_2712', '_828', '_2895', '_2398', '_2908', '_901', '_2536', '_222', '_3686', '_2620', '_3254', '_3962', '_0', '_1448', '_222', '_863', '_3593', '_124', '_124', '_1048', '_1593', '_222', '_4086', '_2647', '_3236', '_1767', '_2800', '_697', '_514', '_3648', '_2337', '_1338', '_1114', '_340', '_3514', '_4076', '_2658', '_1954', '_3867', '_2300', '_251', '_317', '_7', '_1091', '_1768', '_1440', '_3167', '_672', '_1253', '_188', '_3544', '_2934', '_1368', '_479', '_3951', '_3387', '_514', '_2438', '_1262', '_3166', '_462', '_3530', '_333', '_2596', '_3808', '_2796', '_1920', '_794', '_263', '_2626', '_2596', '_1949', '_57', '_3990', '_3785', '_146', '_404', '_3731', '_479', '_3840', '_3840', '_3664', '_940', '_2550', '_4076', '_544', '_3465', '_3232', '_269', '_79', '_2159', '_3879', '_1734', '_3900', '_755', '_1756', '_818', '_800', '_1249', '_171', '_319', '_727', '_171', '_3698', '_3683', '_2596', '_3969', '_2431', '_1838', '_3969', '_126', '_2673_2596', '_4012', '_1010', '_2151', '_3437', '_417', '_2386_2712', '_3705', '_1838', '_3428', '_1168', '_1838', '_1527', '_3885', '_1952', '_2443', '_3997', '_3562', '_1667', '_3651', '_3981_2426_1494_1532_2426_3602']\n", + "['2712', '_1604', '_3458', '_4031', '_1604_1669', '_2008', '_2337_857', '_3123', '_3321', '_1887', '_846', '_2398', '_763', '_612', '_3846', '_1060', '_312', '_859', '_3638', '_763', '_2238', '_590', '_2458', '_3847', '_304', '_1888', '_1986', '_2238', '_3412', '_1885', '_375', '_2176', '_887', '_3051', '_2238', '_3374', '_3485', '_973', '_251', '_622', '_3051', '_2557', '_727', '_3750', '_240', '_2386', '_2452', '_2712', '_2913', '_2525', '_691', '_1114', '_1363', '_796', '_3731', '_1232', '_1332', '_3282', '_966', '_3883', '_2431', '_1774', '_2559', '_755', '_748', '_2975', '_2608', '_3345', '_7', '_868', '_731', '_2872', '_1336', '_2488', '_3706', '_2276', '_3739', '_434', '_2203', '_2019', '_873', '_1273', '_3627', '_2912', '_4046', '_120', '_2888', '_1707', '_1153', '_3731', '_2927', '_1188', '_1400', '_966', '_397', '_1153', '_2712', '_1140', '_792', '_3412', '_20', '_3452', '_2452', '_1247', '_3297', '_1669', '_326', '_2813', '_2365', '_3368', '_1774', '_1129', '_3260', '_186', '_1814', '_1445', '_438', '_1247', '_3323', '_3368', '_3744', '_2392', '_448', '_1953', '_1247', '_2204', '_2430', '_1094', '_1702', '_2688', '_1953', '_2712', '_966', '_2380', '_3714', '_3446', '_3391', '_2531', '_2738', '_2312', '_3866', '_1952', '_2238', '_4069', '_752', '_1861', '_73', '_3403', '_3825', '_685', '_1707', '_332', '_2738', '_685', '_305', '_4049', '_1004', '_186', '_1188', '_4076', '_3468', '_2885', '_740', '_1001', '_251', '_3367', '_2712', '_48', '_752', '_2673', '_2617', '_793', '_927', '_2712_2801', '_3229', '_3896', '_886', '_773', '_3229', '_3396', '_1160', '_1968', '_3272', '_381', '_3452', '_1307', '_3396', '_2240', '_1307', '_215', '_726', '_679', '_3195', '_2712', '_3026', '_903', '_2317', '_1114', '_901', '_1484', '_665', '_2182', '_3688', '_7', '_234', '_3009', '_370', '_2712', '_3871', '_3551', '_499', '_240', '_1742', '_2531', '_582', '_862', '_930', '_1097', '_688', '_2450', '_1658', '_2738', '_97', '_3698', '_2502', '_308', '_746', '_488', '_608', '_2948', '_1669', '_3919', '_2204', '_2431', '_3219', '_1774', '_1941', '_845', '_1398', '_2440', '_3765', '_1644', '_2206', '_3795', '_2076', '_1953', '_685', '_1953', '_1953', '_3879', '_3089', '_2331', '_2807', '_2238', '_1660', '_95', '_222', '_3671', '_2386', '_2918', '_1094', '_3008', '_152', '_117', '_1924', '_365', '_3893', '_3069', '_1725', '_499', '_3731', '_3258', '_1794', '_2718', '_2502', '_829', '_575', '_2326', '_294', '_4054', '_1349', '_1814', '_3177', '_1188', '_3872', '_3281', '_588', '_3750', '_2813', '_992', '_3687', '_3731', '_3087', '_3786', '_2453', '_450', '_2365', '_930', '_1788', '_831', '_1644', '_2984', '_3180', '_1010', '_206', '_1788', '_3808', '_100', '_2506', '_3230', '_399', '_204', '_1806', '_48', '_3603', '_1669', '_2779', '_289', '_3514', '_572', '_1032', '_1932', '_1060', '_990', '_3702', '_1046', '_3161', '_2085', '_1932', '_1932', '_3350', '_702', '_263', '_665', '_1032', '_2895', '_901', '_489', '_859', '_2434', '_2712', '_3693', '_2788', '_1838', '_1026', '_3251', '_1701', '_665', '_1477', '_204', '_2008', '_318', '_289', '_2788', '_1930', '_1325', '_1595', '_237', '_1054', '_3820', '_1669', '_665', '_931', '_1863', '_3218', '_2094', '_859', '_289', '_2712', '_3937', '_1920', '_1229', '_1408', '_153', '_1990', '_2712', '_1435', '_427', '_1838', '_961', '_901', '_2450', '_3030', '_1516', '_3775', '_3013', '_267', '_204', '_2626', '_222', '_477', '_1134', '_2083', '_1217', '_243', '_2070', '_695', '_550', '_2434', '_2525', '_3566', '_3253', '_3075', '_222', '_2167', '_616', '_3574', '_3375', '_1655', '_457', '_1131', '_3316', '_3702', '_2076', '_990', '_3498', '_1261', '_1369', '_2516', '_435', '_890', '_3969', '_951', '_3867', '_222', '_1443', '_1134', '_1838', '_2003', '_1648', '_447', '_2647', '_1299', '_1395', '_3324', '_3514', '_1046', '_1060', '_188', '_2593', '_3498', '_3514', '_1648', '_2813', '_3353', '_2647', '_1048', '_719', '_3353', '_2939', '_3592', '_3613', '_2788', '_3487', '_499', '_2786', '_2801', '_3318', '_3396', '_7', '_2206', '_231', '_1346', '_240', '_3828', '_1482', '_188', '_966', '_175', '_1017', '_868', '_99', '_2469', '_222', '_584', '_2001', '_2750', '_2573', '_784', '_2001', '_1524', '_2913', '_3593', '_1580', '_1793', '_2874', '_1574', '_2160', '_1316', '_3254', '_2655', '_1675', '_2750', '_1052', '_2147', '_2809', '_1351', '_3008', '_1443', '_527', '_3321', '_3030', '_26', '_3286', '_2228', '_50', '_1112', '_2342', '_761', '_2559', '_3702', '_3702', '_1093', '_670', '_527', '_222', '_2003', '_2750', '_1312', '_3044', '_3199', '_295', '_222', '_4086', '_2673', '_263', '_1443', '_251', '_2259', '_222', '_4086', '_3384', '_263', '_354', '_2943', '_2943', '_222', '_2952', '_3194', '_1231', '_354', '_1932', '_3288', '_222', '_354', '_1593', '_3384', '_3672', '_1539', '_3551', '_222', '_1161', '_3324', '_3458', '_2675', '_1307', '_2876', '_665', '_688', '_3979', '_1660', '_2444', '_3229', '_2013', '_665', '_3345', '_2224', '_2224', '_926', '_2224', '_580', '_1430', '_1129', '_1907', '_3177', '_1161', '_1059', '_4076', '_188', '_1544', '_2080', '_114', '_1130', '_2066', '_186', '_665', '_3671', '_1848', '_1726', '_1567', '_2673', '_833', '_3724', '_3910', '_289', '_568', '_1423', '_2008', '_1305', '_188', '_375', '_1631', '_3724', '_20', '_536', '_2174', '_1788', '_124', '_979', '_1688', '_484', '_679', '_3708', '_188', '_3875', '_1516', '_3236', '_1148', '_2700', '_3761', '_1524', '_1420', '_1761', '_2115', '_2419', '_457', '_2813', '_222', '_3808', '_175', '_1283', '_2310', '_2673', '_756', '_1806', '_1164', '_2083', '_499', '_146', '_92', '_2328', '_1788', '_1877', '_2750', '_1768', '_85', '_2662', '_4093', '_2147', '_2317', '_1134', '_204', '_2779', '_1768', '_3415', '_2147', '_2408', '_3269', '_1462', '_2435', '_2434', '_3205', '_2147', '_230', '_1594', '_3881', '_1114', '_1934', '_2676', '_2891', '_782', '_4071', '_3687', '_3195', '_2338', '_623', '_1733', '_757', '_3467', '_3706', '_3352', '_1806', '_2467', '_188', '_3161', '_1912', '_3724', '_1907', '_2750', '_3384', '_3030', '_775', '_2254', '_3514', '_427', '_3893', '_1848', '_7', '_216', '_3205', '_2736', '_3106', '_2331', '_4036', '_2160', '_2939', '_4080', '_2613', '_73', '_3412', '_2870', '_3731', '_3566', '_1970', '_3183', '_2782', '_1851', '_599', '_222', '_308', '_3028', '_1863', '_105', '_764', '_3450', '_1788', '_1905', '_1283', '_3866', '_462', '_365', '_3806', '_654', '_2913', '_2831', '_1343', '_3106', '_365', '_1813', '_3731', '_2415', '_2558', '_657', '_2610', '_1408', '_3507', '_1788', '_2940', '_1581', '_2013', '_366', '_2337', '_3005', '_1788', '_2124', '_1010', '_755', '_278', '_1446', '_2817', '_222', '_3808', '_802', '_2462', '_2448', '_2068', '_3507', '_3731', '_928', '_3547', '_2992', '_2204', '_3463', '_2100', '_1788', '_2909', '_1094', '_940', '_3106', '_2689', '_933', '_3731', '_2392', '_2473', '_3856', '_3662', '_935', '_2918', '_3731', '_2900', '_1669_1669', '_2662', '_2008', '_2444', '_1788', '_2019', '_1655', '_829', '_2939', '_208', '_2365', '_7', '_3671', '_1106', '_833', '_2513', '_3849', '_1655', '_981', '_3181', '_1818', '_2453', '_3352', '_2738', '_2470', '_680', '_77', '_2946', '_3149', '_926', '_2738', '_529', '_3731', '_162', '_1055', '_448', '_926', '_2151', '_731', '_7', '_2204', '_2867', '_1904', '_4071', '_105', '_2511', '_1788', '_2964', '_3619', '_2338', '_2019', '_1310', '_3570', '_1247', '_1751', '_410', '_2885', '_660', '_2320', '_237', '_7', '_1924', '_79', '_3384', '_3297', '_2386', '_344', '_1788', '_3740', '_3704', '_1519', '_945', '_87', '_1462', '_2147', '_429', '_247', '_289', '_2029', '_196', '_3218', '_2147', '_3998', '_449', '_3218', '_721', '_196', '_3171', '_7', '_427', '_1578', '_455', '_3855', '_1678', '_3173', '_1733', '_2913', '_1400', '_3112', '_3323', '_177', '_2718', '_4033', '_637', '_697', '_3461', '_1130', '_1041', '_2398', '_3030', '_775', '_2788', '_2089', '_2968', '_2673', '_3205', '_188', '_366', '_685', '_991', '_3901', '_811', '_1527', '_1088', '_1875', '_399', '_3893', '_3212', '_829', '_1589', '_2160', '_449', '_497', '_691', '_782', '_4059', '_1269', '_2160', '_2939', '_946', '_595', '_684', '_2253', '_31', '_2147', '_2182', '_149', '_3877', '_1423', '_3218', '_317', '_1524', '_1336', '_351', '_2320', '_2991', '_1046', '_3102', '_7', '_2204', '_4087', '_2221', '_3227', '_304', '_1411', '_7', '_105', '_3849', '_2158', '_637', '_1833', '_3154', '_7', '_60', '_1764', '_2073', '_1565', '_3866', '_2610', '_7', '_902', '_1143', '_435', '_716', '_2525', '_3936', '_7', '_2909', '_646', '_3799', '_1238', '_2616', '_2688', '_3731', '_4026', '_1593', '_3102', '_1617', '_2491', '_550', '_7', '_1659', '_457', '_832', '_1425', '_3218', '_586', '_3731', '_2617', '_2673', '_3919', '_1042', '_3187', '_39', '_3731', '_73', '_397', '_1904', '_1818', '_175', '_1444', '_680', '_1161', '_2204', '_2480', '_2182', '_99', '_956', '_680', '_2204', '_1485', '_2410', '_3001', '_3901', '_3907', '_2326', '_2634', '_1757', '_3744', '_1751', '_1059', '_1445', '_3731', '_1966', '_829', '_2458', '_3062', '_3514', '_1747', '_3396', '_3468', '_2166', '_499', '_3969', '_2097', '_2005', '_1788', '_219', '_3560', '_1159', '_645', '_3867', '_3246', '_1246', '_2290', '_1669', '_3497', '_1316', '_1669', '_15', '_1806', '_479', '_196', '_12', '_2029', '_427', '_1390', '_1788', '_4070', '_2398', '_1747', '_3166', '_2008', '_2918', '_117', '_2485', '_1757', '_1599', '_3477', '_2203', '_304', '_169', '_777', '_1489', '_2735', '_2531', '_1882']\n", + "['3446', '_2542', '_1488', '_3452', '_1810', '_499', '_2365', '_752_3437', '_2386', '_2782_1604', '_410', '_3480', '_2712', '_3220', '_2259', '_586', '_3345', '_98', '_1749', '_1930', '_1893', '_2688', '_781', '_1993', '_1991', '_746', '_222', '_3659', '_447', '_536', '_3047', '_1289', '_2182', '_1788', '_3180', '_2945', '_3352', '_2280', '_447', '_3123', '_222', '_2524', '_2917', '_4042', '_280', '_1375', '_134', '_2852', '_400', '_1481', '_499', '_463', '_3442', '_1932', '_4076', '_3910', '_2357', '_2947', '_1424', '_258', '_2123', '_3051', '_1861', '_571', '_2008', '_3762', '_3978', '_1574', '_2712', '_375', '_2453', '_3750', '_278', '_1670', '_651', '_2276', '_1336', '_2694', '_833', '_894', '_1669', '_1061', '_4076', '_3020', '_218', '_921', '_3487', '_2265', '_1560', '_4076', '_3593', '_1573', '_610', '_2862', '_2005', '_679', '_2712_1875', '_2694', '_1669', '_510', '_1739', '_1462', '_3232', '_950', '_4009', '_2782', '_2332', '_375', '_2462', '_2712_3437', '_442', '_2029', '_3062', '_2887', '_2151', '_853', '_584', '_3820', '_1865', '_3671', '_1484', '_2002', '_2712_1907', '_251', '_3324', '_2356', '_608', '_25', '_1507', '_375', '_3362', '_2507', '_3983', '_2558', '_3738', '_1806', '_2356', '_3452', '_1284', '_1604', '_2249', '_175', '_472', '_3717', '_1289', '_1757', '_3051', '_1056', '_2312', '_472', '_3390', '_1554', '_3040', '_2918', '_168', '_793', '_3232', '_2094', '_4040', '_2673', '_3342', '_3384', '_90', '_2322', '_1336', '_3165', '_606', '_1667', '_997', '_2520', '_3232', '_2094', '_685', '_1539', '_3356', '_973', '_2097', '_2712', '_1424', '_410', '_2073', '_3983', '_53', '_3825', '_2712', '_2342', '_62', '_1421', '_3356', '_1650', '_3765', '_2712_3069', '_3387', '_90', '_894', '_2244', '_3458', '_514', '_1095', '_2070', '_3684', '_3062', '_3410', '_3345', '_514', '_868', '_1818', '_2971', '_3069', '_210', '_700', '_514', '_3593', '_2073', '_955', '_3390', '_62', '_1769', '_2337', '_1919', '_1017', '_2169', '_3901', '_2312', '_2221', '_2337_3297', '_977', '_2053', '_3901', '_1134', '_3686', '_1234', '_1705', '_1909', '_295', '_1604', '_50_2782', '_2337_3437', '_2782', '_50', '_933', '_2259', '_2133', '_3933', '_3437', '_50_50', '_2503', '_441', '_2266', '_3981_2609_1494_1532_2426_3602']\n", + "['2308', '_2094', '_1158', '_2030', '_2094', '_29', '_2782', '_2308', '_2094', '_1755', '_2169', '_519', '_3146', '_4040', '_4076_1684', '_2133', '_793', '_1130', '_443', '_3612', '_2692', '_3106', '_2025', '_1626', '_339', '_977', '_768', '_485', '_3351', '_363', '_2624', '_4031', '_1934', '_1217', '_536', '_1875', '_744', '_208', '_2839', '_3050', '_2601', '_3396', '_3106', '_2167', '_1421', '_745', '_3229', '_1767', '_3396_1962', '_3187', '_776', '_379', '_363', '_3650', '_3396', '_229', '_1097', '_413', '_1688', '_2282', '_1139', '_3396', '_1878', '_2689', '_2397', '_3220', '_2813', '_247', '_7', '_4041', '_3037', '_2892', '_3161', '_3374', '_1449', '_536', '_3274', '_1826', '_1794', '_3009', '_118', '_2442', '_3051', '_1922', '_3359', '_1723', '_3662', '_1527', '_1565', '_3396', '_343', '_2718', '_415', '_2280', '_204', '_2243', '_2712', '_928', '_2518', '_1700', '_3069', '_1562', '_1755', '_1810', '_2532', '_2604', '_2909', '_379', '_3864', '_1644', '_40', '_2944', '_39', '_118', '_933', '_2386', '_2750', '_2160', '_731', '_2693', '_555', '_2371', '_109', '_555', '_536', '_2919', '_2199', '_887', '_1962', '_2693', '_1924', '_53', '_3091', '_2786', '_1747', '_4003', '_2386', '_657', '_1861', '_3897', '_248', '_3253', '_1130', '_657', '_1169', '_514', '_1130', '_3253', '_903', '_731', '_294', '_2536', '_1234', '_379', '_3787', '_1728', '_2873', '_1644', '_2750', '_3232', '_731', '_3146', '_251', '_2094', '_2910', '_1857', '_2308', '_2094', '_2462_2462', '_1868', '_2462', '_2782', '_2644', '_857', '_263', '_3449', '_3562', '_548']\n", + "['1083', '_2968', '_762', '_1669', '_3336', '_317', '_2008', '_3835', '_1072', '_3218_3218', '_3350', '_1046', '_104', '_2596', '_2944', '_2511', '_15', '_461', '_204', '_1326', '_4076', '_3278', '_718', '_3893', '_3481', '_208', '_2647', '_3920', '_2623', '_1644', '_1725', '_3473', '_2361', '_1229', '_4076', '_3845', '_3251', '_2550', '_2600', '_4061', '_890', '_4033', '_2511', '_1838', '_2662', '_2622', '_3374', '_2331', '_3920', '_3912', '_2001', '_2132', '_1579', '_2363', '_2700', '_986', '_2027', '_1991', '_331', '_3136', '_4061', '_503', '_1970', '_3452', '_3092', '_79', '_158', '_3093', '_1933', '_2786', '_3815', '_2556', '_2506', '_3072', '_2857', '_368', '_3137', '_3575', '_854', '_1669', '_1161', '_2008', '_2662', '_4033', '_3053', '_2662', '_3750', '_1866', '_1725', '_2726', '_3920', '_2714', '_1375', '_940', '_3276', '_940', '_727', '_1809', '_2740', '_1877', '_550', '_1244', '_2610', '_2648', '_2147', '_2471', '_2750', '_2525', '_1342', '_2712', '_2679', '_1282', '_2909', '_3610', '_2386', '_723', '_933', '_1800', '_572', '_2675', '_1048', '_3400', '_685', '_538', '_809', '_4033', '_2431', '_2833', '_1904', '_3713', '_3886', '_1644', '_4076', '_1781', '_683', '_3541', '_3129', '_93', '_2784', '_4076', '_2237', '_2945', '_2249', '_260', '_2308', '_2399', '_1826', '_262', '_4069', '_476', '_143', '_950', '_3374', '_1826', '_2975', '_42', '_792', '_3863', '_2536', '_2083', '_2077', '_3747', '_427', '_3462', '_3418', '_2107']\n", + "['1234', '_3901', '_1328', '_1351', '_3273', '_331', '_985', '_1176', '_1130', '_1964', '_96', '_1907', '_3725', '_3497', '_3745', '_3680', '_619', '_499', '_7', '_1872', '_1112', '_1131', '_2306', '_1872', '_2431', '_203', '_2453', '_398', '_536', '_2835', '_3538', '_410', '_2530', '_3232', '_184', '_2712', '_798', '_363', '_1222', '_2139', '_679', '_2504', '_3039', '_734', '_3498', '_3978', '_2918', '_2925', '_3702', '_682', '_1565', '_3508', '_2251', '_1566', '_104', '_1485', '_3110', '_3387', '_1815', '_1033', '_1114', '_1726', '_3673', '_1369', '_56', '_1966', '_951', '_1106', '_1506', '_1629', '_2859', '_3303', '_208', '_277', '_3376', '_773', '_3764', '_1083', '_1756', '_160', '_1514', '_3250', '_124', '_3102', '_1083_1667', '_1424', '_2859', '_1575', '_490', '_3551', '_1083_1667', '_4057', '_1924', '_3278', '_3109', '_3109', '_310', '_1667', '_3720', '_3832', '_2154', '_3750', '_700', '_1083', '_2796', '_412', '_2333', '_1684', '_2739', '_1406', '_1234', '_2542', '_2259', '_248', '_246', '_9', '_669', '_204', '_3901', '_2753', '_2608', '_2154', '_3975', '_3109', '_2859', '_175', '_20', '_1987', '_3505', '_802', '_2837', '_187', '_1106', '_829', '_2453', '_3984', '_1419', '_1310', '_2859', '_342', '_3418', '_3551', '_3381', '_1388', '_3039', '_3745', '_2532', '_792', '_3775', '_3265', '_1112', '_3178', '_1926', '_3549', '_1485', '_3232', '_2179', '_3787', '_681', '_2990', '_3297', '_436', '_1924', '_2702', '_1349', '_4082', '_754', '_1423', '_26', '_2786', '_308', '_1848', '_3652', '_3684', '_3801', '_3199', '_2480', '_3967', '_727', '_93', '_3091', '_3798', '_3311', '_3358', '_3798', '_152', '_1140', '_682', '_212', '_2945', '_4091', '_4041', '_1289', '_4032', '_1357', '_2392', '_3777', '_1818', '_278', '_1159', '_396', '_518', '_3575', '_3866', '_1920', '_3659', '_1261', '_1312', '_375', '_2168', '_740', '_2782', '_2356', '_701', '_3168', '_1083', '_805', '_1046', '_2782', '_1629', '_3560', '_535', '_3119', '_2094', '_527', '_1647', '_3335', '_525', '_3321', '_939', '_805', '_2722', '_2456', '_1216', '_3008', '_3410', '_3643', '_97', '_3134', '_313', '_285', '_1229', '_4054', '_1369', '_3984', '_414', '_2361', '_3293', '_575', '_56', '_682', '_1914', '_398', '_3706', '_1031', '_3702', '_2013', '_3684', '_1150', '_3278', '_3968', '_3382', '_977', '_2575', '_4076', '_3369', '_3638', '_2920', '_1242', '_600', '_890', '_1083', '_2562', '_2693', '_3082', '_1907', '_890', '_1307', '_204', '_944', '_714', '_3990', '_286', '_1046', '_2453', '_3981', '_1186', '_658', '_424', '_2609_1513']\n", + "['2712', '_2424', '_1311', '_2693', '_1684', '_3721', '_2303', '_1234_1130', '_1487', '_2021', '_2435', '_3778', '_3410', '_3627', '_1491', '_3467', '_2076', '_2204', '_3721', '_331', '_40', '_3105', '_2673', '_2442', '_3345', '_1270', '_1138', '_3051', '_2134', '_3820', '_3230', '_1076', '_3368', '_3893', '_1986', '_1161', '_96', '_515', '_1051', '_3476', '_224', '_1733', '_1114', '_2669', '_81', '_3969', '_633', '_1968', '_117', '_1114', '_2984', '_2046', '_1565', '_599', '_2093', '_117', '_3167', '_2236', '_92', '_248', '_3822', '_1815', '_2214', '_2322', '_378', '_2225', '_249', '_1294', '_3082', '_2214', '_643', '_2424', '_289', '_3178', '_196', '_3999', '_654', '_1565', '_3165', '_3030', '_716', '_748', '_3737', '_2738', '_1114', '_1671', '_370', '_1114', '_1235', '_3508', '_456', '_1887', '_1794', '_1779', '_1734', '_629', '_2230', '_3731', '_2909', '_1266', '_1381', '_1719', '_3183', '_2431', '_2647', '_2675', '_802', '_3720', '_1170', '_1349', '_2858', '_2214', '_490', '_168', '_2626', '_105', '_2398', '_1578', '_3197', '_2448', '_1562', '_2536', '_3352', '_2531', '_1590', '_2910', '_2839', '_3743', '_2094', '_2939', '_3541', '_1303', '_925', '_1114', '_1833', '_950', '_2939', '_1220', '_3379', '_3648', '_2617', '_3665', '_112', '_3069', '_1105', '_1350', '_3051_3671', '_1351', '_1395', '_3808', '_290', '_9', '_3051', '_2310', '_1970', '_3623', '_3297', '_501', '_2531', '_1148', '_248', '_1099', '_1932', '_450', '_3744', '_1186', '_2910', '_1978', '_1424', '_1310', '_2617', '_1661', '_416', '_3051', '_1114', '_2773', '_2738', '_3969', '_3866', '_544', '_3051', '_1770', '_2713', '_2398', '_64', '_1669', '_1168', '_160', '_637', '_2365', '_910', '_1129', '_2018', '_3958', '_2439', '_1129', '_60', '_2025', '_2760', '_4069', '_1848', '_2738', '_1203', '_2589', '_3775', '_20', '_1815', '_200', '_3051', '_3297', '_1651', '_2673', '_2144', '_1462', '_399', '_3879', '_937', '_2398', '_2520', '_1170', '_3777', '_1262', '_3051', '_2205', '_1945', '_2918', '_2524', '_1312', '_586', '_1925', '_1170', '_2386', '_3856', '_1170', '_3092', '_1604', '_2712', '_62', '_2001', '_3990', '_2496', '_3404', '_3183', '_117', '_2205', '_1269', '_2342', '_658', '_43', '_620', '_3627', '_1048', '_3262', '_1254', '_2310', '_712', '_1877', '_3051_3437', '_1595', '_874', '_1684', '_1453', '_1234', '_3051', '_3163', '_3764', '_939', '_1161', '_2304', '_2983', '_472', '_3167', '_1160', '_1061', '_2348', '_1669', '_2952', '_1861', '_248', '_55', '_3569', '_3969', '_962', '_661', '_2738', '_375', '_469', '_2203', '_2384', '_2673', '_1907', '_2738', '_3167', '_1308', '_3753', '_3297', '_2249', '_890', '_3197', '_2782', '_1351', '_1763', '_1170', '_2750', '_3495', '_3731', '_775', '_3732', '_4004', '_2939', '_3468', '_238', '_456', '_1491', '_1912', '_495', '_1491', '_3379', '_482', '_3051', '_3785', '_3556', '_442', '_3297', '_1229', '_933', '_2647', '_1076', '_268', '_1338', '_1170', '_3638', '_3109', '_3040', '_1076', '_1848', '_1669', '_3526', '_3778', '_1907', '_2439', '_1170', '_2642', '_3790', '_1170', '_9', '_2265', '_2738', '_2310', '_499', '_854', '_2310', '_2925', '_802', '_3040', '_248', '_1661', '_622', '_2782', '_571', '_2673_3051', '_308', '_1924', '_1607', '_2310', '_3092', '_3907', '_2738', '_3785', '_181', '_3951', '_2939', '_2892', '_2530', '_2803', '_3381', '_463', '_535', '_3297', '_3039', '_2249', '_2738', '_3297', '_784', '_3178', '_3468', '_132', '_4092', '_2214', '_944', '_2466', '_3877', '_1684', '_2516', '_215', '_766', '_308', '_2203', '_2303', '_1129', '_3470', '_2177', '_3051', '_1491', '_3026', '_669', '_3297', '_3259', '_3777', '_3051_1684', '_1312', '_2525', '_3468', '_372', '_2918', '_3051', '_1338', '_3721', '_3458', '_3297', '_2304', '_1872', '_3627', '_3468', '_893', '_2251', '_1696', '_15', '_1768', '_1234', '_3747', '_2434', '_2693', '_248', '_3426', '_3724', '_117', '_2782', '_3183', '_2057', '_248', '_861', '_74', '_3627', '_2809', '_456', '_1672', '_248', '_45', '_1742', '_3627', '_3297', '_100', '_2712', '_2809', '_1932', '_2365', '_3731', '_62', '_1742', '_2386', '_3686', '_1594', '_3329', '_3051', '_2496', '_1970', '_2750', '_933', '_1312', '_3942', '_3879', '_3297', '_2854', '_2491', '_64', '_1398', '_2166', '_2738', '_2347', '_755', '_2002', '_2573', '_3867', '_2554', '_2738', '_1219', '_2096', '_2843', '_308', '_2363', '_2415', '_2322', '_462', '_416', '_416', '_3069', '_2428', '_3407', '_2214', '_1575', '_2357', '_289', '_461', '_1845', '_4009', '_4016', '_3177', '_3618', '_1739', '_2492', '_859', '_691', '_2712', '_3107', '_2561', '_1768', '_2154', '_2673', '_1401', '_1234_944', '_1920', '_2008', '_2948', '_2169', '_4003', '_3885', '_1567', '_1563', '_2579', '_3562_2030', '_3977', '_3981_2609_1494_1532_2426_3602']\n", + "['4076', '_240', '_918', '_2524', '_4070', '_3900', '_625', '_2308', '_1114', '_2433', '_488', '_3671', '_2154', '_3199', '_1825', '_2610', '_1578', '_4008', '_1336', '_1562', '_3092', '_2561', '_428', '_2097', '_1959', '_210', '_1593', '_263', '_3745', '_1003', '_3384', '_1856', '_3662', '_1763', '_3251', '_3127', '_229', '_4061', '_977', '_2264', '_727', '_3498', '_2308', '_3541', '_456', '_609', '_3575', '_890', '_3521', '_109', '_3662', '_600', '_773', '_1566', '_2859', '_1872', '_1212', '_3798', '_2431', '_2632', '_117', '_846', '_3452', '_612', '_1336', '_977', '_727', '_3692', '_1046', '_1833', '_1825', '_2404', '_3092', '_1112', '_3712', '_4061', '_1448', '_3138', '_1560', '_447', '_1593', '_1326', '_4032', '_3008', '_387', '_799', '_608', '_1539', '_1543', '_3165', '_3250', '_3030', '_2280', '_3533', '_399', '_2952', '_3271', '_3240', '_4076', '_308', '_2930', '_1723', '_1604_1889', '_3203', '_2308', '_1490', '_3681', '_1889', '_933', '_399', '_1889', '_117', '_3437', '_3567', '_2604', '_1965', '_609', '_4077', '_4061', '_3124', '_1060', '_317', '_1032', '_2673', '_1143', '_7', '_379', '_2', '_3550', '_3093', '_3702', '_1593', '_1450', '_2015', '_1424', '_1140', '_261', '_3893', '_4031', '_3520', '_799', '_628', '_3849', '_2952', '_2796', '_4009', '_1810', '_1684', '_3100', '_1920', '_914', '_610', '_1153', '_4076', '_1958', '_503', '_3271', '_914', '_3353', '_2693', '_2712_944', '_1863', '_1728', '_1161', '_2081', '_3881', '_2712_805', '_3479', '_2782', '_1604_2398', '_1768', '_1225', '_2094', '_4093', '_3218', '_944', '_1219']\n" + ] + } + ], + "source": [ + "for i, item in enumerate(splits.items()):\n", + " print(item[-1])\n", + " if i == 7:\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 65, "metadata": {}, "outputs": [ { @@ -812,10 +861,417 @@ "{('_2426', '_3602'): '_2426_3602',\n", " ('_1532', '_2426_3602'): '_1532_2426_3602',\n", " ('_2426', '_1568'): '_2426_1568',\n", - " ('_3981', '_2426_1568'): '_3981_2426_1568'}" + " ('_3981', '_2426_1568'): '_3981_2426_1568',\n", + " ('_3981_2426_1568', '_1532_2426_3602'): '_3981_2426_1568_1532_2426_3602',\n", + " ('_3981', '_2609'): '_3981_2609',\n", + " ('_3981', '_1736'): '_3981_1736',\n", + " ('_2712', '_1604'): '_2712_1604',\n", + " ('_3981', '_426'): '_3981_426',\n", + " ('_3981_2609', '_1494'): '_3981_2609_1494',\n", + " ('_2426', '_1710'): '_2426_1710',\n", + " ('_3909', '_3977'): '_3909_3977',\n", + " ('_3422',\n", + " '_3981_2426_1568_1532_2426_3602'): '_3422_3981_2426_1568_1532_2426_3602',\n", + " ('_2712', '_1667'): '_2712_1667',\n", + " ('_1736', '_3602'): '_1736_3602',\n", + " ('_2308', '_1604'): '_2308_1604',\n", + " ('_2712', '_139'): '_2712_139',\n", + " ('_2609', '_1295'): '_2609_1295',\n", + " ('_71', '_3602'): '_71_3602',\n", + " ('_2673', '_2673'): '_2673_2673',\n", + " ('_2712', '_933'): '_2712_933',\n", + " ('_2712', '_3671'): '_2712_3671',\n", + " ('_2712', '_1907'): '_2712_1907',\n", + " ('_1234', '_1604'): '_1234_1604',\n", + " ('_2609', '_3602'): '_2609_3602',\n", + " ('_3909_3977', '_2426_1568'): '_3909_3977_2426_1568',\n", + " ('_2609', '_1513'): '_2609_1513',\n", + " ('_1234', '_944'): '_1234_944',\n", + " ('_1568',\n", + " '_3981_2426_1568_1532_2426_3602'): '_1568_3981_2426_1568_1532_2426_3602',\n", + " ('_3981_2609', '_1295'): '_3981_2609_1295',\n", + " ('_3232', '_1604'): '_3232_1604',\n", + " ('_1736', '_2597'): '_1736_2597',\n", + " ('_2712', '_775'): '_2712_775',\n", + " ('_3981_2609_1494', '_1532_2426_3602'): '_3981_2609_1494_1532_2426_3602',\n", + " ('_195', '_1604'): '_195_1604',\n", + " ('_2712', '_944'): '_2712_944',\n", + " ('_2673', '_1604'): '_2673_1604',\n", + " ('_3981_1736', '_3422'): '_3981_1736_3422',\n", + " ('_1046', '_1046'): '_1046_1046',\n", + " ('_752', '_1604'): '_752_1604',\n", + " ('_1532', '_2426_1710'): '_1532_2426_1710',\n", + " ('_2712', '_1575'): '_2712_1575',\n", + " ('_3981', '_2426'): '_3981_2426',\n", + " ('_2673', '_2712'): '_2673_2712',\n", + " ('_2008', '_2008'): '_2008_2008',\n", + " ('_3981_426', '_3909_3977_2426_1568'): '_3981_426_3909_3977_2426_1568',\n", + " ('_1604', '_2673'): '_1604_2673',\n", + " ('_1604', '_399'): '_1604_399',\n", + " ('_1604', '_1669'): '_1604_1669',\n", + " ('_1604', '_2398'): '_1604_2398',\n", + " ('_3651',\n", + " '_3981_2426_1568_1532_2426_3602'): '_3651_3981_2426_1568_1532_2426_3602',\n", + " ('_3981_1736_3422', '_3422'): '_3981_1736_3422_3422',\n", + " ('_3981', '_3562'): '_3981_3562',\n", + " ('_2068', '_1736'): '_2068_1736',\n", + " ('_2712', '_3593'): '_2712_3593',\n", + " ('_2337', '_1604'): '_2337_1604',\n", + " ('_1532', '_71_3602'): '_1532_71_3602',\n", + " ('_3981_2609_1295', '_1532_2426_3602'): '_3981_2609_1295_1532_2426_3602',\n", + " ('_1604', '_2782'): '_1604_2782',\n", + " ('_3051', '_1604'): '_3051_1604',\n", + " ('_2609_1295',\n", + " '_1568_3981_2426_1568_1532_2426_3602'): '_2609_1295_1568_3981_2426_1568_1532_2426_3602',\n", + " ('_1669', '_1669'): '_1669_1669',\n", + " ('_4076', '_1604'): '_4076_1604',\n", + " ('_2712', '_3437'): '_2712_3437',\n", + " ('_2337', '_3047'): '_2337_3047',\n", + " ('_2712', '_2862'): '_2712_2862',\n", + " ('_1295', '_2426_3602'): '_1295_2426_3602',\n", + " ('3981', '_426'): '3981_426',\n", + " ('_2609', '_3197'): '_2609_3197',\n", + " ('_2712', '_2610'): '_2712_2610',\n", + " ('_2337', '_308'): '_2337_308',\n", + " ('_2782', '_2782'): '_2782_2782',\n", + " ('_3933', '_2918'): '_3933_2918',\n", + " ('_3981_426_3909_3977_2426_1568',\n", + " '_3422_3981_2426_1568_1532_2426_3602'): '_3981_426_3909_3977_2426_1568_3422_3981_2426_1568_1532_2426_3602',\n", + " ('_2398', '_2398'): '_2398_2398',\n", + " ('_1295', '_2609_3602'): '_1295_2609_3602',\n", + " ('_1234', '_3047'): '_1234_3047',\n", + " ('_3040', '_139'): '_3040_139',\n", + " ('_2308', '_2610'): '_2308_2610',\n", + " ('_1234', '_308'): '_1234_308',\n", + " ('_1083', '_1604'): '_1083_1604',\n", + " ('_3051', '_1667'): '_3051_1667',\n", + " ('_2596', '_1604'): '_2596_1604',\n", + " ('_1604', '_2008'): '_1604_2008',\n", + " ('_2337', '_857'): '_2337_857',\n", + " ('_752', '_2918'): '_752_2918',\n", + " ('_805', '_2462'): '_805_2462',\n", + " ('_2712', '_2562'): '_2712_2562',\n", + " ('_1083', '_3671'): '_1083_3671',\n", + " ('_3981', '_2503'): '_3981_2503',\n", + " ('_660', '_805'): '_660_805',\n", + " ('_3671', '_2673'): '_3671_2673',\n", + " ('_2308', '_944'): '_2308_944',\n", + " ('_2531', '_1604'): '_2531_1604',\n", + " ('_3051', '_933'): '_3051_933',\n", + " ('_3981_1736_3422_3422', '_2426_1710'): '_3981_1736_3422_3422_2426_1710',\n", + " ('_752', '_2154'): '_752_2154',\n", + " ('_2308', '_775'): '_2308_775',\n", + " ('_3040', '_3671'): '_3040_3671',\n", + " ('_1669', '_2712'): '_1669_2712',\n", + " ('_2398', '_1604'): '_2398_1604',\n", + " ('_399', '_2712'): '_399_2712',\n", + " ('_1083', '_1667'): '_1083_1667',\n", + " ('_1234', '_1130'): '_1234_1130',\n", + " ('_2750', '_2750'): '_2750_2750',\n", + " ('_2596', '_944'): '_2596_944',\n", + " ('_2337', '_944'): '_2337_944',\n", + " ('_2712', '_1130'): '_2712_1130',\n", + " ('_3051', '_805'): '_3051_805',\n", + " ('_2712', '_805'): '_2712_805',\n", + " ('_586', '_1604'): '_586_1604',\n", + " ('_1083', '_1907'): '_1083_1907',\n", + " ('_3040', '_805'): '_3040_805',\n", + " ('_4076', '_3901'): '_4076_3901',\n", + " ('_3396', '_1565'): '_3396_1565',\n", + " ('_4076', '_1684'): '_4076_1684',\n", + " ('_2337', '_3671'): '_2337_3671',\n", + " ('_117', '_1604'): '_117_1604',\n", + " ('_3040', '_339'): '_3040_339',\n", + " ('_2214', '_1604'): '_2214_1604',\n", + " ('_2712', '_2154'): '_2712_2154',\n", + " ('_3981_2609_1494', '_1532_2426_1710'): '_3981_2609_1494_1532_2426_1710',\n", + " ('_2673', '_1234'): '_2673_1234',\n", + " ('_2462', '_2462'): '_2462_2462',\n", + " ('_805', '_2673'): '_805_2673',\n", + " ('_805', '_442'): '_805_442',\n", + " ('_660', '_3671'): '_660_3671',\n", + " ('_204', '_1604'): '_204_1604',\n", + " ('_2337', '_2610'): '_2337_2610',\n", + " ('_1046', '_2673'): '_1046_2673',\n", + " ('_2008', '_1669'): '_2008_1669',\n", + " ('_2531', '_2531'): '_2531_2531',\n", + " ('_2337', '_1684'): '_2337_1684',\n", + " ('_4076', '_933'): '_4076_933',\n", + " ('_2068', '_426'): '_2068_426',\n", + " ('_901', '_2712'): '_901_2712',\n", + " ('_50', '_1604'): '_50_1604',\n", + " ('_4076', '_944'): '_4076_944',\n", + " ('_399', '_399'): '_399_399',\n", + " ('_2308', '_1130'): '_2308_1130',\n", + " ('_3562', '_542'): '_3562_542',\n", + " ('_2918', '_2462'): '_2918_2462',\n", + " ('_3040', '_278'): '_3040_278',\n", + " ('_2712', '_3901'): '_2712_3901',\n", + " ('3981_426', '_3909_3977_2426_1568'): '3981_426_3909_3977_2426_1568',\n", + " ('_3040', '_1604'): '_3040_1604',\n", + " ('_2712', '_1684'): '_2712_1684',\n", + " ('_752', '_3671'): '_752_3671',\n", + " ('_1234', '_1565'): '_1234_1565',\n", + " ('_2308', '_3671'): '_2308_3671',\n", + " ('_3051', '_944'): '_3051_944',\n", + " ('_2386', '_2386'): '_2386_2386',\n", + " ('_2782', '_1604'): '_2782_1604',\n", + " ('_3051', '_1684'): '_3051_1684',\n", + " ('_481', '_3671'): '_481_3671',\n", + " ('_2712', '_461'): '_2712_461',\n", + " ('_1604', '_1046'): '_1604_1046',\n", + " ('_2308', '_278'): '_2308_278',\n", + " ('_1113', '_2426_3602'): '_1113_2426_3602',\n", + " ('_1234', '_775'): '_1234_775',\n", + " ('_2337', '_933'): '_2337_933',\n", + " ('_2337', '_3437'): '_2337_3437',\n", + " ('_1768', '_2712'): '_1768_2712',\n", + " ('_977', '_977'): '_977_977',\n", + " ('_571', '_2712'): '_571_2712',\n", + " ('_2596', '_420'): '_2596_420',\n", + " ('_195', '_3671'): '_195_3671',\n", + " ('_3562', '_1486'): '_3562_1486',\n", + " ('_3396', '_1130'): '_3396_1130',\n", + " ('_3981', '_1430'): '_3981_1430',\n", + " ('_1046', '_2008'): '_1046_2008',\n", + " ('_890', '_2712'): '_890_2712',\n", + " ('_195', '_3593'): '_195_3593',\n", + " ('_1234', '_1684'): '_1234_1684',\n", + " ('_3051', '_139'): '_3051_139',\n", + " ('_3981_2426', '_1494'): '_3981_2426_1494',\n", + " ('_1234', '_3901'): '_1234_3901',\n", + " ('_1604', '_1889'): '_1604_1889',\n", + " ('_2712', '_1161'): '_2712_1161',\n", + " ('_2337', '_1130'): '_2337_1130',\n", + " ('_933', '_2673'): '_933_2673',\n", + " ('_195', '_2610'): '_195_2610',\n", + " ('_2398', '_2712'): '_2398_2712',\n", + " ('_752', '_3437'): '_752_3437',\n", + " ('_752', '_805'): '_752_805',\n", + " ('_3051', '_3593'): '_3051_3593',\n", + " ('_3051', '_3671'): '_3051_3671',\n", + " ('_1046', '_2750'): '_1046_2750',\n", + " ('_2673', '_2596'): '_2673_2596',\n", + " ('_222', '_1604'): '_222_1604',\n", + " ('_4076', '_775'): '_4076_775',\n", + " ('_1234', '_2154'): '_1234_2154',\n", + " ('_944', '_2673'): '_944_2673',\n", + " ('_2712', '_1960'): '_2712_1960',\n", + " ('_2712', '_278'): '_2712_278',\n", + " ('_2337', '_1161'): '_2337_1161',\n", + " ('_2337', '_775'): '_2337_775',\n", + " ('_1046', '_2386'): '_1046_2386',\n", + " ('_2712', '_857'): '_2712_857',\n", + " ('_2462', '_1604'): '_2462_1604',\n", + " ('_1083', '_1575'): '_1083_1575',\n", + " ('_2712', '_902'): '_2712_902',\n", + " ('_1234', '_3437'): '_1234_3437',\n", + " ('_3040', '_2610'): '_3040_2610',\n", + " ('_1604', '_2386'): '_1604_2386',\n", + " ('_2398', '_399'): '_2398_399',\n", + " ('_2673', '_4076'): '_2673_4076',\n", + " ('_3981_2609_1494', '_1532_71_3602'): '_3981_2609_1494_1532_71_3602',\n", + " ('_3218', '_3218'): '_3218_3218',\n", + " ('_1604', '_2531'): '_1604_2531',\n", + " ('_1838', '_1838'): '_1838_1838',\n", + " ('_2712', '_420'): '_2712_420',\n", + " ('_3199', '_2673'): '_3199_2673',\n", + " ('_1532', '_1736_3602'): '_1532_1736_3602',\n", + " ('_4093', '_2008'): '_4093_2008',\n", + " ('_4076', '_2154'): '_4076_2154',\n", + " ('_3051', '_1907'): '_3051_1907',\n", + " ('_3051', '_2154'): '_3051_2154',\n", + " ('_2712', '_1962'): '_2712_1962',\n", + " ('_2712', '_105'): '_2712_105',\n", + " ('_3981_2426_1494', '_1532_2426_3602'): '_3981_2426_1494_1532_2426_3602',\n", + " ('_263', '_263'): '_263_263',\n", + " ('_3562', '_3449'): '_3562_3449',\n", + " ('_1234', '_2918'): '_1234_2918',\n", + " ('_1768', '_1768'): '_1768_1768',\n", + " ('_1225', '_1604'): '_1225_1604',\n", + " ('_2673', '_3051'): '_2673_3051',\n", + " ('_3562', '_2030'): '_3562_2030',\n", + " ('_1046', '_2596'): '_1046_2596',\n", + " ('_2712', '_3047'): '_2712_3047',\n", + " ('_3232', '_2610'): '_3232_2610',\n", + " ('_2308', '_1161'): '_2308_1161',\n", + " ('_3040', '_3069'): '_3040_3069',\n", + " ('_2596', '_2777'): '_2596_2777',\n", + " ('_2531', '_944'): '_2531_944',\n", + " ('_2386', '_1604'): '_2386_1604',\n", + " ('_3981', '_1205'): '_3981_1205',\n", + " ('_2386', '_2712'): '_2386_2712',\n", + " ('_3051', '_3798'): '_3051_3798',\n", + " ('_3981_2426', '_3422'): '_3981_2426_3422',\n", + " ('_1234', '_857'): '_1234_857',\n", + " ('_3051', '_1130'): '_3051_1130',\n", + " ('_1046', '_2712'): '_1046_2712',\n", + " ('_1669', '_2008'): '_1669_2008',\n", + " ('_263', '_2673'): '_263_2673',\n", + " ('_2609_1295', '_1295'): '_2609_1295_1295',\n", + " ('_2712', '_3069'): '_2712_3069',\n", + " ('_2308', '_3047'): '_2308_3047',\n", + " ('_752', '_1810'): '_752_1810',\n", + " ('_1234', '_3297'): '_1234_3297',\n", + " ('_2712', '_308'): '_2712_308',\n", + " ('_3051', '_1354'): '_3051_1354',\n", + " ('_1234', '_1161'): '_1234_1161',\n", + " ('_2398', '_2673'): '_2398_2673',\n", + " ('_117', '_2918'): '_117_2918',\n", + " ('_2712', '_1093'): '_2712_1093',\n", + " ('_2712', '_2801'): '_2712_2801',\n", + " ('_2337', '_278'): '_2337_278',\n", + " ('_2462', '_2918'): '_2462_2918',\n", + " ('_2750', '_1046'): '_2750_1046',\n", + " ('_1234', '_933'): '_1234_933',\n", + " ('_775', '_2673'): '_775_2673',\n", + " ('_4076', '_139'): '_4076_139',\n", + " ('_3396', '_2144'): '_3396_2144',\n", + " ('_1855',\n", + " '_3981_426_3909_3977_2426_1568_3422_3981_2426_1568_1532_2426_3602'): '_1855_3981_426_3909_3977_2426_1568_3422_3981_2426_1568_1532_2426_3602',\n", + " ('_3051', '_339'): '_3051_339',\n", + " ('_3452', '_3452'): '_3452_3452',\n", + " ('_263', '_1604'): '_263_1604',\n", + " ('_1604', '_2462'): '_1604_2462',\n", + " ('_427', '_1046'): '_427_1046',\n", + " ('_2596', '_933'): '_2596_933',\n", + " ('_2596', '_3437'): '_2596_3437',\n", + " ('2712', '_3671'): '2712_3671',\n", + " ('_3396', '_339'): '_3396_339',\n", + " ('_2712', '_2133'): '_2712_2133',\n", + " ('_50', '_2782'): '_50_2782',\n", + " ('_3396', '_139'): '_3396_139',\n", + " ('_890', '_890'): '_890_890',\n", + " ('_2308', '_805'): '_2308_805',\n", + " ('_399', '_1604'): '_399_1604',\n", + " ('_2308', '_3297'): '_2308_3297',\n", + " ('_1527', '_1527'): '_1527_1527',\n", + " ('_2008', '_1046'): '_2008_1046',\n", + " ('_3051', '_3492'): '_3051_3492',\n", + " ('_308', '_2673'): '_308_2673',\n", + " ('_1872', '_1872'): '_1872_1872',\n", + " ('_3396', '_1604'): '_3396_1604',\n", + " ('_2337', '_3297'): '_2337_3297',\n", + " ('_2008', '_2712'): '_2008_2712',\n", + " ('_3197', '_2610'): '_3197_2610',\n", + " ('_2782', '_2673'): '_2782_2673',\n", + " ('_3051', '_3437'): '_3051_3437',\n", + " ('_2068', '_71'): '_2068_71',\n", + " ('_1234', '_805'): '_1234_805',\n", + " ('_727', '_727'): '_727_727',\n", + " ('_1083', '_933'): '_1083_933',\n", + " ('_2386', '_2596'): '_2386_2596',\n", + " ('_196', '_196'): '_196_196',\n", + " ('_2712', '_1565'): '_2712_1565',\n", + " ('_2531', '_2673'): '_2531_2673',\n", + " ('_3981_1736', '_3977'): '_3981_1736_3977',\n", + " ('_3981_1736_3422_3422', '_2426_3602'): '_3981_1736_3422_3422_2426_3602',\n", + " ('_50', '_50'): '_50_50',\n", + " ('_752', '_2419'): '_752_2419',\n", + " ('_1046', '_2531'): '_1046_2531',\n", + " ('_2184', '_1604'): '_2184_1604',\n", + " ('_1669', '_2673'): '_1669_2673',\n", + " ('_3232', '_278'): '_3232_278',\n", + " ('_1046', '_1669'): '_1046_1669',\n", + " ('_3040', '_3593'): '_3040_3593',\n", + " ('_2750', '_2596'): '_2750_2596',\n", + " ('_3051', '_1565'): '_3051_1565',\n", + " ('_3562', '_883'): '_3562_883',\n", + " ('_3039', '_1604'): '_3039_1604',\n", + " ('_3933', '_1685'): '_3933_1685',\n", + " ('_3047', '_2673'): '_3047_2673',\n", + " ('_2750', '_2386'): '_2750_2386',\n", + " ('_2308', '_308'): '_2308_308',\n", + " ('_3437', '_2673'): '_3437_2673',\n", + " ('_2817', '_2462'): '_2817_2462',\n", + " ('_3040', '_3492'): '_3040_3492',\n", + " ('_1234', '_913'): '_1234_913',\n", + " ('_2308', '_3901'): '_2308_3901',\n", + " ('_4076', '_420'): '_4076_420',\n", + " ('_2596', '_1684'): '_2596_1684',\n", + " ('_571', '_571'): '_571_571',\n", + " ('_2308', '_3593'): '_2308_3593',\n", + " ('_3218', '_2712'): '_3218_2712',\n", + " ('_2782', '_2712'): '_2782_2712',\n", + " ('_3981_426', '_3124'): '_3981_426_3124',\n", + " ('_660', '_2610'): '_660_2610',\n", + " ('_3199', '_3199'): '_3199_3199',\n", + " ('_752', '_1354'): '_752_1354',\n", + " ('_1810', '_1604'): '_1810_1604',\n", + " ('_3253', '_3253'): '_3253_3253',\n", + " ('_1046', '_222'): '_1046_222',\n", + " ('_1234', '_3671'): '_1234_3671',\n", + " ('_2610', '_2673'): '_2610_2673',\n", + " ('_1507', '_1604'): '_1507_1604',\n", + " ('_1046', '_204'): '_1046_204',\n", + " ('_2070', '_2276'): '_2070_2276',\n", + " ('_2308', '_1565'): '_2308_1565',\n", + " ('_752', '_1130'): '_752_1130',\n", + " ('_2712', '_1875'): '_2712_1875',\n", + " ('_1424', '_1424'): '_1424_1424',\n", + " ('_2712', '_198'): '_2712_198',\n", + " ('_2398', '_1669'): '_2398_1669',\n", + " ('_2596', '_3047'): '_2596_3047',\n", + " ('_399', '_2673'): '_399_2673',\n", + " ('_903', '_903'): '_903_903',\n", + " ('_195', '_139'): '_195_139',\n", + " ('_1046', '_1604'): '_1046_1604',\n", + " ('_2596', '_1565'): '_2596_1565',\n", + " ('_3396', '_1962'): '_3396_1962',\n", + " ('_1046', '_571'): '_1046_571',\n", + " ('_2308', '_3437'): '_2308_3437',\n", + " ('_3981', '_1185'): '_3981_1185',\n", + " ('_2596', '_775'): '_2596_775',\n", + " ('_2609', '_1811'): '_2609_1811',\n", + " ('_2712', '_240'): '_2712_240',\n", + " ('_3452', '_3396'): '_3452_3396',\n", + " ('_2712', '_2094'): '_2712_2094',\n", + " ('_2712', '_1170'): '_2712_1170',\n", + " ('_2712', '_4070'): '_2712_4070',\n", + " ('_2596', '_857'): '_2596_857',\n", + " ('_2712', '_1810'): '_2712_1810',\n", + " ('_3051', '_278'): '_3051_278',\n", + " ('_3981', '_2115'): '_3981_2115',\n", + " ('_2308', '_1962'): '_2308_1962',\n", + " ('_251', '_251'): '_251_251',\n", + " ('_2596', '_3220'): '_2596_3220',\n", + " ('_3051', '_420'): '_3051_420',\n", + " ('_3232', '_3396'): '_3232_3396',\n", + " ('_1046', '_3721'): '_1046_3721',\n", + " ('_1684', '_2673'): '_1684_2673',\n", + " ('_805', '_2782'): '_805_2782',\n", + " ('_3324', '_3324'): '_3324_3324',\n", + " ('_1604', '_3039'): '_1604_3039',\n", + " ('_3396', '_3694'): '_3396_3694',\n", + " ('_793', '_2712'): '_793_2712',\n", + " ('_1046', '_1838'): '_1046_1838',\n", + " ('_1604', '_586'): '_1604_586',\n", + " ('_2596', '_2094'): '_2596_2094',\n", + " ('_2712', '_2355'): '_2712_2355',\n", + " ('_3232', '_3437'): '_3232_3437',\n", + " ('_1604', '_890'): '_1604_890',\n", + " ('_2918', '_2782'): '_2918_2782',\n", + " ('_3051', '_3069'): '_3051_3069',\n", + " ('_2610', '_1669'): '_2610_1669',\n", + " ('_1604', '_2750'): '_1604_2750',\n", + " ('_3396', '_1336'): '_3396_1336',\n", + " ('_222', '_3047'): '_222_3047',\n", + " ('_3396', '_3901'): '_3396_3901',\n", + " ('_3396', '_2912'): '_3396_2912',\n", + " ('_2712', '_1354'): '_2712_1354',\n", + " ('_3232', '_944'): '_3232_944',\n", + " ('_3040', '_1907'): '_3040_1907',\n", + " ('_2673', '_399'): '_2673_399',\n", + " ('_3778', '_3778'): '_3778_3778',\n", + " ('_3051', '_3210'): '_3051_3210',\n", + " ('_427', '_427'): '_427_427',\n", + " ('_3396', '_775'): '_3396_775',\n", + " ('_1234', '_2610'): '_1234_2610',\n", + " ('_2609_1811',\n", + " '_3422_3981_2426_1568_1532_2426_3602'): '_2609_1811_3422_3981_2426_1568_1532_2426_3602'}" ] }, - "execution_count": 61, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } @@ -826,86 +1282,95 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "for j in range(len(txt)):\n", + " val = add_special_character([txt[j]])\n", + " for pair, merge in merges.items():\n", + " i = 0\n", + " while i < len(val) - 1:\n", + " if val[i] == pair[0] and val[i+1] == pair[1]:\n", + " print(pair)\n", + " print(j)\n", + " print(i)\n", + " break\n", + " i += 1\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 51, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "_2426_3602\n", - "_2426_1568\n", - "_3981_2426_1568\n", - "_1532_2426_3602\n" - ] + "data": { + "text/plain": [ + "'0'" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "for i in vocab:\n", - " if i in merges.values():\n", - " print(i)" + "txt[j][i]" ] }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "def tokenize(text, merges=merges):\n", " pre_tokenized_text = add_special_character([text])\n", - " print(type(pre_tokenized_text))\n", + " changed = False\n", " splits = pre_tokenized_text #[[l for l in word] for word in pre_tokenized_text]\n", " for pair, merge in merges.items():\n", - " for idx, split in enumerate(splits):\n", - " i = 0\n", - " while i < len(split) - 1:\n", - " if split[i] == pair[0] and split[i + 1] == pair[1]:\n", - " split = split[:i] + [merge] + split[i + 2 :]\n", - " else:\n", - " i += 1\n", - " splits[idx] = split\n", - " print(splits)\n", - " return\n", + " tmp = splits\n", + " i = 0\n", + " \n", + " while i < len(tmp) - 1:\n", + " if tmp[i] == pair[0] and tmp[i + 1] == pair[1]:\n", + " tmp = tmp[:i] + [merge] + tmp[i + 2 :]\n", + " changed = True\n", + " else:\n", + " i += 1\n", + " splits = tmp\n", + " return changed, splits\n", " return sum(splits, [])" ] }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 43, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "['896', '▁2029', '▁935', '▁679', '▁1115', '▁3601', '▁3000', '▁222', '▁3446', '▁2218', '▁3072', '▁550', '▁3652', '▁665', '▁2596', '▁2809', '▁3649', '▁251', '▁2610', '▁2536', '▁47', '▁2852', '▁2940', '▁3353', '▁3400', '▁3336', '▁325', '▁2647', '▁4076', '▁3653', '▁3253', '▁58', '▁3664', '▁1424', '▁1388', '▁222', '▁278', '▁897', '▁447', '▁2355', '▁2453', '▁2531', '▁2712', '▁828', '▁2895', '▁2398', '▁2908', '▁901', '▁2536', '▁222', '▁3686', '▁2620', '▁3254', '▁3962', '▁0', '▁1448', '▁222', '▁863', '▁3593', '▁124', '▁124', '▁1048', '▁1593', '▁222', '▁4086', '▁2647', '▁3236', '▁1767', '▁2800', '▁697', '▁514', '▁3648', '▁2337', '▁1338', '▁1114', '▁340', '▁3514', '▁4076', '▁2658', '▁1954', '▁3867', '▁2300', '▁251', '▁317', '▁7', '▁1091', '▁1768', '▁1440', '▁3167', '▁672', '▁1253', '▁188', '▁3544', '▁2934', '▁1368', '▁479', '▁3951', '▁3387', '▁514', '▁2438', '▁1262', '▁3166', '▁462', '▁3530', '▁333', '▁2596', '▁3808', '▁2796', '▁1920', '▁794', '▁263', '▁2626', '▁2596', '▁1949', '▁57', '▁3990', '▁3785', '▁146', '▁404', '▁3731', '▁479', '▁3840', '▁3840', '▁3664', '▁940', '▁2550', '▁4076', '▁544', '▁3465', '▁3232', '▁269', '▁79', '▁2159', '▁3879', '▁1734', '▁3900', '▁755', '▁1756', '▁818', '▁800', '▁1249', '▁171', '▁319', '▁727', '▁171', '▁3698', '▁3683', '▁2596', '▁3969', '▁2431', '▁1838', '▁3969', '▁126', '▁2673', '▁2596', '▁4012', '▁1010', '▁2151', '▁3437', '▁417', '▁2386', '▁2712', '▁3705', '▁1838', '▁3428', '▁1168', '▁1838', '▁1527', '▁3885', '▁1952', '▁2443', '▁3997', '▁3562', '▁1667', '▁3651', '▁3981', '▁2426', '▁1494', '▁1532', '▁2426', '▁3602']\n" + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[43], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39mfor\u001b[39;00m idx, i \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(txt):\n\u001b[0;32m----> 2\u001b[0m change, out \u001b[39m=\u001b[39m tokenize(i)\n\u001b[1;32m 3\u001b[0m \u001b[39mif\u001b[39;00m change:\n\u001b[1;32m 4\u001b[0m \u001b[39mprint\u001b[39m(out)\n", + "Cell \u001b[0;32mIn[41], line 9\u001b[0m, in \u001b[0;36mtokenize\u001b[0;34m(text, merges)\u001b[0m\n\u001b[1;32m 7\u001b[0m i \u001b[39m=\u001b[39m \u001b[39m0\u001b[39m\n\u001b[1;32m 8\u001b[0m \u001b[39mwhile\u001b[39;00m i \u001b[39m<\u001b[39m \u001b[39mlen\u001b[39m(tmp) \u001b[39m-\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[0;32m----> 9\u001b[0m \u001b[39mif\u001b[39;00m tmp[i] \u001b[39m==\u001b[39m pair[\u001b[39m0\u001b[39;49m] \u001b[39mand\u001b[39;00m tmp[i \u001b[39m+\u001b[39m \u001b[39m1\u001b[39m] \u001b[39m==\u001b[39m pair[\u001b[39m1\u001b[39m]:\n\u001b[1;32m 10\u001b[0m tmp \u001b[39m=\u001b[39m tmp[:i] \u001b[39m+\u001b[39m [merge] \u001b[39m+\u001b[39m tmp[i \u001b[39m+\u001b[39m \u001b[39m2\u001b[39m :]\n\u001b[1;32m 11\u001b[0m changed \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ - "tokenize(txt[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'896 2029 935 679 1115 3601 3000 222 3446 2218 3072 550 3652 665 2596 2809 3649 251 2610 2536 47 2852 2940 3353 3400 3336 325 2647 4076 3653 3253 58 3664 1424 1388 222 278 897 447 2355 2453 2531 2712 828 2895 2398 2908 901 2536 222 3686 2620 3254 3962 0 1448 222 863 3593 124 124 1048 1593 222 4086 2647 3236 1767 2800 697 514 3648 2337 1338 1114 340 3514 4076 2658 1954 3867 2300 251 317 7 1091 1768 1440 3167 672 1253 188 3544 2934 1368 479 3951 3387 514 2438 1262 3166 462 3530 333 2596 3808 2796 1920 794 263 2626 2596 1949 57 3990 3785 146 404 3731 479 3840 3840 3664 940 2550 4076 544 3465 3232 269 79 2159 3879 1734 3900 755 1756 818 800 1249 171 319 727 171 3698 3683 2596 3969 2431 1838 3969 126 2673 2596 4012 1010 2151 3437 417 2386 2712 3705 1838 3428 1168 1838 1527 3885 1952 2443 3997 3562 1667 3651 3981 2426 1494 1532 2426 3602 1855'" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "txt[0]" + "for idx, i in enumerate(txt):\n", + " change, out = tokenize(i)\n", + " if change:\n", + " print(out)\n", + " print(idx)\n", + " break\n", + " " ] }, {