From ae2935eee6651ce032d88f45d0ee18dac75558f3 Mon Sep 17 00:00:00 2001 From: Mikko Kortelainen Date: Thu, 12 Oct 2023 10:23:26 +0300 Subject: [PATCH] make maxMinorTokens constructor parametrized (#13) * make maxMinorTokens constructor parametrized * constructor args are now final again --- .../scala/com/teragrep/functions/dpf_03/TokenAggregator.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/teragrep/functions/dpf_03/TokenAggregator.scala b/src/main/scala/com/teragrep/functions/dpf_03/TokenAggregator.scala index a73207f..5eefa29 100644 --- a/src/main/scala/com/teragrep/functions/dpf_03/TokenAggregator.scala +++ b/src/main/scala/com/teragrep/functions/dpf_03/TokenAggregator.scala @@ -56,13 +56,13 @@ import org.apache.spark.unsafe.types.UTF8String import java.nio.charset.StandardCharsets import scala.reflect.ClassTag -class TokenAggregator(private final val columnName: String) extends Aggregator[Row, TokenBuffer, Set[String]] +class TokenAggregator(final val columnName: String, final val maxMinorTokens: Long) extends Aggregator[Row, TokenBuffer, Set[String]] with Serializable { var tokenizer: Option[Tokenizer] = None override def zero(): TokenBuffer = { - tokenizer = Some(new Tokenizer(32)) + tokenizer = Some(new Tokenizer(maxMinorTokens)) new TokenBuffer() }