Skip to content

Commit

Permalink
documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
sbordt committed Apr 4, 2024
1 parent e01e467 commit 18c345d
Show file tree
Hide file tree
Showing 9 changed files with 118 additions and 69 deletions.
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import sys

sys.path.insert(0, os.path.abspath("../tabmemcheck/"))
sys.path.insert(0, os.path.abspath("../tabmemcheck/datasets"))

import tabmemcheck

Expand Down
27 changes: 17 additions & 10 deletions docs/tabmemcheck.rst
Original file line number Diff line number Diff line change
@@ -1,36 +1,43 @@
Documentation
=============

Main Package Documentation
------------------------------------------
This is the documentation for the tabmemcheck package.

Tests for tabular datasets (based on csv files)
-----------------------------------------------

.. automodule:: tabmemcheck
:members:
:undoc-members:
:members: run_all_tests, header_test, feature_names_test, row_completion_test, feature_completion_test, first_token_test, sample
:show-inheritance:

Datasets
---------------
Dataset loading (original, perturbed, task, statistical)
--------------------------------------------------------

.. automodule:: tabmemcheck.datasets
:members:
:undoc-members:
:members: load_dataset, load_iris, load_wine, load_adult, load_housing, load_openml_diabetes
:show-inheritance:


LLM Interface
----------------------

.. automodule:: tabmemcheck
:members: LLM_Interface, openai_setup, send_chat_completion, send_completion, set_logging_task, read_chatlog
:show-inheritance:

Analysis
------------------------

.. automodule:: tabmemcheck.analysis
:members:
:undoc-members:
:show-inheritance:


Utilities
------------------------

.. autoclass:: tabmemcheck.utils
:members:
:undoc-members:
:show-inheritance:


8 changes: 4 additions & 4 deletions tabmemcheck/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from .version import __version__


def load_default_system_prompts():
def __load_default_system_prompts():
"""Load the default system prompts from the resources folder."""
import importlib.resources as resources
import yaml
Expand All @@ -40,7 +40,7 @@ def load_default_system_prompts():


# global config object for the module
class DotDict(dict):
class __DotDict(dict):
def __getattr__(self, attr):
return self.get(attr)

Expand All @@ -52,10 +52,10 @@ def __delattr__(self, key):
del self[key]


config = DotDict({})
config = __DotDict({})

# default system prompts from yaml file
config.system_prompts = load_default_system_prompts()
config.system_prompts = __load_default_system_prompts()

# default llm options
config.temperature = 0
Expand Down
2 changes: 1 addition & 1 deletion tabmemcheck/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def conditional_completion_analysis(csv_file, completions_df):


def levenshtein_distance_t_test(x, y, z, alternative="two-sided", return_dist=False):
"""Test whether |x-y| < |x-z| in Levenshtein distance using a t-test.
"""Test whether x is closer to y than z in Levenshtein distance using a t-test.
x must be a list of stings.
y and z can be either a list of strings or a list of lists of strings.
Expand Down
4 changes: 2 additions & 2 deletions tabmemcheck/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from .load import (
load_dataset,
load_iris,
# load_titanic,
load_openml_diabetes,
load_wine,
load_adult,
load_housing,
load_openml_diabetes,
)
28 changes: 20 additions & 8 deletions tabmemcheck/datasets/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,12 +196,24 @@ def report_feature_variation(df_original, df_variation):
def load_dataset(
csv_file: str,
yaml_config: str = None,
transform=DATASET_PLAIN,
transform: str = DATASET_PLAIN,
permute_columns=False, # for perturbed transform
print_stats=False,
seed=None,
):
"""Generic dataset loading function. Dataset tranformations are specified in a yaml configuration file."""
"""Load a dataset from a CSV file and apply transformations as specified in a YAML configuration file.
Args:
csv_file (str): The path to the CSV file.
yaml_config (str, optional): The path to the YAML configuration file. Defaults to None.
transform (str, optional): The type of transformation to apply ('original', 'perturbed', 'task', 'statistical').
permute_columns (bool, optional): Whether to permute the columns in the perturbed version. Defaults to False.
print_stats (bool, optional): Whether to print statistics about the transformation. Defaults to False.
seed (optional): The seed for the numpy random number generator. Defaults to None.
Returns:
pandas.DataFrame: The transformed dataset.
"""
__validate_inputs(transform)
rng = np.random.default_rng(seed=seed)

Expand Down Expand Up @@ -311,25 +323,25 @@ def load_dataset(


def load_iris(csv_file: str = "iris.csv", *args, **kwargs):
"""The Iris dataset. https://archive.ics.uci.edu/ml/datasets/iris"""
"""Load the Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)."""
return load_dataset(csv_file, "iris.yaml", *args, **kwargs)


def load_wine(csv_file: str = "iris.csv", *args, **kwargs):
"""The UCI Wine dataset. https://archive.ics.uci.edu/dataset/109/wine"""
"""Load the UCI Wine dataset (https://archive.ics.uci.edu/dataset/109/wine)."""
return load_dataset(csv_file, "wine.yaml", *args, **kwargs)


def load_adult(csv_file: str = "adult-train.csv", *args, **kwargs):
"""The Adult Income dataset. http://www.cs.toronto.edu/~delve/data/adult/adultDetail.html"""
"""Load the Adult Income dataset (http://www.cs.toronto.edu/~delve/data/adult/adultDetail.html)."""
return load_dataset(csv_file, "adult.yaml", *args, **kwargs)


def load_housing(csv_file: str = "california-housing.csv", *args, **kwargs):
"""California Housing dataset."""
"""Load the California Housing dataset (https://inria.github.io/scikit-learn-mooc/python_scripts/datasets_california_housing.html)."""
return load_dataset(csv_file, "housing.yaml", *args, **kwargs)


def load_openml_diabetes(csv_file: str = "openml-diabetes.csv", *args, **kwargs):
"""The OpenML Diabetes dataset. https://www.openml.org/d/37"""
return load_dataset("openml-diabetes.csv", "openml-diabetes.yaml", *args, **kwargs)
"""Load the OpenML Diabetes dataset (https://www.openml.org/d/37)."""
return load_dataset(csv_file, "openml-diabetes.yaml", *args, **kwargs)
111 changes: 70 additions & 41 deletions tabmemcheck/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,15 @@ def run_all_tests(
csv_file: str,
llm: Union[LLM_Interface, str],
few_shot_csv_files=DEFAULT_FEW_SHOT_CSV_FILES,
feature_name=None,
unique_feature: str = None,
):
"""Run different tests for memorization and prior experience with the content of the csv file.
:param csv_file: The path to the csv file.
:param llm: The language model to be tested.
:param few_shot_csv_files: A list of other csv files to be used as few-shot examples.
:param unique_feature: The name of the feature to be used for the feature completion test.
"""
llm = __llm_setup(llm)
few_shot_csv_files = __validate_few_shot_files(csv_file, few_shot_csv_files)
__print_info(csv_file, llm, few_shot_csv_files)
Expand Down Expand Up @@ -138,7 +145,7 @@ def run_all_tests(
tabmem.config.temperature = temp

row_completion_test(csv_file, llm, num_queries=25)
feature_completion_test(csv_file, llm, num_queries=25, feature_name=feature_name)
feature_completion_test(csv_file, llm, num_queries=25, feature_name=unique_feature)
first_token_test(csv_file, llm, num_queries=25)


Expand All @@ -154,17 +161,15 @@ def feature_names_test(
few_shot_csv_files=DEFAULT_FEW_SHOT_CSV_FILES,
system_prompt: str = "default",
):
"""Test if the model knows the names of the features.
"""Test if the model knows the names of the features in a csv file.
The prompt format is:
System: <system_prompt>
User: Dataset: <dataset_name>
Feature 1, Feature 2, ..., Feature n
Response: Feature n+1, Feature n+2, ..., Feature m
This can be modified in the following ways:
- Include few-shot examples from other csv files.
:param csv_file: The path to the csv file.
:param llm: The language model to be tested.
:param num_prefix_features: The number of features given to the model as part of the prompt (defaults to 1/4 of the features).
:param few_shot_csv_files: A list of other csv files to be used as few-shot examples.
:param system_prompt: The system prompt to be used.
"""

llm = __llm_setup(llm)
few_shot_csv_files = __validate_few_shot_files(csv_file, few_shot_csv_files)

Expand Down Expand Up @@ -260,10 +265,6 @@ def feature_names_test(
+ response
)

# TODO do some sort of evaluation
# for example, return true if it completes all but X of the feature names, correcting for upper/lower case
# at least do formatted printing of the results


####################################################################################
# Feature Values
Expand All @@ -284,12 +285,16 @@ def header_test(
system_prompt: str = "default",
verbose: bool = True,
):
"""Header test, using other csv files as few-shot examples.
"""Header test for memorization.
Splits the csv file at random positions in rows 2, 4, 6, and 8. Performs 1 query for each split. Reports the best completion.
We split the csv file at random positions in rows split_rows and performs 1 query for each split. Then we compare the best completion with the actual header.
NOTE: This test might fail if the header and rows of the csv file are very long, and the model has a small context window.
NOTE: in the end, this is the case for all of our tests :)
:param csv_file: The path to the csv file.
:param llm: The language model to be tested.
:param split_rows: The rows at which the csv file is split for the test.
:param completion_length: The length of the completions in the few-shot examples (reduce for LLMs with small context windows).
:param few_shot_csv_files: A list of other csv files to be used as few-shot examples.
:param system_prompt: The system prompt to be used.
"""
llm = __llm_setup(llm)
few_shot_csv_files = __validate_few_shot_files(csv_file, few_shot_csv_files)
Expand Down Expand Up @@ -372,9 +377,6 @@ def header_test(

return header_prompt, header_completion, llm_completion

# TODO return true if it completes the given row, as well as the next row.
# TODO count the number of correctly completed rows and print this number


####################################################################################
# Row Completion
Expand All @@ -385,12 +387,21 @@ def row_completion_test(
csv_file: str,
llm: Union[LLM_Interface, str],
num_prefix_rows=10,
num_queries=50,
num_queries=25,
few_shot=7,
out_file=None,
system_prompt: str = "default",
):
"""Row completion test: Complete the next row of the csv file, given the previous rows."""
"""Row completion test for memorization. The test resports the number of correctly completed rows.
:param csv_file: The path to the csv file.
:param llm: The language model to be tested.
:param num_prefix_rows: The number of rows given to the model as part of the prompt.
:param num_queries: The number of rows that we test the model on.
:param few_shot: The number of few-shot examples to be used.
:param out_file: Optionally save all queries and responses to a csv file.
:param system_prompt: The system prompt to be used.
"""
llm = __llm_setup(llm)

if system_prompt == "default": # default system prompt?
Expand Down Expand Up @@ -437,7 +448,7 @@ def row_completion_test(
if test_suffix.strip() in response.strip():
num_exact_matches += 1

# the statistical test using the levenshtein distance TODO taken out of current version although it works
# the statistical test using the levenshtein distance. taken out of current version although it seems to work in practice.
# test_prefix_rows = [prefix.split("\n") for prefix in test_prefixes]
# test_result = analysis.levenshtein_distance_t_test(
# responses, test_suffixes, test_prefix_rows
Expand Down Expand Up @@ -467,21 +478,20 @@ def feature_completion_test(
csv_file: str,
llm: Union[LLM_Interface, str],
feature_name: str = None,
num_queries=100,
num_queries=25,
few_shot=5,
out_file=None,
system_prompt: str = "default",
):
"""Feature completion test where we attempt to predict a single rare feature & count the number of exact matches.
The basic prompt format is the following:
System: <system_prompt>
User: Feature 1 = value 1, Feature 2 = value 2, ..., Feature n = value n
Response: Feature {feature_name} = value
This can be modified in the following ways:
- Include few-shot examples from other csv files.
- Don't use the feature names, but only the values.
"""Feature completion test for memorization. The test resports the number of correctly completed features.
:param csv_file: The path to the csv file.
:param llm: The language model to be tested.
:param feature_name: The name of the feature to be used for the test.
:param num_queries: The number of feature values that we test the model on.
:param few_shot: The number of few-shot examples to be used.
:param out_file: Optionally save all queries and responses to a csv file.
:param system_prompt: The system prompt to be used.
"""
llm = __llm_setup(llm)

Expand Down Expand Up @@ -558,12 +568,23 @@ def first_token_test(
csv_file: str,
llm: Union[LLM_Interface, str],
num_prefix_rows=10,
num_queries=100,
num_queries=25,
few_shot=7,
out_file=None,
system_prompt: str = "default",
):
"""First token test: Complete the first token of the next row of the csv file, given the previous rows."""
"""First token test for memorization. We ask the model to complete the first token of the next row of the csv file, given the previous rows. The test resports the number of correctly completed tokens.
Note that the ''first token'' is not actually the first token produced by the llm, but consists of the first n digits of the row. The number of digits is determined by the function build_first_token.
:param csv_file: The path to the csv file.
:param llm: The language model to be tested.
:param num_prefix_rows: The number of rows given to the model as part of the prompt.
:param num_queries: The number of rows that we test the model on.
:param few_shot: The number of few-shot examples to be used.
:param out_file: Optionally save all queries and responses to a csv file.
:param system_prompt: The system prompt to be used.
"""
llm = __llm_setup(llm)

if (
Expand Down Expand Up @@ -654,7 +675,7 @@ def first_token_test(


####################################################################################
# Zero-Knowledge Sampling
# Sampling
####################################################################################


Expand All @@ -680,7 +701,15 @@ def sample(
out_file=None,
system_prompt: str = "default",
):
"""zero-shot sampling from the csv file, using few-shot examples from other csv files."""
"""Ask the model to provide random samples from the csv file.
:param csv_file: The path to the csv file.
:param llm: The language model to be tested.
:param num_queries: The desired number of samples.
:param few_shot_csv_files: A list of other csv files to be used as few-shot examples.
:param out_file: Optionally save all queries and responses to a csv file.
:param system_prompt: The system prompt to be used.
"""
llm = __llm_setup(llm)
few_shot_csv_files = __validate_few_shot_files(csv_file, few_shot_csv_files)

Expand All @@ -703,7 +732,7 @@ def sample(
)

if len(cond_feature_names) > 0:
pass
raise NotImplementedError("Conditional sampling not yet supported.")
# TODO handle the condtional case!

# parse the model responses in a dataframe
Expand Down
2 changes: 1 addition & 1 deletion tabmemcheck/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

@dataclass
class LLM_Interface:
"""The interface to the language model."""
"""Generic interface to a language model."""

# if true, the tests use the chat_completion function, otherwise the completion function
chat_mode = False
Expand Down
Loading

0 comments on commit 18c345d

Please sign in to comment.