CITATION.cff

cff-version: 1.2.0
message: "If you use this dataset, please cite it using the following metadata."
title: "The SemEval 2025 LLMs4Subjects Shared Task Dataset"
version: "1.0.0"
#doi: "10.1234/dataset-doi"  # Replace with the DOI if available
type: dataset
authors:
  - family-names: "D'Souza"
    given-names: "Jennifer"
    orcid: "https://orcid.org/0000-0002-6616-9509"  # Optional
  - family-names: "Sadruddin"
    given-names: "Sameer"
  - family-names: "Israel"
    given-names: "Holger"
  - family-names: "Begoin"
    given-names: "Mathias"	
date-released: "2024-10-03"
url: "https://github.com/jd-coderepos/llms4subjects/"  # GitHub repository URL
keywords:
  - "subject indexing"
  - "large language models"
  - "digital libraries"
license: "CC-BY-4.0"  
repository-code: "https://github.com/jd-coderepos/llms4subjects/"
#repository-artifact: "https://doi.org/10.5281/zenodo.1234567"  # DOI for the dataset, if stored in a repository like Zenodo
abstract: "To support the development of systems for the LLMs4Subjects shared task, we provide participants with two types of datasets: 1) Curated, human-readable form of the GND subjects taxonomy. and 2) A large-scale dataset of technical records from TIB’s open-access collection, annotated with GND subjects, available in both English and German."