Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.galileo.ai/llms.txt

Use this file to discover all available pages before exploring further.

Dataset

Object-centric interface for Galileo datasets. This class provides an intuitive way to work with Galileo datasets, encapsulating dataset management operations and providing seamless integration with dataset content management. Examples
# Create a new dataset locally, then persist
dataset = Dataset(
    name="ml-knowledge-evaluation_3",
    content=[
        {"input": "What is machine learning?", "output": "Machine learning ..."},
        {"input": "How does deep learning work?", "output": "Deep learning uses ..."}
    ]
).create()

# Get an existing dataset
dataset = Dataset.get(name="geography-questions")

# List all datasets
datasets = Dataset.list(limit=50)

# Get dataset content
content = dataset.get_content()

# Add rows to dataset
dataset.add_rows([
    {"input": "Australia", "output": "Oceania"},
    {"input": "Egypt", "output": "Africa"},
])

# Get dataset versions
versions = dataset.get_versions()

# Delete dataset
dataset.delete()

add_rows

def add_rows(self, rows: list[dict[str, Any]]) -> Dataset
Add rows to this dataset (active mutation). This method performs an API call and atomically updates the state. Arguments
  • rows (list[dict[str, Any]]): The rows to add to the dataset.

create

def create(self) -> Dataset
Persist this dataset to the API. Examples
dataset = Dataset(name="test", content=[...]).create()
assert dataset.is_synced()

delete

def delete(self) -> None
Delete this dataset. Examples
dataset = Dataset.get(name="my-dataset")
dataset.delete()

extend

def extend(self,
           *,
           prompt: str | None=None,
           instructions: str | None=None,
           examples: list[str] | None=None,
           count: int=10,
           data_types: list[str] | None=None,
           prompt_settings: dict[str, Any] | None=None) -> list[DatasetRow]
Extend this dataset with synthetically generated data. Arguments
  • prompt (Optional[str]): A description of the assistant’s role.
  • instructions (Optional[str]): Instructions for the assistant.
  • examples (Optional[list[str]]): Examples of user prompts.
  • count (int): The number of synthetic examples to generate.
  • data_types (Optional[list[str]]): The types of data to generate.
  • prompt_settings (Optional[dict[str, Any]]): Settings for the prompt generation.

generate

def generate(cls,
             *,
             prompt: str | None=None,
             instructions: str | None=None,
             examples: list[str] | None=None,
             count: int=10,
             data_types: list[str] | None=None,
             prompt_settings: dict[str, Any] | None=None) -> list[DatasetRow]
Generate synthetic dataset rows. Arguments
  • prompt (Optional[str]): A description of the assistant’s role.
  • instructions (Optional[str]): Instructions for the assistant.
  • examples (Optional[list[str]]): Examples of user prompts.
  • count (int): The number of synthetic examples to generate.
  • data_types (Optional[list[str]]): The types of data to generate.
  • prompt_settings (Optional[dict[str, Any]]): Settings for the prompt generation.

get

def get(cls, *, id: str | None=None, name: str | None=None) -> Dataset | None
Get an existing dataset by ID or name. Arguments
  • id (Optional[str]): The dataset ID.
  • name (Optional[str]): The dataset name.

get_content

def get_content(self) -> DatasetContent | None
Get the content of this dataset. Examples
dataset = Dataset.get(name="my-dataset")
content = dataset.get_content()

get_version_content

def get_version_content(self, *, index: int) -> DatasetVersionContent
Get the content of a specific version of this dataset. Arguments
  • index (int): The 1-based version index to retrieve. Must be >= 1.

get_versions

def get_versions(self) -> ListDatasetVersionResponse
Get a list of versions for this dataset. Examples
dataset = Dataset.get(name="my-dataset")
versions = dataset.get_versions()
for v in versions.versions:
    print(v.version_index, v.num_rows)

list

def list(cls,
         *,
         limit: Unset | int=100,
         project_id: str | None=None,
         project_name: str | None=None) -> list[Dataset]
List all available datasets, optionally filtered by project. Arguments
  • limit (Union[Unset, int]): Maximum number of datasets to return.
  • project_id (Optional[str]): Filter datasets used in this project by ID.
  • project_name (Optional[str]): Filter datasets used in this project by name.

refresh

def refresh(self) -> None
Refresh this dataset’s state from the API. Updates all attributes with the latest values from the remote API and sets the state to SYNCED. Examples
dataset.refresh()
assert dataset.is_synced()

save

def save(self) -> Dataset
Save changes to this dataset. Persists any local changes (name) to the remote API. If the dataset is LOCAL_ONLY, delegates to create(). If SYNCED, returns immediately as a no-op. Raises ValueError for DELETED or FAILED_SYNC states. Examples
dataset = Dataset.get(name="my-dataset")
dataset.name = "renamed-dataset"
dataset.save()
assert dataset.is_synced()