Documentation Index
Fetch the complete documentation index at: https://docs.galileo.ai/llms.txt
Use this file to discover all available pages before exploring further.
Dataset
Object-centric interface for Galileo datasets.
This class provides an intuitive way to work with Galileo datasets,
encapsulating dataset management operations and providing seamless
integration with dataset content management.
Examples
# Create a new dataset locally, then persist
dataset = Dataset(
name="ml-knowledge-evaluation_3",
content=[
{"input": "What is machine learning?", "output": "Machine learning ..."},
{"input": "How does deep learning work?", "output": "Deep learning uses ..."}
]
).create()
# Get an existing dataset
dataset = Dataset.get(name="geography-questions")
# List all datasets
datasets = Dataset.list(limit=50)
# Get dataset content
content = dataset.get_content()
# Add rows to dataset
dataset.add_rows([
{"input": "Australia", "output": "Oceania"},
{"input": "Egypt", "output": "Africa"},
])
# Get dataset versions
versions = dataset.get_versions()
# Delete dataset
dataset.delete()
add_rows
def add_rows(self, rows: list[dict[str, Any]]) -> Dataset
Add rows to this dataset (active mutation).
This method performs an API call and atomically updates the state.
Arguments
rows (list[dict[str, Any]]): The rows to add to the dataset.
create
def create(self) -> Dataset
Persist this dataset to the API.
Examples
dataset = Dataset(name="test", content=[...]).create()
assert dataset.is_synced()
delete
Delete this dataset.
Examples
dataset = Dataset.get(name="my-dataset")
dataset.delete()
extend
def extend(self,
*,
prompt: str | None=None,
instructions: str | None=None,
examples: list[str] | None=None,
count: int=10,
data_types: list[str] | None=None,
prompt_settings: dict[str, Any] | None=None) -> list[DatasetRow]
Extend this dataset with synthetically generated data.
Arguments
prompt (Optional[str]): A description of the assistant’s role.
instructions (Optional[str]): Instructions for the assistant.
examples (Optional[list[str]]): Examples of user prompts.
count (int): The number of synthetic examples to generate.
data_types (Optional[list[str]]): The types of data to generate.
prompt_settings (Optional[dict[str, Any]]): Settings for the prompt generation.
generate
def generate(cls,
*,
prompt: str | None=None,
instructions: str | None=None,
examples: list[str] | None=None,
count: int=10,
data_types: list[str] | None=None,
prompt_settings: dict[str, Any] | None=None) -> list[DatasetRow]
Generate synthetic dataset rows.
Arguments
prompt (Optional[str]): A description of the assistant’s role.
instructions (Optional[str]): Instructions for the assistant.
examples (Optional[list[str]]): Examples of user prompts.
count (int): The number of synthetic examples to generate.
data_types (Optional[list[str]]): The types of data to generate.
prompt_settings (Optional[dict[str, Any]]): Settings for the prompt generation.
get
def get(cls, *, id: str | None=None, name: str | None=None) -> Dataset | None
Get an existing dataset by ID or name.
Arguments
id (Optional[str]): The dataset ID.
name (Optional[str]): The dataset name.
get_content
def get_content(self) -> DatasetContent | None
Get the content of this dataset.
Examples
dataset = Dataset.get(name="my-dataset")
content = dataset.get_content()
get_version_content
def get_version_content(self, *, index: int) -> DatasetVersionContent
Get the content of a specific version of this dataset.
Arguments
index (int): The 1-based version index to retrieve. Must be >= 1.
get_versions
def get_versions(self) -> ListDatasetVersionResponse
Get a list of versions for this dataset.
Examples
dataset = Dataset.get(name="my-dataset")
versions = dataset.get_versions()
for v in versions.versions:
print(v.version_index, v.num_rows)
list
def list(cls,
*,
limit: Unset | int=100,
project_id: str | None=None,
project_name: str | None=None) -> list[Dataset]
List all available datasets, optionally filtered by project.
Arguments
limit (Union[Unset, int]): Maximum number of datasets to return.
project_id (Optional[str]): Filter datasets used in this project by ID.
project_name (Optional[str]): Filter datasets used in this project by name.
refresh
def refresh(self) -> None
Refresh this dataset’s state from the API.
Updates all attributes with the latest values from the remote API
and sets the state to SYNCED.
Examples
dataset.refresh()
assert dataset.is_synced()
save
def save(self) -> Dataset
Save changes to this dataset.
Persists any local changes (name) to the remote API. If the dataset is
LOCAL_ONLY, delegates to create(). If SYNCED, returns immediately as a
no-op. Raises ValueError for DELETED or FAILED_SYNC states.
Examples
dataset = Dataset.get(name="my-dataset")
dataset.name = "renamed-dataset"
dataset.save()
assert dataset.is_synced()