Documentation Index
Fetch the complete documentation index at: https://docs.galileo.ai/llms.txt
Use this file to discover all available pages before exploring further.
BuiltInMetrics
Provides convenient access to built-in Galileo metrics (formerly “scorers”).
Examples
from galileo.metric import Metric
# Access built-in metrics
Metric.metrics.correctness
Metric.metrics.completeness
Metric.metrics.toxicity
Metric
Base class for all Galileo metrics.
This is an abstract base class that defines common attributes and methods
for all metric types. Use one of the concrete metric classes instead:
- GalileoMetric: Built-in Galileo scorers (access via Metric.scorers)
- LlmMetric: Custom LLM-based metrics with prompt templates
- LocalMetric: Local function-based metrics
- CodeMetric: Code-based metrics (future support)
Common Attributes
id (str | None): The unique metric identifier (UUID).
name (str): The metric name.
scorer_type (ScorerTypes | None): The type of scorer.
description (str): Description of the metric.
tags (list[str]): Tags associated with the metric.
created_at (datetime | None): When the metric was created.
updated_at (datetime | None): When the metric was last updated.
version (int | None): Metric version number.
Class Attributes
metrics (BuiltInMetrics): Access built-in Galileo metrics.
Examples
# 1. Use built-in Galileo scorers
from galileo import Metric, GalileoMetric, LlmMetric, LocalMetric, LogStream
log_stream = LogStream.get(name="my-stream", project_name="my-project")
log_stream.set_metrics([
Metric.metrics.correctness,
Metric.metrics.completeness,
])
# 2. Create custom LLM metric
llm_metric = LlmMetric(
name="response_quality",
prompt="Rate the quality...",
model="gpt-4o-mini",
judges=3,
).create()
# 3. Create local function-based metric
def my_scorer(trace_or_span):
return 0.5
local_metric = LocalMetric(
name="response_length",
scorer_fn=my_scorer,
)
delete
Delete this metric.
Only works for server-side metrics. Local metrics don’t need deletion.
Examples
metric = Metric.get(name="factuality-checker")
metric.delete()
delete_by_name
def delete_by_name(cls, name: str) -> None
Delete a metric by name without retrieving it first.
This is more efficient than calling Metric.get(name=...).delete()
when you only need to delete and don’t need the metric object.
Arguments
name: The name of the metric to delete.
get
def get(cls, *, id: str | None=None, name: str | None=None) -> Metric | None
Get an existing metric by ID or name.
Returns the appropriate subclass instance based on scorer_type.
Arguments
id: The metric ID (UUID).
name: The metric name.
list
def list(cls,
*,
name_filter: str | None=None,
scorer_types: list[ScorerTypes] | None=None) -> builtins.list[Metric]
List metrics with optional filtering.
Returns appropriate subclass instances based on scorer_type.
Arguments
name_filter: Filter metrics by exact name match.
scorer_types: Filter by scorer types.
refresh
def refresh(self) -> None
Refresh this metric’s state from the API.
Updates all attributes with the latest values from the remote API.
Examples
metric.refresh()
assert metric.is_synced()
to_legacy_metric
def to_legacy_metric(self) -> LegacyMetric
Convert to legacy galileo.schema.metrics.Metric format.
This enables backward compatibility with existing code that uses
the legacy Metric class.
Examples
metric = Metric.get(name="my-metric")
legacy = metric.to_legacy_metric()
# Use with existing APIs
update
def update(self, **kwargs: Any) -> Metric
Update this metric’s properties on the API.
Only name, description, and tags can be updated via this method.
On success the instance is updated with the API response and returned in SYNCED state.
Arguments
**kwargs (Any): Fields to update. Supported keys: name, description, tags.
Examples
metric = Metric.get(name="factuality-checker")
metric.update(name="new-name", description="Updated description")
assert metric.is_synced()
LlmMetric
LLM-based metric with custom prompt templates.
This metric type allows you to create custom metrics evaluated by an LLM
judge using a prompt template.
Arguments
-
Configuration:
-
-------------: Default values for model and judges can be configured via:
- Configuration.default_scorer_model (env: GALILEO_DEFAULT_SCORER_MODEL)
- Configuration.default_scorer_judges (env: GALILEO_DEFAULT_SCORER_JUDGES)
Examples
# Create custom LLM metric with string model name
metric = LlmMetric(
name="response_quality",
prompt='''
Rate the quality of this response on a scale of 1-10.
Question: {input}
Answer: {output}
Return only the numerical score (1-10).
''',
model="gpt-4o-mini", # String model name
judges=3,
node_level=StepType.llm,
description="Rates response quality",
tags=["quality", "custom"],
output_type=OutputTypeEnum.PERCENTAGE,
cot_enabled=True,
).create()
# Or use a Model object from Integration
from galileo.integration import Integration
gpt_model = Integration.openai.get_model(alias="gpt-4o-mini")
metric = LlmMetric(
name="response_quality",
prompt="Rate quality 1-10: {input} -> {output}",
model=gpt_model, # Model object
judges=3,
).create()
create
def create(self) -> LlmMetric
Persist this LLM metric to the API.
Examples
metric = LlmMetric(
name="quality_check",
prompt="Rate the quality...",
model="gpt-4o-mini"
).create()
assert metric.is_synced()
CodeMetric
Code-based metric.
This metric type is for code-based scorers that execute custom code
to evaluate traces/spans.
Examples
# Get existing code metric
metric = Metric.get(name="my-code-metric")
assert isinstance(metric, CodeMetric)
# Create code metric with inline code
metric = CodeMetric(
name="custom_code_scorer",
code="def scorer_fn(step_object):\\n return 1.0",
description="Custom code-based scorer",
tags=["custom", "code"],
node_level=StepType.llm,
).create()
# Load code from file
metric = CodeMetric(
name="custom_code_scorer",
node_level=StepType.llm,
).load_code("./scorers/my_scorer.py").create()
create
def create(self) -> CodeMetric
Persist this Code metric to the API.
This method validates the code first by submitting it to the validation
endpoint, polling for the result, and then creating the scorer with the
validated result.
Examples
# Create with inline code
metric = CodeMetric(
name="custom_code_scorer",
code="def scorer_fn(step_object):\\n return 1.0",
node_level=StepType.llm,
).create()
assert metric.is_synced()
# Create by loading from file
metric = CodeMetric(
name="custom_code_scorer",
node_level=StepType.llm,
).load_code("./scorers/my_scorer.py").create()
assert metric.is_synced()
load_code
def load_code(self, code_file_path: str) -> CodeMetric
Load code from a file into this metric instance.
Arguments
code_file_path: Path to the Python file containing the scorer code.
GalileoMetric
Built-in Galileo scorer metric.
This metric type represents Galileo’s built-in scorers like correctness,
completeness, toxicity, etc. Access these via Metric.metrics.
Examples
# Access built-in scorers
from galileo import Metric, LogStream
log_stream = LogStream.get(name="my-stream", project_name="my-project")
log_stream.set_metrics([
Metric.metrics.correctness,
Metric.metrics.completeness,
Metric.metrics.toxicity,
])
# Or get by name
metric = Metric.get(name="correctness")
assert isinstance(metric, GalileoMetric)
LocalMetric
Local function-based metric.
This metric type uses a Python function to score traces/spans locally
without making API calls. Useful for simple, deterministic metrics.
Examples
# Create local function-based metric
def response_length_scorer(trace_or_span):
if hasattr(trace_or_span, "output") and trace_or_span.output:
return min(len(trace_or_span.output) / 100.0, 1.0)
return 0.0
local_metric = LocalMetric(
name="response_length",
scorer_fn=response_length_scorer,
scorable_types=[StepType.llm],
aggregatable_types=[StepType.trace],
)
# Use with log stream
log_stream.set_metrics([local_metric])
to_local_metric_config
def to_local_metric_config(self) -> LocalMetricConfig
Convert to LocalMetricConfig format.
Examples
def my_scorer(trace):
return 0.5
metric = LocalMetric(name="test", scorer_fn=my_scorer)
config = metric.to_local_metric_config()