> ## Documentation Index
> Fetch the complete documentation index at: https://docs.galileo.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Action Advancement

> Understand how to measure and optimize the effectiveness of your AI agent's actions

export const BooleanClassificationReport = ({report, negativeLabel = "Not Advanced", positiveLabel = "Advanced", negativeClass = "False", positiveClass = "True", maxWidth = 520}) => {
  const parseReport = reportStr => {
    const lines = reportStr.trim().split('\n').filter(line => line.trim());
    const result = {
      classes: [],
      accuracy: null,
      macroAvg: null,
      weightedAvg: null,
      totalSupport: null
    };
    for (const line of lines) {
      const parts = line.trim().split(/\s+/);
      if (parts[0] === 'precision') continue;
      if (parts.length >= 5 && !['accuracy', 'macro', 'weighted'].includes(parts[0])) {
        result.classes.push({
          name: parts[0],
          precision: parseFloat(parts[1]),
          recall: parseFloat(parts[2]),
          f1: parseFloat(parts[3]),
          support: parseInt(parts[4], 10)
        });
      }
      if (parts[0] === 'accuracy') {
        result.accuracy = parseFloat(parts[1]);
        result.totalSupport = parseInt(parts[2], 10);
      }
      if (parts[0] === 'macro' && parts[1] === 'avg') {
        result.macroAvg = {
          precision: parseFloat(parts[2]),
          recall: parseFloat(parts[3]),
          f1: parseFloat(parts[4]),
          support: parseInt(parts[5], 10)
        };
      }
      if (parts[0] === 'weighted' && parts[1] === 'avg') {
        result.weightedAvg = {
          precision: parseFloat(parts[2]),
          recall: parseFloat(parts[3]),
          f1: parseFloat(parts[4]),
          support: parseInt(parts[5], 10)
        };
      }
    }
    return result;
  };
  const parsed = parseReport(report);
  if (parsed.classes.length < 2) {
    return <div style={{
      color: "red",
      padding: "1rem",
      border: "1px solid red"
    }}>BooleanClassificationReport: Could not parse report. Expected at least 2 classes.</div>;
  }
  const negClass = parsed.classes.find(c => c.name === negativeClass) || parsed.classes[0];
  const posClass = parsed.classes.find(c => c.name === positiveClass) || parsed.classes[1];
  const tnPlusFp = negClass.support;
  const tpPlusFn = posClass.support;
  const tn = Math.round(negClass.recall * tnPlusFp);
  const fp = tnPlusFp - tn;
  const tp = Math.round(posClass.recall * tpPlusFn);
  const fn = tpPlusFn - tp;
  const tnPct = tn / tnPlusFp * 100;
  const fpPct = fp / tnPlusFp * 100;
  const fnPct = fn / tpPlusFn * 100;
  const tpPct = tp / tpPlusFn * 100;
  const rowStyle = {
    borderBottom: "1px solid rgba(148, 163, 184, 0.3)"
  };
  const cellStyle = {
    padding: "0.5rem 0.125rem"
  };
  const centerCellStyle = {
    textAlign: "center",
    padding: "0.5rem 0.125rem"
  };
  return <div>
      {}
      <table style={{
    width: "auto",
    borderCollapse: "collapse",
    marginBottom: "1.5rem",
    fontSize: "0.875rem"
  }}>
        <thead>
          <tr style={{
    borderBottom: "2px solid rgba(148, 163, 184, 0.5)"
  }}>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}></th>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}>Precision</th>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}>Recall</th>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}>F1-Score</th>
          </tr>
        </thead>
        <tbody>
          {}
          <tr style={rowStyle}>
            <td style={cellStyle}>{negativeLabel}</td>
            <td style={centerCellStyle}>{negClass.precision.toFixed(2)}</td>
            <td style={centerCellStyle}>{negClass.recall.toFixed(2)}</td>
            <td style={centerCellStyle}>{negClass.f1.toFixed(2)}</td>
          </tr>
          <tr style={rowStyle}>
            <td style={cellStyle}>{positiveLabel}</td>
            <td style={centerCellStyle}>{posClass.precision.toFixed(2)}</td>
            <td style={centerCellStyle}>{posClass.recall.toFixed(2)}</td>
            <td style={centerCellStyle}>{posClass.f1.toFixed(2)}</td>
          </tr>
          
        </tbody>
      </table>

      {}
      <BooleanConfusionMatrix actualNegativeLabel={negativeLabel} actualPositiveLabel={positiveLabel} predictedNegativeLabel={negativeLabel} predictedPositiveLabel={positiveLabel} tnPct={tnPct.toString()} fpPct={fpPct.toString()} fnPct={fnPct.toString()} tpPct={tpPct.toString()} displayFormat="fraction" maxWidth={maxWidth} />
    </div>;
};

export const BooleanConfusionMatrix = ({actualNegativeLabel = "Not Advanced", actualPositiveLabel = "Advanced", predictedNegativeLabel = "Not Advanced", predictedPositiveLabel = "Advanced", tnCount, tnPct, fpCount, fpPct, fnCount, fnPct, tpCount, tpPct, matrix, maxWidth = 520, displayFormat = "percentage", fractionDigits = 3, percentageDigits = 1, titlePrefix = ""}) => {
  const parseNum = val => val !== undefined && val !== null ? Number(val) : undefined;
  const clampPct = pct => Math.max(0, Math.min(100, Number(pct) || 0));
  const formatValue = pct => {
    const p = clampPct(pct);
    if (displayFormat === "fraction") {
      const digits = Number.isFinite(Number(fractionDigits)) ? Number(fractionDigits) : 3;
      return (p / 100).toFixed(digits);
    }
    const digits = Number.isFinite(Number(percentageDigits)) ? Number(percentageDigits) : 1;
    return `${p.toFixed(digits)}%`;
  };
  const palette = ["#f8fafc", "#eff6ff", "#dbeafe", "#bfdbfe", "#93c5fd", "#60a5fa", "#3b82f6", "#2563eb", "#1d4ed8", "#1e40af"];
  const getBg = pct => {
    const p = clampPct(pct);
    const idx = p === 100 ? 9 : Math.floor(p / 10);
    return palette[idx];
  };
  const getColor = pct => clampPct(pct) >= 60 ? "#ffffff" : "#1e3a8a";
  const rawTn = parseNum(tnCount);
  const rawFp = parseNum(fpCount);
  const rawFn = parseNum(fnCount);
  const rawTp = parseNum(tpCount);
  const rawTnPct = parseNum(tnPct);
  const rawFpPct = parseNum(fpPct);
  const rawFnPct = parseNum(fnPct);
  const rawTpPct = parseNum(tpPct);
  const hasCounts = rawTn !== undefined && rawFp !== undefined && rawFn !== undefined && rawTp !== undefined;
  const hasPcts = rawTnPct !== undefined && rawFpPct !== undefined && rawFnPct !== undefined && rawTpPct !== undefined;
  let resolvedMatrix;
  let showCounts;
  if (matrix) {
    resolvedMatrix = matrix;
    showCounts = matrix.tn?.count !== undefined;
  } else if (hasCounts) {
    const actualNegTotal = rawTn + rawFp;
    const actualPosTotal = rawFn + rawTp;
    resolvedMatrix = {
      tn: {
        count: rawTn,
        pct: actualNegTotal > 0 ? rawTn / actualNegTotal * 100 : 0
      },
      fp: {
        count: rawFp,
        pct: actualNegTotal > 0 ? rawFp / actualNegTotal * 100 : 0
      },
      fn: {
        count: rawFn,
        pct: actualPosTotal > 0 ? rawFn / actualPosTotal * 100 : 0
      },
      tp: {
        count: rawTp,
        pct: actualPosTotal > 0 ? rawTp / actualPosTotal * 100 : 0
      }
    };
    showCounts = true;
  } else if (hasPcts) {
    resolvedMatrix = {
      tn: {
        pct: rawTnPct
      },
      fp: {
        pct: rawFpPct
      },
      fn: {
        pct: rawFnPct
      },
      tp: {
        pct: rawTpPct
      }
    };
    showCounts = false;
  } else {
    return <div style={{
      color: "red",
      padding: "1rem",
      border: "1px solid red"
    }}>BooleanConfusionMatrix: Provide either all counts or all percentages</div>;
  }
  const cellStyle = pct => ({
    background: getBg(pct),
    color: getColor(pct),
    padding: "1rem",
    textAlign: "center",
    borderRadius: "8px",
    aspectRatio: "1 / 1",
    width: "100%",
    display: "flex",
    flexDirection: "column",
    alignItems: "center",
    justifyContent: "center",
    border: "1px solid rgba(148, 163, 184, 0.35)"
  });
  const displayPredictedLabels = {
    left: predictedPositiveLabel,
    right: predictedNegativeLabel
  };
  const displayActualLabels = {
    top: actualPositiveLabel,
    bottom: actualNegativeLabel
  };
  const displayMatrix = {
    tl: resolvedMatrix.tp,
    tr: resolvedMatrix.fn,
    bl: resolvedMatrix.fp,
    br: resolvedMatrix.tn
  };
  return <div style={{
    maxWidth: maxWidth + "px",
    margin: "1rem 0"
  }}>
      <div style={{
    display: "grid",
    gridTemplateColumns: "auto auto 1fr 1fr",
    gridTemplateRows: "auto auto auto 1fr 1fr auto",
    gap: "2px"
  }}>
        {}
        <div></div>
        <div></div>
        <div style={{
    gridColumn: "3 / 5",
    textAlign: "center",
    padding: "0.5rem",
    fontWeight: "600",
    fontSize: "1rem"
  }}>
          {titlePrefix}Confusion Matrix (Normalized)
        </div>

        {}
        <div></div>
        <div></div>
        <div style={{
    gridColumn: "3 / 5",
    textAlign: "center",
    padding: "0.5rem",
    fontWeight: "600",
    fontSize: "0.875rem"
  }}>
          Predicted
        </div>

        {}
        <div></div>
        <div></div>
        <div style={{
    textAlign: "center",
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "center"
  }}>{displayPredictedLabels.left}</div>
        <div style={{
    textAlign: "center",
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "center"
  }}>{displayPredictedLabels.right}</div>

        {}
        <div style={{
    gridRow: "4 / 6",
    writingMode: "vertical-rl",
    transform: "rotate(180deg)",
    textAlign: "center",
    fontWeight: "600",
    fontSize: "0.875rem",
    padding: "0 0.5rem",
    display: "flex",
    alignItems: "center",
    justifyContent: "center"
  }}>
          Actual
        </div>
        <div style={{
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "flex-end"
  }}>{displayActualLabels.top}</div>
        <div style={cellStyle(displayMatrix.tl.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.tl.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.tl.pct)}</div>
        </div>
        <div style={cellStyle(displayMatrix.tr.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.tr.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.tr.pct)}</div>
        </div>

        {}
        <div style={{
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "flex-end"
  }}>{displayActualLabels.bottom}</div>
        <div style={cellStyle(displayMatrix.bl.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.bl.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.bl.pct)}</div>
        </div>
        <div style={cellStyle(displayMatrix.br.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.br.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.br.pct)}</div>
        </div>

        {}
        <div></div>
        <div></div>
        <div style={{
    gridColumn: "3 / 5",
    marginTop: "0.5rem",
    display: "flex",
    alignItems: "center",
    gap: "0.5rem"
  }}>
          <span style={{
    fontSize: "0.75rem",
    fontWeight: "500"
  }}>{displayFormat === "fraction" ? "0.0" : "0%"}</span>
          <div style={{
    display: "flex",
    flex: 1,
    height: "12px",
    borderRadius: "4px",
    overflow: "hidden",
    border: "1px solid rgba(148, 163, 184, 0.35)"
  }}>
            {palette.map((color, idx) => <div key={idx} style={{
    flex: 1,
    height: "100%",
    background: color
  }} />)}
          </div>
          <span style={{
    fontSize: "0.75rem",
    fontWeight: "500"
  }}>{displayFormat === "fraction" ? "1.0" : "100%"}</span>
        </div>
      </div>
    </div>;
};

export const DefinitionCard = ({children}) => {
  return <Card variant="secondary">
    <div style={{
    padding: '0.5rem',
    border: '5px solid var(--primary-light)',
    borderRadius: '0.5rem',
    fontSize: '1.3rem',
    lineHeight: '1.4',
    boxShadow: '0 0 10px 10px var(--primary-light)'
  }}>
        {children}
      </div>

</Card>;
};

export const Scale = ({low, mid, high, lowLabel = "Low", midLabel = "Mid", highLabel = "High", lowDescription, midDescription, highDescription, midColor = "yellow", inverted = false}) => {
  const lowColor = inverted ? "green" : "red";
  const highColor = inverted ? "red" : "green";
  const gradientId = inverted ? "greenToRed" : "redToGreen";
  return <div style={{
    display: 'flex',
    flexDirection: 'column',
    width: '100%'
  }}>
      <svg width="100%" height="30" style={{
    marginBottom: '8px'
  }}>
        <defs>
          <linearGradient id={gradientId} x1="0%" y1="0%" x2="100%" y2="0%">
            <stop offset="0%" stopColor={lowColor} />
            <stop offset="100%" stopColor={highColor} />
          </linearGradient>
        </defs>
        <rect width="100%" height="100%" fill={`url(#${gradientId})`} rx="4" ry="4" />
      </svg>

      <div style={{
    display: 'flex',
    justifyContent: 'space-between',
    width: '100%',
    marginBottom: '16px'
  }}>
        <p style={{
    margin: 0,
    fontSize: '12px'
  }}>{low}</p>
        {mid && <p style={{
    margin: 0,
    fontSize: '12px'
  }}>{mid}</p>}
        <p style={{
    margin: 0,
    fontSize: '12px'
  }}>{high}</p>
      </div>

      <div style={{
    display: 'flex',
    justifyContent: 'space-between',
    width: '100%'
  }}>
        <div style={{
    maxWidth: '40%'
  }}>
          <div style={{
    display: 'flex',
    alignItems: 'center',
    marginBottom: '4px'
  }}>
            <div style={{
    width: '12px',
    height: '12px',
    backgroundColor: lowColor,
    borderRadius: '50%',
    marginRight: '8px'
  }}></div>
            <p style={{
    margin: 0,
    fontWeight: 'bold',
    fontSize: '14px'
  }}>{lowLabel}</p>
          </div>
          {lowDescription && <p style={{
    margin: 0,
    fontSize: '14px',
    color: '#666',
    maxWidth: '250px',
    lineHeight: '1.4'
  }}>{lowDescription}</p>}
        </div>
        {mid && <div style={{
    maxWidth: '40%',
    textAlign: 'center'
  }}>
            <div style={{
    display: 'flex',
    alignItems: 'center',
    justifyContent: 'center',
    marginBottom: '4px'
  }}>
              <div style={{
    width: '12px',
    height: '12px',
    backgroundColor: midColor,
    borderRadius: '50%',
    marginRight: '8px'
  }}></div>
              <p style={{
    margin: 0,
    fontWeight: 'bold',
    fontSize: '14px'
  }}>{midLabel}</p>
            </div>
            {midDescription && <p style={{
    margin: 0,
    fontSize: '14px',
    color: '#666',
    maxWidth: '250px',
    textAlign: 'center',
    lineHeight: '1.4'
  }}>{midDescription}</p>}
          </div>}


        <div style={{
    maxWidth: '40%',
    textAlign: 'right'
  }}>
          <div style={{
    display: 'flex',
    alignItems: 'center',
    justifyContent: 'flex-end',
    marginBottom: '4px'
  }}>
            <p style={{
    margin: 0,
    fontWeight: 'bold',
    fontSize: '14px'
  }}>{highLabel}</p>
            <div style={{
    width: '12px',
    height: '12px',
    backgroundColor: highColor,
    borderRadius: '50%',
    marginLeft: '8px'
  }}></div>
          </div>
          {highDescription && <p style={{
    margin: 0,
    fontSize: '14px',
    color: '#666',
    maxWidth: '250px',
    marginLeft: 'auto',
    lineHeight: '1.4'
  }}>{highDescription}</p>}
        </div>
      </div>
    </div>;
};

## Overview

<DefinitionCard>
  <strong>Action Advancement</strong> measures whether an assistant successfully accomplishes or makes progress toward at least one user goal in a conversation.
</DefinitionCard>

Action Advancement addresses the common pain points of unclear agent performance by measuring whether AI agents are actually helping users achieve their objectives rather than just providing responses.

An assistant successfully advances a user's goal when it:

1. Provides a complete or partial answer to the user's question
2. Requests clarification or additional information to better understand the user's needs
3. Confirms that a requested action has been successfully completed

For an interaction to count as advancing the user's goal, the assistant's response must be:

* Factually accurate
* Directly addressing the user's request
* Consistent with any tool outputs used

### Action Advancement at a glance

| Property                       | Description                                                                   |
| :----------------------------- | :---------------------------------------------------------------------------- |
| **Name of Metric**             | Action Advancement                                                            |
| **Metric Category**            | Agentic Metrics                                                               |
| **Use this metric for**        | Evaluating whether AI agents make progress toward user goals in conversations |
| **Can be applied to**          | session, trace, all span types (agent, workflow, retriever, LLM and tool)     |
| **LLM/Luna Support**           | Supported with both LLM + Luna models                                         |
| **Protect Runtime Protection** | No - Not applicable for this metric                                           |
| **Constants**                  | None - Uses dynamic evaluation                                                |
| **Usage Context**              | Agentic workflows, multi-step tasks, tool-using assistants                    |
| **Value Type**                 | Confidence score (0.0 to 1.0) - Confidence that any one action has advanced   |
| **Input/Output Requirements**  | Requires conversation context, user goals, and assistant responses            |

## When to Use This Metric

<Card>
  <div style={{display: 'flex', alignItems: 'center', gap: '0.5rem', marginBottom: '0.75rem'}}>
    <div style={{fontSize: '1.25rem', color: 'var(--primary-color)'}}>
      <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
        <path d="M12 22c5.523 0 10-4.477 10-10S17.523 2 12 2 2 6.477 2 12s4.477 10 10 10z" />

        <path d="m9 12 2 2 4-4" />
      </svg>
    </div>

    <h3 style={{margin: 0, fontSize: '1.25rem', fontWeight: '600'}}>When to Use This Metric</h3>
  </div>

  This metric shines when simple response quality metrics fall short, particularly for complex, multi-step interactions where progress toward goals matters more than individual response quality.

  <div style={{ marginTop: "1rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Agentic Workflows:</strong> When an AI agent must decide on actions and select appropriate tools.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Multi-step Tasks:</strong> When completing a user's request requires multiple steps or decisions.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Tool-using Assistants:</strong> When evaluating if the assistant used available tools effectively.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Customer Service Agents:</strong> Resolving user issues through multi-step problem-solving.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Task-Oriented Assistants:</strong> Completing specific actions like booking flights or processing orders.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Research Assistants:</strong> Gathering and synthesizing information across multiple sources.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Creative Assistants:</strong> Understanding and building upon user requests iteratively.
  </div>
</Card>

### Calculation method

If the Action Advancement score is less than 100%, it means at least one evaluator determined the assistant failed to make progress on any user goal.

Action Advancement is calculated by:

<Steps>
  <Step title="Model Request">
    Multiple evaluation requests are sent to an LLM evaluator to analyze the assistant's progress toward user goals.
  </Step>

  <Step title="Prompt Engineering">
    A specialized chain-of-thought prompt guides the model to evaluate whether the assistant made progress on user goals based on the metric's definition.
  </Step>

  <Step title="Evaluation Process">
    Each evaluation analyzes the interaction and produces both a detailed explanation and a binary judgment (yes/no) on goal advancement.
  </Step>

  <Step title="Score Calculation">
    The final Action Advancement score is computed as the confidence score or probability that any one user ask is advanced.
  </Step>
</Steps>

We display one of the generated explanations alongside the score, choosing one that aligns with the majority judgment.

<Note>
  This metric requires multiple LLM calls to compute, which may impact usage and billing.
</Note>

### Score Interpretation

**Expected Score:** 1.0 (Excellent) - The assistant made clear progress toward the booking goal by gathering necessary information and providing options.

<Scale low="0.0" mid="0.5" high="1.0" lowLabel="Poor" midLabel="Fair" highLabel="Excellent" lowDescription="Assistant failed to make any progress toward user goals" midDescription="Assistant made some progress but didn't fully address the user's needs" highDescription="Assistant successfully advanced user goals with clear progress" />

### What different scores mean

* **0.0 - 0.3 (Poor):** The assistant completely failed to address the user's request or made no meaningful progress. Common causes include ignoring the user's question, providing irrelevant information, or failing to use available tools when needed.

* **0.4 - 0.7 (Fair):** The assistant made some progress but didn't fully accomplish the user's goal. This might include partial answers, requesting clarification when not needed, or missing key aspects of the request.

* **0.8 - 1.0 (Excellent):** The assistant successfully advanced the user's goal by providing complete answers, making appropriate requests for clarification, or confirming successful task completion.

## How to improve Action Advancement scores

To improve Action Advancement scores, focus on ensuring your AI agents make meaningful progress toward user goals in every interaction.

### Common issues and solutions

| Issue                               | Cause                                            | Solution                                                                                                     |
| :---------------------------------- | :----------------------------------------------- | :----------------------------------------------------------------------------------------------------------- |
| **Assistant ignores user requests** | Poor prompt engineering or context understanding | Improve system prompts to emphasize goal-oriented responses and ensure the assistant understands user intent |
| **Incomplete responses**            | Insufficient context or tool usage               | Provide better context and ensure the assistant uses available tools effectively                             |
| **Irrelevant information**          | Lack of focus on user goals                      | Train the assistant to stay focused on the specific user request and avoid tangential information            |
| **No progress on multi-step tasks** | Poor task breakdown                              | Implement better task decomposition and ensure the assistant can handle complex, multi-step processes        |

### Best practices for optimization

* **Clear goal identification:** Ensure your assistant can identify and prioritize user goals
* **Progressive disclosure:** Break complex tasks into manageable steps
* **Tool integration:** Make sure the assistant effectively uses available tools and APIs
* **Context awareness:** Maintain conversation context to build on previous interactions

## Comparison to other metrics

| Property                       | Action Advancement                        | Instruction Adherence                            | Completeness                     |
| :----------------------------- | :---------------------------------------- | :----------------------------------------------- | :------------------------------- |
| **Metric Category**            | Agentic Metrics                           | Response Quality                                 | Response Quality                 |
| **Use this metric for**        | Evaluating goal progress in conversations | Measuring how well responses follow instructions | Assessing response completeness  |
| **Best for**                   | Multi-step tasks and agentic workflows    | Single-turn instruction following                | Ensuring comprehensive responses |
| **LLM/Luna Support**           | Yes                                       | Yes                                              | Yes                              |
| **Protect Runtime Protection** | No                                        | No                                               | No                               |
| **Value Type**                 | Percentage (0.0-1.0)                      | Percentage (0.0-1.0)                             | Percentage (0.0-1.0)             |
| **Limitations**                | Requires conversation context             | May not capture goal progress                    | Doesn't measure goal advancement |

## Best practices

To effectively implement and optimize Action Advancement in your AI systems, consider these key practices:

### Track progress over time

Monitor Action Advancement scores across different versions of your agent to ensure improvements in task completion capabilities. This helps you identify whether your optimizations are actually improving goal advancement.

### Analyze failure patterns

When Action Advancement scores are low, examine the specific steps where agents fail to make progress to identify systematic issues. Look for patterns in where agents get stuck or fail to advance user goals.

### Combine with other metrics

Use Action Advancement alongside other agentic metrics to get a comprehensive view of your assistant's effectiveness. This provides a more complete picture of your agent's performance beyond just goal advancement.

### Test edge cases

Create evaluation datasets that include complex, multi-step tasks to thoroughly assess your agent's ability to advance user goals. This ensures your agent can handle challenging scenarios that require multiple steps.

<Note>
  When optimizing for Action Advancement, ensure you're not sacrificing other important aspects like safety, factual accuracy, or user experience in pursuit of task completion.
</Note>

## Performance Benchmarks

We evaluated Action Advancement against human expert labels on an internal dataset of agentic conversation samples using top frontier models.

| Model                   | F1 (True) |
| :---------------------- | :-------: |
| GPT-4.1                 |    0.87   |
| GPT-4.1-mini (judges=3) |    0.78   |
| Claude Sonnet 4.5       |    0.89   |
| Gemini 3 Flash          |    0.85   |

### GPT-4.1 Classification Report

<BooleanClassificationReport
  report={`            precision    recall  f1-score   support

False       0.9583    0.7302    0.8288        63
True       0.7848    0.9688    0.8671        64

accuracy                          0.8504       127
macro avg     0.8716    0.8495    0.8480       127
weighted avg  0.8709    0.8504    0.8481       127`}
  negativeLabel="False"
  positiveLabel="True"
  negativeClass="False"
  positiveClass="True"
/>

<Note>
  Benchmarks based on internal evaluation dataset. Performance may vary by use case.
</Note>

## Related Resources

If you would like to dive deeper or start implementing Action Advancement, check out the following resources:

### Examples

* [Action Advancement Examples](https://app.galileo.ai) - Log in and explore the "Action Advancement" Log Stream in the "Preset Metric Examples" Project to see this metric in action.

### How-to guides

* [Agentic AI Basic Example](/how-to-guides/agentic-ai/basic-example)
* [Creating Custom Metrics](/how-to-guides/metrics/create-local-metric/create-local-metric)

### Related Concepts

* [Agentic Metrics Overview](/concepts/metrics/agentic/agentic-overview)
* [Action Completion](/concepts/metrics/agentic/action-completion)
* [Agent Efficiency](/concepts/metrics/agentic/agent-efficiency)
