> ## Documentation Index
> Fetch the complete documentation index at: https://docs.galileo.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Action Completion

> Understand how to measure whether your agent actually accomplished a user's goals

export const BooleanClassificationReport = ({report, negativeLabel = "Not Advanced", positiveLabel = "Advanced", negativeClass = "False", positiveClass = "True", maxWidth = 520}) => {
  const parseReport = reportStr => {
    const lines = reportStr.trim().split('\n').filter(line => line.trim());
    const result = {
      classes: [],
      accuracy: null,
      macroAvg: null,
      weightedAvg: null,
      totalSupport: null
    };
    for (const line of lines) {
      const parts = line.trim().split(/\s+/);
      if (parts[0] === 'precision') continue;
      if (parts.length >= 5 && !['accuracy', 'macro', 'weighted'].includes(parts[0])) {
        result.classes.push({
          name: parts[0],
          precision: parseFloat(parts[1]),
          recall: parseFloat(parts[2]),
          f1: parseFloat(parts[3]),
          support: parseInt(parts[4], 10)
        });
      }
      if (parts[0] === 'accuracy') {
        result.accuracy = parseFloat(parts[1]);
        result.totalSupport = parseInt(parts[2], 10);
      }
      if (parts[0] === 'macro' && parts[1] === 'avg') {
        result.macroAvg = {
          precision: parseFloat(parts[2]),
          recall: parseFloat(parts[3]),
          f1: parseFloat(parts[4]),
          support: parseInt(parts[5], 10)
        };
      }
      if (parts[0] === 'weighted' && parts[1] === 'avg') {
        result.weightedAvg = {
          precision: parseFloat(parts[2]),
          recall: parseFloat(parts[3]),
          f1: parseFloat(parts[4]),
          support: parseInt(parts[5], 10)
        };
      }
    }
    return result;
  };
  const parsed = parseReport(report);
  if (parsed.classes.length < 2) {
    return <div style={{
      color: "red",
      padding: "1rem",
      border: "1px solid red"
    }}>BooleanClassificationReport: Could not parse report. Expected at least 2 classes.</div>;
  }
  const negClass = parsed.classes.find(c => c.name === negativeClass) || parsed.classes[0];
  const posClass = parsed.classes.find(c => c.name === positiveClass) || parsed.classes[1];
  const tnPlusFp = negClass.support;
  const tpPlusFn = posClass.support;
  const tn = Math.round(negClass.recall * tnPlusFp);
  const fp = tnPlusFp - tn;
  const tp = Math.round(posClass.recall * tpPlusFn);
  const fn = tpPlusFn - tp;
  const tnPct = tn / tnPlusFp * 100;
  const fpPct = fp / tnPlusFp * 100;
  const fnPct = fn / tpPlusFn * 100;
  const tpPct = tp / tpPlusFn * 100;
  const rowStyle = {
    borderBottom: "1px solid rgba(148, 163, 184, 0.3)"
  };
  const cellStyle = {
    padding: "0.5rem 0.125rem"
  };
  const centerCellStyle = {
    textAlign: "center",
    padding: "0.5rem 0.125rem"
  };
  return <div>
      {}
      <table style={{
    width: "auto",
    borderCollapse: "collapse",
    marginBottom: "1.5rem",
    fontSize: "0.875rem"
  }}>
        <thead>
          <tr style={{
    borderBottom: "2px solid rgba(148, 163, 184, 0.5)"
  }}>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}></th>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}>Precision</th>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}>Recall</th>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}>F1-Score</th>
          </tr>
        </thead>
        <tbody>
          {}
          <tr style={rowStyle}>
            <td style={cellStyle}>{negativeLabel}</td>
            <td style={centerCellStyle}>{negClass.precision.toFixed(2)}</td>
            <td style={centerCellStyle}>{negClass.recall.toFixed(2)}</td>
            <td style={centerCellStyle}>{negClass.f1.toFixed(2)}</td>
          </tr>
          <tr style={rowStyle}>
            <td style={cellStyle}>{positiveLabel}</td>
            <td style={centerCellStyle}>{posClass.precision.toFixed(2)}</td>
            <td style={centerCellStyle}>{posClass.recall.toFixed(2)}</td>
            <td style={centerCellStyle}>{posClass.f1.toFixed(2)}</td>
          </tr>
          
        </tbody>
      </table>

      {}
      <BooleanConfusionMatrix actualNegativeLabel={negativeLabel} actualPositiveLabel={positiveLabel} predictedNegativeLabel={negativeLabel} predictedPositiveLabel={positiveLabel} tnPct={tnPct.toString()} fpPct={fpPct.toString()} fnPct={fnPct.toString()} tpPct={tpPct.toString()} displayFormat="fraction" maxWidth={maxWidth} />
    </div>;
};

export const BooleanConfusionMatrix = ({actualNegativeLabel = "Not Advanced", actualPositiveLabel = "Advanced", predictedNegativeLabel = "Not Advanced", predictedPositiveLabel = "Advanced", tnCount, tnPct, fpCount, fpPct, fnCount, fnPct, tpCount, tpPct, matrix, maxWidth = 520, displayFormat = "percentage", fractionDigits = 3, percentageDigits = 1, titlePrefix = ""}) => {
  const parseNum = val => val !== undefined && val !== null ? Number(val) : undefined;
  const clampPct = pct => Math.max(0, Math.min(100, Number(pct) || 0));
  const formatValue = pct => {
    const p = clampPct(pct);
    if (displayFormat === "fraction") {
      const digits = Number.isFinite(Number(fractionDigits)) ? Number(fractionDigits) : 3;
      return (p / 100).toFixed(digits);
    }
    const digits = Number.isFinite(Number(percentageDigits)) ? Number(percentageDigits) : 1;
    return `${p.toFixed(digits)}%`;
  };
  const palette = ["#f8fafc", "#eff6ff", "#dbeafe", "#bfdbfe", "#93c5fd", "#60a5fa", "#3b82f6", "#2563eb", "#1d4ed8", "#1e40af"];
  const getBg = pct => {
    const p = clampPct(pct);
    const idx = p === 100 ? 9 : Math.floor(p / 10);
    return palette[idx];
  };
  const getColor = pct => clampPct(pct) >= 60 ? "#ffffff" : "#1e3a8a";
  const rawTn = parseNum(tnCount);
  const rawFp = parseNum(fpCount);
  const rawFn = parseNum(fnCount);
  const rawTp = parseNum(tpCount);
  const rawTnPct = parseNum(tnPct);
  const rawFpPct = parseNum(fpPct);
  const rawFnPct = parseNum(fnPct);
  const rawTpPct = parseNum(tpPct);
  const hasCounts = rawTn !== undefined && rawFp !== undefined && rawFn !== undefined && rawTp !== undefined;
  const hasPcts = rawTnPct !== undefined && rawFpPct !== undefined && rawFnPct !== undefined && rawTpPct !== undefined;
  let resolvedMatrix;
  let showCounts;
  if (matrix) {
    resolvedMatrix = matrix;
    showCounts = matrix.tn?.count !== undefined;
  } else if (hasCounts) {
    const actualNegTotal = rawTn + rawFp;
    const actualPosTotal = rawFn + rawTp;
    resolvedMatrix = {
      tn: {
        count: rawTn,
        pct: actualNegTotal > 0 ? rawTn / actualNegTotal * 100 : 0
      },
      fp: {
        count: rawFp,
        pct: actualNegTotal > 0 ? rawFp / actualNegTotal * 100 : 0
      },
      fn: {
        count: rawFn,
        pct: actualPosTotal > 0 ? rawFn / actualPosTotal * 100 : 0
      },
      tp: {
        count: rawTp,
        pct: actualPosTotal > 0 ? rawTp / actualPosTotal * 100 : 0
      }
    };
    showCounts = true;
  } else if (hasPcts) {
    resolvedMatrix = {
      tn: {
        pct: rawTnPct
      },
      fp: {
        pct: rawFpPct
      },
      fn: {
        pct: rawFnPct
      },
      tp: {
        pct: rawTpPct
      }
    };
    showCounts = false;
  } else {
    return <div style={{
      color: "red",
      padding: "1rem",
      border: "1px solid red"
    }}>BooleanConfusionMatrix: Provide either all counts or all percentages</div>;
  }
  const cellStyle = pct => ({
    background: getBg(pct),
    color: getColor(pct),
    padding: "1rem",
    textAlign: "center",
    borderRadius: "8px",
    aspectRatio: "1 / 1",
    width: "100%",
    display: "flex",
    flexDirection: "column",
    alignItems: "center",
    justifyContent: "center",
    border: "1px solid rgba(148, 163, 184, 0.35)"
  });
  const displayPredictedLabels = {
    left: predictedPositiveLabel,
    right: predictedNegativeLabel
  };
  const displayActualLabels = {
    top: actualPositiveLabel,
    bottom: actualNegativeLabel
  };
  const displayMatrix = {
    tl: resolvedMatrix.tp,
    tr: resolvedMatrix.fn,
    bl: resolvedMatrix.fp,
    br: resolvedMatrix.tn
  };
  return <div style={{
    maxWidth: maxWidth + "px",
    margin: "1rem 0"
  }}>
      <div style={{
    display: "grid",
    gridTemplateColumns: "auto auto 1fr 1fr",
    gridTemplateRows: "auto auto auto 1fr 1fr auto",
    gap: "2px"
  }}>
        {}
        <div></div>
        <div></div>
        <div style={{
    gridColumn: "3 / 5",
    textAlign: "center",
    padding: "0.5rem",
    fontWeight: "600",
    fontSize: "1rem"
  }}>
          {titlePrefix}Confusion Matrix (Normalized)
        </div>

        {}
        <div></div>
        <div></div>
        <div style={{
    gridColumn: "3 / 5",
    textAlign: "center",
    padding: "0.5rem",
    fontWeight: "600",
    fontSize: "0.875rem"
  }}>
          Predicted
        </div>

        {}
        <div></div>
        <div></div>
        <div style={{
    textAlign: "center",
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "center"
  }}>{displayPredictedLabels.left}</div>
        <div style={{
    textAlign: "center",
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "center"
  }}>{displayPredictedLabels.right}</div>

        {}
        <div style={{
    gridRow: "4 / 6",
    writingMode: "vertical-rl",
    transform: "rotate(180deg)",
    textAlign: "center",
    fontWeight: "600",
    fontSize: "0.875rem",
    padding: "0 0.5rem",
    display: "flex",
    alignItems: "center",
    justifyContent: "center"
  }}>
          Actual
        </div>
        <div style={{
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "flex-end"
  }}>{displayActualLabels.top}</div>
        <div style={cellStyle(displayMatrix.tl.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.tl.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.tl.pct)}</div>
        </div>
        <div style={cellStyle(displayMatrix.tr.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.tr.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.tr.pct)}</div>
        </div>

        {}
        <div style={{
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "flex-end"
  }}>{displayActualLabels.bottom}</div>
        <div style={cellStyle(displayMatrix.bl.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.bl.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.bl.pct)}</div>
        </div>
        <div style={cellStyle(displayMatrix.br.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.br.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.br.pct)}</div>
        </div>

        {}
        <div></div>
        <div></div>
        <div style={{
    gridColumn: "3 / 5",
    marginTop: "0.5rem",
    display: "flex",
    alignItems: "center",
    gap: "0.5rem"
  }}>
          <span style={{
    fontSize: "0.75rem",
    fontWeight: "500"
  }}>{displayFormat === "fraction" ? "0.0" : "0%"}</span>
          <div style={{
    display: "flex",
    flex: 1,
    height: "12px",
    borderRadius: "4px",
    overflow: "hidden",
    border: "1px solid rgba(148, 163, 184, 0.35)"
  }}>
            {palette.map((color, idx) => <div key={idx} style={{
    flex: 1,
    height: "100%",
    background: color
  }} />)}
          </div>
          <span style={{
    fontSize: "0.75rem",
    fontWeight: "500"
  }}>{displayFormat === "fraction" ? "1.0" : "100%"}</span>
        </div>
      </div>
    </div>;
};

export const MetricWhenToUse = ({description, useCases}) => {
  return <Card>
      <div style={{
    display: 'flex',
    alignItems: 'center',
    gap: '0.5rem',
    marginBottom: '0.75rem'
  }}>
        <div style={{
    fontSize: '1.25rem',
    color: 'var(--primary-color)'
  }}>
          <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
            <path d="M12 22c5.523 0 10-4.477 10-10S17.523 2 12 2 2 6.477 2 12s4.477 10 10 10z" />
            <path d="m9 12 2 2 4-4" />
          </svg>
        </div>
        <h3 style={{
    margin: 0,
    fontSize: '1.25rem',
    fontWeight: '600'
  }}>When to Use This Metric</h3>
      </div>

      {description}

      {useCases != null && useCases.map((useCase, index) => <div key={index} style={{
    marginTop: "1rem",
    paddingTop: "0.75rem",
    borderTop: "1px solid rgba(209, 213, 219, 0.33)"
  }}>
          <strong>{useCase.title}</strong>{useCase.description ? `: ${useCase.description}` : ''}
        </div>)}
    </Card>;
};

export const DefinitionCard = ({children}) => {
  return <Card variant="secondary">
    <div style={{
    padding: '0.5rem',
    border: '5px solid var(--primary-light)',
    borderRadius: '0.5rem',
    fontSize: '1.3rem',
    lineHeight: '1.4',
    boxShadow: '0 0 10px 10px var(--primary-light)'
  }}>
        {children}
      </div>

</Card>;
};

export const Scale = ({low, mid, high, lowLabel = "Low", midLabel = "Mid", highLabel = "High", lowDescription, midDescription, highDescription, midColor = "yellow", inverted = false}) => {
  const lowColor = inverted ? "green" : "red";
  const highColor = inverted ? "red" : "green";
  const gradientId = inverted ? "greenToRed" : "redToGreen";
  return <div style={{
    display: 'flex',
    flexDirection: 'column',
    width: '100%'
  }}>
      <svg width="100%" height="30" style={{
    marginBottom: '8px'
  }}>
        <defs>
          <linearGradient id={gradientId} x1="0%" y1="0%" x2="100%" y2="0%">
            <stop offset="0%" stopColor={lowColor} />
            <stop offset="100%" stopColor={highColor} />
          </linearGradient>
        </defs>
        <rect width="100%" height="100%" fill={`url(#${gradientId})`} rx="4" ry="4" />
      </svg>

      <div style={{
    display: 'flex',
    justifyContent: 'space-between',
    width: '100%',
    marginBottom: '16px'
  }}>
        <p style={{
    margin: 0,
    fontSize: '12px'
  }}>{low}</p>
        {mid && <p style={{
    margin: 0,
    fontSize: '12px'
  }}>{mid}</p>}
        <p style={{
    margin: 0,
    fontSize: '12px'
  }}>{high}</p>
      </div>

      <div style={{
    display: 'flex',
    justifyContent: 'space-between',
    width: '100%'
  }}>
        <div style={{
    maxWidth: '40%'
  }}>
          <div style={{
    display: 'flex',
    alignItems: 'center',
    marginBottom: '4px'
  }}>
            <div style={{
    width: '12px',
    height: '12px',
    backgroundColor: lowColor,
    borderRadius: '50%',
    marginRight: '8px'
  }}></div>
            <p style={{
    margin: 0,
    fontWeight: 'bold',
    fontSize: '14px'
  }}>{lowLabel}</p>
          </div>
          {lowDescription && <p style={{
    margin: 0,
    fontSize: '14px',
    color: '#666',
    maxWidth: '250px',
    lineHeight: '1.4'
  }}>{lowDescription}</p>}
        </div>
        {mid && <div style={{
    maxWidth: '40%',
    textAlign: 'center'
  }}>
            <div style={{
    display: 'flex',
    alignItems: 'center',
    justifyContent: 'center',
    marginBottom: '4px'
  }}>
              <div style={{
    width: '12px',
    height: '12px',
    backgroundColor: midColor,
    borderRadius: '50%',
    marginRight: '8px'
  }}></div>
              <p style={{
    margin: 0,
    fontWeight: 'bold',
    fontSize: '14px'
  }}>{midLabel}</p>
            </div>
            {midDescription && <p style={{
    margin: 0,
    fontSize: '14px',
    color: '#666',
    maxWidth: '250px',
    textAlign: 'center',
    lineHeight: '1.4'
  }}>{midDescription}</p>}
          </div>}


        <div style={{
    maxWidth: '40%',
    textAlign: 'right'
  }}>
          <div style={{
    display: 'flex',
    alignItems: 'center',
    justifyContent: 'flex-end',
    marginBottom: '4px'
  }}>
            <p style={{
    margin: 0,
    fontWeight: 'bold',
    fontSize: '14px'
  }}>{highLabel}</p>
            <div style={{
    width: '12px',
    height: '12px',
    backgroundColor: highColor,
    borderRadius: '50%',
    marginLeft: '8px'
  }}></div>
          </div>
          {highDescription && <p style={{
    margin: 0,
    fontSize: '14px',
    color: '#666',
    maxWidth: '250px',
    marginLeft: 'auto',
    lineHeight: '1.4'
  }}>{highDescription}</p>}
        </div>
      </div>
    </div>;
};

## Overview

<DefinitionCard>
  <strong>Action Completion</strong> determines whether the agent successfully accomplished all of the user’s goals.
</DefinitionCard>

Action Completion addresses the common pain points of agent performance by measuring whether AI agents are actually helping users achieve their end goal rather than just providing responses.

Action Completion is successful when all of the below are true: :

* The agent provides a complete answer in the case of a question
* The agent provides a confirmation of successful action in the case of a request
* The response is coherent and factually accurate
* The response comprehensively addresses every aspect of the user's request
* The response avoids contradicting tool outputs
* The response summarizes all relevant parts returned by tools

### Action Completion at a glance

| Property                       | Description                                                           |
| :----------------------------- | :-------------------------------------------------------------------- |
| **Name of Metric**             | Action Completion                                                     |
| **Metric Category**            | Agentic Metrics                                                       |
| **Use this metric for**        | Measuring whether the agent successfully accomplished the user's goal |
| **Can be applied to**          | Session                                                               |
| **LLM/Luna Support**           | Supported with both LLM + Luna models                                 |
| **Protect Runtime Protection** | No                                                                    |
| **Constants**                  | None - Uses dynamic evaluation                                        |
| **Usage Context**              | Agentic workflows, multi-step tasks, tool-using assistants            |
| **Value Type**                 | Confidence score denoted as a percentage.                             |
| **Input/Output Requirements**  | Requires agent responses and user goals for evaluation                |

<MetricWhenToUse
  description="Action Completion is the single best measure of whether an agent is truly useful, particularly valuable for agentic workflows and multi-step tasks."
  useCases={[
{
  title: "Agentic Workflows",
  description: "When an AI agent must decide on a course of action and select tools to accomplish tasks."
},
{
  title: "Multi-step Tasks",
  description: "When completing a user's request requires multiple steps or decisions."
},
{
  title: "Tool-using Assistants",
  description: "When evaluating if the assistant successfully used the right tools and accomplished the intended goals."
}
]}
/>

## Calculation method

If the response does not achieve an Action Completion score of 100%, it indicates that at least one judge considered the model to have failed in accomplishing every user goal.

<Steps>
  <Step title="Additional Requests">
    Multiple requests are sent to an LLM using a carefully designed chain-of-thought prompt that adheres to the definition above.
  </Step>

  <Step title="Judgment Responses">
    The LLM generates multiple distinct responses, each containing:

    * An explanation
    * A final judgment: "Yes" (goal accomplished) or "No" (goal not accomplished)
  </Step>

  <Step title="Score Computation">
    Action Completion Score = (Number of "Yes" Responses) / (Total Number of Responses)
  </Step>

  <Step title="Explanation Surfacing">
    One explanation is surfaced, chosen to align with the majority judgment among the responses.
  </Step>
</Steps>

Galileo displays a generated explanation alongside the score, choosing the one that aligns with the majority judgement for troubleshooting.

<Note>
  This metric requires multiple LLM calls to compute, which may impact usage and billing.
</Note>

## Score interpretation

**Expected Score:** 100% - A perfect score indicates the agent successfully accomplished all user goals with complete, accurate, and comprehensive responses.

<Scale low="0%" mid="50%" high="100%" lowLabel="Poor" midLabel="Fair" highLabel="Excellent" lowDescription="Agent failed to accomplish any user goals" midDescription="Agent made some progress but didn't fully address user needs" highDescription="Agent successfully accomplished all user goals completely" />

### What different scores mean

* **0.0 - 0.3 (Poor):** Agent completely failed to accomplish user goals, provided incomplete answers, or contradicted tool outputs. Common causes include insufficient tool usage, incomplete responses, or factual inaccuracies.
* **0.4 - 0.7 (Fair):** Agent made progress toward user goals but didn't fully address all aspects of the request. Areas for improvement include ensuring comprehensive coverage of all user requirements and better tool utilization.
* **0.8 - 1.0 (Excellent):** Agent successfully accomplished all user goals with complete, accurate, and comprehensive responses. Best practices include thorough tool usage, complete answer provision, and proper confirmation of successful actions.

## How to improve Action Completion scores

To optimize your agent's performance and ensure high Action Completion scores, focus on comprehensive goal accomplishment and complete response generation.

### Common issues and solutions

| Issue                      | Cause                                               | Solution                                                                                          |
| :------------------------- | :-------------------------------------------------- | :------------------------------------------------------------------------------------------------ |
| Incomplete responses       | Agent stops before addressing all user requirements | Implement comprehensive response generation and ensure all user goals are explicitly addressed    |
| Tool output contradictions | Agent ignores or contradicts information from tools | Ensure agent properly summarizes and incorporates all relevant tool outputs without contradiction |
| Missing confirmations      | Agent doesn't confirm successful actions            | Add explicit confirmation steps for action-based requests                                         |
| Factual inaccuracies       | Agent provides incorrect information                | Implement fact-checking mechanisms and ensure responses align with tool outputs                   |

### Best practices for optimization

* **Track Progress Over Time**: Monitor Action Completion scores across different versions of your agent to identify trends and ensure continuous improvements in task completion capabilities.
* **Analyze Failure Patterns**: When Action Completion scores are low, examine specific steps or scenarios where agents fail to meet user goals. Use this analysis to identify and address systematic issues.
* **Combine with Other Metrics**: Use Action Completion alongside other agentic metrics, such as Action Advancement, to get a comprehensive view of your assistant's effectiveness and identify areas for improvement.
* **Test Edge Cases**: Create evaluation datasets that include complex, multi-step tasks to thoroughly assess your agent's ability to handle challenging scenarios and advance user goals effectively.

<Note>
  When optimizing for Action Completion, ensure you're not sacrificing other important aspects like safety, factual accuracy, or user experience in pursuit of task completion.
</Note>

## Comparison to other metrics

| Property                       | Action Completion             | Action Advancement              | Tool Selection                    |
| :----------------------------- | :---------------------------- | :------------------------------ | :-------------------------------- |
| **Metric Category**            | Agentic Performance           | Agentic Performance             | Agentic Performance               |
| **Use this metric for**        | Measuring goal accomplishment | Measuring progress toward goals | Measuring tool choice quality     |
| **Best for**                   | Final outcome evaluation      | Progress tracking               | Tool usage optimization           |
| **LLM/Luna Support**           | Yes                           | Yes                             | Yes                               |
| **Protect Runtime Protection** | No                            | No                              | No                                |
| **Value Type**                 | Percentage (0%-100%)          | Percentage (0%-100%)            | Percentage (0%-100%)              |
| **Limitations**                | Requires multiple LLM calls   | May not capture final success   | Doesn't measure execution quality |

## Performance Benchmarks

We evaluated Action Completion against human expert labels on an internal dataset of agentic conversation samples using top frontier models.

| Model                   | F1 (True) |
| :---------------------- | :-------: |
| GPT-4.1                 |    0.92   |
| GPT-4.1-mini (judges=3) |    0.79   |
| Claude Sonnet 4.5       |    0.87   |
| Gemini 3 Flash          |    0.92   |

### GPT-4.1 Classification Report

<BooleanClassificationReport
  report={`            precision    recall  f1-score   support

False       0.9286    0.8667    0.8966        45
True       0.8966    0.9455    0.9204        55

accuracy                          0.9100       100
macro avg     0.9126    0.9061    0.9085       100
weighted avg  0.9110    0.9100    0.9096       100`}
  negativeLabel="False"
  positiveLabel="True"
  negativeClass="False"
  positiveClass="True"
/>

<Note>
  Benchmarks based on internal evaluation dataset. Performance may vary by use case.
</Note>

## Related Resources

If you would like to dive deeper or start implementing Action Completion, check out the following resources:

### Examples

* [Action Completion Examples](https://app.galileo.ai) - Log in and explore the "Action Completion" Log Stream in the "Preset Metric Examples" Project to see this metric in action.

### How-to guides

* [Agentic AI Examples](/how-to-guides/agentic-ai/basic-example)

### Related Concepts

* [Action Advancement](/concepts/metrics/agentic/action-advancement)
* [Tool Selection](/concepts/metrics/agentic/tool-selection-quality)
* [Agentic AI Overview](/concepts/metrics/agentic/agentic-overview)
