> ## Documentation Index
> Fetch the complete documentation index at: https://docs.galileo.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Visual Quality

> Evaluate whether the quality of an input / PDF in an LLM span is sufficient for the associated text prompt task to be reliably performed

export const BooleanClassificationReport = ({report, negativeLabel = "Not Advanced", positiveLabel = "Advanced", negativeClass = "False", positiveClass = "True", maxWidth = 520}) => {
  const parseReport = reportStr => {
    const lines = reportStr.trim().split('\n').filter(line => line.trim());
    const result = {
      classes: [],
      accuracy: null,
      macroAvg: null,
      weightedAvg: null,
      totalSupport: null
    };
    for (const line of lines) {
      const parts = line.trim().split(/\s+/);
      if (parts[0] === 'precision') continue;
      if (parts.length >= 5 && !['accuracy', 'macro', 'weighted'].includes(parts[0])) {
        result.classes.push({
          name: parts[0],
          precision: parseFloat(parts[1]),
          recall: parseFloat(parts[2]),
          f1: parseFloat(parts[3]),
          support: parseInt(parts[4], 10)
        });
      }
      if (parts[0] === 'accuracy') {
        result.accuracy = parseFloat(parts[1]);
        result.totalSupport = parseInt(parts[2], 10);
      }
      if (parts[0] === 'macro' && parts[1] === 'avg') {
        result.macroAvg = {
          precision: parseFloat(parts[2]),
          recall: parseFloat(parts[3]),
          f1: parseFloat(parts[4]),
          support: parseInt(parts[5], 10)
        };
      }
      if (parts[0] === 'weighted' && parts[1] === 'avg') {
        result.weightedAvg = {
          precision: parseFloat(parts[2]),
          recall: parseFloat(parts[3]),
          f1: parseFloat(parts[4]),
          support: parseInt(parts[5], 10)
        };
      }
    }
    return result;
  };
  const parsed = parseReport(report);
  if (parsed.classes.length < 2) {
    return <div style={{
      color: "red",
      padding: "1rem",
      border: "1px solid red"
    }}>BooleanClassificationReport: Could not parse report. Expected at least 2 classes.</div>;
  }
  const negClass = parsed.classes.find(c => c.name === negativeClass) || parsed.classes[0];
  const posClass = parsed.classes.find(c => c.name === positiveClass) || parsed.classes[1];
  const tnPlusFp = negClass.support;
  const tpPlusFn = posClass.support;
  const tn = Math.round(negClass.recall * tnPlusFp);
  const fp = tnPlusFp - tn;
  const tp = Math.round(posClass.recall * tpPlusFn);
  const fn = tpPlusFn - tp;
  const tnPct = tn / tnPlusFp * 100;
  const fpPct = fp / tnPlusFp * 100;
  const fnPct = fn / tpPlusFn * 100;
  const tpPct = tp / tpPlusFn * 100;
  const rowStyle = {
    borderBottom: "1px solid rgba(148, 163, 184, 0.3)"
  };
  const cellStyle = {
    padding: "0.5rem 0.125rem"
  };
  const centerCellStyle = {
    textAlign: "center",
    padding: "0.5rem 0.125rem"
  };
  return <div>
      {}
      <table style={{
    width: "auto",
    borderCollapse: "collapse",
    marginBottom: "1.5rem",
    fontSize: "0.875rem"
  }}>
        <thead>
          <tr style={{
    borderBottom: "2px solid rgba(148, 163, 184, 0.5)"
  }}>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}></th>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}>Precision</th>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}>Recall</th>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}>F1-Score</th>
          </tr>
        </thead>
        <tbody>
          {}
          <tr style={rowStyle}>
            <td style={cellStyle}>{negativeLabel}</td>
            <td style={centerCellStyle}>{negClass.precision.toFixed(2)}</td>
            <td style={centerCellStyle}>{negClass.recall.toFixed(2)}</td>
            <td style={centerCellStyle}>{negClass.f1.toFixed(2)}</td>
          </tr>
          <tr style={rowStyle}>
            <td style={cellStyle}>{positiveLabel}</td>
            <td style={centerCellStyle}>{posClass.precision.toFixed(2)}</td>
            <td style={centerCellStyle}>{posClass.recall.toFixed(2)}</td>
            <td style={centerCellStyle}>{posClass.f1.toFixed(2)}</td>
          </tr>
          
        </tbody>
      </table>

      {}
      <BooleanConfusionMatrix actualNegativeLabel={negativeLabel} actualPositiveLabel={positiveLabel} predictedNegativeLabel={negativeLabel} predictedPositiveLabel={positiveLabel} tnPct={tnPct.toString()} fpPct={fpPct.toString()} fnPct={fnPct.toString()} tpPct={tpPct.toString()} displayFormat="fraction" maxWidth={maxWidth} />
    </div>;
};

export const BooleanConfusionMatrix = ({actualNegativeLabel = "Not Advanced", actualPositiveLabel = "Advanced", predictedNegativeLabel = "Not Advanced", predictedPositiveLabel = "Advanced", tnCount, tnPct, fpCount, fpPct, fnCount, fnPct, tpCount, tpPct, matrix, maxWidth = 520, displayFormat = "percentage", fractionDigits = 3, percentageDigits = 1, titlePrefix = ""}) => {
  const parseNum = val => val !== undefined && val !== null ? Number(val) : undefined;
  const clampPct = pct => Math.max(0, Math.min(100, Number(pct) || 0));
  const formatValue = pct => {
    const p = clampPct(pct);
    if (displayFormat === "fraction") {
      const digits = Number.isFinite(Number(fractionDigits)) ? Number(fractionDigits) : 3;
      return (p / 100).toFixed(digits);
    }
    const digits = Number.isFinite(Number(percentageDigits)) ? Number(percentageDigits) : 1;
    return `${p.toFixed(digits)}%`;
  };
  const palette = ["#f8fafc", "#eff6ff", "#dbeafe", "#bfdbfe", "#93c5fd", "#60a5fa", "#3b82f6", "#2563eb", "#1d4ed8", "#1e40af"];
  const getBg = pct => {
    const p = clampPct(pct);
    const idx = p === 100 ? 9 : Math.floor(p / 10);
    return palette[idx];
  };
  const getColor = pct => clampPct(pct) >= 60 ? "#ffffff" : "#1e3a8a";
  const rawTn = parseNum(tnCount);
  const rawFp = parseNum(fpCount);
  const rawFn = parseNum(fnCount);
  const rawTp = parseNum(tpCount);
  const rawTnPct = parseNum(tnPct);
  const rawFpPct = parseNum(fpPct);
  const rawFnPct = parseNum(fnPct);
  const rawTpPct = parseNum(tpPct);
  const hasCounts = rawTn !== undefined && rawFp !== undefined && rawFn !== undefined && rawTp !== undefined;
  const hasPcts = rawTnPct !== undefined && rawFpPct !== undefined && rawFnPct !== undefined && rawTpPct !== undefined;
  let resolvedMatrix;
  let showCounts;
  if (matrix) {
    resolvedMatrix = matrix;
    showCounts = matrix.tn?.count !== undefined;
  } else if (hasCounts) {
    const actualNegTotal = rawTn + rawFp;
    const actualPosTotal = rawFn + rawTp;
    resolvedMatrix = {
      tn: {
        count: rawTn,
        pct: actualNegTotal > 0 ? rawTn / actualNegTotal * 100 : 0
      },
      fp: {
        count: rawFp,
        pct: actualNegTotal > 0 ? rawFp / actualNegTotal * 100 : 0
      },
      fn: {
        count: rawFn,
        pct: actualPosTotal > 0 ? rawFn / actualPosTotal * 100 : 0
      },
      tp: {
        count: rawTp,
        pct: actualPosTotal > 0 ? rawTp / actualPosTotal * 100 : 0
      }
    };
    showCounts = true;
  } else if (hasPcts) {
    resolvedMatrix = {
      tn: {
        pct: rawTnPct
      },
      fp: {
        pct: rawFpPct
      },
      fn: {
        pct: rawFnPct
      },
      tp: {
        pct: rawTpPct
      }
    };
    showCounts = false;
  } else {
    return <div style={{
      color: "red",
      padding: "1rem",
      border: "1px solid red"
    }}>BooleanConfusionMatrix: Provide either all counts or all percentages</div>;
  }
  const cellStyle = pct => ({
    background: getBg(pct),
    color: getColor(pct),
    padding: "1rem",
    textAlign: "center",
    borderRadius: "8px",
    aspectRatio: "1 / 1",
    width: "100%",
    display: "flex",
    flexDirection: "column",
    alignItems: "center",
    justifyContent: "center",
    border: "1px solid rgba(148, 163, 184, 0.35)"
  });
  const displayPredictedLabels = {
    left: predictedPositiveLabel,
    right: predictedNegativeLabel
  };
  const displayActualLabels = {
    top: actualPositiveLabel,
    bottom: actualNegativeLabel
  };
  const displayMatrix = {
    tl: resolvedMatrix.tp,
    tr: resolvedMatrix.fn,
    bl: resolvedMatrix.fp,
    br: resolvedMatrix.tn
  };
  return <div style={{
    maxWidth: maxWidth + "px",
    margin: "1rem 0"
  }}>
      <div style={{
    display: "grid",
    gridTemplateColumns: "auto auto 1fr 1fr",
    gridTemplateRows: "auto auto auto 1fr 1fr auto",
    gap: "2px"
  }}>
        {}
        <div></div>
        <div></div>
        <div style={{
    gridColumn: "3 / 5",
    textAlign: "center",
    padding: "0.5rem",
    fontWeight: "600",
    fontSize: "1rem"
  }}>
          {titlePrefix}Confusion Matrix (Normalized)
        </div>

        {}
        <div></div>
        <div></div>
        <div style={{
    gridColumn: "3 / 5",
    textAlign: "center",
    padding: "0.5rem",
    fontWeight: "600",
    fontSize: "0.875rem"
  }}>
          Predicted
        </div>

        {}
        <div></div>
        <div></div>
        <div style={{
    textAlign: "center",
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "center"
  }}>{displayPredictedLabels.left}</div>
        <div style={{
    textAlign: "center",
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "center"
  }}>{displayPredictedLabels.right}</div>

        {}
        <div style={{
    gridRow: "4 / 6",
    writingMode: "vertical-rl",
    transform: "rotate(180deg)",
    textAlign: "center",
    fontWeight: "600",
    fontSize: "0.875rem",
    padding: "0 0.5rem",
    display: "flex",
    alignItems: "center",
    justifyContent: "center"
  }}>
          Actual
        </div>
        <div style={{
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "flex-end"
  }}>{displayActualLabels.top}</div>
        <div style={cellStyle(displayMatrix.tl.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.tl.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.tl.pct)}</div>
        </div>
        <div style={cellStyle(displayMatrix.tr.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.tr.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.tr.pct)}</div>
        </div>

        {}
        <div style={{
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "flex-end"
  }}>{displayActualLabels.bottom}</div>
        <div style={cellStyle(displayMatrix.bl.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.bl.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.bl.pct)}</div>
        </div>
        <div style={cellStyle(displayMatrix.br.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.br.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.br.pct)}</div>
        </div>

        {}
        <div></div>
        <div></div>
        <div style={{
    gridColumn: "3 / 5",
    marginTop: "0.5rem",
    display: "flex",
    alignItems: "center",
    gap: "0.5rem"
  }}>
          <span style={{
    fontSize: "0.75rem",
    fontWeight: "500"
  }}>{displayFormat === "fraction" ? "0.0" : "0%"}</span>
          <div style={{
    display: "flex",
    flex: 1,
    height: "12px",
    borderRadius: "4px",
    overflow: "hidden",
    border: "1px solid rgba(148, 163, 184, 0.35)"
  }}>
            {palette.map((color, idx) => <div key={idx} style={{
    flex: 1,
    height: "100%",
    background: color
  }} />)}
          </div>
          <span style={{
    fontSize: "0.75rem",
    fontWeight: "500"
  }}>{displayFormat === "fraction" ? "1.0" : "100%"}</span>
        </div>
      </div>
    </div>;
};

export const MetricWhenToUse = ({description, useCases}) => {
  return <Card>
      <div style={{
    display: 'flex',
    alignItems: 'center',
    gap: '0.5rem',
    marginBottom: '0.75rem'
  }}>
        <div style={{
    fontSize: '1.25rem',
    color: 'var(--primary-color)'
  }}>
          <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
            <path d="M12 22c5.523 0 10-4.477 10-10S17.523 2 12 2 2 6.477 2 12s4.477 10 10 10z" />
            <path d="m9 12 2 2 4-4" />
          </svg>
        </div>
        <h3 style={{
    margin: 0,
    fontSize: '1.25rem',
    fontWeight: '600'
  }}>When to Use This Metric</h3>
      </div>

      {description}

      {useCases != null && useCases.map((useCase, index) => <div key={index} style={{
    marginTop: "1rem",
    paddingTop: "0.75rem",
    borderTop: "1px solid rgba(209, 213, 219, 0.33)"
  }}>
          <strong>{useCase.title}</strong>{useCase.description ? `: ${useCase.description}` : ''}
        </div>)}
    </Card>;
};

export const DefinitionCard = ({children}) => {
  return <Card variant="secondary">
    <div style={{
    padding: '0.5rem',
    border: '5px solid var(--primary-light)',
    borderRadius: '0.5rem',
    fontSize: '1.3rem',
    lineHeight: '1.4',
    boxShadow: '0 0 10px 10px var(--primary-light)'
  }}>
        {children}
      </div>

</Card>;
};

export const Pill = ({label, color, backgroundColor}) => <span style={{
  display: "inline-block",
  backgroundColor: backgroundColor ?? "#C0C0C0",
  color: color ?? "#333",
  padding: "2px 8px",
  borderRadius: "12px",
  fontSize: "12px",
  fontWeight: "500",
  lineHeight: "1"
}}>
    {label}
  </span>;

<DefinitionCard>
  <strong>Visual Quality</strong> is a binary metric that judges whether the quality of an input image / PDF in an LLM span is sufficient for the task described in the adjoining text prompt to be reliably performed.
</DefinitionCard>

The Visual Quality metric is task-grounded rather than purely aesthetic: an image / PDF is considered high quality only if the specific task can be completed reliably from the visual evidence. An image / PDF is considered low quality when blur, glare, compression artifacts, darkness, occlusion, crop, distortion, or other degradation makes task-critical information unprocessable.

## Visual Quality at a glance

| Property                       | Description        |
| :----------------------------- | :----------------- |
| **Name**                       | Visual Quality     |
| **Category**                   | Multimodal Quality |
| **Metric Level**               | LLM Span           |
| **LLM-as-a-judge Support**     | ✅                  |
| **Luna Support**               | ❌                  |
| **Protect Runtime Protection** | ❌                  |
| **Value Type**                 | boolean            |

## Score interpretation

| Score     | Label        | Meaning                                                                                                 |
| :-------- | :----------- | :------------------------------------------------------------------------------------------------------ |
| **False** | Low Quality  | The quality of the image / PDF is low and information required for performing the task is unprocessable |
| **True**  | High Quality | The quality of the image / PDF is high and performing the task is feasible                              |

## When to use this metric

<MetricWhenToUse
  description="Visual Quality is useful when your pipeline passes user-supplied or externally-sourced images into an LLM span alongside a text task prompt"
  useCases={[
{
  title: "Document and form processing",
  description: "Detect scanned or photographed documents where blur, skew, or low resolution would prevent reliable OCR or data extraction.",
},
{
  title: "Visual question answering",
  description: "Ensure input images are clear enough for the model to answer questions about specific details such as labels, faces, or charts.",
},
{
  title: "Product or asset inspection",
  description: "Flag images where lighting, occlusion, or compression artifacts prevent reliable defect detection or classification.",
},
{
  title: "Pre-processing quality gate",
  description: "Use as an upstream filter to skip or re-request images before passing them to downstream analysis steps.",
},
]}
/>

## Example scenario

<Card>
  <div style={{display: 'flex', alignItems: 'center', gap: '0.5rem', marginBottom: '0.75rem'}}>
    <div style={{fontSize: '1.25rem', color: 'var(--primary-color)'}}>
      <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
        <path d="M12 22c5.523 0 10-4.477 10-10S17.523 2 12 2 2 6.477 2 12s4.477 10 10 10z" />

        <path d="m9 12 2 2 4-4" />
      </svg>
    </div>

    <h3 style={{margin: 0, fontSize: '1.25rem', fontWeight: '600'}}>Reading a serial number from a photo</h3>
  </div>

  <div style={{ marginTop: "1rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Task prompt:</strong> “Read the device serial number and return it exactly.”
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>
      High Quality (<Pill label="High Quality" />
      ):
    </strong>

    {" "}

    The serial number region is in focus, not occluded, and the characters are legible.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>
      Low Quality (<Pill label="Low Quality" />
      ):
    </strong>

    {" "}

    Motion blur or glare obscures the serial number region, making characters unreadable and the task infeasible.
  </div>
</Card>

## Inputs considered

The evaluator examines the following when available:

* The raw input image provided to the LLM span
* The text prompt that defines the task the model is expected to perform on that image

Quality is evaluated strictly with respect to whether the task described in the prompt can be completed; a low-resolution image that is perfectly adequate for one task may be insufficient for another.

## Calculation method

Visual Quality is computed through a multi-step process:

<Steps>
  <Step title="Task grounding">The evaluator reads the adjoining text prompt to identify what information in the image is required to complete the task.</Step>
  <Step title="Visual evidence review">The evaluator inspects the image for quality issues (blur, glare, compression, darkness, occlusion, crop, distortion) that affect task-critical regions.</Step>

  <Step title="Binary decision">
    The evaluator returns a binary label: <Pill label="True" /> if the task is feasible from the visual evidence, otherwise <Pill label="False" />.
  </Step>
</Steps>

<Note>This metric is typically computed by prompting an LLM with access to the image and the text prompt, which may require additional LLM calls to compute and can impact usage and billing.</Note>

## Best practices

<CardGroup cols={2}>
  <Card title="Duplicate and create custom metric" icon="link">
    We recommend you to duplicate this metric, modify the prompt and define your task in the prompt for better performance.
  </Card>

  <Card title="Use metric as a diagnostic tool" icon="chart-line">
    Visual Quality helps you diagnose why a certain action wasn't completed by the agent, by telling if image quality was the reason.
  </Card>

  <Card title="Combine with Action Completion" icon="layer-group">
    Use Action Completion to check if your agent is completing the user asks, then use Visual Quality to diagnose where action completion is low.
  </Card>
</CardGroup>

## Performance Benchmarks

We evaluated Visual Quality against human expert labels on an internal dataset of varied samples using top frontier models.

| Model             | F1 (True) |
| :---------------- | :-------: |
| GPT-4.1           |    0.84   |
| Claude Sonnet 4.6 |    0.81   |
| Gemini 3.1 Flash  |    0.85   |

## Related Resources

If you would like to dive deeper or start implementing Visual Quality, check out the following resources:

### Examples

* [Visual Quality Examples](https://app.galileo.ai) - Log in and explore the "Visual Quality" Log Stream in the "Preset Metric Examples" Project to see this metric in action.

### Related Concepts

* [Action Completion](/concepts/metrics/agentic/action-advancement)
