> ## Documentation Index
> Fetch the complete documentation index at: https://docs.galileo.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# PII

> Detect and protect personally identifiable information in AI systems using Galileo's PII Metric to identify sensitive data and implement appropriate safeguards

export const MultiLabelConfusionMatrix = ({report, labelOrder, labelDisplayNames = {}, decimals = 4, maxWidth = 520, microNegativeLabel = "False", microPositiveLabel = "True", showPerLabelMatrices = true}) => {
  const toNum = v => {
    if (v == null) return undefined;
    const n = Number(v);
    return Number.isFinite(n) ? n : undefined;
  };
  const clamp01 = v => Math.max(0, Math.min(1, v));
  const sumVals = (obj, keys) => (keys || Object.keys(obj || ({}))).reduce((a, k) => a + (toNum(obj?.[k]) ?? 0), 0);
  const getLabels = (lo, pcs) => {
    if (Array.isArray(lo) && lo.length) return lo;
    if (pcs && typeof pcs === "object") return Object.keys(pcs);
    return [];
  };
  const deriveCM = ({precision, recall, positiveSupport, negativeSupport}) => {
    const P = toNum(positiveSupport), N = toNum(negativeSupport), prec = toNum(precision), rec = toNum(recall);
    if (P === undefined || N === undefined || prec === undefined || rec === undefined || P < 0 || N < 0) return null;
    const tp = clamp01(rec) * P, fn = P - tp;
    let fp = clamp01(prec) > 0 ? tp / clamp01(prec) - tp : 0;
    if (!Number.isFinite(fp) || fp < 0) fp = 0;
    if (fp > N) fp = N;
    const tn = N - fp;
    return {
      tnPct: N > 0 ? tn / N * 100 : 0,
      fpPct: N > 0 ? fp / N * 100 : 0,
      fnPct: P > 0 ? fn / P * 100 : 0,
      tpPct: P > 0 ? tp / P * 100 : 0
    };
  };
  const labels = getLabels(labelOrder, report?.per_class_support);
  const perSupport = report?.per_class_support || ({});
  const perNegSupport = report?.per_class_negative_support || ({});
  if (!report || labels.length === 0) {
    return <div style={{
      color: "red",
      padding: "1rem",
      border: "1px solid red"
    }}>
        MultiLabelConfusionMatrix: Missing or invalid report/labels.
      </div>;
  }
  const totalPositiveSupport = sumVals(perSupport, labels);
  const totalNegativeSupport = sumVals(perNegSupport, labels);
  const microMatrix = deriveCM({
    precision: report.micro_precision,
    recall: report.micro_recall,
    positiveSupport: totalPositiveSupport,
    negativeSupport: totalNegativeSupport
  });
  return <div>
      {microMatrix ? <BooleanConfusionMatrix actualPositiveLabel={microPositiveLabel} actualNegativeLabel={microNegativeLabel} predictedPositiveLabel={microPositiveLabel} predictedNegativeLabel={microNegativeLabel} matrix={{
    tp: {
      pct: microMatrix.tpPct
    },
    fn: {
      pct: microMatrix.fnPct
    },
    fp: {
      pct: microMatrix.fpPct
    },
    tn: {
      pct: microMatrix.tnPct
    }
  }} displayFormat="fraction" fractionDigits={decimals} maxWidth={maxWidth} titlePrefix="Micro-Averaged " /> : <div style={{
    color: "red",
    padding: "1rem",
    border: "1px solid red"
  }}>
          MultiLabelConfusionMatrix: Could not derive micro confusion matrix from report.
        </div>}

      {showPerLabelMatrices && <>
          <div style={{
    fontWeight: "600",
    fontSize: "0.95rem",
    margin: "1.25rem 0 0.5rem"
  }}>Per-label confusion matrices</div>
          <div style={{
    display: "grid",
    gridTemplateColumns: "repeat(auto-fit, minmax(280px, 1fr))",
    gap: "1.25rem"
  }}>
            {labels.map(label => {
    const labelName = labelDisplayNames?.[label] ?? label;
    const matrix = deriveCM({
      precision: report?.per_class_precision?.[label],
      recall: report?.per_class_recall?.[label],
      positiveSupport: perSupport?.[label],
      negativeSupport: perNegSupport?.[label]
    });
    const negativeLabel = `Not ${labelName}`;
    return <div key={label}>
                  <div style={{
      fontWeight: "600",
      fontSize: "0.875rem",
      marginBottom: "0.25rem"
    }}>{labelName}</div>
                  {matrix ? <BooleanConfusionMatrix actualPositiveLabel={labelName} actualNegativeLabel={negativeLabel} predictedPositiveLabel={labelName} predictedNegativeLabel={negativeLabel} matrix={{
      tp: {
        pct: matrix.tpPct
      },
      fn: {
        pct: matrix.fnPct
      },
      fp: {
        pct: matrix.fpPct
      },
      tn: {
        pct: matrix.tnPct
      }
    }} displayFormat="fraction" fractionDigits={decimals} maxWidth={maxWidth} /> : <div style={{
      color: "red",
      padding: "0.75rem",
      border: "1px solid red"
    }}>
                      Could not derive confusion matrix for label: <code>{label}</code>
                    </div>}
                </div>;
  })}
          </div>
        </>}
    </div>;
};

export const MultiLabelClassificationReport = ({report: reportProp, labelOrder: labelOrderProp, labelDisplayNames: labelDisplayNamesProp = {}, decimals = 4, maxWidth = 520, showConfusionMatrices = true, showPerLabelMatrices = true, showAverageRows = true}) => {
  const toNum = v => {
    if (v == null) return undefined;
    const n = Number(v);
    return Number.isFinite(n) ? n : undefined;
  };
  const getLabels = (lo, pcs) => {
    if (Array.isArray(lo) && lo.length) return lo;
    if (pcs && typeof pcs === "object") return Object.keys(pcs);
    return [];
  };
  const fmtMetric = (v, d) => {
    const n = toNum(v);
    if (n === undefined) return "—";
    return n.toFixed(Number.isFinite(Number(d)) ? Number(d) : 4);
  };
  let report, labelOrder, labelDisplayNames;
  try {
    report = typeof reportProp === 'string' ? JSON.parse(reportProp) : reportProp;
    labelOrder = typeof labelOrderProp === 'string' ? JSON.parse(labelOrderProp) : labelOrderProp;
    labelDisplayNames = typeof labelDisplayNamesProp === 'string' ? JSON.parse(labelDisplayNamesProp) : labelDisplayNamesProp;
  } catch (e) {
    return <div style={{
      color: "red",
      padding: "1rem",
      border: "1px solid red"
    }}>
        MultiLabelClassificationReport: JSON parse error - {e.message}
      </div>;
  }
  const labels = getLabels(labelOrder, report?.per_class_support);
  if (!report || labels.length === 0) {
    return <div style={{
      color: "red",
      padding: "1rem",
      border: "1px solid red"
    }}>
        MultiLabelClassificationReport: Missing or invalid report/labels.
      </div>;
  }
  const rowStyle = {
    borderBottom: "1px solid rgba(148, 163, 184, 0.3)"
  };
  const cellStyle = {
    padding: "0.5rem 0.125rem"
  };
  const centerCellStyle = {
    textAlign: "center",
    padding: "0.5rem 0.125rem"
  };
  const avgRowStyle = {
    ...rowStyle,
    background: "rgba(148, 163, 184, 0.08)",
    fontWeight: 600
  };
  return <div>
      <table style={{
    width: "auto",
    borderCollapse: "collapse",
    marginBottom: "1.25rem",
    fontSize: "0.875rem"
  }}>
        <thead>
          <tr style={{
    borderBottom: "2px solid rgba(148, 163, 184, 0.5)"
  }}>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}></th>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}>Precision</th>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}>Recall</th>
            <th style={{
    textAlign: "center",
    padding: "0.5rem 0.125rem",
    fontWeight: "600"
  }}>F1-Score</th>
          </tr>
        </thead>
        <tbody>
          {labels.map(label => {
    const labelName = labelDisplayNames?.[label] ?? label;
    return <tr key={label} style={rowStyle}>
                <td style={cellStyle}>{labelName}</td>
                <td style={centerCellStyle}>{fmtMetric(report?.per_class_precision?.[label], decimals)}</td>
                <td style={centerCellStyle}>{fmtMetric(report?.per_class_recall?.[label], decimals)}</td>
                <td style={centerCellStyle}>{fmtMetric(report?.per_class_f1?.[label], decimals)}</td>
              </tr>;
  })}

          {showAverageRows && <>
              <tr style={avgRowStyle}>
                <td style={cellStyle}>Micro avg</td>
                <td style={centerCellStyle}>{fmtMetric(report.micro_precision, decimals)}</td>
                <td style={centerCellStyle}>{fmtMetric(report.micro_recall, decimals)}</td>
                <td style={centerCellStyle}>{fmtMetric(report.micro_f1, decimals)}</td>
              </tr>
              <tr style={avgRowStyle}>
                <td style={cellStyle}>Macro avg</td>
                <td style={centerCellStyle}>{fmtMetric(report.macro_precision, decimals)}</td>
                <td style={centerCellStyle}>{fmtMetric(report.macro_recall, decimals)}</td>
                <td style={centerCellStyle}>{fmtMetric(report.macro_f1, decimals)}</td>
              </tr>
              <tr style={avgRowStyle}>
                <td style={cellStyle}>Weighted avg</td>
                <td style={centerCellStyle}>{fmtMetric(report.weighted_precision, decimals)}</td>
                <td style={centerCellStyle}>{fmtMetric(report.weighted_recall, decimals)}</td>
                <td style={centerCellStyle}>{fmtMetric(report.weighted_f1, decimals)}</td>
              </tr>
            </>}
        </tbody>
      </table>

      {showConfusionMatrices && <MultiLabelConfusionMatrix report={report} labelOrder={labels} labelDisplayNames={labelDisplayNames} decimals={decimals} maxWidth={maxWidth} showPerLabelMatrices={showPerLabelMatrices} />}
    </div>;
};

export const BooleanConfusionMatrix = ({actualNegativeLabel = "Not Advanced", actualPositiveLabel = "Advanced", predictedNegativeLabel = "Not Advanced", predictedPositiveLabel = "Advanced", tnCount, tnPct, fpCount, fpPct, fnCount, fnPct, tpCount, tpPct, matrix, maxWidth = 520, displayFormat = "percentage", fractionDigits = 3, percentageDigits = 1, titlePrefix = ""}) => {
  const parseNum = val => val !== undefined && val !== null ? Number(val) : undefined;
  const clampPct = pct => Math.max(0, Math.min(100, Number(pct) || 0));
  const formatValue = pct => {
    const p = clampPct(pct);
    if (displayFormat === "fraction") {
      const digits = Number.isFinite(Number(fractionDigits)) ? Number(fractionDigits) : 3;
      return (p / 100).toFixed(digits);
    }
    const digits = Number.isFinite(Number(percentageDigits)) ? Number(percentageDigits) : 1;
    return `${p.toFixed(digits)}%`;
  };
  const palette = ["#f8fafc", "#eff6ff", "#dbeafe", "#bfdbfe", "#93c5fd", "#60a5fa", "#3b82f6", "#2563eb", "#1d4ed8", "#1e40af"];
  const getBg = pct => {
    const p = clampPct(pct);
    const idx = p === 100 ? 9 : Math.floor(p / 10);
    return palette[idx];
  };
  const getColor = pct => clampPct(pct) >= 60 ? "#ffffff" : "#1e3a8a";
  const rawTn = parseNum(tnCount);
  const rawFp = parseNum(fpCount);
  const rawFn = parseNum(fnCount);
  const rawTp = parseNum(tpCount);
  const rawTnPct = parseNum(tnPct);
  const rawFpPct = parseNum(fpPct);
  const rawFnPct = parseNum(fnPct);
  const rawTpPct = parseNum(tpPct);
  const hasCounts = rawTn !== undefined && rawFp !== undefined && rawFn !== undefined && rawTp !== undefined;
  const hasPcts = rawTnPct !== undefined && rawFpPct !== undefined && rawFnPct !== undefined && rawTpPct !== undefined;
  let resolvedMatrix;
  let showCounts;
  if (matrix) {
    resolvedMatrix = matrix;
    showCounts = matrix.tn?.count !== undefined;
  } else if (hasCounts) {
    const actualNegTotal = rawTn + rawFp;
    const actualPosTotal = rawFn + rawTp;
    resolvedMatrix = {
      tn: {
        count: rawTn,
        pct: actualNegTotal > 0 ? rawTn / actualNegTotal * 100 : 0
      },
      fp: {
        count: rawFp,
        pct: actualNegTotal > 0 ? rawFp / actualNegTotal * 100 : 0
      },
      fn: {
        count: rawFn,
        pct: actualPosTotal > 0 ? rawFn / actualPosTotal * 100 : 0
      },
      tp: {
        count: rawTp,
        pct: actualPosTotal > 0 ? rawTp / actualPosTotal * 100 : 0
      }
    };
    showCounts = true;
  } else if (hasPcts) {
    resolvedMatrix = {
      tn: {
        pct: rawTnPct
      },
      fp: {
        pct: rawFpPct
      },
      fn: {
        pct: rawFnPct
      },
      tp: {
        pct: rawTpPct
      }
    };
    showCounts = false;
  } else {
    return <div style={{
      color: "red",
      padding: "1rem",
      border: "1px solid red"
    }}>BooleanConfusionMatrix: Provide either all counts or all percentages</div>;
  }
  const cellStyle = pct => ({
    background: getBg(pct),
    color: getColor(pct),
    padding: "1rem",
    textAlign: "center",
    borderRadius: "8px",
    aspectRatio: "1 / 1",
    width: "100%",
    display: "flex",
    flexDirection: "column",
    alignItems: "center",
    justifyContent: "center",
    border: "1px solid rgba(148, 163, 184, 0.35)"
  });
  const displayPredictedLabels = {
    left: predictedPositiveLabel,
    right: predictedNegativeLabel
  };
  const displayActualLabels = {
    top: actualPositiveLabel,
    bottom: actualNegativeLabel
  };
  const displayMatrix = {
    tl: resolvedMatrix.tp,
    tr: resolvedMatrix.fn,
    bl: resolvedMatrix.fp,
    br: resolvedMatrix.tn
  };
  return <div style={{
    maxWidth: maxWidth + "px",
    margin: "1rem 0"
  }}>
      <div style={{
    display: "grid",
    gridTemplateColumns: "auto auto 1fr 1fr",
    gridTemplateRows: "auto auto auto 1fr 1fr auto",
    gap: "2px"
  }}>
        {}
        <div></div>
        <div></div>
        <div style={{
    gridColumn: "3 / 5",
    textAlign: "center",
    padding: "0.5rem",
    fontWeight: "600",
    fontSize: "1rem"
  }}>
          {titlePrefix}Confusion Matrix (Normalized)
        </div>

        {}
        <div></div>
        <div></div>
        <div style={{
    gridColumn: "3 / 5",
    textAlign: "center",
    padding: "0.5rem",
    fontWeight: "600",
    fontSize: "0.875rem"
  }}>
          Predicted
        </div>

        {}
        <div></div>
        <div></div>
        <div style={{
    textAlign: "center",
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "center"
  }}>{displayPredictedLabels.left}</div>
        <div style={{
    textAlign: "center",
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "center"
  }}>{displayPredictedLabels.right}</div>

        {}
        <div style={{
    gridRow: "4 / 6",
    writingMode: "vertical-rl",
    transform: "rotate(180deg)",
    textAlign: "center",
    fontWeight: "600",
    fontSize: "0.875rem",
    padding: "0 0.5rem",
    display: "flex",
    alignItems: "center",
    justifyContent: "center"
  }}>
          Actual
        </div>
        <div style={{
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "flex-end"
  }}>{displayActualLabels.top}</div>
        <div style={cellStyle(displayMatrix.tl.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.tl.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.tl.pct)}</div>
        </div>
        <div style={cellStyle(displayMatrix.tr.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.tr.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.tr.pct)}</div>
        </div>

        {}
        <div style={{
    padding: "0.5rem",
    fontSize: "0.75rem",
    fontWeight: "500",
    display: "flex",
    alignItems: "center",
    justifyContent: "flex-end"
  }}>{displayActualLabels.bottom}</div>
        <div style={cellStyle(displayMatrix.bl.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.bl.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.bl.pct)}</div>
        </div>
        <div style={cellStyle(displayMatrix.br.pct)}>
          {showCounts && <div style={{
    fontSize: "1.5rem",
    fontWeight: "700",
    lineHeight: 1
  }}>{displayMatrix.br.count}</div>}
          <div style={{
    fontSize: showCounts ? "0.75rem" : "1rem",
    fontWeight: showCounts ? "400" : "700",
    opacity: showCounts ? 0.8 : 1
  }}>{formatValue(displayMatrix.br.pct)}</div>
        </div>

        {}
        <div></div>
        <div></div>
        <div style={{
    gridColumn: "3 / 5",
    marginTop: "0.5rem",
    display: "flex",
    alignItems: "center",
    gap: "0.5rem"
  }}>
          <span style={{
    fontSize: "0.75rem",
    fontWeight: "500"
  }}>{displayFormat === "fraction" ? "0.0" : "0%"}</span>
          <div style={{
    display: "flex",
    flex: 1,
    height: "12px",
    borderRadius: "4px",
    overflow: "hidden",
    border: "1px solid rgba(148, 163, 184, 0.35)"
  }}>
            {palette.map((color, idx) => <div key={idx} style={{
    flex: 1,
    height: "100%",
    background: color
  }} />)}
          </div>
          <span style={{
    fontSize: "0.75rem",
    fontWeight: "500"
  }}>{displayFormat === "fraction" ? "1.0" : "100%"}</span>
        </div>
      </div>
    </div>;
};

export const DefinitionCard = ({children}) => {
  return <Card variant="secondary">
    <div style={{
    padding: '0.5rem',
    border: '5px solid var(--primary-light)',
    borderRadius: '0.5rem',
    fontSize: '1.3rem',
    lineHeight: '1.4',
    boxShadow: '0 0 10px 10px var(--primary-light)'
  }}>
        {children}
      </div>

</Card>;
};

export const Scale = ({low, mid, high, lowLabel = "Low", midLabel = "Mid", highLabel = "High", lowDescription, midDescription, highDescription, midColor = "yellow", inverted = false}) => {
  const lowColor = inverted ? "green" : "red";
  const highColor = inverted ? "red" : "green";
  const gradientId = inverted ? "greenToRed" : "redToGreen";
  return <div style={{
    display: 'flex',
    flexDirection: 'column',
    width: '100%'
  }}>
      <svg width="100%" height="30" style={{
    marginBottom: '8px'
  }}>
        <defs>
          <linearGradient id={gradientId} x1="0%" y1="0%" x2="100%" y2="0%">
            <stop offset="0%" stopColor={lowColor} />
            <stop offset="100%" stopColor={highColor} />
          </linearGradient>
        </defs>
        <rect width="100%" height="100%" fill={`url(#${gradientId})`} rx="4" ry="4" />
      </svg>

      <div style={{
    display: 'flex',
    justifyContent: 'space-between',
    width: '100%',
    marginBottom: '16px'
  }}>
        <p style={{
    margin: 0,
    fontSize: '12px'
  }}>{low}</p>
        {mid && <p style={{
    margin: 0,
    fontSize: '12px'
  }}>{mid}</p>}
        <p style={{
    margin: 0,
    fontSize: '12px'
  }}>{high}</p>
      </div>

      <div style={{
    display: 'flex',
    justifyContent: 'space-between',
    width: '100%'
  }}>
        <div style={{
    maxWidth: '40%'
  }}>
          <div style={{
    display: 'flex',
    alignItems: 'center',
    marginBottom: '4px'
  }}>
            <div style={{
    width: '12px',
    height: '12px',
    backgroundColor: lowColor,
    borderRadius: '50%',
    marginRight: '8px'
  }}></div>
            <p style={{
    margin: 0,
    fontWeight: 'bold',
    fontSize: '14px'
  }}>{lowLabel}</p>
          </div>
          {lowDescription && <p style={{
    margin: 0,
    fontSize: '14px',
    color: '#666',
    maxWidth: '250px',
    lineHeight: '1.4'
  }}>{lowDescription}</p>}
        </div>
        {mid && <div style={{
    maxWidth: '40%',
    textAlign: 'center'
  }}>
            <div style={{
    display: 'flex',
    alignItems: 'center',
    justifyContent: 'center',
    marginBottom: '4px'
  }}>
              <div style={{
    width: '12px',
    height: '12px',
    backgroundColor: midColor,
    borderRadius: '50%',
    marginRight: '8px'
  }}></div>
              <p style={{
    margin: 0,
    fontWeight: 'bold',
    fontSize: '14px'
  }}>{midLabel}</p>
            </div>
            {midDescription && <p style={{
    margin: 0,
    fontSize: '14px',
    color: '#666',
    maxWidth: '250px',
    textAlign: 'center',
    lineHeight: '1.4'
  }}>{midDescription}</p>}
          </div>}


        <div style={{
    maxWidth: '40%',
    textAlign: 'right'
  }}>
          <div style={{
    display: 'flex',
    alignItems: 'center',
    justifyContent: 'flex-end',
    marginBottom: '4px'
  }}>
            <p style={{
    margin: 0,
    fontWeight: 'bold',
    fontSize: '14px'
  }}>{highLabel}</p>
            <div style={{
    width: '12px',
    height: '12px',
    backgroundColor: highColor,
    borderRadius: '50%',
    marginLeft: '8px'
  }}></div>
          </div>
          {highDescription && <p style={{
    margin: 0,
    fontSize: '14px',
    color: '#666',
    maxWidth: '250px',
    marginLeft: 'auto',
    lineHeight: '1.4'
  }}>{highDescription}</p>}
        </div>
      </div>
    </div>;
};

<DefinitionCard>
  <strong>PII Detection</strong> identifies personally identifiable information spans within a sample (both input and output).
</DefinitionCard>

This metric is particularly valuable for identifying sensitive personal data that may require special handling or protection. Detecting PII is essential for compliance with privacy regulations and protecting user information.

## Calculation method

PII detection is computed through a specialized process:

<Steps>
  <Step title="Model Foundation">
    A specialized Small Language Model (SLM) trained on proprietary datasets forms the core of the detection system, enabling accurate identification of various PII types.
  </Step>

  <Step title="Content Analysis">
    The system performs comprehensive scanning of both input and output text, utilizing pattern recognition and contextual analysis to identify potential PII occurrences.
  </Step>

  <Step title="Classification Process">
    Each detected PII instance is systematically categorized by its specific type (e.g., SSN, email, address) and assigned a confidence score based on the detection certainty.
  </Step>

  <Step title="Visual Reporting">
    Results are displayed through an interactive interface that highlights PII instances directly in the text, making it easy to identify and review sensitive information locations.
  </Step>
</Steps>

To highlight which parts of the text were detected as PII, click on the icon next to the PII metric value. The type of PII detected along with the model's confidence will be shown on the input or output text.

<Card>
  <div style={{display: 'flex', alignItems: 'center', gap: '0.5rem', marginBottom: '0.75rem'}}>
    <div style={{fontSize: '1.25rem', color: 'var(--primary-color)'}}>
      <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
        <path d="M12 22c5.523 0 10-4.477 10-10S17.523 2 12 2 2 6.477 2 12s4.477 10 10 10z" />

        <path d="m9 12 2 2 4-4" />
      </svg>
    </div>

    <h3 style={{margin: 0, fontSize: '1.25rem', fontWeight: '600'}}>PII Categories Detected</h3>
  </div>

  The current model detects the following precisely defined categories:

  <div style={{ marginTop: "1rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Account Information:</strong> Bank account numbers, Bank Identification Code (BIC) and International Bank Account Number (IBAN).
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Address:</strong> A physical address. Must contain at least a street name and number, and may contain extra elements such as city, zip code, state, etc.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Credit Card:</strong> Credit card number (can be full or last 4 digits), Card Verification Value (CVV) and expiration date.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Date of Birth:</strong> This represents the day, month and year a person was born. The context should make it clear that it's someone's birthdate.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Email:</strong> An email address.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Name:</strong> A person's full name. It must consist of at least a first and last name to be considered PII.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Network Information:</strong> IPv4, IPv6 and MAC addresses.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Password:</strong> A password.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Phone Number:</strong> A phone number.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Social Security Number (SSN):</strong> A US Social Security Number.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Username:</strong> A username.
  </div>
</Card>

## Optimizing your AI system

<Card>
  <div style={{display: 'flex', alignItems: 'center', gap: '0.5rem', marginBottom: '0.75rem'}}>
    <div style={{fontSize: '1.25rem', color: 'var(--primary-color)'}}>
      <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
        <path d="M12 20h9" />

        <path d="M16.5 3.5a2.121 2.121 0 0 1 3 3L7 19l-4 1 1-4L16.5 3.5z" />
      </svg>
    </div>

    <h3 style={{margin: 0, fontSize: '1.25rem', fontWeight: '600'}}>Addressing PII in Your System</h3>
  </div>

  When PII is detected in your system, consider these approaches:

  <div style={{ marginTop: "1rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Implement data redaction:</strong> Automatically mask or remove PII before processing or storing data.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Create PII handling policies:</strong> Develop clear guidelines for how different types of PII should be processed.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Set up user consent flows:</strong> Ensure users understand when and how their PII might be used.
  </div>

  <div style={{ marginTop: "0.75rem", paddingTop: "0.75rem", borderTop: "1px solid rgba(209, 213, 219, 0.33)" }}>
    <strong>Establish data retention policies:</strong> Define how long different types of PII should be stored.
  </div>
</Card>

## Performance Benchmarks

We evaluated PII Detection against gold labels on the "test" split of [rungalileo/pii](https://huggingface.co/datasets/rungalileo/pii) dataset using top frontier models. This dataset was created from the open-source [gretelai/synthetic\_pii\_finance\_multilingual](https://huggingface.co/datasets/gretelai/synthetic_pii_finance_multilingual) dataset by remapping its labels.

| Model                  | Macro F1 |
| :--------------------- | :------: |
| GPT-4.1                |   0.84   |
| GPT-4.1 Mini           |   0.83   |
| Gemini 3 Flash Preview |   0.85   |
| Claude Sonnet 4.5      |   0.83   |

### Classification Report

<MultiLabelClassificationReport report={`{"micro_f1":0.8995,"micro_precision":0.8842,"micro_recall":0.9153,"macro_f1":0.8402,"macro_precision":0.8129,"macro_recall":0.8857,"weighted_f1":0.9068,"weighted_precision":0.9054,"weighted_recall":0.9153,"per_class_f1":{"account_info":0.6048,"address":0.9067,"credit_card_info":0.7961,"date_of_birth":0.8296,"email":0.9706,"name":0.9571,"network_info":0.9589,"password":0.8369,"phone_number":0.9179,"ssn":0.8923,"username":0.5714},"per_class_precision":{"account_info":0.4765,"address":0.9104,"credit_card_info":0.7658,"date_of_birth":0.7414,"email":0.9763,"name":0.9844,"network_info":0.9722,"password":0.8447,"phone_number":0.8729,"ssn":0.9355,"username":0.4615},"per_class_recall":{"account_info":0.8274,"address":0.9030,"credit_card_info":0.8288,"date_of_birth":0.9416,"email":0.9649,"name":0.9312,"network_info":0.9459,"password":0.8293,"phone_number":0.9677,"ssn":0.8529,"username":0.7500},"per_class_support":{"account_info":307,"address":1711,"credit_card_info":146,"date_of_birth":137,"email":427,"name":2036,"network_info":111,"password":164,"phone_number":433,"ssn":68,"username":24},"per_class_negative_support":{"account_info":2169,"address":765,"credit_card_info":2330,"date_of_birth":2339,"email":2049,"name":440,"network_info":2365,"password":2312,"phone_number":2043,"ssn":2408,"username":2452}}`} labelOrder={`["name","address","email","phone_number","account_info","credit_card_info","date_of_birth","network_info","password","ssn","username"]`} decimals={4} showConfusionMatrices={true} showPerLabelMatrices={false} showAverageRows={false} />

<Note>
  Benchmarks based on the [rungalileo/pii](https://huggingface.co/datasets/rungalileo/pii) evaluation dataset. Performance may vary by use case.
</Note>

## Best practices

<CardGroup cols={2}>
  <Card title="Real-time PII Detection" icon="shield-halved">
    Implement PII detection as part of your input validation pipeline to catch sensitive information before processing.
  </Card>

  <Card title="Data Minimization" icon="filter">
    Only collect and process the minimum amount of PII necessary for your application's functionality.
  </Card>

  <Card title="Secure Storage" icon="lock">
    When PII must be stored, ensure it's properly encrypted and access is strictly controlled.
  </Card>

  <Card title="Regular Audits" icon="clipboard-check">
    Periodically review your system for unintended PII exposure or collection.
  </Card>
</CardGroup>

<Note>
  Automatically identify PII occurrences in any part of the workflow (user input, chains, model output, etc), and respond accordingly by implementing guardrails or other preventative measures. This helps ensure compliance with privacy regulations like GDPR, CCPA, and others.
</Note>
