Skip to content

IDD#

Instruction quality scoring using the Intent-Driven Development rubric.

inspect_coco.idd #

IDD (Intent-Driven Development) instruction scoring.

IDDScore(total, goal, requirements, constraints, output, ambiguity_count, specificity) dataclass #

Composite IDD score with per-criterion breakdown.

score_instruction(text) #

Score an instruction.md against IDD criteria.

Performs heuristic analysis (no LLM call) checking for the presence of Goal, Requirements, Constraints, and Output/Success criteria.

Parameters:

Name Type Description Default
text str

The instruction markdown content.

required

Returns:

Type Description
IDDScore

IDDScore with 0.0-1.0 total and per-criterion breakdown.

Source code in src/inspect_coco/idd/scorer.py
def score_instruction(text: str) -> IDDScore:
    """Score an instruction.md against IDD criteria.

    Performs heuristic analysis (no LLM call) checking for the presence
    of Goal, Requirements, Constraints, and Output/Success criteria.

    Args:
        text: The instruction markdown content.

    Returns:
        IDDScore with 0.0-1.0 total and per-criterion breakdown.
    """
    text_lower = text.lower()
    lines = text.splitlines()

    # Score each criterion
    goal = _score_goal(text_lower, lines)
    requirements = _score_requirements(text_lower, lines)
    constraints = _score_constraints(text_lower, lines)
    output = _score_output(text_lower, lines)

    # Measure ambiguity
    ambiguity_count = _count_ambiguity(text_lower)
    word_count = len(text.split())
    ambiguity_density = ambiguity_count / max(word_count, 1)
    specificity = max(0.0, 1.0 - (ambiguity_density * 20))  # penalize heavily

    # Weighted total (equal weights)
    total = (goal.score + requirements.score + constraints.score + output.score) / 4.0

    return IDDScore(
        total=round(total, 2),
        goal=goal,
        requirements=requirements,
        constraints=constraints,
        output=output,
        ambiguity_count=ambiguity_count,
        specificity=round(specificity, 2),
    )

explain_score(score, threshold=0.6) #

Generate explanatory teaching feedback for an IDD score.

Produces per-criterion feedback showing what's present, what's missing, and concrete suggestions for improvement. Designed to teach users how to write better instructions through use.

Parameters:

Name Type Description Default
score IDDScore

The IDDScore to explain.

required
threshold float

The passing threshold for display purposes.

0.6

Returns:

Type Description
str

Formatted string with explanatory feedback.

Source code in src/inspect_coco/idd/explainer.py
def explain_score(score: IDDScore, threshold: float = 0.6) -> str:
    """Generate explanatory teaching feedback for an IDD score.

    Produces per-criterion feedback showing what's present, what's missing,
    and concrete suggestions for improvement. Designed to teach users how
    to write better instructions through use.

    Args:
        score: The IDDScore to explain.
        threshold: The passing threshold for display purposes.

    Returns:
        Formatted string with explanatory feedback.
    """
    status = "PASS" if score.total >= threshold else "BELOW THRESHOLD"
    lines = [
        f"[IDD Pre-Check] Score: {score.total:.2f} / 1.0 ({status}, threshold: {threshold})",
        "",
    ]

    # Per-criterion feedback
    for criterion in [score.goal, score.requirements, score.constraints, score.output]:
        mark = "+" if criterion.found else "-"
        lines.append(f"  {mark} {criterion.name}: {criterion.explanation}")
        if not criterion.found:
            lines.append(f"    -> {criterion.suggestion}")

    # Ambiguity warning
    if score.ambiguity_count > 0:
        lines.append("")
        lines.append(
            f"  ! Ambiguity: {score.ambiguity_count} vague word(s) detected "
            f"(specificity: {score.specificity:.2f})"
        )
        lines.append(
            "    -> Replace vague words (appropriate, properly, handle) with concrete terms"
        )

    # Template reminder if below threshold
    if score.total < threshold:
        lines.append("")
        lines.append(IDD_TEMPLATE)
        lines.append("")
        lines.append(REWRITE_TIP)

    return "\n".join(lines)