Source code for fermilink.implement.source_analysis

from __future__ import annotations

import json
import re
from typing import Any


SOURCE_ANALYSIS_TAG = "source_analysis"
IMPLEMENTATION_CONTRACT_TAG = "implementation_contract"
VALIDATION_RUNNER_TAG = "validation_runner"
ANALYSIS_SUMMARY_TAG = "analysis_summary"
REVIEW_NOTES_TAG = "review_notes"

SOURCE_ANALYSIS_RE = re.compile(
    rf"<{SOURCE_ANALYSIS_TAG}>\s*(.*?)\s*</{SOURCE_ANALYSIS_TAG}>",
    re.IGNORECASE | re.DOTALL,
)
IMPLEMENTATION_CONTRACT_RE = re.compile(
    rf"<{IMPLEMENTATION_CONTRACT_TAG}>\s*(.*?)\s*</{IMPLEMENTATION_CONTRACT_TAG}>",
    re.IGNORECASE | re.DOTALL,
)
VALIDATION_RUNNER_RE = re.compile(
    rf"<{VALIDATION_RUNNER_TAG}>\s*(.*?)\s*</{VALIDATION_RUNNER_TAG}>",
    re.IGNORECASE | re.DOTALL,
)
ANALYSIS_SUMMARY_RE = re.compile(
    rf"<{ANALYSIS_SUMMARY_TAG}>\s*(.*?)\s*</{ANALYSIS_SUMMARY_TAG}>",
    re.IGNORECASE | re.DOTALL,
)
REVIEW_NOTES_RE = re.compile(
    rf"<{REVIEW_NOTES_TAG}>\s*(.*?)\s*</{REVIEW_NOTES_TAG}>",
    re.IGNORECASE | re.DOTALL,
)



[docs]
def extract_source_analysis(text: str) -> dict[str, Any] | None:
    match = SOURCE_ANALYSIS_RE.search(str(text or ""))
    if not match:
        return None
    try:
        payload = json.loads(match.group(1).strip())
    except (json.JSONDecodeError, ValueError):
        return None
    return payload if isinstance(payload, dict) else None




[docs]
def extract_implementation_contract(text: str) -> str | None:
    match = IMPLEMENTATION_CONTRACT_RE.search(str(text or ""))
    if not match:
        return None
    value = match.group(1).strip()
    return value or None




[docs]
def extract_validation_runner(text: str) -> str | None:
    match = VALIDATION_RUNNER_RE.search(str(text or ""))
    if not match:
        return None
    value = match.group(1).strip()
    return value or None




[docs]
def extract_analysis_summary(text: str) -> str | None:
    match = ANALYSIS_SUMMARY_RE.search(str(text or ""))
    if not match:
        return None
    value = match.group(1).strip()
    return value or None




[docs]
def extract_review_notes(text: str) -> str | None:
    match = REVIEW_NOTES_RE.search(str(text or ""))
    if not match:
        return None
    value = match.group(1).strip()
    return value or None




[docs]
def build_source_analysis_agents_md(*, goal_rel: str, autogen_rel: str) -> str:
    return (
        "# FermiLink Implement Source Analysis Mode\n"
        "\n"
        "You are analyzing a codebase for a new scientific implementation task.\n"
        "\n"
        "Read these first:\n"
        f"- `{goal_rel}`\n"
        "- Source files referenced by editable scope\n"
        "- Existing tests/examples/build files\n"
        "\n"
        "You may read any repository file.\n"
        "Use online search when available to ground implementation guidance in "
        "official documentation, upstream source, API references, papers, or "
        "well-established examples. Cite the URLs or publication identifiers you "
        "used in the structured analysis.\n"
        f"You may only write to `{autogen_rel}`.\n"
        "Do not modify source code.\n"
    )




[docs]
def build_contract_generation_agents_md(
    *,
    goal_rel: str,
    analysis_rel: str,
    autogen_rel: str,
) -> str:
    return (
        "# FermiLink Implement Contract Generation Mode\n"
        "\n"
        "You are generating a progressive implementation contract and optional validation runner.\n"
        "\n"
        "Read these first:\n"
        f"- `{goal_rel}`\n"
        f"- `{analysis_rel}`\n"
        "\n"
        "You may read any repository file.\n"
        "Use online search when available to fill implementation-guide gaps with "
        "official documentation, upstream source, API references, papers, or "
        "well-established examples. Cite the URLs or publication identifiers you "
        "used in the contract text.\n"
        f"You may only write to `{autogen_rel}`.\n"
        "Do not modify source code.\n"
    )




[docs]
def build_source_analysis_prompt(
    *,
    goal_spec: dict[str, Any],
    goal_rel: str,
    tracked_file_summary: str,
) -> str:
    return (
        "Perform source analysis for FermiLink implement mode.\n"
        "\n"
        f"Goal file: `{goal_rel}`\n"
        "Full goal:\n"
        "```\n"
        f"{goal_spec.get('raw_text') or ''}\n"
        "```\n"
        "\n"
        "Repository file summary:\n"
        f"{tracked_file_summary}\n"
        "\n"
        "Identify the target API, natural insertion points, existing tests, "
        "build/runtime commands, representative workloads, useful observables, "
        "and risks for cheating or overfitting. Treat goal.md validation text "
        "as optional user intent, not as a required source of shell commands.\n"
        "\n"
        "Before finalizing the analysis, use online search/web browsing when "
        "available. Prioritize official project documentation, upstream source "
        "repositories, API references, peer-reviewed papers/preprints, and "
        "accepted usage examples relevant to the goal target. Capture concrete "
        "implementation implications, not just links. If online search is not "
        "available in this runtime, state that explicitly in the analysis and "
        "continue from repository/local evidence only.\n"
        "\n"
        "Return exactly one JSON object inside:\n"
        f"<{SOURCE_ANALYSIS_TAG}>...</{SOURCE_ANALYSIS_TAG}>\n"
        "with keys such as package, language, target_files, existing_tests, "
        "proposed_api, validation_strategy, online_research, external_references, "
        "implementation_notes, and risks. You may also include:\n"
        f"<{ANALYSIS_SUMMARY_TAG}>one sentence</{ANALYSIS_SUMMARY_TAG}>\n"
        f"<{REVIEW_NOTES_TAG}>notes</{REVIEW_NOTES_TAG}>\n"
    )




[docs]
def build_contract_generation_prompt(
    *,
    goal_spec: dict[str, Any],
    goal_rel: str,
    analysis: dict[str, Any],
    analysis_rel: str,
    default_contract_yaml: str,
    contract_rel: str,
    runner_rel: str,
) -> str:
    return (
        "Generate a progressive implementation contract for FermiLink implement mode.\n"
        "\n"
        f"Goal: `{goal_rel}`\n"
        f"Analysis: `{analysis_rel}`\n"
        "\n"
        "Goal content:\n"
        "```\n"
        f"{goal_spec.get('raw_text') or ''}\n"
        "```\n"
        "\n"
        "Structured analysis:\n"
        f"{json.dumps(analysis, indent=2, sort_keys=True)}\n"
        "\n"
        "Fallback contract template:\n"
        "```yaml\n"
        f"{default_contract_yaml}\n"
        "```\n"
        "\n"
        "Write or return a contract at:\n"
        f"- `{contract_rel}`\n"
        "Optional validation runner path:\n"
        f"- `{runner_rel}`\n"
        "\n"
        "The contract must keep baseline/reference optional, define editable scope, "
        "input API expectations, desired outputs, progressive validation commands, "
        "score-based partial acceptance, final done criteria, workload split, and anti-cheating guardrails. "
        "Derive deterministic worker/controller `pre_commands` from the goal "
        "`## Build` section when present; treat `## Pre Commands` as a legacy alias only.\n"
        "Generate YAML `validation.commands` yourself from the goal, analysis, "
        "existing tests, examples, and scientific target. The initial goal.md "
        "should not need a technical validation section. If goal.md includes "
        "validation prose, treat it as guidance; if it includes fenced command "
        "blocks, preserve or improve them only when they are suitable.\n"
        "When representative workloads are available, split them into "
        "worker-visible and controller-only heldout cases using `workload_split` "
        "with worker/controller workload ids. Prefer explicit train-/test- "
        "prefixes when present; otherwise hold out at least one controller-only "
        "case when two or more workloads exist. Do not put controller-only "
        "case details in worker-visible validation commands.\n"
        "\n"
        "Most important: expand the YAML `target` value into a comprehensive "
        "implementation guide. Do not merely copy the `## Target` section from "
        "goal.md. Set `target` as a multiline YAML block scalar string (`|`) "
        "that gives the worker enough detail to implement the goal without "
        "re-deriving the plan from scratch. Include:\n"
        "- the concrete objective and success definition;\n"
        "- online research used, with official URLs, upstream source links, "
        "paper/arXiv/DOI identifiers, or a clear note if search was unavailable;\n"
        "- current repository insertion points: files, classes, functions, data "
        "flow, and compatibility constraints;\n"
        "- step-by-step implementation work plan, including algorithms, APIs, "
        "data structures, numerical methods, configuration, and error handling;\n"
        "- integration details for build/runtime setup, public API exposure, "
        "backward compatibility, and non-goals;\n"
        "- validation milestones that map each implementation step to "
        "`validation.commands`, observables, scoring, and done criteria;\n"
        "- edge cases, scientific correctness risks, and anti-hardcoding checks.\n"
        "If the structured analysis lacks enough external grounding, perform "
        "additional online search before writing the contract. Prefer primary "
        "sources and do not invent citations.\n"
        "\n"
        "Return corrected YAML inside:\n"
        f"<{IMPLEMENTATION_CONTRACT_TAG}>...</{IMPLEMENTATION_CONTRACT_TAG}>\n"
        "If you provide a runner, put it inside:\n"
        f"<{VALIDATION_RUNNER_TAG}>...</{VALIDATION_RUNNER_TAG}>\n"
        f"<{REVIEW_NOTES_TAG}>notes</{REVIEW_NOTES_TAG}>\n"
    )