from __future__ import annotations
import json
import re
from typing import Any
SOURCE_ANALYSIS_TAG = "source_analysis"
IMPLEMENTATION_CONTRACT_TAG = "implementation_contract"
VALIDATION_RUNNER_TAG = "validation_runner"
ANALYSIS_SUMMARY_TAG = "analysis_summary"
REVIEW_NOTES_TAG = "review_notes"
SOURCE_ANALYSIS_RE = re.compile(
rf"<{SOURCE_ANALYSIS_TAG}>\s*(.*?)\s*</{SOURCE_ANALYSIS_TAG}>",
re.IGNORECASE | re.DOTALL,
)
IMPLEMENTATION_CONTRACT_RE = re.compile(
rf"<{IMPLEMENTATION_CONTRACT_TAG}>\s*(.*?)\s*</{IMPLEMENTATION_CONTRACT_TAG}>",
re.IGNORECASE | re.DOTALL,
)
VALIDATION_RUNNER_RE = re.compile(
rf"<{VALIDATION_RUNNER_TAG}>\s*(.*?)\s*</{VALIDATION_RUNNER_TAG}>",
re.IGNORECASE | re.DOTALL,
)
ANALYSIS_SUMMARY_RE = re.compile(
rf"<{ANALYSIS_SUMMARY_TAG}>\s*(.*?)\s*</{ANALYSIS_SUMMARY_TAG}>",
re.IGNORECASE | re.DOTALL,
)
REVIEW_NOTES_RE = re.compile(
rf"<{REVIEW_NOTES_TAG}>\s*(.*?)\s*</{REVIEW_NOTES_TAG}>",
re.IGNORECASE | re.DOTALL,
)
[docs]
def build_source_analysis_agents_md(*, goal_rel: str, autogen_rel: str) -> str:
return (
"# FermiLink Implement Source Analysis Mode\n"
"\n"
"You are analyzing a codebase for a new scientific implementation task.\n"
"\n"
"Read these first:\n"
f"- `{goal_rel}`\n"
"- Source files referenced by editable scope\n"
"- Existing tests/examples/build files\n"
"\n"
"You may read any repository file.\n"
"Use online search when available to ground implementation guidance in "
"official documentation, upstream source, API references, papers, or "
"well-established examples. Cite the URLs or publication identifiers you "
"used in the structured analysis.\n"
f"You may only write to `{autogen_rel}`.\n"
"Do not modify source code.\n"
)
[docs]
def build_contract_generation_agents_md(
*,
goal_rel: str,
analysis_rel: str,
autogen_rel: str,
) -> str:
return (
"# FermiLink Implement Contract Generation Mode\n"
"\n"
"You are generating a progressive implementation contract and optional validation runner.\n"
"\n"
"Read these first:\n"
f"- `{goal_rel}`\n"
f"- `{analysis_rel}`\n"
"\n"
"You may read any repository file.\n"
"Use online search when available to fill implementation-guide gaps with "
"official documentation, upstream source, API references, papers, or "
"well-established examples. Cite the URLs or publication identifiers you "
"used in the contract text.\n"
f"You may only write to `{autogen_rel}`.\n"
"Do not modify source code.\n"
)
[docs]
def build_source_analysis_prompt(
*,
goal_spec: dict[str, Any],
goal_rel: str,
tracked_file_summary: str,
) -> str:
return (
"Perform source analysis for FermiLink implement mode.\n"
"\n"
f"Goal file: `{goal_rel}`\n"
"Full goal:\n"
"```\n"
f"{goal_spec.get('raw_text') or ''}\n"
"```\n"
"\n"
"Repository file summary:\n"
f"{tracked_file_summary}\n"
"\n"
"Identify the target API, natural insertion points, existing tests, "
"build/runtime commands, representative workloads, useful observables, "
"and risks for cheating or overfitting. Treat goal.md validation text "
"as optional user intent, not as a required source of shell commands.\n"
"\n"
"Before finalizing the analysis, use online search/web browsing when "
"available. Prioritize official project documentation, upstream source "
"repositories, API references, peer-reviewed papers/preprints, and "
"accepted usage examples relevant to the goal target. Capture concrete "
"implementation implications, not just links. If online search is not "
"available in this runtime, state that explicitly in the analysis and "
"continue from repository/local evidence only.\n"
"\n"
"Return exactly one JSON object inside:\n"
f"<{SOURCE_ANALYSIS_TAG}>...</{SOURCE_ANALYSIS_TAG}>\n"
"with keys such as package, language, target_files, existing_tests, "
"proposed_api, validation_strategy, online_research, external_references, "
"implementation_notes, and risks. You may also include:\n"
f"<{ANALYSIS_SUMMARY_TAG}>one sentence</{ANALYSIS_SUMMARY_TAG}>\n"
f"<{REVIEW_NOTES_TAG}>notes</{REVIEW_NOTES_TAG}>\n"
)
[docs]
def build_contract_generation_prompt(
*,
goal_spec: dict[str, Any],
goal_rel: str,
analysis: dict[str, Any],
analysis_rel: str,
default_contract_yaml: str,
contract_rel: str,
runner_rel: str,
) -> str:
return (
"Generate a progressive implementation contract for FermiLink implement mode.\n"
"\n"
f"Goal: `{goal_rel}`\n"
f"Analysis: `{analysis_rel}`\n"
"\n"
"Goal content:\n"
"```\n"
f"{goal_spec.get('raw_text') or ''}\n"
"```\n"
"\n"
"Structured analysis:\n"
f"{json.dumps(analysis, indent=2, sort_keys=True)}\n"
"\n"
"Fallback contract template:\n"
"```yaml\n"
f"{default_contract_yaml}\n"
"```\n"
"\n"
"Write or return a contract at:\n"
f"- `{contract_rel}`\n"
"Optional validation runner path:\n"
f"- `{runner_rel}`\n"
"\n"
"The contract must keep baseline/reference optional, define editable scope, "
"input API expectations, desired outputs, progressive validation commands, "
"score-based partial acceptance, final done criteria, workload split, and anti-cheating guardrails. "
"Derive deterministic worker/controller `pre_commands` from the goal "
"`## Build` section when present; treat `## Pre Commands` as a legacy alias only.\n"
"Generate YAML `validation.commands` yourself from the goal, analysis, "
"existing tests, examples, and scientific target. The initial goal.md "
"should not need a technical validation section. If goal.md includes "
"validation prose, treat it as guidance; if it includes fenced command "
"blocks, preserve or improve them only when they are suitable.\n"
"When representative workloads are available, split them into "
"worker-visible and controller-only heldout cases using `workload_split` "
"with worker/controller workload ids. Prefer explicit train-/test- "
"prefixes when present; otherwise hold out at least one controller-only "
"case when two or more workloads exist. Do not put controller-only "
"case details in worker-visible validation commands.\n"
"\n"
"Most important: expand the YAML `target` value into a comprehensive "
"implementation guide. Do not merely copy the `## Target` section from "
"goal.md. Set `target` as a multiline YAML block scalar string (`|`) "
"that gives the worker enough detail to implement the goal without "
"re-deriving the plan from scratch. Include:\n"
"- the concrete objective and success definition;\n"
"- online research used, with official URLs, upstream source links, "
"paper/arXiv/DOI identifiers, or a clear note if search was unavailable;\n"
"- current repository insertion points: files, classes, functions, data "
"flow, and compatibility constraints;\n"
"- step-by-step implementation work plan, including algorithms, APIs, "
"data structures, numerical methods, configuration, and error handling;\n"
"- integration details for build/runtime setup, public API exposure, "
"backward compatibility, and non-goals;\n"
"- validation milestones that map each implementation step to "
"`validation.commands`, observables, scoring, and done criteria;\n"
"- edge cases, scientific correctness risks, and anti-hardcoding checks.\n"
"If the structured analysis lacks enough external grounding, perform "
"additional online search before writing the contract. Prefer primary "
"sources and do not invent citations.\n"
"\n"
"Return corrected YAML inside:\n"
f"<{IMPLEMENTATION_CONTRACT_TAG}>...</{IMPLEMENTATION_CONTRACT_TAG}>\n"
"If you provide a runner, put it inside:\n"
f"<{VALIDATION_RUNNER_TAG}>...</{VALIDATION_RUNNER_TAG}>\n"
f"<{REVIEW_NOTES_TAG}>notes</{REVIEW_NOTES_TAG}>\n"
)