Source code for fermilink.optimize.goal

"""Parse goal.md into a structured optimization goal specification.

Goal mode lets users describe optimization intent in natural-language
markdown instead of authoring a benchmark YAML contract and runner script
by hand.  This module parses the structured markdown into a dict that
the source-analysis / benchmark-generation pipeline can consume.
"""

from __future__ import annotations

import re
from typing import Any


# ---------------------------------------------------------------------------
# Goal detection
# ---------------------------------------------------------------------------

_GOAL_SECTION_MARKERS = [
    "## package",
    "## target",
    "## editable scope",
    "## performance metric",
    "## correctness constraints",
    "## correctness",
    "## representative workloads",
    "## workloads",
    "# optimization goal",
]


[docs] def is_goal_markdown(text: str) -> bool: """Return *True* when *text* looks like a goal-structured markdown file. Detection is based on the presence of at least two known goal-section headings. This intentionally stays loose so that users can omit optional sections while still triggering goal mode. """ lowered = text.lower() count = sum(1 for marker in _GOAL_SECTION_MARKERS if marker in lowered) return count >= 2
# --------------------------------------------------------------------------- # Section extraction helpers # --------------------------------------------------------------------------- _HEADING_RE = re.compile(r"^(#{1,3})\s+(.+)$") def _extract_sections(text: str) -> dict[str, str]: """Split markdown into ``{lowercased heading: body text}`` pairs.""" sections: dict[str, str] = {} current_heading = "" current_lines: list[str] = [] for line in text.splitlines(): match = _HEADING_RE.match(line) if match: if current_heading: sections[current_heading] = "\n".join(current_lines).strip() current_heading = match.group(2).strip().lower() current_lines = [] else: current_lines.append(line) if current_heading: sections[current_heading] = "\n".join(current_lines).strip() return sections def _text_section(sections: dict[str, str], *keys: str) -> str: """Return the body of the first matching section, or empty string.""" for key in keys: text = sections.get(key, "").strip() if text: return text return "" def _first_line(sections: dict[str, str], key: str) -> str: text = _text_section(sections, key) return text.splitlines()[0].strip() if text else "" def _list_section(sections: dict[str, str], *keys: str) -> list[str]: """Extract a bullet-list section, trying *keys* in order.""" for key in keys: text = sections.get(key, "").strip() if not text: continue items: list[str] = [] for line in text.splitlines(): stripped = line.strip() for prefix in ("- ", "* ", "+ "): if stripped.startswith(prefix): stripped = stripped[len(prefix) :] break stripped = stripped.strip() if stripped: items.append(stripped) if items: return items return [] def _code_blocks(sections: dict[str, str], key: str) -> list[str]: """Extract fenced code blocks from a section body.""" text = sections.get(key, "") blocks: list[str] = [] current: list[str] | None = None for line in text.splitlines(): stripped = line.strip() if stripped.startswith("```"): if current is not None: blocks.append("\n".join(current)) current = None else: current = [] elif current is not None: current.append(line) return blocks # --------------------------------------------------------------------------- # Public API # ---------------------------------------------------------------------------
[docs] def parse_goal(text: str) -> dict[str, Any]: """Parse a goal markdown document into a structured specification. Returns a dict with the following keys (all strings or lists of strings; missing sections default to empty): * ``raw_text`` – the original markdown * ``package`` – package identifier (first line of ``## Package``) * ``target`` – free-text optimization target description * ``editable_scope`` – list of file-path globs the agent may edit * ``performance_metric`` – description of the metric to optimise * ``correctness_constraints`` – list of correctness requirements * ``workloads`` – list of representative workload descriptions * ``language`` – optional language hint (``python``, ``cpp``, ``fortran``, …) * ``notes`` – optional free-form notes * ``build_commands`` – optional build/install commands from code blocks """ sections = _extract_sections(text) return { "raw_text": text, "package": _first_line(sections, "package"), "target": _text_section(sections, "target"), "editable_scope": _list_section( sections, "editable scope", "scope", "editable paths" ), "performance_metric": _text_section( sections, "performance metric", "metric", "objective" ), "correctness_constraints": _list_section( sections, "correctness constraints", "correctness", "constraints", ), "workloads": _list_section( sections, "representative workloads", "workloads", "cases", "test cases", ), "language": _first_line(sections, "language"), "notes": _text_section(sections, "notes"), "build_commands": _code_blocks(sections, "build") or _code_blocks(sections, "setup") or _code_blocks(sections, "install"), }