Source code for clanguru.doc_generator

import re
import textwrap
from abc import ABC, abstractmethod
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import Union

from jinja2 import Environment, StrictUndefined, TemplateError, UndefinedError, select_autoescape
from py_app_dev.core.exceptions import UserNotificationException

from clanguru.cparser import CLangParser, Token, TranslationUnit

GTEST_MACROS = ("TEST", "TEST_P", "TEST_F", "TYPED_TEST", "TYPED_TEST_P")
_GTEST_DECL_RE = re.compile(rf"^\s*(?:{'|'.join(GTEST_MACROS)})\s*\(\s*(\w+)\s*,\s*(\w+)\s*\)")
_TEMPLATE_ENV = Environment(undefined=StrictUndefined, autoescape=select_autoescape(enabled_extensions=()))



[docs]
@dataclass
class GTestInfo:
    suite: str
    case: str

    @property
    def test(self) -> str:
        return f"{self.suite}.{self.case}"




[docs]
class DocsFormat(Enum):
    myst = "myst"
    md = "md"
    rst = "rst"

    @property
    def format_tag(self) -> str:
        """Return the tag used in source comments for this format."""
        if self == DocsFormat.myst:
            return "md"
        return self.value




[docs]
@dataclass
class TextContent:
    text: str




[docs]
@dataclass
class CodeContent:
    code: str
    language: str = "c"
    linenos: bool = True
    highlight_lines: list[int] | None = None
    start_line: int | None = None



SectionContent = Union[TextContent, CodeContent]


class Section:
    def __init__(self, title: str):
        self.title = title
        self.content: list[SectionContent] = []
        self.subsections: list[Section] = []

    def add_content(self, content: SectionContent) -> None:
        self.content.append(content)

    def add_subsection(self, subsection: "Section") -> None:
        self.subsections.append(subsection)



[docs]
class DocStructure:
    """Format independent documentation structure."""

    def __init__(self, title: str):
        self.title = title
        self.sections: list[Section] = []

    def add_section(self, section: Section) -> None:
        self.sections.append(section)




[docs]
class OutputFormatter(ABC):
    """Abstract base class for output formatters."""

    @property
    @abstractmethod
    def docs_format(self) -> DocsFormat:
        """Return the documentation format for this formatter."""
        ...


[docs]
    @abstractmethod
    def format(self, doc: DocStructure) -> str:
        """Format the entire documentation structure."""
        pass



[docs]
    @abstractmethod
    def format_text(self, text: str) -> str:
        """Format a text block."""
        pass



[docs]
    @abstractmethod
    def format_code(self, content: CodeContent) -> str:
        """Format a code block."""
        pass



[docs]
    @abstractmethod
    def file_extension(self) -> str:
        """Return the file extension for the formatter."""
        pass



[docs]
    @abstractmethod
    def format_table(self, headers: list[str], rows: list[list[str]]) -> str:
        """Format a table with headers and rows."""
        pass





[docs]
class MarkdownFlavour(Enum):
    Myst = "myst"
    Raw = "raw"




[docs]
class MarkdownFormatter(OutputFormatter):
    """
    Markdown output formatter for documentation.

    Two flavours are supported:
    * Raw: plain GitHub style fenced code blocks.
    * Myst: MystParser extended ``code-block`` directive with options (linenos & highlight lines).
    """

    def __init__(self, flavour: MarkdownFlavour = MarkdownFlavour.Raw, *, jinja_raw_tags: bool = False) -> None:
        super().__init__()
        self.flavour = flavour
        self.jinja_raw_tags = jinja_raw_tags


[docs]
    def format(self, doc: DocStructure) -> str:
        output = f"# {doc.title}\n\n"
        for section in doc.sections:
            output += self._format_section(section, 2)
        return output.rstrip() + "\n"


    def _format_section(self, section: Section, level: int) -> str:
        output = f"{'#' * level} {section.title}\n\n"
        for content in section.content:
            if isinstance(content, TextContent):
                output += self.format_text(content.text) + "\n\n"
            elif isinstance(content, CodeContent):
                output += self.format_code(content) + "\n\n"
        for subsection in section.subsections:
            output += self._format_section(subsection, level + 1)
        return output

    @property
    def docs_format(self) -> DocsFormat:
        return DocsFormat.myst if self.flavour is MarkdownFlavour.Myst else DocsFormat.md


[docs]
    def format_text(self, text: str) -> str:
        return text.strip()



[docs]
    def format_code(self, content: CodeContent) -> str:
        if self.flavour is MarkdownFlavour.Myst:
            return self.format_code_block_myst(content)
        code_block = f"```{content.language}\n{content.code}\n```"
        if self.jinja_raw_tags:
            return f"{{% raw %}}\n{code_block}\n{{% endraw %}}"
        return code_block



[docs]
    def format_code_block_myst(self, content: CodeContent) -> str:
        """
        Return a fenced code block or Myst code-block directive.

        Myst format example::

            ```{code-block} c
            :linenos:
            :lineno-start: 5
            :emphasize-lines: 2,4

            int main() {}
            ```
        """
        options: list[str] = []
        if content.linenos:
            options.append(":linenos:")
        if content.start_line is not None:
            options.append(f":lineno-start: {content.start_line}")
        if content.highlight_lines:
            # myst expects a comma separated list
            highlighted = ",".join(str(n) for n in content.highlight_lines)
            options.append(f":emphasize-lines: {highlighted}")
        # Build directive header
        header = f"```{{code-block}} {content.language}".rstrip()
        body_parts = [header]
        body_parts.extend(options)
        # Blank line separating options from code per myst recommendations
        body_parts.append("")
        body_parts.append(content.code)
        body_parts.append("```")
        result = "\n".join(body_parts)
        if self.jinja_raw_tags:
            return f"{{% raw %}}\n{result}\n{{% endraw %}}"
        return result



[docs]
    def format_table(self, headers: list[str], rows: list[list[str]]) -> str:
        header_line = "| " + " | ".join(headers) + " |"
        separator_line = "| " + " | ".join(["---"] * len(headers)) + " |"
        row_lines = ["| " + " | ".join(row) + " |" for row in rows]
        return "\n".join([header_line, separator_line, *row_lines]) + "\n"



[docs]
    def file_extension(self) -> str:
        return "md"





[docs]
class RSTFormatter(OutputFormatter):
    """reStructuredText output formatter for documentation."""

    def __init__(self, *, jinja_raw_tags: bool = False) -> None:
        super().__init__()
        self.jinja_raw_tags = jinja_raw_tags


[docs]
    def format(self, doc: DocStructure) -> str:
        output = f"{doc.title}\n{'=' * len(doc.title)}\n\n"
        for section in doc.sections:
            output += self._format_section(section, 1)
        return output.rstrip() + "\n"


    def _format_section(self, section: Section, level: int) -> str:
        underlines = "=-~^"
        output = f"{section.title}\n{underlines[level] * len(section.title)}\n\n"
        for content in section.content:
            if isinstance(content, TextContent):
                output += self.format_text(content.text) + "\n\n"
            elif isinstance(content, CodeContent):
                output += self.format_code(content) + "\n\n"
        for subsection in section.subsections:
            output += self._format_section(subsection, level + 1)
        return output

    @property
    def docs_format(self) -> DocsFormat:
        return DocsFormat.rst


[docs]
    def format_text(self, text: str) -> str:
        return text.strip()



[docs]
    def format_code(self, content: CodeContent) -> str:
        options = []
        if content.linenos:
            options.append("   :linenos:")
        if content.start_line is not None:
            options.append(f"   :lineno-start: {content.start_line}")
        if content.highlight_lines:
            highlighted = ",".join(str(n) for n in content.highlight_lines)
            options.append(f"   :emphasize-lines: {highlighted}")

        options_str = "\n".join(options)
        if options_str:
            options_str = "\n" + options_str + "\n"

        code_block = f".. code-block:: {content.language}{options_str}\n{self._indent_code(content.code)}\n"
        if self.jinja_raw_tags:
            return f"{{% raw %}}\n{code_block}{{% endraw %}}\n"
        return code_block


    def _indent_code(self, code: str) -> str:
        return "\n".join(f"    {line}" for line in code.split("\n"))


[docs]
    def file_extension(self) -> str:
        return "rst"



[docs]
    def format_table(self, headers: list[str], rows: list[list[str]]) -> str:
        """Format a simple grid table in reStructuredText."""
        if not headers:
            return ""

        # Determine column widths based on headers and rows
        col_widths: list[int] = []
        for i, header in enumerate(headers):
            max_cell = max((len(row[i]) for row in rows), default=0)
            col_widths.append(max(len(header), max_cell))

        def sep(char: str) -> str:
            return "+" + "+".join(char * (w + 2) for w in col_widths) + "+"

        def make_row(columns: list[str]) -> str:
            return "|" + "|".join(f" {c.ljust(w)} " for c, w in zip(columns, col_widths)) + "|"

        top = sep("-")
        header_sep = sep("=")
        row_sep = sep("-")

        lines: list[str] = [top, make_row(headers), header_sep]
        for row in rows:
            lines.append(make_row(row))
            lines.append(row_sep)
        return "\n".join(lines) + "\n"




def _extract_doc_contents(raw_content: str, accepted_tags: list[str]) -> list[str]:
    """
    Extract and dedent all content blocks matching any of the accepted tags.

    The opening tag must appear at the start of a line (or the start of the string)
    to avoid matching tags that appear mid-sentence in prose text.
    """
    if not accepted_tags:
        return []
    tags_pattern = "|".join(re.escape(tag) for tag in accepted_tags)
    pattern = rf"(?:^|(?<=\n))(?:@|\\)({tags_pattern})\s*(.*?)\s*(?:@|\\)end\1"
    matches = re.finditer(pattern, raw_content, flags=re.DOTALL)
    return [textwrap.dedent(match.group(2)) for match in matches]


def _detect_gtest(body_code: str) -> GTestInfo | None:
    """Return `GTestInfo` if `body_code` starts with a recognized GTest macro call."""
    match = _GTEST_DECL_RE.match(body_code)
    if not match:
        return None
    return GTestInfo(suite=match.group(1), case=match.group(2))


def _render_doc_template(content: str, declaration_name: str, gtest: GTestInfo | None) -> str:
    """
    Render a doc block as a Jinja2 template.

    Undefined variables raise `UserNotificationException` with the declaration name so
    authors can locate the faulty placeholder. Content without `{{`/`{%` is returned
    verbatim (cheap short-circuit).
    """
    if "{{" not in content and "{%" not in content:
        return content
    context = {"gtest": gtest} if gtest is not None else {}
    try:
        return _TEMPLATE_ENV.from_string(content).render(context)
    except UndefinedError as error:
        raise UserNotificationException(f"Undefined template variable in doc block of '{declaration_name}': {error.message}") from error
    except TemplateError as error:
        raise UserNotificationException(f"Template error in doc block of '{declaration_name}': {error}") from error


def _build_declaration_section(name: str, description_tokens: list[Token], body_code: str, body_start_line: int, tags: list[str]) -> Section:
    section = Section(name)
    gtest = _detect_gtest(body_code)
    for token in description_tokens:
        raw = CLangParser.get_comment_content(token)
        for content in _extract_doc_contents(raw, tags):
            section.add_content(TextContent(_render_doc_template(content, name, gtest)))
    section.add_content(CodeContent(code=body_code, start_line=body_start_line))
    return section



[docs]
def generate_doc_structure(translation_unit: TranslationUnit, docs_format: DocsFormat = DocsFormat.md) -> DocStructure:
    """
    Generate documentation structure from a translation unit.

    Uses the CLangParser to extract functions and classes from the translation unit
    and creates a DocStructure object with the extracted information.
    """
    tags = [docs_format.format_tag, "docs"]

    doc = DocStructure(translation_unit.source_file.name)
    functions = [f for f in CLangParser.get_functions(translation_unit) if f.is_definition]
    if functions:
        functions_section = Section("Functions")
        doc.add_section(functions_section)
        for func in functions:
            functions_section.add_subsection(_build_declaration_section(func.name, func.description_tokens, func.body.content, func.body.start_line, tags))

    classes = CLangParser.get_classes(translation_unit)
    if classes:
        classes_section = Section("Classes")
        doc.add_section(classes_section)
        for cls in classes:
            classes_section.add_subsection(_build_declaration_section(cls.name, cls.description_tokens, cls.body.content, cls.body.start_line, tags))

    return doc




[docs]
def generate_documentation(translation_unit: TranslationUnit, formatter: OutputFormatter, output_file: Path) -> None:
    """Generate documentation from a translation unit and write it to a file using the specified formatter."""
    output_file.write_text(formatter.format(generate_doc_structure(translation_unit, formatter.docs_format)), encoding="utf-8")