From 86141bfa0fbea5db77be6e4e7d921c58eb17d6dc Mon Sep 17 00:00:00 2001 From: Jason Walker <913443@dadeschools.net> Date: Thu, 2 Jul 2026 14:33:02 -0400 Subject: [PATCH] docs: LLM-Agent-SHA opaque attribution convention, Phase 0 (#86) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the Phase 0 owner decision on #86 (issuecomment-1354): - docs/llm-agent-sha.md: format llm-<12 lowercase hex> (^llm-[0-9a-f]{12}$), generation rules, per-PR/workstream lifetime, visible markdown metadata blocks, no SHA in branch/worktree names, same-SHA vs same-user vs same-profile distinction. Attribution only — never an eligibility input. - docs/llm-workflow-runbooks.md: attribution subsection + handoff/review runbook pointers. - templates start-issue.md / review-pr.md: handoff and review metadata blocks; reviewer rule that a different SHA is not a different actor. - tests/test_llm_agent_sha.py: negative tests — same Gitea user with a different LLM-Agent-SHA still fails self-review and self-merge; eligibility results are identical with/without/across SHA env values; no gate accepts or reads any agent-SHA input. No launcher/env handling, no gitea_whoami fields, no PR auto-injection, no audit schema changes. No eligibility behavior changed. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/llm-agent-sha.md | 138 +++++++++++++ docs/llm-workflow-runbooks.md | 22 +- .../templates/review-pr.md | 10 + .../templates/start-issue.md | 11 + tests/test_llm_agent_sha.py | 195 ++++++++++++++++++ 5 files changed, 374 insertions(+), 2 deletions(-) create mode 100644 docs/llm-agent-sha.md create mode 100644 tests/test_llm_agent_sha.py diff --git a/docs/llm-agent-sha.md b/docs/llm-agent-sha.md new file mode 100644 index 0000000..45d77fb --- /dev/null +++ b/docs/llm-agent-sha.md @@ -0,0 +1,138 @@ +# LLM-Agent-SHA — Opaque Agent Attribution (Phase 0) + +Convention for attributing work to a specific LLM session/workstream across +issues, branches, PRs, and review handoffs, without exposing a human or model +identity. Approved by the owner decision on issue #86 +(`#issuecomment-1354`); this document implements **Phase 0 only**. + +## The one rule that matters + +`LLM-Agent-SHA` is **informational attribution metadata only**. It must never +be used for authentication, authorization, review eligibility, merge +eligibility, profile permissions, or any other security decision. + +The security gates remain, unchanged: + +- the **authenticated Gitea user** (self-review/self-merge protection), +- the **active MCP profile** and its `allowed_operations` + (see [`gitea-execution-profiles.md`](gitea-execution-profiles.md)), +- the fail-closed eligibility checks in `gitea_check_pr_eligibility`. + +Two sessions with different `LLM-Agent-SHA` values that authenticate as the +same Gitea user are **the same actor** for review/merge safety. A different +SHA never unlocks self-review or self-merge. `tests/test_llm_agent_sha.py` +proves the eligibility logic never consults the SHA. + +## Format + +```text +LLM-Agent-SHA: llm-<12 lowercase hex chars> +``` + +Validation regex: + +```text +^llm-[0-9a-f]{12}$ +``` + +Examples: `llm-8f3a9c2d6b41`, `llm-41d0e7aa9f2c`, `llm-b7c93d441a08`. + +### Generation + +Generate 48 random bits, e.g. `python3 -c "import secrets; print('llm-' + +secrets.token_hex(6))"`, or hash a non-secret session UUID. An +operator-provided opaque ID is also fine. + +Do **not** derive the value from any of: + +- a Gitea token or other secret, +- an email address or username, +- a machine hostname or private filesystem path, +- a model or provider name, +- conversation contents. + +The SHA must contain no model name, provider name, human name, email, +hostname, token, private path, or conversation-derived content. It is safe to +include in PR bodies, issue comments, and audit logs — and only there. + +## Lifetime + +Canonical lifetime is **per PR/workstream**: pick one SHA when starting an +issue and keep it through the branch, PR, and handoff for that workstream. A +per-session SHA is acceptable when the session maps cleanly to one +workstream. Do not reuse a SHA across unrelated workstreams. + +## Placement + +Phase 0 uses **visible markdown metadata blocks** (not hidden HTML +comments). Include the block in PR bodies and review handoffs; keep it out of +ordinary comments unless attribution is genuinely useful there. + +**Never put the SHA in branch or worktree names.** Branches stay +issue-linked and human-readable (`docs/issue-86-llm-agent-sha-phase0`), per +the branch standard. + +### Handoff metadata block (implementer → PR body / handoff report) + +```markdown +LLM Handoff Metadata: +- LLM-Agent-SHA: llm-8f3a9c2d6b41 +- LLM-Role: implementer +- Authenticated-Gitea-User: jcwalker3 +- MCP-Profile: gitea-default +- Branch: docs/example-branch +- Worktree: branches/docs-example-branch +- Self-review allowed: no +``` + +### Review metadata block (reviewer → review comment) + +```markdown +Review Metadata: +- LLM-Agent-SHA: llm-41d0e7aa9f2c +- LLM-Role: reviewer +- Authenticated-Gitea-User: sysadmin +- MCP-Profile: prgs-reviewer +- Eligibility: passed +``` + +## Same SHA vs same user vs same profile + +Reviewers and operators must keep three distinct identities straight: + +| Comparison | Meaning | Effect on eligibility | +|---|---|---| +| same `LLM-Agent-SHA` | same LLM session/workstream wrote both artifacts | **none — attribution only** | +| same authenticated Gitea user | same Gitea actor | **blocks** self-review / self-merge, regardless of SHA | +| same MCP profile | same capability set | governs `allowed_operations` (what actions are permitted at all) | + +Concretely: an implementer session (`llm-8f3a…`, user `jcwalker3`) and a +would-be reviewer session (`llm-41d0…`, also user `jcwalker3`) have different +SHAs but the **same Gitea user** — the reviewer session is still the PR +author to Gitea and must not review, approve, or merge. Review handoffs +require a genuinely different authenticated user (e.g. `sysadmin` / +`prgs-reviewer`). + +## Phase 0 scope (and what is deferred) + +Phase 0 is documentation, handoff/review templates, and negative tests only. +Deferred to later owner-approved phases; none of this exists yet: + +- launcher-enforced SHA generation, +- `LLM_AGENT_SHA` / `LLM_AGENT_ROLE` environment injection, +- `gitea_whoami` returning SHA/role, +- automatic PR body injection by MCP tools, +- audit schema changes requiring the SHA, +- release/orchestrator lineage tracking. + +MCP tools neither read nor emit the SHA. Setting an `LLM_AGENT_SHA` +environment variable has no effect on any tool; the negative tests assert +eligibility results are byte-identical with and without it. + +## Related documents + +- [`llm-workflow-runbooks.md`](llm-workflow-runbooks.md) — the runbooks whose + handoffs carry these blocks +- [`gitea-execution-profiles.md`](gitea-execution-profiles.md) — profiles and + `allowed_operations` (the real permission gate) +- [`safety-model.md`](safety-model.md) — audit, redaction, confirmation gates diff --git a/docs/llm-workflow-runbooks.md b/docs/llm-workflow-runbooks.md index 066f62d..4542a90 100644 --- a/docs/llm-workflow-runbooks.md +++ b/docs/llm-workflow-runbooks.md @@ -45,6 +45,18 @@ Use any eligible reviewer profile to review PR #N. Use any eligible merger profile to merge PR #N if checks pass. ``` +### Attribution: `LLM-Agent-SHA` (metadata only) + +Sessions may attribute their work with an opaque `LLM-Agent-SHA` +(`llm-<12 lowercase hex>`, e.g. `llm-8f3a9c2d6b41`) in PR-body and +review-handoff metadata blocks — see +[`llm-agent-sha.md`](llm-agent-sha.md) for the full convention. It is +**attribution only**: eligibility is decided solely by the authenticated +Gitea user and the profile's allowed operations. Two sessions with different +SHAs under the same Gitea user are the same actor — a different SHA never +permits self-review or self-merge. Keep the SHA out of branch and worktree +names. + ## Prerequisites: canonical config + thin launchers Runtime profiles live in **one canonical JSON file**, referenced by every LLM @@ -274,7 +286,8 @@ touching anything. `fix/...` / `docs/...`); `cd` into that worktree; implement narrowly; add or update tests if behavior changes; run the full suite; commit with an issue-linked message; open a PR to `master`. **Do not** review or merge your - own PR. + own PR. Include an `LLM Handoff Metadata` block (with `LLM-Agent-SHA`) in + the PR body — see [`llm-agent-sha.md`](llm-agent-sha.md). - **Prompt:** `Use an author profile to implement issue #N and open a PR to master. Do not self-review or self-merge.` @@ -285,7 +298,11 @@ touching anything. - **Steps:** confirm identity + eligibility (menu eligibility check or `gitea_check_pr_eligibility`); read the diff; confirm scope matches the linked issue; post the review (`comment` / `request_changes` / `approve`) via the - gated review tool. Pin the reviewed head SHA where supported. + gated review tool. Pin the reviewed head SHA where supported. Include a + `Review Metadata` block (with your own `LLM-Agent-SHA`) in the review — + and remember: a different `LLM-Agent-SHA` does **not** make you a different + actor; only a different authenticated Gitea user does + ([`llm-agent-sha.md`](llm-agent-sha.md)). - **Prompt:** `Use any eligible reviewer profile to review PR #N. Approve only if scope matches issue #M and checks pass; otherwise request changes.` @@ -391,6 +408,7 @@ scripts/release-tag v0.4.0 --notes-file /tmp/release-notes.md --push - [`../skills/llm-project-workflow/SKILL.md`](../skills/llm-project-workflow/SKILL.md) — portable cross-project LLM workflow skill. - [`gitea-execution-profiles.md`](gitea-execution-profiles.md) — the profile model. +- [`llm-agent-sha.md`](llm-agent-sha.md) — opaque agent attribution metadata (never an eligibility input). - [`safety-model.md`](safety-model.md) — trust boundaries and audit logging. - [`tool-boundaries.md`](tool-boundaries.md) — per-tool allowed operations. - [`credential-isolation.md`](credential-isolation.md) — credential handling. diff --git a/skills/llm-project-workflow/templates/review-pr.md b/skills/llm-project-workflow/templates/review-pr.md index 6ee8833..49a8f3e 100644 --- a/skills/llm-project-workflow/templates/review-pr.md +++ b/skills/llm-project-workflow/templates/review-pr.md @@ -8,6 +8,8 @@ Task: review PR # for issue #. Rules (llm-project-workflow): - Review in a SEPARATE detached review worktree, never the author's folder. - You must NOT be the PR author. If the authenticated user == PR author, stop. + A different LLM-Agent-SHA does NOT make you a different actor — only a + different authenticated Gitea user does (docs/llm-agent-sha.md). - Do not merge if any check fails. Steps: @@ -21,6 +23,14 @@ Steps: 6. Run the test suite; note results. 7. Post the review verdict: approve only if scope is clean and checks pass; otherwise request changes with specifics. Never merge from this review step. + Include a "Review Metadata" block (attribution only — docs/llm-agent-sha.md): + + Review Metadata: + - LLM-Agent-SHA: llm-<12 lowercase hex, e.g. llm-41d0e7aa9f2c> + - LLM-Role: reviewer + - Authenticated-Gitea-User: + - MCP-Profile: + - Eligibility: passed/failed Handoff: reviewer identity, PR author, scope verdict, checks + results, decision. ``` diff --git a/skills/llm-project-workflow/templates/start-issue.md b/skills/llm-project-workflow/templates/start-issue.md index 64dc5eb..1bbdb6c 100644 --- a/skills/llm-project-workflow/templates/start-issue.md +++ b/skills/llm-project-workflow/templates/start-issue.md @@ -23,6 +23,17 @@ Steps: 6. Checks: run the test suite, compile/lint changed files, git diff --check, and scan the diff for secrets. 7. Commit (issue-linked message), push the branch, open a PR to master. + Include an "LLM Handoff Metadata" block in the PR body (attribution only; + never an eligibility input — docs/llm-agent-sha.md): + + LLM Handoff Metadata: + - LLM-Agent-SHA: llm-<12 lowercase hex, e.g. llm-8f3a9c2d6b41> + - LLM-Role: implementer + - Authenticated-Gitea-User: + - MCP-Profile: + - Branch: + - Worktree: + - Self-review allowed: no 8. Stop before review/merge — you are the author. Handoff: issue #, branch, worktree path, files changed, checks + results, PR URL. diff --git a/tests/test_llm_agent_sha.py b/tests/test_llm_agent_sha.py new file mode 100644 index 0000000..edbe518 --- /dev/null +++ b/tests/test_llm_agent_sha.py @@ -0,0 +1,195 @@ +"""Negative tests for LLM-Agent-SHA attribution (#86, Phase 0). + +``LLM-Agent-SHA`` (docs/llm-agent-sha.md) is attribution metadata ONLY. These +tests prove it can never bypass the review/merge safety gates: + +1. Same Gitea user + different LLM-Agent-SHA still fails self-review/approval. +2. Same Gitea user + different LLM-Agent-SHA still fails self-merge. +3. The eligibility logic does not consult SHA metadata at all — results are + identical with no SHA, one SHA, or a different SHA in the environment, and + no gate accepts an agent-SHA input. + +Phase 0 adds no SHA support to any MCP tool; the environment variables set +here (``LLM_AGENT_SHA`` / ``LLM_AGENT_ROLE``) simulate a future launcher and +must be ignored by every gate. +""" +import inspect +import os +import re +import sys +import unittest +from unittest.mock import patch + +sys.path.insert(0, str(__import__("pathlib").Path(__file__).resolve().parent.parent)) + +from mcp_server import ( # noqa: E402 + gitea_check_pr_eligibility, + gitea_merge_pr, + gitea_review_pr, + gitea_submit_pr_review, +) + +FAKE_AUTH = "Basic dGVzdDp0ZXN0" +SHA_PATTERN = re.compile(r"^llm-[0-9a-f]{12}$") +# Two distinct, well-formed agent SHAs "belonging" to the same Gitea user. +SHA_IMPLEMENTER = "llm-8f3a9c2d6b41" +SHA_WOULD_BE_REVIEWER = "llm-41d0e7aa9f2c" + + +def _pr(author, state="open", sha="abc123", mergeable=True): + return { + "user": {"login": author}, + "state": state, + "head": {"sha": sha}, + "mergeable": mergeable, + } + + +class TestShaFormatConvention(unittest.TestCase): + """The documented format: llm-<12 lowercase hex>, nothing identifying.""" + + def test_documented_examples_are_valid(self): + for value in (SHA_IMPLEMENTER, SHA_WOULD_BE_REVIEWER, "llm-b7c93d441a08"): + self.assertRegex(value, SHA_PATTERN) + + def test_identifying_or_malformed_values_are_rejected(self): + for bad in ( + "llm-8F3A9C2D6B41", # uppercase + "llm-8f3a9c2d6b4", # too short + "llm-8f3a9c2d6b411", # too long + "llm-opus4", # model name, not hex + "claude-8f3a9c2d6b41", # provider prefix + "jcwalker3", # username + "llm-user@example.com", # email + "8f3a9c2d6b41", # missing prefix + "", + ): + self.assertNotRegex(bad, SHA_PATTERN) + + +class TestShaCannotBypassSelfReview(unittest.TestCase): + """Scenario: user A (SHA 1) authored the PR; user A (SHA 2) tries to act.""" + + def _env(self, agent_sha, role): + # Reviewer-capable profile + a simulated launcher-injected agent SHA. + return { + "GITEA_PROFILE_NAME": "gitea-reviewer", + "GITEA_ALLOWED_OPERATIONS": "read,review,approve,merge", + "LLM_AGENT_SHA": agent_sha, + "LLM_AGENT_ROLE": role, + } + + @patch("mcp_server.api_request") + @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) + def test_same_user_different_sha_cannot_approve(self, _auth, mock_api): + mock_api.side_effect = [{"login": "jcwalker3"}, _pr("jcwalker3")] + env = self._env(SHA_WOULD_BE_REVIEWER, "reviewer") + with patch.dict(os.environ, env, clear=True): + r = gitea_check_pr_eligibility(pr_number=9, action="approve", remote="prgs") + self.assertFalse(r["eligible"]) + self.assertIn("authenticated user is PR author", r["reasons"]) + + @patch("mcp_server.api_request") + @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) + def test_same_user_different_sha_cannot_merge(self, _auth, mock_api): + mock_api.side_effect = [{"login": "jcwalker3"}, _pr("jcwalker3")] + env = self._env(SHA_WOULD_BE_REVIEWER, "merger") + with patch.dict(os.environ, env, clear=True): + r = gitea_check_pr_eligibility(pr_number=9, action="merge", remote="prgs") + self.assertFalse(r["eligible"]) + self.assertIn("authenticated user is PR author", r["reasons"]) + + @patch("mcp_server.api_request") + @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) + def test_gated_merge_tool_refuses_self_merge_despite_sha(self, _auth, mock_api): + # Even the fully-confirmed gated merge path must refuse: correct + # confirmation string, mergeable PR, merge-capable profile — but the + # authenticated user is the PR author, whatever the agent SHA says. + mock_api.side_effect = [{"login": "jcwalker3"}, _pr("jcwalker3")] + env = self._env(SHA_WOULD_BE_REVIEWER, "merger") + with patch.dict(os.environ, env, clear=True): + r = gitea_merge_pr( + pr_number=9, confirmation="MERGE PR 9", remote="prgs") + self.assertFalse(r.get("performed")) + for call in mock_api.call_args_list: + method, url = call.args[0], call.args[1] + self.assertFalse( + method == "POST" and url.endswith("/merge"), + f"self-merge mutation reached the API: {method} {url}", + ) + + @patch("mcp_server.api_request") + @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) + def test_review_tool_refuses_self_approval_despite_sha(self, _auth, mock_api): + mock_api.side_effect = [{"login": "jcwalker3"}, _pr("jcwalker3")] + env = self._env(SHA_WOULD_BE_REVIEWER, "reviewer") + with patch.dict(os.environ, env, clear=True): + r = gitea_review_pr( + pr_number=9, event="APPROVE", body="self approve", merge=False, + remote="prgs") + self.assertFalse(r["success"]) + self.assertIn("authenticated user is PR author", r["message"]) + for call in mock_api.call_args_list: + method, url = call.args[0], call.args[1] + self.assertFalse( + method == "POST" and url.endswith("/reviews"), + f"self-review mutation reached the API: {method} {url}", + ) + + +class TestEligibilityNeverConsultsSha(unittest.TestCase): + """The gates have no SHA input: not via env, not via parameters.""" + + def _run_eligibility(self, extra_env): + env = { + "GITEA_PROFILE_NAME": "gitea-reviewer", + "GITEA_ALLOWED_OPERATIONS": "read,review,approve", + } + env.update(extra_env) + with patch("mcp_server.get_auth_header", return_value=FAKE_AUTH), \ + patch("mcp_server.api_request") as mock_api: + mock_api.side_effect = [{"login": "reviewer-bot"}, _pr("author-bot")] + with patch.dict(os.environ, env, clear=True): + return gitea_check_pr_eligibility( + pr_number=5, action="approve", remote="prgs") + + def test_result_identical_with_without_and_across_shas(self): + baseline = self._run_eligibility({}) + with_one = self._run_eligibility( + {"LLM_AGENT_SHA": SHA_IMPLEMENTER, "LLM_AGENT_ROLE": "implementer"}) + with_other = self._run_eligibility( + {"LLM_AGENT_SHA": SHA_WOULD_BE_REVIEWER, "LLM_AGENT_ROLE": "reviewer"}) + self.assertEqual(baseline, with_one) + self.assertEqual(baseline, with_other) + self.assertTrue(baseline["eligible"]) # sanity: a real decision ran + + def test_eligibility_result_carries_no_agent_sha(self): + r = self._run_eligibility( + {"LLM_AGENT_SHA": SHA_IMPLEMENTER, "LLM_AGENT_ROLE": "implementer"}) + blob = repr(r) + self.assertNotIn(SHA_IMPLEMENTER, blob) + self.assertNotIn("LLM_AGENT", blob) + + def test_gate_functions_accept_no_agent_sha_parameter(self): + for fn in (gitea_check_pr_eligibility, gitea_merge_pr, + gitea_review_pr, gitea_submit_pr_review): + for param in inspect.signature(fn).parameters: + lowered = param.lower() + self.assertNotIn("agent", lowered, + f"{fn.__name__} accepts agent param {param!r}") + self.assertNotIn("llm", lowered, + f"{fn.__name__} accepts llm param {param!r}") + + def test_gate_sources_never_read_agent_sha(self): + # Phase 0 guarantee: no gate reads LLM_AGENT_* metadata at all. + for fn in (gitea_check_pr_eligibility, gitea_merge_pr, + gitea_review_pr, gitea_submit_pr_review): + src = inspect.getsource(fn) + self.assertNotIn("LLM_AGENT", src, + f"{fn.__name__} reads LLM_AGENT metadata") + self.assertNotIn("agent_sha", src, + f"{fn.__name__} consults an agent SHA") + + +if __name__ == "__main__": + unittest.main() -- 2.43.7