diff --git a/README.md b/README.md index 2f052d9..ceae647 100644 --- a/README.md +++ b/README.md @@ -381,8 +381,15 @@ python3 -m pytest tests/ -v | `test_python_cli.py` | `close_issue.py` + `mark_issue.py` CLI validation | | `test_mirror_refs.py` | Flags, safety defaults, local integration tests | +(Core suites — the table is non-exhaustive; see `tests/` for the full set.) + All tests mock network and keychain access — no real API calls are made. +For how to write tests — mocking the API/auth safely, testing profile and +self-review/self-merge gates, no-secret regression expectations, and unit vs. +integration guidance — see +[`docs/developer-testing-guidelines.md`](docs/developer-testing-guidelines.md). + ## Troubleshooting ### macOS: `com.apple.provenance` blocks Python execution (#3) diff --git a/docs/architecture/jenkins-job-mapping-design.md b/docs/architecture/jenkins-job-mapping-design.md new file mode 100644 index 0000000..e0b04ff --- /dev/null +++ b/docs/architecture/jenkins-job-mapping-design.md @@ -0,0 +1,165 @@ +# Jenkins Repo/Branch/PR → Job Mapping — Design Notes + +- **Status:** Design (implementation-ready notes; **no implementation in this repo**) +- **Issue:** #77 (parent: #72 read-only tools design; umbrella: #75; boundary: ADR-0001, #71) +- **Related docs:** [`jenkins-readonly-build-status-design.md`](jenkins-readonly-build-status-design.md) +- **Date:** 2026-07-02 + +## 1. Purpose + +The #72 tool set addresses Jenkins jobs by **explicit fully-qualified job +path**. This document designs the layer above it: how a *(repository, branch, +PR)* tuple — the vocabulary of Gitea workflows — resolves deterministically to +a Jenkins job path, so an LLM can ask "did the build for `Gitea-Tools` +`master` pass?" without knowing Jenkins internals. + +Hard constraints inherited from #72 / ADR-0001: + +- **No silent guessing of job names.** Unmapped input returns an explicit + "no mapping" result — never a fuzzy match, never a constructed-and-probed + name. +- **Read-only.** Mapping introduces no Jenkins write actions. +- Lives in the **`jenkins-mcp`** boundary; no Gitea credentials involved. + +## 2. Mapping format + +Declarative, versioned config (TOML or JSON — match whatever config format +`jenkins-mcp` adopts; illustrated here as TOML): + +```toml +version = 1 + +[[mapping]] +# Source side (what the caller supplies) +repo = "Scaled-Tech-Consulting/Gitea-Tools" # org/repo, exact +# Target side (where it lives in Jenkins) +job = "scaled-tech/gitea-tools" # foldered job path +type = "multibranch" # "multibranch" | "single" | "parameterized-view" + +[[mapping]] +repo = "Scaled-Tech-Consulting/Timesheet" +branch = "master" # optional: branch-specific override +job = "scaled-tech/timesheet-master" +type = "single" +``` + +Field semantics: + +| Field | Required | Meaning | +|---|---|---| +| `repo` | yes | Exact `org/repo` (case-insensitive compare, stored canonical) | +| `branch` | no | Exact branch name this entry pins; absent = all branches | +| `job` | yes | Fully-qualified Jenkins job path, folders `/`-joined | +| `type` | yes | How branch/PR resolves under the job (§3) | + +Rules: + +- **Exact matching only** on `repo` and `branch`. No globs in v1 (globs invite + accidental over-matching; add later behind an explicit `pattern = true` flag + if ever needed). +- Unknown `type` or malformed entry ⇒ config load fails closed with a clear + error naming the entry — a broken mapping file must not half-load. +- Duplicate `(repo, branch)` keys ⇒ load error (ambiguity is refused, not + resolved). + +## 3. Resolution semantics by job type + +Given caller input `(repo, branch?, pr?)`: + +- **`multibranch`** — branch job addressed as `/` + (e.g. `feature/x` → `feature%2Fx`). PRs addressed as `/PR-` + (Jenkins multibranch PR-discovery naming). Both per #72 §8. +- **`single`** — the job path is used as-is; `branch`/`pr` input beyond the + entry's pinned branch is a **no-mapping** result (a single job cannot answer + for arbitrary branches). +- **`parameterized-view`** — read-only variant for jobs that encode branch as + a build parameter: resolution returns the base job path plus a + `branch_param` filter hint the status tools may apply client-side when + scanning recent builds. It never triggers anything (read-only rule). + +## 4. Precedence + +Most-specific entry wins, evaluated in this order: + +1. `(repo, branch)` exact entry — branch-pinned override. +2. `(repo)` entry — repo-wide (multibranch typical). +3. Nothing → **no mapping** (§5). + +PR input resolves through the same chain: a PR belongs to its **base repo**'s +mapping; forks never introduce their own mapping (a fork's head repo is not +consulted — CI runs live under the base repo's job). If the base repo is +unmapped, the PR is unmapped. + +Ties are impossible by construction (duplicate keys refused at load). + +## 5. No-match behavior + +```json +{ + "mapped": false, + "repo": "org/unknown-repo", + "branch": "master", + "error": "no Jenkins job mapping for this repo/branch", + "hint": "add an entry to the jenkins-mcp mapping config" +} +``` + +- Deterministic, explicit, machine-checkable (`mapped: false`). +- **Never** falls back to name construction ("repo name probably equals job + name"), never probes Jenkins for candidates, never string-similarity ranks. +- The hint names the config, not a guessed job. + +## 6. Where the mapping config lives + +- **In the `jenkins-mcp` package/deployment** (e.g. `jenkins-mcp/mapping.toml`), + version-controlled next to the server that consumes it — *not* in Gitea-Tools + and *not* in per-user env vars (mappings are shared team facts, not + credentials). +- Path overridable via env (`JENKINS_MCP_MAPPING_FILE`) for tests/containers. +- Contains **no secrets** — job paths and repo names only — so it is safe to + commit and review like any other config. +- Reloaded at server start; a hot-reload tool is out of scope (restart is the + documented path). + +## 7. Exposed tool surface (read-only) + +One addition to the #72 tool set: + +| Tool | Purpose | +|---|---| +| `jenkins_resolve_job` | `(repo, branch?, pr?)` → `{mapped, job, addressed_path, type}` or the §5 no-match result. Pure config lookup — **no Jenkins API call at all.** | + +Status tools (`jenkins_latest_build` etc.) accept either an explicit job path +(as designed in #72) **or** `(repo, branch)` which they resolve via the same +mapping layer first. Resolution failure surfaces the §5 payload rather than +querying Jenkins. + +## 8. Testing strategy (mocked; for the implementing package) + +Config-layer tests (no network at all): + +- Exact-match hit: repo-wide and branch-pinned entries. +- Precedence: branch-pinned beats repo-wide. +- Multibranch encoding: `feature/x` → `/feature%2Fx`; PR → `/PR-7`. +- `single` type with non-pinned branch ⇒ no-mapping. +- Fork PR resolves through base repo; unmapped base ⇒ no-mapping. +- Unknown repo/branch ⇒ §5 payload, and **no Jenkins client call** + (`mock_api.assert_not_called()`). +- Malformed config / duplicate keys / unknown type ⇒ load fails closed with + entry-naming error. +- No-secret check: mapping load/error paths never touch or print credentials. + +Integration with mocked Jenkins API (per #72 §9): resolved path is used +verbatim in the GET URL; no write verbs anywhere. + +## 9. Standalone-worthiness and readiness + +#77 was split from #72 on the condition it stays "standalone only if mapping +is nontrivial." The precedence rules, fork/PR semantics, three job types, and +fail-closed config loading above are the nontrivial part; this document is the +justification. + +Ready to implement in `jenkins-mcp` when #72's readiness checklist clears +(ADR-0001 owner decision #1; profile schema per #76 or hand-rolled +`jenkins-readonly`). Nothing here unlocks build triggers, deploys, or +parameterized launches. diff --git a/docs/architecture/jenkins-readonly-build-status-design.md b/docs/architecture/jenkins-readonly-build-status-design.md new file mode 100644 index 0000000..7e231e5 --- /dev/null +++ b/docs/architecture/jenkins-readonly-build-status-design.md @@ -0,0 +1,151 @@ +# Jenkins Read-Only Build Status Tools — Design Notes + +- **Status:** Design (implementation-ready notes; **no implementation in this repo**) +- **Issue:** #72 (parent umbrella: #75; boundary decision: ADR-0001, #71) +- **Related:** #77 (repo/branch/PR → job mapping, designed separately) +- **Date:** 2026-07-02 + +## 1. Purpose and scope + +Define the minimum **read-only** Jenkins MCP tool set that lets an LLM answer: +*"Did the latest build for this project/branch succeed or fail?"* — plus enough +detail (build URL, number, timing, result) to report or investigate. + +Phase 1 is **strictly read-only**, per ADR-0001 +([`adr-0001-mcp-control-plane-boundaries.md`](adr-0001-mcp-control-plane-boundaries.md)): + +- **Excluded: build triggers.** +- **Excluded: deploy triggers.** +- **Excluded: parameterized job launches.** +- Excluded: job creation/deletion/config changes, queue manipulation, node + management — any Jenkins mutation whatsoever. + +## 2. Boundary placement + +These tools belong to the **`jenkins-mcp`** package/server of the MCP Control +Plane — **never** inside `gitea-mcp` (`mcp_server.py` in this repo). +Consequences (from `tool-boundaries.md`, `credential-isolation.md`, ADR-0001): + +- `jenkins-mcp` runs as its own server process with its own `.env`. +- **Jenkins credentials never enter the Gitea MCP runtime**, and Gitea + credentials never enter `jenkins-mcp`. +- This document lands in this repo only because the repo currently hosts the + Control Plane's architecture docs; the code ships elsewhere (owner decision + #1 of ADR-0001). + +## 3. Minimum read-only tool set + +| Tool | Purpose | +|---|---| +| `jenkins_whoami` | Verify authenticated Jenkins identity + active profile (mirror of `gitea_whoami`; fail-closed identity proof before anything else) | +| `jenkins_list_jobs` | List visible jobs (supports folder paths), with pagination bounds | +| `jenkins_latest_build` | The primary question: latest build of a job (or job+branch for multibranch) → status summary | +| `jenkins_build_status` | Status of a specific build number (job, number) | +| `jenkins_get_build` | Full safe detail of a build (fields in §4) | +| `jenkins_console_tail` | Bounded, redacted tail of a build's console log (§6) — optional, approval-gated addition | + +All tools are `GET`-only against the Jenkins JSON API (`/api/json`, +`.../lastBuild/api/json`, `.../consoleText`). No tool issues POST/PUT/DELETE. + +## 4. Return payloads (safe fields) + +`jenkins_latest_build` / `jenkins_build_status` / `jenkins_get_build` return: + +| Field | Source | Notes | +|---|---|---| +| `job` | request | Fully-qualified job path (folders joined with `/`) | +| `build_number` | `number` | int | +| `result` | `result` | `SUCCESS` / `FAILURE` / `UNSTABLE` / `ABORTED` / `NOT_BUILT`; `null` → `IN_PROGRESS` when `building=true` | +| `building` | `building` | bool | +| `url` | `url` | Build URL | +| `branch` | multibranch job name / SCM action | Best-effort; omitted when unknown | +| `timestamp` | `timestamp` | ISO-8601 UTC (converted from epoch ms) | +| `duration_seconds` | `duration` | 0/omitted while building | +| `commit_sha` | SCM build action | Best-effort; omitted when unknown | + +Rules: no raw Jenkins payload passthrough (allowlist projection only); no +`Authorization` header, token, or crumb material in any output or error +(reuse the shared redaction approach of `safety-model.md` §3 / `gitea_audit`). + +## 5. Failure behavior (fail closed, clear, safe) + +| Condition | Behavior | +|---|---| +| Unknown job | Explicit `{"found": false, "job": "", "error": "job not found"}` — never guess or fuzzy-match a job name (hard rule; see also #77) | +| Jenkins unreachable (DNS/timeout/conn refused) | Clear `"network error contacting Jenkins: "`; no retry storm — mirror `gitea_auth.api_request` timeout + failure conversion | +| 502/503/504 | Explicit "Jenkins upstream unavailable" | +| 401/403 | "Jenkins auth failed / insufficient permissions" — **without** echoing credentials or the request's auth material | +| Malformed JSON | "malformed JSON response from Jenkins" (no raw-body dump) | +| Missing profile/creds | Fail closed before any network call (§7) | + +## 6. Console tail safety (`jenkins_console_tail`) + +Console logs are the highest-risk surface (secrets, tokens, internal hosts +routinely leak into build logs). If included at all (owner may defer it): + +- **Bounded:** hard server-side cap (default: last 200 lines AND ≤ 64 KiB, + whichever is smaller; caller may request less, never more). +- **Redacted:** pass through the shared secret redactor (token/`Basic`/`Bearer`/ + password/key-value patterns) before returning; redaction failure ⇒ return an + error, never the raw text. +- **Default off:** summary fields (`result`, failing stage if cheaply available) + are preferred; the tail requires an explicit `allowed_operations` entry + (`jenkins.console.read`) distinct from plain `jenkins.build.read`. + +## 7. Credentials and profile requirements + +Follows the per-service profile model (`gitea-execution-profiles.md`, extended +by #76): + +- Env/config: `JENKINS_URL`, `JENKINS_USER`, `JENKINS_TOKEN_SOURCE_NAME` + (name-of-secret only — value resolved at runtime, never logged/committed). +- Profile: e.g. `jenkins-readonly` with namespaced + `allowed_operations: ["jenkins.read", "jenkins.build.read"]` + (+ `jenkins.console.read` only if the tail tool is approved); + `forbidden_operations: ["jenkins.build.trigger", "jenkins.deploy", "jenkins.job.configure"]` + as belt-and-braces even though no mutating tool exists. +- Missing URL/user/token/profile ⇒ **fail closed** with a clear message. +- Since every tool is read-only, no confirmation gates are needed — but + identity (`jenkins_whoami`) must still work so workflows can prove which + Jenkins account they act as. + +## 8. Job addressing and mapping + +Tools accept an explicit fully-qualified job path (folder-aware: +`folder/subfolder/job`). How a *repo/branch/PR* resolves to that job path is +**out of scope here** and designed in **#77**, with these fixed constraints: + +- No silent guessing of job names — unmapped input returns an explicit + "no mapping" result. +- Multibranch pipelines address a branch job as `/` with proper + URL-encoding of branch names (e.g. `feature%2Fx`). + +## 9. Testing strategy (for the implementing package) + +Mocked-Jenkins unit tests only (no live Jenkins in unit CI), mirroring this +repo's conventions (`docs/developer-testing-guidelines.md`): + +- Patch the HTTP client; assert method is always `GET` and URL shape is correct + (folders, multibranch encoding). +- Success projections: field allowlist exactly as §4; unknown fields dropped. +- `result=null + building=true` ⇒ `IN_PROGRESS`. +- Unknown job ⇒ found:false, no fuzzy match, no API retry. +- Timeout/DNS/5xx/malformed-JSON ⇒ safe errors, no secret/credential leakage + (explicit no-token-in-error assertions). +- Console tail: cap enforcement (lines and bytes), redaction applied, redaction + failure ⇒ error not raw text, gated behind `jenkins.console.read`. +- Profile gate: missing/insufficient profile ⇒ no network call + (`mock_api.assert_not_called()` pattern). + +## 10. Implementation-readiness checklist + +Ready to implement in `jenkins-mcp` once: + +1. ADR-0001 owner decision #1 (where `jenkins-mcp` lives) is made. +2. #76 profile schema exists (or a minimal `jenkins-readonly` profile is + hand-rolled to the same rules). +3. #77 mapping design is accepted (or tools ship path-addressed only, mapping + deferred). + +Explicitly **not** unlocked by this document: build triggers, deploys, +parameterized launches, any Jenkins code in `mcp_server.py`. diff --git a/docs/developer-testing-guidelines.md b/docs/developer-testing-guidelines.md new file mode 100644 index 0000000..dc5693c --- /dev/null +++ b/docs/developer-testing-guidelines.md @@ -0,0 +1,240 @@ +# Developer Testing Guidelines + +How to write and run tests for Gitea-Tools. This guide reflects the current +repository behavior and the safety model documented in +[`safety-model.md`](safety-model.md), +[`credential-isolation.md`](credential-isolation.md), and +[`gitea-execution-profiles.md`](gitea-execution-profiles.md). + +Core principle: **tests never make real network calls and never touch real +credentials.** Every test mocks the HTTP client and the keychain/auth lookup. + +--- + +## 1. Standard test commands + +The test suite needs the project virtualenv (it provides the MCP SDK): + +```bash +# From the repository root +source venv/bin/activate +python3 -m pytest tests/ -q +``` + +Or invoke the venv interpreter directly without activating: + +```bash +./venv/bin/python -m pytest tests/ -q +``` + +Use `-q` for a compact summary and `-v` to see individual test names. + +### Run the full suite + +```bash +./venv/bin/python -m pytest tests/ -q +``` + +### Run targeted tests + +```bash +# One file +./venv/bin/python -m pytest tests/test_mcp_server.py -q + +# One class +./venv/bin/python -m pytest tests/test_merge_pr.py::TestMergeDisabled -q + +# One test, by node id +./venv/bin/python -m pytest tests/test_review_pr.py::TestAPIPayload::test_payload_fields_and_workflow -q + +# By keyword expression +./venv/bin/python -m pytest tests/ -q -k "merge and fails_closed" +``` + +--- + +## 2. Syntax and formatting checks + +These are fast and belong in any pre-PR loop: + +```bash +# Byte-compile the main modules (catches syntax errors) +python3 -m py_compile mcp_server.py +python3 -m py_compile manage_labels.py + +# Lint shell scripts without executing them +bash -n scripts/clear-provenance + +# Detect stray conflict markers and whitespace errors in the diff +git diff --check +``` + +Run `git diff --check` before every commit; it flags leftover merge-conflict +markers and trailing-whitespace/whitespace-error lines. + +--- + +## 3. How to add an MCP tool test + +MCP tools live in `mcp_server.py` and are exercised in +`tests/test_mcp_server.py`. Tests call the underlying tool function directly +with the network layer and auth mocked. The established pattern: + +```python +from unittest.mock import patch, MagicMock + +FAKE_AUTH = "Basic ZmFrZTpmYWtl" # not a real credential + +class TestCreateIssue(unittest.TestCase): + @patch("mcp_server.api_request") + @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) + def test_creates_issue(self, _auth, mock_api): + mock_api.return_value = {"number": 1, "html_url": "https://gitea.example.com/issues/1"} + + result = mcp_server.gitea_create_issue(title="Add tests", remote="prgs") + + # Assert on the request the tool would have made + mock_api.assert_called_once() + method, url = mock_api.call_args[0][0], mock_api.call_args[0][1] + self.assertEqual(method, "POST") + self.assertIn("/issues", url) +``` + +Checklist when adding a tool test: + +* Patch `mcp_server.api_request` — never hit the network. +* Patch `mcp_server.get_auth_header` to return a fake header — never read the + keychain. +* Assert on the **method, URL, and payload** the tool builds, and on the shape + of the returned payload. +* Cover both `dadeschools` and `prgs` remotes when the tool takes `remote`, and + confirm the correct host/org/repo are used. +* Cover the error path (e.g. `api_request` raising) and confirm the tool + surfaces a clear message without leaking secrets. + +--- + +## 4. How to mock API requests safely + +* **Always patch `mcp_server.api_request`** (or `gitea_auth.api_request` for the + CLI/auth-level tests). No test should open a socket. +* **Always patch the auth lookup** (`get_auth_header` / `get_credentials`) and + return an obviously fake value. Do not put a real token or password in a test, + a fixture, or an environment file. +* Prefer asserting on `mock_api.call_args` (method/URL/payload) over asserting on + a real response body. +* For keychain behavior specifically, see `tests/test_credentials.py`, which + mocks the `git credential fill` subprocess (`Popen`) and the environment. + +--- + +## 5. How to test profile / allowed-operation failures + +The execution-profile model (see +[`gitea-execution-profiles.md`](gitea-execution-profiles.md)) enforces that a +tool may only perform operations in its profile's `allowed_operations`, and that +`forbidden_operations` always override `allowed_operations`. Mutating tools must +**fail closed** when the active profile does not permit the operation. + +When adding or changing a gated tool, add tests that: + +* Configure a profile whose `allowed_operations` does **not** include the + requested operation, and assert the tool refuses **without** calling + `api_request` (assert `mock_api.assert_not_called()`). +* Configure a profile where the operation is in **both** allowed and forbidden, + and assert forbidden wins (still refused). +* Confirm the refusal message names the missing operation and does not include + any secret material. +* Confirm the happy path (operation allowed) still reaches `api_request`. + +The guiding assertion is: **no mutation path may reach `api_request` unless the +profile/allowed-operation check passed first.** + +--- + +## 6. How to test self-review / self-merge gates + +Author-cannot-review and author-cannot-merge are hard safety gates. The merge +path is gated (`gitea_merge_pr`), the legacy review wrapper fails closed on +`merge=True`, and `gitea_submit_pr_review` never merges. Existing coverage lives +in `tests/test_merge_pr.py` and `tests/test_review_pr.py`. + +Patterns to follow (see those files for concrete examples): + +* **Self-merge blocked:** authenticated user == PR author → the tool returns a + refusal and **never calls the merge endpoint** (`mock_api.assert_not_called()` + or assert no `POST .../merge`). +* **Fail-closed inputs:** missing confirmation string, or an unexpected + `expected_head_sha`/changed-file set → refuse before any API call. +* **Legacy wrapper:** `merge=True` on the review wrapper fails closed and points + to the gated workflow, with no API call + (`test_merge_flag_fails_closed_without_api_call`). +* **Self-approval blocked:** authenticated user == PR author → `approve` / + `request_changes` refused. + +Every new gate should have a test proving the mutating endpoint is **not** +reached when the gate should block. + +--- + +## 7. No-secret / no-token regression expectations + +Secrets must never appear in logs, tool return values, audit records, or test +output (see [`safety-model.md`](safety-model.md) §3). The audit module +(`gitea_audit.py`) redacts secret-like keys and value prefixes; see +`tests/test_audit.py`. + +Expectations for new tests: + +* Assert that token/authorization/password fields are replaced with + `gitea_audit.REDACTED` in any structured output or audit record + (`test_redacts_secret_keys`, `test_redacts_nested_and_lists`). +* Assert that credential-looking substrings in free-text (error messages, + reasons) are redacted (`test_redacts_credential_value_prefixes`, + `test_metadata_and_reason_redacted`). +* Never commit a real token/password, even in a fixture. Use obviously fake + values (e.g. `FAKE_AUTH` above). +* When a tool returns identity/profile metadata, assert it contains the + non-secret fields (username, profile name) and **not** the token. + +There is no third-party secret scanner wired into this repo today; secret safety +is enforced by `gitea_audit.redact` plus the regression tests above. A quick +manual sweep before a PR: + +```bash +# Look for accidentally committed credentials in the diff +git diff --cached | grep -nEi "authorization: (basic|bearer)|password|token=[A-Za-z0-9]" || echo "clean" +``` + +--- + +## 8. Unit tests vs. future Docker integration tests + +* **Unit tests (today, default):** fast, fully mocked, no network, no keychain. + This is where the vast majority of coverage lives and where new tests should + go. They must stay fast and must not require credentials. +* **Docker/local-Gitea integration tests (planned, see #66):** opt-in and + skipped by default, gated behind an explicit environment variable and run + against a pinned, disposable Gitea container. They validate real API behavior + (pagination, permissions, label/PR-review endpoints, error payloads) that + mocks cannot prove. They must not require production credentials and must not + leak tokens. + +Rule of thumb: prove **logic and request-shaping** with unit tests; reserve +integration tests for **real-server compatibility**. Do not convert unit tests +into network tests. + +--- + +## 9. Read-only vs. mutating tool expectations + +* **Read-only tools** (e.g. `gitea_whoami`, `gitea_view_*`, `gitea_list_*`, + `gitea_get_profile`): test that they never issue a mutating HTTP method and + never require a mutation gate. Assert the request method is `GET`. +* **Mutating tools** (create/edit/close/label, review, merge, mirror): test that + they (a) pass the profile/allowed-operation gate, (b) honor confirmation and + self-action gates, (c) emit an audit record with the authenticated identity + and outcome, and (d) fail closed — no `api_request` call — when any gate fails. + +Keep this split explicit in test names and assertions so a reviewer can see, per +tool, which category it belongs to and which gates it must respect. diff --git a/docs/llm-agent-sha.md b/docs/llm-agent-sha.md new file mode 100644 index 0000000..45d77fb --- /dev/null +++ b/docs/llm-agent-sha.md @@ -0,0 +1,138 @@ +# LLM-Agent-SHA — Opaque Agent Attribution (Phase 0) + +Convention for attributing work to a specific LLM session/workstream across +issues, branches, PRs, and review handoffs, without exposing a human or model +identity. Approved by the owner decision on issue #86 +(`#issuecomment-1354`); this document implements **Phase 0 only**. + +## The one rule that matters + +`LLM-Agent-SHA` is **informational attribution metadata only**. It must never +be used for authentication, authorization, review eligibility, merge +eligibility, profile permissions, or any other security decision. + +The security gates remain, unchanged: + +- the **authenticated Gitea user** (self-review/self-merge protection), +- the **active MCP profile** and its `allowed_operations` + (see [`gitea-execution-profiles.md`](gitea-execution-profiles.md)), +- the fail-closed eligibility checks in `gitea_check_pr_eligibility`. + +Two sessions with different `LLM-Agent-SHA` values that authenticate as the +same Gitea user are **the same actor** for review/merge safety. A different +SHA never unlocks self-review or self-merge. `tests/test_llm_agent_sha.py` +proves the eligibility logic never consults the SHA. + +## Format + +```text +LLM-Agent-SHA: llm-<12 lowercase hex chars> +``` + +Validation regex: + +```text +^llm-[0-9a-f]{12}$ +``` + +Examples: `llm-8f3a9c2d6b41`, `llm-41d0e7aa9f2c`, `llm-b7c93d441a08`. + +### Generation + +Generate 48 random bits, e.g. `python3 -c "import secrets; print('llm-' + +secrets.token_hex(6))"`, or hash a non-secret session UUID. An +operator-provided opaque ID is also fine. + +Do **not** derive the value from any of: + +- a Gitea token or other secret, +- an email address or username, +- a machine hostname or private filesystem path, +- a model or provider name, +- conversation contents. + +The SHA must contain no model name, provider name, human name, email, +hostname, token, private path, or conversation-derived content. It is safe to +include in PR bodies, issue comments, and audit logs — and only there. + +## Lifetime + +Canonical lifetime is **per PR/workstream**: pick one SHA when starting an +issue and keep it through the branch, PR, and handoff for that workstream. A +per-session SHA is acceptable when the session maps cleanly to one +workstream. Do not reuse a SHA across unrelated workstreams. + +## Placement + +Phase 0 uses **visible markdown metadata blocks** (not hidden HTML +comments). Include the block in PR bodies and review handoffs; keep it out of +ordinary comments unless attribution is genuinely useful there. + +**Never put the SHA in branch or worktree names.** Branches stay +issue-linked and human-readable (`docs/issue-86-llm-agent-sha-phase0`), per +the branch standard. + +### Handoff metadata block (implementer → PR body / handoff report) + +```markdown +LLM Handoff Metadata: +- LLM-Agent-SHA: llm-8f3a9c2d6b41 +- LLM-Role: implementer +- Authenticated-Gitea-User: jcwalker3 +- MCP-Profile: gitea-default +- Branch: docs/example-branch +- Worktree: branches/docs-example-branch +- Self-review allowed: no +``` + +### Review metadata block (reviewer → review comment) + +```markdown +Review Metadata: +- LLM-Agent-SHA: llm-41d0e7aa9f2c +- LLM-Role: reviewer +- Authenticated-Gitea-User: sysadmin +- MCP-Profile: prgs-reviewer +- Eligibility: passed +``` + +## Same SHA vs same user vs same profile + +Reviewers and operators must keep three distinct identities straight: + +| Comparison | Meaning | Effect on eligibility | +|---|---|---| +| same `LLM-Agent-SHA` | same LLM session/workstream wrote both artifacts | **none — attribution only** | +| same authenticated Gitea user | same Gitea actor | **blocks** self-review / self-merge, regardless of SHA | +| same MCP profile | same capability set | governs `allowed_operations` (what actions are permitted at all) | + +Concretely: an implementer session (`llm-8f3a…`, user `jcwalker3`) and a +would-be reviewer session (`llm-41d0…`, also user `jcwalker3`) have different +SHAs but the **same Gitea user** — the reviewer session is still the PR +author to Gitea and must not review, approve, or merge. Review handoffs +require a genuinely different authenticated user (e.g. `sysadmin` / +`prgs-reviewer`). + +## Phase 0 scope (and what is deferred) + +Phase 0 is documentation, handoff/review templates, and negative tests only. +Deferred to later owner-approved phases; none of this exists yet: + +- launcher-enforced SHA generation, +- `LLM_AGENT_SHA` / `LLM_AGENT_ROLE` environment injection, +- `gitea_whoami` returning SHA/role, +- automatic PR body injection by MCP tools, +- audit schema changes requiring the SHA, +- release/orchestrator lineage tracking. + +MCP tools neither read nor emit the SHA. Setting an `LLM_AGENT_SHA` +environment variable has no effect on any tool; the negative tests assert +eligibility results are byte-identical with and without it. + +## Related documents + +- [`llm-workflow-runbooks.md`](llm-workflow-runbooks.md) — the runbooks whose + handoffs carry these blocks +- [`gitea-execution-profiles.md`](gitea-execution-profiles.md) — profiles and + `allowed_operations` (the real permission gate) +- [`safety-model.md`](safety-model.md) — audit, redaction, confirmation gates diff --git a/docs/llm-workflow-runbooks.md b/docs/llm-workflow-runbooks.md index 066f62d..4542a90 100644 --- a/docs/llm-workflow-runbooks.md +++ b/docs/llm-workflow-runbooks.md @@ -45,6 +45,18 @@ Use any eligible reviewer profile to review PR #N. Use any eligible merger profile to merge PR #N if checks pass. ``` +### Attribution: `LLM-Agent-SHA` (metadata only) + +Sessions may attribute their work with an opaque `LLM-Agent-SHA` +(`llm-<12 lowercase hex>`, e.g. `llm-8f3a9c2d6b41`) in PR-body and +review-handoff metadata blocks — see +[`llm-agent-sha.md`](llm-agent-sha.md) for the full convention. It is +**attribution only**: eligibility is decided solely by the authenticated +Gitea user and the profile's allowed operations. Two sessions with different +SHAs under the same Gitea user are the same actor — a different SHA never +permits self-review or self-merge. Keep the SHA out of branch and worktree +names. + ## Prerequisites: canonical config + thin launchers Runtime profiles live in **one canonical JSON file**, referenced by every LLM @@ -274,7 +286,8 @@ touching anything. `fix/...` / `docs/...`); `cd` into that worktree; implement narrowly; add or update tests if behavior changes; run the full suite; commit with an issue-linked message; open a PR to `master`. **Do not** review or merge your - own PR. + own PR. Include an `LLM Handoff Metadata` block (with `LLM-Agent-SHA`) in + the PR body — see [`llm-agent-sha.md`](llm-agent-sha.md). - **Prompt:** `Use an author profile to implement issue #N and open a PR to master. Do not self-review or self-merge.` @@ -285,7 +298,11 @@ touching anything. - **Steps:** confirm identity + eligibility (menu eligibility check or `gitea_check_pr_eligibility`); read the diff; confirm scope matches the linked issue; post the review (`comment` / `request_changes` / `approve`) via the - gated review tool. Pin the reviewed head SHA where supported. + gated review tool. Pin the reviewed head SHA where supported. Include a + `Review Metadata` block (with your own `LLM-Agent-SHA`) in the review — + and remember: a different `LLM-Agent-SHA` does **not** make you a different + actor; only a different authenticated Gitea user does + ([`llm-agent-sha.md`](llm-agent-sha.md)). - **Prompt:** `Use any eligible reviewer profile to review PR #N. Approve only if scope matches issue #M and checks pass; otherwise request changes.` @@ -391,6 +408,7 @@ scripts/release-tag v0.4.0 --notes-file /tmp/release-notes.md --push - [`../skills/llm-project-workflow/SKILL.md`](../skills/llm-project-workflow/SKILL.md) — portable cross-project LLM workflow skill. - [`gitea-execution-profiles.md`](gitea-execution-profiles.md) — the profile model. +- [`llm-agent-sha.md`](llm-agent-sha.md) — opaque agent attribution metadata (never an eligibility input). - [`safety-model.md`](safety-model.md) — trust boundaries and audit logging. - [`tool-boundaries.md`](tool-boundaries.md) — per-tool allowed operations. - [`credential-isolation.md`](credential-isolation.md) — credential handling. diff --git a/gitea_auth.py b/gitea_auth.py index 244e998..a429b7b 100644 --- a/gitea_auth.py +++ b/gitea_auth.py @@ -14,6 +14,7 @@ import datetime import subprocess import urllib.request import urllib.error +import urllib.parse from email.utils import parsedate_to_datetime from dotenv import dotenv_values, load_dotenv @@ -188,6 +189,39 @@ DEFAULT_MAX_RETRIES = _env_int("GITEA_MAX_RETRIES", 3) DEFAULT_BASE_DELAY = _env_float("GITEA_RETRY_BASE_DELAY", 1.0) # seconds DEFAULT_MAX_DELAY = _env_float("GITEA_RETRY_MAX_DELAY", 60.0) # seconds +# Per-request socket timeout (seconds). Overridable via environment. +DEFAULT_HTTP_TIMEOUT = _env_float("GITEA_HTTP_TIMEOUT", 30.0) + + +def _redact(text): + """Best-effort strip of credential-like substrings from error text. + + Reuses the audit module's redactor so error messages never surface tokens, + Basic/Bearer headers, or password-like values. Falls back to the plain + string if the audit helper is unavailable. + """ + try: + from gitea_audit import _redact_str + return _redact_str(str(text)) + except Exception: + return str(text) + + +def _add_query(url, **params): + """Return *url* with the given query parameters added or overridden. + + Preserves any existing query string on *url* (e.g. ``?state=open``) so + pagination params can be layered on top of an already-filtered endpoint. + """ + parts = urllib.parse.urlsplit(url) + query = dict(urllib.parse.parse_qsl(parts.query, keep_blank_values=True)) + for key, value in params.items(): + query[str(key)] = str(value) + new_query = urllib.parse.urlencode(query) + return urllib.parse.urlunsplit( + (parts.scheme, parts.netloc, parts.path, new_query, parts.fragment) + ) + def parse_retry_after(value, now=None): """Parse a ``Retry-After`` header into a non-negative delay in seconds. @@ -239,16 +273,31 @@ def backoff_delay(attempt, base=DEFAULT_BASE_DELAY, cap=DEFAULT_MAX_DELAY, rand= def api_request(method, url, auth_header, payload=None, *, max_retries=None, base_delay=None, max_delay=None, + timeout=None, sleep_func=time.sleep, rand_func=random.random, now_func=time.time): """Make an authenticated JSON request to the Gitea API. - Returns parsed JSON on success, raises ``RuntimeError`` on HTTP errors. + Returns parsed JSON on success (or ``None`` for an empty body), and raises + ``RuntimeError`` on failure. On HTTP 429 the request is retried up to *max_retries* times: honoring a valid ``Retry-After`` header (seconds or HTTP-date) when present, otherwise - using capped jittered exponential backoff. Non-429 errors and successful - responses are unchanged. The ``*_func`` parameters are injection points for + using capped jittered exponential backoff. Successful responses are + unchanged. + + All failures are converted to a ``RuntimeError`` with a clear, secret + -redacted message (no raw stack traces or credential material): + + - Non-429 HTTP errors surface the status code and a redacted response body. + 502/503/504 upstream errors get an explicit "Gitea upstream unavailable" + message. + - Timeouts and network/DNS failures (``URLError`` / ``TimeoutError``) surface + a generic "network error contacting Gitea" message. + - A malformed (non-JSON) success body surfaces a "malformed JSON response" + message rather than a raw decode error. + + The ``*_func`` parameters and ``timeout`` are injection points for deterministic testing. """ if max_retries is None: @@ -257,6 +306,8 @@ def api_request(method, url, auth_header, payload=None, *, base_delay = DEFAULT_BASE_DELAY if max_delay is None: max_delay = DEFAULT_MAX_DELAY + if timeout is None: + timeout = DEFAULT_HTTP_TIMEOUT data = json.dumps(payload).encode("utf-8") if payload is not None else None req = urllib.request.Request(url, data=data, method=method) @@ -267,9 +318,8 @@ def api_request(method, url, auth_header, payload=None, *, attempt = 0 while True: try: - with urllib.request.urlopen(req) as resp: + with urllib.request.urlopen(req, timeout=timeout) as resp: body = resp.read().decode("utf-8") - return json.loads(body) if body else None except urllib.error.HTTPError as e: if e.code == 429 and attempt < max_retries: header = e.headers.get("Retry-After") if e.headers else None @@ -279,8 +329,71 @@ def api_request(method, url, auth_header, payload=None, *, attempt += 1 sleep_func(delay) continue - error_body = e.read().decode("utf-8", errors="replace") - raise RuntimeError(f"HTTP {e.code}: {error_body}") from e + try: + error_body = e.read().decode("utf-8", errors="replace") + except Exception: + error_body = "" + detail = _redact(error_body).strip() + if e.code in (502, 503, 504): + msg = f"HTTP {e.code}: Gitea upstream unavailable" + raise RuntimeError(f"{msg}: {detail}" if detail else msg) from e + raise RuntimeError(f"HTTP {e.code}: {detail}") from e + except (urllib.error.URLError, TimeoutError) as e: + reason = getattr(e, "reason", e) + raise RuntimeError( + f"network error contacting Gitea: {_redact(reason)}" + ) from e + + if not body: + return None + try: + return json.loads(body) + except ValueError as e: + raise RuntimeError("malformed JSON response from Gitea") from e + + +def api_get_all(url, auth_header, *, limit=None, page_size=50, max_pages=100, + **kwargs): + """Fetch a paginated Gitea collection, following page-based pagination. + + Issues successive ``GET`` requests with ``page`` and ``limit`` (per-page) + query parameters, accumulating list items until one of: + + - a page returns fewer items than the page size (the last page), + - an empty or ``None`` page is returned (also treated as the end — this is + how missing/malformed pagination metadata degrades safely), + - *limit* total items have been collected, or + - *max_pages* pages have been fetched (a safety cap against runaway loops). + + Pagination relies on the *length of each returned page*, not on + ``X-Total-Count`` / ``Link`` headers, so it tolerates missing or malformed + pagination metadata. Returns a list (possibly empty). Raises ``RuntimeError`` + (via :func:`api_request`) on network/HTTP/malformed failures, or if a page is + not a JSON list. Extra ``kwargs`` pass through to :func:`api_request`. + """ + if page_size < 1: + page_size = 1 + if page_size > 50: + page_size = 50 # Gitea caps per-page results at 50 + if limit is not None and limit < page_size: + page_size = max(1, limit) + + results = [] + for page in range(1, max_pages + 1): + page_url = _add_query(url, page=page, limit=page_size) + data = api_request("GET", page_url, auth_header, **kwargs) + if data is None: + break + if not isinstance(data, list): + raise RuntimeError( + f"expected a list page from Gitea, got {type(data).__name__}" + ) + results.extend(data) + if limit is not None and len(results) >= limit: + return results[:limit] + if len(data) < page_size: + break + return results def repo_api_url(host, org, repo): diff --git a/mcp_server.py b/mcp_server.py index 4a5fe82..26850df 100644 --- a/mcp_server.py +++ b/mcp_server.py @@ -38,6 +38,7 @@ from gitea_auth import ( # noqa: E402 get_credentials, get_auth_header, api_request, + api_get_all, repo_api_url, get_profile, ) @@ -384,7 +385,7 @@ def gitea_list_prs( h, o, r = _resolve(remote, host, org, repo) auth = _auth(h) url = f"{repo_api_url(h, o, r)}/pulls?state={state}" - prs = api_request("GET", url, auth) or [] + prs = api_get_all(url, auth) return [ { "number": pr["number"], @@ -1266,7 +1267,7 @@ def gitea_list_issues( Args: state: Filter by state — 'open', 'closed', or 'all'. label: Filter by label name (e.g. 'important'). - limit: Max number of issues to return (default: 50). + limit: Max number of issues to return across all pages (default: 50). remote: Known instance — 'dadeschools' or 'prgs'. host: Override the Gitea host. org: Override the owner/organization. @@ -1277,11 +1278,11 @@ def gitea_list_issues( """ h, o, r = _resolve(remote, host, org, repo) auth = _auth(h) - params = f"state={state}&limit={limit}&type=issues" + params = f"state={state}&type=issues" if label: params += f"&labels={label}" url = f"{repo_api_url(h, o, r)}/issues?{params}" - issues = api_request("GET", url, auth) + issues = api_get_all(url, auth, limit=limit) return [ { "number": i["number"], @@ -1554,7 +1555,7 @@ def gitea_list_labels( h, o, r = _resolve(remote, host, org, repo) auth = _auth(h) base = repo_api_url(h, o, r) - return api_request("GET", f"{base}/labels?limit=100", auth) + return api_get_all(f"{base}/labels", auth) @mcp.tool() diff --git a/skills/llm-project-workflow/templates/review-pr.md b/skills/llm-project-workflow/templates/review-pr.md index 6ee8833..49a8f3e 100644 --- a/skills/llm-project-workflow/templates/review-pr.md +++ b/skills/llm-project-workflow/templates/review-pr.md @@ -8,6 +8,8 @@ Task: review PR # for issue #. Rules (llm-project-workflow): - Review in a SEPARATE detached review worktree, never the author's folder. - You must NOT be the PR author. If the authenticated user == PR author, stop. + A different LLM-Agent-SHA does NOT make you a different actor — only a + different authenticated Gitea user does (docs/llm-agent-sha.md). - Do not merge if any check fails. Steps: @@ -21,6 +23,14 @@ Steps: 6. Run the test suite; note results. 7. Post the review verdict: approve only if scope is clean and checks pass; otherwise request changes with specifics. Never merge from this review step. + Include a "Review Metadata" block (attribution only — docs/llm-agent-sha.md): + + Review Metadata: + - LLM-Agent-SHA: llm-<12 lowercase hex, e.g. llm-41d0e7aa9f2c> + - LLM-Role: reviewer + - Authenticated-Gitea-User: + - MCP-Profile: + - Eligibility: passed/failed Handoff: reviewer identity, PR author, scope verdict, checks + results, decision. ``` diff --git a/skills/llm-project-workflow/templates/start-issue.md b/skills/llm-project-workflow/templates/start-issue.md index 64dc5eb..1bbdb6c 100644 --- a/skills/llm-project-workflow/templates/start-issue.md +++ b/skills/llm-project-workflow/templates/start-issue.md @@ -23,6 +23,17 @@ Steps: 6. Checks: run the test suite, compile/lint changed files, git diff --check, and scan the diff for secrets. 7. Commit (issue-linked message), push the branch, open a PR to master. + Include an "LLM Handoff Metadata" block in the PR body (attribution only; + never an eligibility input — docs/llm-agent-sha.md): + + LLM Handoff Metadata: + - LLM-Agent-SHA: llm-<12 lowercase hex, e.g. llm-8f3a9c2d6b41> + - LLM-Role: implementer + - Authenticated-Gitea-User: + - MCP-Profile: + - Branch: + - Worktree: + - Self-review allowed: no 8. Stop before review/merge — you are the author. Handoff: issue #, branch, worktree path, files changed, checks + results, PR URL. diff --git a/tests/test_api_reliability.py b/tests/test_api_reliability.py new file mode 100644 index 0000000..e159b23 --- /dev/null +++ b/tests/test_api_reliability.py @@ -0,0 +1,205 @@ +"""Unit coverage for shared API pagination and failure handling (#67). + +Covers gitea_auth.api_request failure conversion (timeouts, DNS/network, +502/503, malformed error payloads, malformed success JSON, no-secret leakage, +preserved success + 429 behavior) and gitea_auth.api_get_all pagination +(single/multi page, missing/malformed metadata, limit cap, max_pages, query +handling). Everything is mocked — no real network calls are made. +""" +import io +import unittest +import urllib.error +from unittest.mock import patch + +import gitea_auth +import gitea_audit + +FAKE_AUTH = "Basic ZmFrZTpmYWtl" # not a real credential +URL = "https://gitea.example.com/api/v1/repos/o/r/issues" + + +class FakeResp: + """Minimal context-manager stand-in for a urlopen response.""" + + def __init__(self, body): + self._body = body.encode("utf-8") if isinstance(body, str) else body + + def read(self): + return self._body + + def __enter__(self): + return self + + def __exit__(self, *exc): + return False + + +def http_error(code, body="", headers=None): + return urllib.error.HTTPError( + url=URL, code=code, msg="err", + hdrs=headers or {}, fp=io.BytesIO(body.encode("utf-8")), + ) + + +# --------------------------------------------------------------------------- +# api_request — success path preserved +# --------------------------------------------------------------------------- +class TestApiRequestSuccess(unittest.TestCase): + + @patch("gitea_auth.urllib.request.urlopen") + def test_success_returns_parsed_json(self, mock_open): + mock_open.return_value = FakeResp('{"number": 1, "title": "ok"}') + self.assertEqual( + gitea_auth.api_request("GET", URL, FAKE_AUTH), + {"number": 1, "title": "ok"}, + ) + + @patch("gitea_auth.urllib.request.urlopen") + def test_empty_body_returns_none(self, mock_open): + mock_open.return_value = FakeResp("") + self.assertIsNone(gitea_auth.api_request("GET", URL, FAKE_AUTH)) + + @patch("gitea_auth.urllib.request.urlopen") + def test_429_then_success_still_retries(self, mock_open): + mock_open.side_effect = [http_error(429), FakeResp('{"ok": true}')] + calls = [] + result = gitea_auth.api_request( + "GET", URL, FAKE_AUTH, + sleep_func=lambda d: calls.append(d), rand_func=lambda: 0.0, + ) + self.assertEqual(result, {"ok": True}) + self.assertEqual(len(calls), 1) # slept once between the two attempts + + +# --------------------------------------------------------------------------- +# api_request — failure handling +# --------------------------------------------------------------------------- +class TestApiRequestFailures(unittest.TestCase): + + @patch("gitea_auth.urllib.request.urlopen") + def test_timeout_converted_to_runtimeerror(self, mock_open): + mock_open.side_effect = TimeoutError("timed out") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + self.assertIn("network error contacting Gitea", str(ctx.exception)) + + @patch("gitea_auth.urllib.request.urlopen") + def test_dns_network_failure_converted(self, mock_open): + mock_open.side_effect = urllib.error.URLError("Name or service not known") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + self.assertIn("network error contacting Gitea", str(ctx.exception)) + + @patch("gitea_auth.urllib.request.urlopen") + def test_502_upstream_message(self, mock_open): + mock_open.side_effect = http_error(502, "bad gateway") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + msg = str(ctx.exception) + self.assertIn("HTTP 502", msg) + self.assertIn("upstream unavailable", msg) + + @patch("gitea_auth.urllib.request.urlopen") + def test_503_upstream_message(self, mock_open): + mock_open.side_effect = http_error(503, "") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + self.assertIn("HTTP 503", str(ctx.exception)) + self.assertIn("upstream unavailable", str(ctx.exception)) + + @patch("gitea_auth.urllib.request.urlopen") + def test_malformed_error_payload_does_not_crash(self, mock_open): + # Non-JSON garbage error body must still yield a clean RuntimeError. + mock_open.side_effect = http_error(500, "garbage") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + self.assertIn("HTTP 500", str(ctx.exception)) + + @patch("gitea_auth.urllib.request.urlopen") + def test_malformed_success_json_raises_clean_error(self, mock_open): + mock_open.return_value = FakeResp("not json{") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + self.assertIn("malformed JSON response", str(ctx.exception)) + + @patch("gitea_auth.urllib.request.urlopen") + def test_no_secret_leak_in_error_body(self, mock_open): + mock_open.side_effect = http_error( + 400, "failed: token supersecret123 rejected") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + msg = str(ctx.exception) + self.assertNotIn("supersecret123", msg) + self.assertIn(gitea_audit.REDACTED, msg) + + @patch("gitea_auth.urllib.request.urlopen") + def test_auth_header_never_in_error(self, mock_open): + mock_open.side_effect = http_error(400, "bad request") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + self.assertNotIn(FAKE_AUTH, str(ctx.exception)) + + +# --------------------------------------------------------------------------- +# api_get_all — pagination +# --------------------------------------------------------------------------- +class TestApiGetAll(unittest.TestCase): + + @patch("gitea_auth.api_request") + def test_single_page(self, mock_req): + mock_req.return_value = [{"id": 1}, {"id": 2}] # short page (< page_size) + result = gitea_auth.api_get_all(URL, FAKE_AUTH, page_size=50) + self.assertEqual(result, [{"id": 1}, {"id": 2}]) + self.assertEqual(mock_req.call_count, 1) + + @patch("gitea_auth.api_request") + def test_multi_page(self, mock_req): + mock_req.side_effect = [ + [{"id": 1}, {"id": 2}], # full page + [{"id": 3}, {"id": 4}], # full page + [{"id": 5}], # short page -> stop + ] + result = gitea_auth.api_get_all(URL, FAKE_AUTH, page_size=2) + self.assertEqual([r["id"] for r in result], [1, 2, 3, 4, 5]) + self.assertEqual(mock_req.call_count, 3) + + @patch("gitea_auth.api_request") + def test_missing_metadata_none_page_ends(self, mock_req): + mock_req.return_value = None # empty/malformed metadata -> treated as end + self.assertEqual(gitea_auth.api_get_all(URL, FAKE_AUTH), []) + self.assertEqual(mock_req.call_count, 1) + + @patch("gitea_auth.api_request") + def test_malformed_metadata_non_list_raises(self, mock_req): + mock_req.return_value = {"message": "not a list"} + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_get_all(URL, FAKE_AUTH) + self.assertIn("expected a list page", str(ctx.exception)) + + @patch("gitea_auth.api_request") + def test_limit_caps_results(self, mock_req): + mock_req.side_effect = [[{"id": 1}, {"id": 2}], [{"id": 3}, {"id": 4}]] + result = gitea_auth.api_get_all(URL, FAKE_AUTH, page_size=2, limit=3) + self.assertEqual([r["id"] for r in result], [1, 2, 3]) + + @patch("gitea_auth.api_request") + def test_max_pages_safety_cap(self, mock_req): + mock_req.side_effect = [ + [{"id": 1}, {"id": 2}], [{"id": 3}, {"id": 4}], [{"id": 5}, {"id": 6}], + ] + result = gitea_auth.api_get_all(URL, FAKE_AUTH, page_size=2, max_pages=2) + self.assertEqual(len(result), 4) + self.assertEqual(mock_req.call_count, 2) + + @patch("gitea_auth.api_request") + def test_query_params_appended_and_preserved(self, mock_req): + mock_req.return_value = [] # first (empty) page ends immediately + gitea_auth.api_get_all(URL + "?state=open", FAKE_AUTH, page_size=2) + called_url = mock_req.call_args[0][1] + self.assertIn("state=open", called_url) + self.assertIn("page=1", called_url) + self.assertIn("limit=2", called_url) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_llm_agent_sha.py b/tests/test_llm_agent_sha.py new file mode 100644 index 0000000..edbe518 --- /dev/null +++ b/tests/test_llm_agent_sha.py @@ -0,0 +1,195 @@ +"""Negative tests for LLM-Agent-SHA attribution (#86, Phase 0). + +``LLM-Agent-SHA`` (docs/llm-agent-sha.md) is attribution metadata ONLY. These +tests prove it can never bypass the review/merge safety gates: + +1. Same Gitea user + different LLM-Agent-SHA still fails self-review/approval. +2. Same Gitea user + different LLM-Agent-SHA still fails self-merge. +3. The eligibility logic does not consult SHA metadata at all — results are + identical with no SHA, one SHA, or a different SHA in the environment, and + no gate accepts an agent-SHA input. + +Phase 0 adds no SHA support to any MCP tool; the environment variables set +here (``LLM_AGENT_SHA`` / ``LLM_AGENT_ROLE``) simulate a future launcher and +must be ignored by every gate. +""" +import inspect +import os +import re +import sys +import unittest +from unittest.mock import patch + +sys.path.insert(0, str(__import__("pathlib").Path(__file__).resolve().parent.parent)) + +from mcp_server import ( # noqa: E402 + gitea_check_pr_eligibility, + gitea_merge_pr, + gitea_review_pr, + gitea_submit_pr_review, +) + +FAKE_AUTH = "Basic dGVzdDp0ZXN0" +SHA_PATTERN = re.compile(r"^llm-[0-9a-f]{12}$") +# Two distinct, well-formed agent SHAs "belonging" to the same Gitea user. +SHA_IMPLEMENTER = "llm-8f3a9c2d6b41" +SHA_WOULD_BE_REVIEWER = "llm-41d0e7aa9f2c" + + +def _pr(author, state="open", sha="abc123", mergeable=True): + return { + "user": {"login": author}, + "state": state, + "head": {"sha": sha}, + "mergeable": mergeable, + } + + +class TestShaFormatConvention(unittest.TestCase): + """The documented format: llm-<12 lowercase hex>, nothing identifying.""" + + def test_documented_examples_are_valid(self): + for value in (SHA_IMPLEMENTER, SHA_WOULD_BE_REVIEWER, "llm-b7c93d441a08"): + self.assertRegex(value, SHA_PATTERN) + + def test_identifying_or_malformed_values_are_rejected(self): + for bad in ( + "llm-8F3A9C2D6B41", # uppercase + "llm-8f3a9c2d6b4", # too short + "llm-8f3a9c2d6b411", # too long + "llm-opus4", # model name, not hex + "claude-8f3a9c2d6b41", # provider prefix + "jcwalker3", # username + "llm-user@example.com", # email + "8f3a9c2d6b41", # missing prefix + "", + ): + self.assertNotRegex(bad, SHA_PATTERN) + + +class TestShaCannotBypassSelfReview(unittest.TestCase): + """Scenario: user A (SHA 1) authored the PR; user A (SHA 2) tries to act.""" + + def _env(self, agent_sha, role): + # Reviewer-capable profile + a simulated launcher-injected agent SHA. + return { + "GITEA_PROFILE_NAME": "gitea-reviewer", + "GITEA_ALLOWED_OPERATIONS": "read,review,approve,merge", + "LLM_AGENT_SHA": agent_sha, + "LLM_AGENT_ROLE": role, + } + + @patch("mcp_server.api_request") + @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) + def test_same_user_different_sha_cannot_approve(self, _auth, mock_api): + mock_api.side_effect = [{"login": "jcwalker3"}, _pr("jcwalker3")] + env = self._env(SHA_WOULD_BE_REVIEWER, "reviewer") + with patch.dict(os.environ, env, clear=True): + r = gitea_check_pr_eligibility(pr_number=9, action="approve", remote="prgs") + self.assertFalse(r["eligible"]) + self.assertIn("authenticated user is PR author", r["reasons"]) + + @patch("mcp_server.api_request") + @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) + def test_same_user_different_sha_cannot_merge(self, _auth, mock_api): + mock_api.side_effect = [{"login": "jcwalker3"}, _pr("jcwalker3")] + env = self._env(SHA_WOULD_BE_REVIEWER, "merger") + with patch.dict(os.environ, env, clear=True): + r = gitea_check_pr_eligibility(pr_number=9, action="merge", remote="prgs") + self.assertFalse(r["eligible"]) + self.assertIn("authenticated user is PR author", r["reasons"]) + + @patch("mcp_server.api_request") + @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) + def test_gated_merge_tool_refuses_self_merge_despite_sha(self, _auth, mock_api): + # Even the fully-confirmed gated merge path must refuse: correct + # confirmation string, mergeable PR, merge-capable profile — but the + # authenticated user is the PR author, whatever the agent SHA says. + mock_api.side_effect = [{"login": "jcwalker3"}, _pr("jcwalker3")] + env = self._env(SHA_WOULD_BE_REVIEWER, "merger") + with patch.dict(os.environ, env, clear=True): + r = gitea_merge_pr( + pr_number=9, confirmation="MERGE PR 9", remote="prgs") + self.assertFalse(r.get("performed")) + for call in mock_api.call_args_list: + method, url = call.args[0], call.args[1] + self.assertFalse( + method == "POST" and url.endswith("/merge"), + f"self-merge mutation reached the API: {method} {url}", + ) + + @patch("mcp_server.api_request") + @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) + def test_review_tool_refuses_self_approval_despite_sha(self, _auth, mock_api): + mock_api.side_effect = [{"login": "jcwalker3"}, _pr("jcwalker3")] + env = self._env(SHA_WOULD_BE_REVIEWER, "reviewer") + with patch.dict(os.environ, env, clear=True): + r = gitea_review_pr( + pr_number=9, event="APPROVE", body="self approve", merge=False, + remote="prgs") + self.assertFalse(r["success"]) + self.assertIn("authenticated user is PR author", r["message"]) + for call in mock_api.call_args_list: + method, url = call.args[0], call.args[1] + self.assertFalse( + method == "POST" and url.endswith("/reviews"), + f"self-review mutation reached the API: {method} {url}", + ) + + +class TestEligibilityNeverConsultsSha(unittest.TestCase): + """The gates have no SHA input: not via env, not via parameters.""" + + def _run_eligibility(self, extra_env): + env = { + "GITEA_PROFILE_NAME": "gitea-reviewer", + "GITEA_ALLOWED_OPERATIONS": "read,review,approve", + } + env.update(extra_env) + with patch("mcp_server.get_auth_header", return_value=FAKE_AUTH), \ + patch("mcp_server.api_request") as mock_api: + mock_api.side_effect = [{"login": "reviewer-bot"}, _pr("author-bot")] + with patch.dict(os.environ, env, clear=True): + return gitea_check_pr_eligibility( + pr_number=5, action="approve", remote="prgs") + + def test_result_identical_with_without_and_across_shas(self): + baseline = self._run_eligibility({}) + with_one = self._run_eligibility( + {"LLM_AGENT_SHA": SHA_IMPLEMENTER, "LLM_AGENT_ROLE": "implementer"}) + with_other = self._run_eligibility( + {"LLM_AGENT_SHA": SHA_WOULD_BE_REVIEWER, "LLM_AGENT_ROLE": "reviewer"}) + self.assertEqual(baseline, with_one) + self.assertEqual(baseline, with_other) + self.assertTrue(baseline["eligible"]) # sanity: a real decision ran + + def test_eligibility_result_carries_no_agent_sha(self): + r = self._run_eligibility( + {"LLM_AGENT_SHA": SHA_IMPLEMENTER, "LLM_AGENT_ROLE": "implementer"}) + blob = repr(r) + self.assertNotIn(SHA_IMPLEMENTER, blob) + self.assertNotIn("LLM_AGENT", blob) + + def test_gate_functions_accept_no_agent_sha_parameter(self): + for fn in (gitea_check_pr_eligibility, gitea_merge_pr, + gitea_review_pr, gitea_submit_pr_review): + for param in inspect.signature(fn).parameters: + lowered = param.lower() + self.assertNotIn("agent", lowered, + f"{fn.__name__} accepts agent param {param!r}") + self.assertNotIn("llm", lowered, + f"{fn.__name__} accepts llm param {param!r}") + + def test_gate_sources_never_read_agent_sha(self): + # Phase 0 guarantee: no gate reads LLM_AGENT_* metadata at all. + for fn in (gitea_check_pr_eligibility, gitea_merge_pr, + gitea_review_pr, gitea_submit_pr_review): + src = inspect.getsource(fn) + self.assertNotIn("LLM_AGENT", src, + f"{fn.__name__} reads LLM_AGENT metadata") + self.assertNotIn("agent_sha", src, + f"{fn.__name__} consults an agent SHA") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 646c9b8..79a42d9 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -103,7 +103,7 @@ class TestCloseIssue(unittest.TestCase): # --------------------------------------------------------------------------- class TestListIssues(unittest.TestCase): - @patch("mcp_server.api_request") + @patch("mcp_server.api_get_all") @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) def test_returns_formatted_list(self, _auth, mock_api): mock_api.return_value = [ @@ -124,20 +124,20 @@ class TestListIssues(unittest.TestCase): self.assertEqual(result[0]["assignee"], "alice") self.assertEqual(result[1]["assignee"], "") - @patch("mcp_server.api_request") + @patch("mcp_server.api_get_all") @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) def test_passes_label_filter(self, _auth, mock_api): mock_api.return_value = [] gitea_list_issues(label="important") - url = mock_api.call_args[0][1] + url = mock_api.call_args[0][0] self.assertIn("labels=important", url) - @patch("mcp_server.api_request") + @patch("mcp_server.api_get_all") @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) def test_passes_state_filter(self, _auth, mock_api): mock_api.return_value = [] gitea_list_issues(state="closed") - url = mock_api.call_args[0][1] + url = mock_api.call_args[0][0] self.assertIn("state=closed", url) @@ -259,7 +259,7 @@ class TestMirrorRefs(unittest.TestCase): # --------------------------------------------------------------------------- class TestListPRs(unittest.TestCase): - @patch("mcp_server.api_request") + @patch("mcp_server.api_get_all") @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) def test_list_prs(self, _auth, mock_api): mock_api.return_value = [