From cfe3ff67556a159d54c9f02da3ff94928ea3b385 Mon Sep 17 00:00:00 2001 From: Jason Walker <913443@dadeschools.net> Date: Thu, 2 Jul 2026 13:27:06 -0400 Subject: [PATCH] fix: add shared API pagination and failure handling (#67) Harden gitea_auth.api_request: add a per-request timeout (env GITEA_HTTP_TIMEOUT), convert timeouts and DNS/network failures (URLError/TimeoutError) into clear RuntimeErrors, give 502/503/504 an explicit 'upstream unavailable' message, convert malformed success JSON into a clean error, and redact credential-like substrings from all error text. Preserves the success path and existing 429 retry/backoff. Add shared gitea_auth.api_get_all: page-based pagination that tolerates missing/malformed metadata (relies on page length, not Link/X-Total-Count headers), honors an optional overall limit, and caps pages. Wire it into the read-only list tools gitea_list_issues, gitea_list_prs, and gitea_list_labels (return shape unchanged). Add tests/test_api_reliability.py (18 cases) and update the three list-tool tests to the new call path. No auth/profile/merge/review/tracker behavior changed. No modular #65 refactor. Co-Authored-By: Claude Opus 4.8 (1M context) --- gitea_auth.py | 127 +++++++++++++++++++-- mcp_server.py | 11 +- tests/test_api_reliability.py | 205 ++++++++++++++++++++++++++++++++++ tests/test_mcp_server.py | 12 +- 4 files changed, 337 insertions(+), 18 deletions(-) create mode 100644 tests/test_api_reliability.py diff --git a/gitea_auth.py b/gitea_auth.py index 244e998..a429b7b 100644 --- a/gitea_auth.py +++ b/gitea_auth.py @@ -14,6 +14,7 @@ import datetime import subprocess import urllib.request import urllib.error +import urllib.parse from email.utils import parsedate_to_datetime from dotenv import dotenv_values, load_dotenv @@ -188,6 +189,39 @@ DEFAULT_MAX_RETRIES = _env_int("GITEA_MAX_RETRIES", 3) DEFAULT_BASE_DELAY = _env_float("GITEA_RETRY_BASE_DELAY", 1.0) # seconds DEFAULT_MAX_DELAY = _env_float("GITEA_RETRY_MAX_DELAY", 60.0) # seconds +# Per-request socket timeout (seconds). Overridable via environment. +DEFAULT_HTTP_TIMEOUT = _env_float("GITEA_HTTP_TIMEOUT", 30.0) + + +def _redact(text): + """Best-effort strip of credential-like substrings from error text. + + Reuses the audit module's redactor so error messages never surface tokens, + Basic/Bearer headers, or password-like values. Falls back to the plain + string if the audit helper is unavailable. + """ + try: + from gitea_audit import _redact_str + return _redact_str(str(text)) + except Exception: + return str(text) + + +def _add_query(url, **params): + """Return *url* with the given query parameters added or overridden. + + Preserves any existing query string on *url* (e.g. ``?state=open``) so + pagination params can be layered on top of an already-filtered endpoint. + """ + parts = urllib.parse.urlsplit(url) + query = dict(urllib.parse.parse_qsl(parts.query, keep_blank_values=True)) + for key, value in params.items(): + query[str(key)] = str(value) + new_query = urllib.parse.urlencode(query) + return urllib.parse.urlunsplit( + (parts.scheme, parts.netloc, parts.path, new_query, parts.fragment) + ) + def parse_retry_after(value, now=None): """Parse a ``Retry-After`` header into a non-negative delay in seconds. @@ -239,16 +273,31 @@ def backoff_delay(attempt, base=DEFAULT_BASE_DELAY, cap=DEFAULT_MAX_DELAY, rand= def api_request(method, url, auth_header, payload=None, *, max_retries=None, base_delay=None, max_delay=None, + timeout=None, sleep_func=time.sleep, rand_func=random.random, now_func=time.time): """Make an authenticated JSON request to the Gitea API. - Returns parsed JSON on success, raises ``RuntimeError`` on HTTP errors. + Returns parsed JSON on success (or ``None`` for an empty body), and raises + ``RuntimeError`` on failure. On HTTP 429 the request is retried up to *max_retries* times: honoring a valid ``Retry-After`` header (seconds or HTTP-date) when present, otherwise - using capped jittered exponential backoff. Non-429 errors and successful - responses are unchanged. The ``*_func`` parameters are injection points for + using capped jittered exponential backoff. Successful responses are + unchanged. + + All failures are converted to a ``RuntimeError`` with a clear, secret + -redacted message (no raw stack traces or credential material): + + - Non-429 HTTP errors surface the status code and a redacted response body. + 502/503/504 upstream errors get an explicit "Gitea upstream unavailable" + message. + - Timeouts and network/DNS failures (``URLError`` / ``TimeoutError``) surface + a generic "network error contacting Gitea" message. + - A malformed (non-JSON) success body surfaces a "malformed JSON response" + message rather than a raw decode error. + + The ``*_func`` parameters and ``timeout`` are injection points for deterministic testing. """ if max_retries is None: @@ -257,6 +306,8 @@ def api_request(method, url, auth_header, payload=None, *, base_delay = DEFAULT_BASE_DELAY if max_delay is None: max_delay = DEFAULT_MAX_DELAY + if timeout is None: + timeout = DEFAULT_HTTP_TIMEOUT data = json.dumps(payload).encode("utf-8") if payload is not None else None req = urllib.request.Request(url, data=data, method=method) @@ -267,9 +318,8 @@ def api_request(method, url, auth_header, payload=None, *, attempt = 0 while True: try: - with urllib.request.urlopen(req) as resp: + with urllib.request.urlopen(req, timeout=timeout) as resp: body = resp.read().decode("utf-8") - return json.loads(body) if body else None except urllib.error.HTTPError as e: if e.code == 429 and attempt < max_retries: header = e.headers.get("Retry-After") if e.headers else None @@ -279,8 +329,71 @@ def api_request(method, url, auth_header, payload=None, *, attempt += 1 sleep_func(delay) continue - error_body = e.read().decode("utf-8", errors="replace") - raise RuntimeError(f"HTTP {e.code}: {error_body}") from e + try: + error_body = e.read().decode("utf-8", errors="replace") + except Exception: + error_body = "" + detail = _redact(error_body).strip() + if e.code in (502, 503, 504): + msg = f"HTTP {e.code}: Gitea upstream unavailable" + raise RuntimeError(f"{msg}: {detail}" if detail else msg) from e + raise RuntimeError(f"HTTP {e.code}: {detail}") from e + except (urllib.error.URLError, TimeoutError) as e: + reason = getattr(e, "reason", e) + raise RuntimeError( + f"network error contacting Gitea: {_redact(reason)}" + ) from e + + if not body: + return None + try: + return json.loads(body) + except ValueError as e: + raise RuntimeError("malformed JSON response from Gitea") from e + + +def api_get_all(url, auth_header, *, limit=None, page_size=50, max_pages=100, + **kwargs): + """Fetch a paginated Gitea collection, following page-based pagination. + + Issues successive ``GET`` requests with ``page`` and ``limit`` (per-page) + query parameters, accumulating list items until one of: + + - a page returns fewer items than the page size (the last page), + - an empty or ``None`` page is returned (also treated as the end — this is + how missing/malformed pagination metadata degrades safely), + - *limit* total items have been collected, or + - *max_pages* pages have been fetched (a safety cap against runaway loops). + + Pagination relies on the *length of each returned page*, not on + ``X-Total-Count`` / ``Link`` headers, so it tolerates missing or malformed + pagination metadata. Returns a list (possibly empty). Raises ``RuntimeError`` + (via :func:`api_request`) on network/HTTP/malformed failures, or if a page is + not a JSON list. Extra ``kwargs`` pass through to :func:`api_request`. + """ + if page_size < 1: + page_size = 1 + if page_size > 50: + page_size = 50 # Gitea caps per-page results at 50 + if limit is not None and limit < page_size: + page_size = max(1, limit) + + results = [] + for page in range(1, max_pages + 1): + page_url = _add_query(url, page=page, limit=page_size) + data = api_request("GET", page_url, auth_header, **kwargs) + if data is None: + break + if not isinstance(data, list): + raise RuntimeError( + f"expected a list page from Gitea, got {type(data).__name__}" + ) + results.extend(data) + if limit is not None and len(results) >= limit: + return results[:limit] + if len(data) < page_size: + break + return results def repo_api_url(host, org, repo): diff --git a/mcp_server.py b/mcp_server.py index 4a5fe82..26850df 100644 --- a/mcp_server.py +++ b/mcp_server.py @@ -38,6 +38,7 @@ from gitea_auth import ( # noqa: E402 get_credentials, get_auth_header, api_request, + api_get_all, repo_api_url, get_profile, ) @@ -384,7 +385,7 @@ def gitea_list_prs( h, o, r = _resolve(remote, host, org, repo) auth = _auth(h) url = f"{repo_api_url(h, o, r)}/pulls?state={state}" - prs = api_request("GET", url, auth) or [] + prs = api_get_all(url, auth) return [ { "number": pr["number"], @@ -1266,7 +1267,7 @@ def gitea_list_issues( Args: state: Filter by state — 'open', 'closed', or 'all'. label: Filter by label name (e.g. 'important'). - limit: Max number of issues to return (default: 50). + limit: Max number of issues to return across all pages (default: 50). remote: Known instance — 'dadeschools' or 'prgs'. host: Override the Gitea host. org: Override the owner/organization. @@ -1277,11 +1278,11 @@ def gitea_list_issues( """ h, o, r = _resolve(remote, host, org, repo) auth = _auth(h) - params = f"state={state}&limit={limit}&type=issues" + params = f"state={state}&type=issues" if label: params += f"&labels={label}" url = f"{repo_api_url(h, o, r)}/issues?{params}" - issues = api_request("GET", url, auth) + issues = api_get_all(url, auth, limit=limit) return [ { "number": i["number"], @@ -1554,7 +1555,7 @@ def gitea_list_labels( h, o, r = _resolve(remote, host, org, repo) auth = _auth(h) base = repo_api_url(h, o, r) - return api_request("GET", f"{base}/labels?limit=100", auth) + return api_get_all(f"{base}/labels", auth) @mcp.tool() diff --git a/tests/test_api_reliability.py b/tests/test_api_reliability.py new file mode 100644 index 0000000..e159b23 --- /dev/null +++ b/tests/test_api_reliability.py @@ -0,0 +1,205 @@ +"""Unit coverage for shared API pagination and failure handling (#67). + +Covers gitea_auth.api_request failure conversion (timeouts, DNS/network, +502/503, malformed error payloads, malformed success JSON, no-secret leakage, +preserved success + 429 behavior) and gitea_auth.api_get_all pagination +(single/multi page, missing/malformed metadata, limit cap, max_pages, query +handling). Everything is mocked — no real network calls are made. +""" +import io +import unittest +import urllib.error +from unittest.mock import patch + +import gitea_auth +import gitea_audit + +FAKE_AUTH = "Basic ZmFrZTpmYWtl" # not a real credential +URL = "https://gitea.example.com/api/v1/repos/o/r/issues" + + +class FakeResp: + """Minimal context-manager stand-in for a urlopen response.""" + + def __init__(self, body): + self._body = body.encode("utf-8") if isinstance(body, str) else body + + def read(self): + return self._body + + def __enter__(self): + return self + + def __exit__(self, *exc): + return False + + +def http_error(code, body="", headers=None): + return urllib.error.HTTPError( + url=URL, code=code, msg="err", + hdrs=headers or {}, fp=io.BytesIO(body.encode("utf-8")), + ) + + +# --------------------------------------------------------------------------- +# api_request — success path preserved +# --------------------------------------------------------------------------- +class TestApiRequestSuccess(unittest.TestCase): + + @patch("gitea_auth.urllib.request.urlopen") + def test_success_returns_parsed_json(self, mock_open): + mock_open.return_value = FakeResp('{"number": 1, "title": "ok"}') + self.assertEqual( + gitea_auth.api_request("GET", URL, FAKE_AUTH), + {"number": 1, "title": "ok"}, + ) + + @patch("gitea_auth.urllib.request.urlopen") + def test_empty_body_returns_none(self, mock_open): + mock_open.return_value = FakeResp("") + self.assertIsNone(gitea_auth.api_request("GET", URL, FAKE_AUTH)) + + @patch("gitea_auth.urllib.request.urlopen") + def test_429_then_success_still_retries(self, mock_open): + mock_open.side_effect = [http_error(429), FakeResp('{"ok": true}')] + calls = [] + result = gitea_auth.api_request( + "GET", URL, FAKE_AUTH, + sleep_func=lambda d: calls.append(d), rand_func=lambda: 0.0, + ) + self.assertEqual(result, {"ok": True}) + self.assertEqual(len(calls), 1) # slept once between the two attempts + + +# --------------------------------------------------------------------------- +# api_request — failure handling +# --------------------------------------------------------------------------- +class TestApiRequestFailures(unittest.TestCase): + + @patch("gitea_auth.urllib.request.urlopen") + def test_timeout_converted_to_runtimeerror(self, mock_open): + mock_open.side_effect = TimeoutError("timed out") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + self.assertIn("network error contacting Gitea", str(ctx.exception)) + + @patch("gitea_auth.urllib.request.urlopen") + def test_dns_network_failure_converted(self, mock_open): + mock_open.side_effect = urllib.error.URLError("Name or service not known") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + self.assertIn("network error contacting Gitea", str(ctx.exception)) + + @patch("gitea_auth.urllib.request.urlopen") + def test_502_upstream_message(self, mock_open): + mock_open.side_effect = http_error(502, "bad gateway") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + msg = str(ctx.exception) + self.assertIn("HTTP 502", msg) + self.assertIn("upstream unavailable", msg) + + @patch("gitea_auth.urllib.request.urlopen") + def test_503_upstream_message(self, mock_open): + mock_open.side_effect = http_error(503, "") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + self.assertIn("HTTP 503", str(ctx.exception)) + self.assertIn("upstream unavailable", str(ctx.exception)) + + @patch("gitea_auth.urllib.request.urlopen") + def test_malformed_error_payload_does_not_crash(self, mock_open): + # Non-JSON garbage error body must still yield a clean RuntimeError. + mock_open.side_effect = http_error(500, "garbage") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + self.assertIn("HTTP 500", str(ctx.exception)) + + @patch("gitea_auth.urllib.request.urlopen") + def test_malformed_success_json_raises_clean_error(self, mock_open): + mock_open.return_value = FakeResp("not json{") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + self.assertIn("malformed JSON response", str(ctx.exception)) + + @patch("gitea_auth.urllib.request.urlopen") + def test_no_secret_leak_in_error_body(self, mock_open): + mock_open.side_effect = http_error( + 400, "failed: token supersecret123 rejected") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + msg = str(ctx.exception) + self.assertNotIn("supersecret123", msg) + self.assertIn(gitea_audit.REDACTED, msg) + + @patch("gitea_auth.urllib.request.urlopen") + def test_auth_header_never_in_error(self, mock_open): + mock_open.side_effect = http_error(400, "bad request") + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_request("GET", URL, FAKE_AUTH) + self.assertNotIn(FAKE_AUTH, str(ctx.exception)) + + +# --------------------------------------------------------------------------- +# api_get_all — pagination +# --------------------------------------------------------------------------- +class TestApiGetAll(unittest.TestCase): + + @patch("gitea_auth.api_request") + def test_single_page(self, mock_req): + mock_req.return_value = [{"id": 1}, {"id": 2}] # short page (< page_size) + result = gitea_auth.api_get_all(URL, FAKE_AUTH, page_size=50) + self.assertEqual(result, [{"id": 1}, {"id": 2}]) + self.assertEqual(mock_req.call_count, 1) + + @patch("gitea_auth.api_request") + def test_multi_page(self, mock_req): + mock_req.side_effect = [ + [{"id": 1}, {"id": 2}], # full page + [{"id": 3}, {"id": 4}], # full page + [{"id": 5}], # short page -> stop + ] + result = gitea_auth.api_get_all(URL, FAKE_AUTH, page_size=2) + self.assertEqual([r["id"] for r in result], [1, 2, 3, 4, 5]) + self.assertEqual(mock_req.call_count, 3) + + @patch("gitea_auth.api_request") + def test_missing_metadata_none_page_ends(self, mock_req): + mock_req.return_value = None # empty/malformed metadata -> treated as end + self.assertEqual(gitea_auth.api_get_all(URL, FAKE_AUTH), []) + self.assertEqual(mock_req.call_count, 1) + + @patch("gitea_auth.api_request") + def test_malformed_metadata_non_list_raises(self, mock_req): + mock_req.return_value = {"message": "not a list"} + with self.assertRaises(RuntimeError) as ctx: + gitea_auth.api_get_all(URL, FAKE_AUTH) + self.assertIn("expected a list page", str(ctx.exception)) + + @patch("gitea_auth.api_request") + def test_limit_caps_results(self, mock_req): + mock_req.side_effect = [[{"id": 1}, {"id": 2}], [{"id": 3}, {"id": 4}]] + result = gitea_auth.api_get_all(URL, FAKE_AUTH, page_size=2, limit=3) + self.assertEqual([r["id"] for r in result], [1, 2, 3]) + + @patch("gitea_auth.api_request") + def test_max_pages_safety_cap(self, mock_req): + mock_req.side_effect = [ + [{"id": 1}, {"id": 2}], [{"id": 3}, {"id": 4}], [{"id": 5}, {"id": 6}], + ] + result = gitea_auth.api_get_all(URL, FAKE_AUTH, page_size=2, max_pages=2) + self.assertEqual(len(result), 4) + self.assertEqual(mock_req.call_count, 2) + + @patch("gitea_auth.api_request") + def test_query_params_appended_and_preserved(self, mock_req): + mock_req.return_value = [] # first (empty) page ends immediately + gitea_auth.api_get_all(URL + "?state=open", FAKE_AUTH, page_size=2) + called_url = mock_req.call_args[0][1] + self.assertIn("state=open", called_url) + self.assertIn("page=1", called_url) + self.assertIn("limit=2", called_url) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 646c9b8..79a42d9 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -103,7 +103,7 @@ class TestCloseIssue(unittest.TestCase): # --------------------------------------------------------------------------- class TestListIssues(unittest.TestCase): - @patch("mcp_server.api_request") + @patch("mcp_server.api_get_all") @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) def test_returns_formatted_list(self, _auth, mock_api): mock_api.return_value = [ @@ -124,20 +124,20 @@ class TestListIssues(unittest.TestCase): self.assertEqual(result[0]["assignee"], "alice") self.assertEqual(result[1]["assignee"], "") - @patch("mcp_server.api_request") + @patch("mcp_server.api_get_all") @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) def test_passes_label_filter(self, _auth, mock_api): mock_api.return_value = [] gitea_list_issues(label="important") - url = mock_api.call_args[0][1] + url = mock_api.call_args[0][0] self.assertIn("labels=important", url) - @patch("mcp_server.api_request") + @patch("mcp_server.api_get_all") @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) def test_passes_state_filter(self, _auth, mock_api): mock_api.return_value = [] gitea_list_issues(state="closed") - url = mock_api.call_args[0][1] + url = mock_api.call_args[0][0] self.assertIn("state=closed", url) @@ -259,7 +259,7 @@ class TestMirrorRefs(unittest.TestCase): # --------------------------------------------------------------------------- class TestListPRs(unittest.TestCase): - @patch("mcp_server.api_request") + @patch("mcp_server.api_get_all") @patch("mcp_server.get_auth_header", return_value=FAKE_AUTH) def test_list_prs(self, _auth, mock_api): mock_api.return_value = [