fix: add shared API pagination and failure handling (#67)

Harden gitea_auth.api_request: add a per-request timeout (env
GITEA_HTTP_TIMEOUT), convert timeouts and DNS/network failures
(URLError/TimeoutError) into clear RuntimeErrors, give 502/503/504 an
explicit 'upstream unavailable' message, convert malformed success JSON
into a clean error, and redact credential-like substrings from all error
text. Preserves the success path and existing 429 retry/backoff.

Add shared gitea_auth.api_get_all: page-based pagination that tolerates
missing/malformed metadata (relies on page length, not Link/X-Total-Count
headers), honors an optional overall limit, and caps pages. Wire it into
the read-only list tools gitea_list_issues, gitea_list_prs, and
gitea_list_labels (return shape unchanged).

Add tests/test_api_reliability.py (18 cases) and update the three list-tool
tests to the new call path. No auth/profile/merge/review/tracker behavior
changed. No modular #65 refactor.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-07-02 13:27:06 -04:00
parent 093945254d
commit cfe3ff6755
4 changed files with 337 additions and 18 deletions
+120 -7
View File
@@ -14,6 +14,7 @@ import datetime
import subprocess
import urllib.request
import urllib.error
import urllib.parse
from email.utils import parsedate_to_datetime
from dotenv import dotenv_values, load_dotenv
@@ -188,6 +189,39 @@ DEFAULT_MAX_RETRIES = _env_int("GITEA_MAX_RETRIES", 3)
DEFAULT_BASE_DELAY = _env_float("GITEA_RETRY_BASE_DELAY", 1.0) # seconds
DEFAULT_MAX_DELAY = _env_float("GITEA_RETRY_MAX_DELAY", 60.0) # seconds
# Per-request socket timeout (seconds). Overridable via environment.
DEFAULT_HTTP_TIMEOUT = _env_float("GITEA_HTTP_TIMEOUT", 30.0)
def _redact(text):
"""Best-effort strip of credential-like substrings from error text.
Reuses the audit module's redactor so error messages never surface tokens,
Basic/Bearer headers, or password-like values. Falls back to the plain
string if the audit helper is unavailable.
"""
try:
from gitea_audit import _redact_str
return _redact_str(str(text))
except Exception:
return str(text)
def _add_query(url, **params):
"""Return *url* with the given query parameters added or overridden.
Preserves any existing query string on *url* (e.g. ``?state=open``) so
pagination params can be layered on top of an already-filtered endpoint.
"""
parts = urllib.parse.urlsplit(url)
query = dict(urllib.parse.parse_qsl(parts.query, keep_blank_values=True))
for key, value in params.items():
query[str(key)] = str(value)
new_query = urllib.parse.urlencode(query)
return urllib.parse.urlunsplit(
(parts.scheme, parts.netloc, parts.path, new_query, parts.fragment)
)
def parse_retry_after(value, now=None):
"""Parse a ``Retry-After`` header into a non-negative delay in seconds.
@@ -239,16 +273,31 @@ def backoff_delay(attempt, base=DEFAULT_BASE_DELAY, cap=DEFAULT_MAX_DELAY, rand=
def api_request(method, url, auth_header, payload=None, *,
max_retries=None, base_delay=None, max_delay=None,
timeout=None,
sleep_func=time.sleep, rand_func=random.random,
now_func=time.time):
"""Make an authenticated JSON request to the Gitea API.
Returns parsed JSON on success, raises ``RuntimeError`` on HTTP errors.
Returns parsed JSON on success (or ``None`` for an empty body), and raises
``RuntimeError`` on failure.
On HTTP 429 the request is retried up to *max_retries* times: honoring a
valid ``Retry-After`` header (seconds or HTTP-date) when present, otherwise
using capped jittered exponential backoff. Non-429 errors and successful
responses are unchanged. The ``*_func`` parameters are injection points for
using capped jittered exponential backoff. Successful responses are
unchanged.
All failures are converted to a ``RuntimeError`` with a clear, secret
-redacted message (no raw stack traces or credential material):
- Non-429 HTTP errors surface the status code and a redacted response body.
502/503/504 upstream errors get an explicit "Gitea upstream unavailable"
message.
- Timeouts and network/DNS failures (``URLError`` / ``TimeoutError``) surface
a generic "network error contacting Gitea" message.
- A malformed (non-JSON) success body surfaces a "malformed JSON response"
message rather than a raw decode error.
The ``*_func`` parameters and ``timeout`` are injection points for
deterministic testing.
"""
if max_retries is None:
@@ -257,6 +306,8 @@ def api_request(method, url, auth_header, payload=None, *,
base_delay = DEFAULT_BASE_DELAY
if max_delay is None:
max_delay = DEFAULT_MAX_DELAY
if timeout is None:
timeout = DEFAULT_HTTP_TIMEOUT
data = json.dumps(payload).encode("utf-8") if payload is not None else None
req = urllib.request.Request(url, data=data, method=method)
@@ -267,9 +318,8 @@ def api_request(method, url, auth_header, payload=None, *,
attempt = 0
while True:
try:
with urllib.request.urlopen(req) as resp:
with urllib.request.urlopen(req, timeout=timeout) as resp:
body = resp.read().decode("utf-8")
return json.loads(body) if body else None
except urllib.error.HTTPError as e:
if e.code == 429 and attempt < max_retries:
header = e.headers.get("Retry-After") if e.headers else None
@@ -279,8 +329,71 @@ def api_request(method, url, auth_header, payload=None, *,
attempt += 1
sleep_func(delay)
continue
error_body = e.read().decode("utf-8", errors="replace")
raise RuntimeError(f"HTTP {e.code}: {error_body}") from e
try:
error_body = e.read().decode("utf-8", errors="replace")
except Exception:
error_body = ""
detail = _redact(error_body).strip()
if e.code in (502, 503, 504):
msg = f"HTTP {e.code}: Gitea upstream unavailable"
raise RuntimeError(f"{msg}: {detail}" if detail else msg) from e
raise RuntimeError(f"HTTP {e.code}: {detail}") from e
except (urllib.error.URLError, TimeoutError) as e:
reason = getattr(e, "reason", e)
raise RuntimeError(
f"network error contacting Gitea: {_redact(reason)}"
) from e
if not body:
return None
try:
return json.loads(body)
except ValueError as e:
raise RuntimeError("malformed JSON response from Gitea") from e
def api_get_all(url, auth_header, *, limit=None, page_size=50, max_pages=100,
**kwargs):
"""Fetch a paginated Gitea collection, following page-based pagination.
Issues successive ``GET`` requests with ``page`` and ``limit`` (per-page)
query parameters, accumulating list items until one of:
- a page returns fewer items than the page size (the last page),
- an empty or ``None`` page is returned (also treated as the end — this is
how missing/malformed pagination metadata degrades safely),
- *limit* total items have been collected, or
- *max_pages* pages have been fetched (a safety cap against runaway loops).
Pagination relies on the *length of each returned page*, not on
``X-Total-Count`` / ``Link`` headers, so it tolerates missing or malformed
pagination metadata. Returns a list (possibly empty). Raises ``RuntimeError``
(via :func:`api_request`) on network/HTTP/malformed failures, or if a page is
not a JSON list. Extra ``kwargs`` pass through to :func:`api_request`.
"""
if page_size < 1:
page_size = 1
if page_size > 50:
page_size = 50 # Gitea caps per-page results at 50
if limit is not None and limit < page_size:
page_size = max(1, limit)
results = []
for page in range(1, max_pages + 1):
page_url = _add_query(url, page=page, limit=page_size)
data = api_request("GET", page_url, auth_header, **kwargs)
if data is None:
break
if not isinstance(data, list):
raise RuntimeError(
f"expected a list page from Gitea, got {type(data).__name__}"
)
results.extend(data)
if limit is not None and len(results) >= limit:
return results[:limit]
if len(data) < page_size:
break
return results
def repo_api_url(host, org, repo):
+6 -5
View File
@@ -38,6 +38,7 @@ from gitea_auth import ( # noqa: E402
get_credentials,
get_auth_header,
api_request,
api_get_all,
repo_api_url,
get_profile,
)
@@ -384,7 +385,7 @@ def gitea_list_prs(
h, o, r = _resolve(remote, host, org, repo)
auth = _auth(h)
url = f"{repo_api_url(h, o, r)}/pulls?state={state}"
prs = api_request("GET", url, auth) or []
prs = api_get_all(url, auth)
return [
{
"number": pr["number"],
@@ -1266,7 +1267,7 @@ def gitea_list_issues(
Args:
state: Filter by state — 'open', 'closed', or 'all'.
label: Filter by label name (e.g. 'important').
limit: Max number of issues to return (default: 50).
limit: Max number of issues to return across all pages (default: 50).
remote: Known instance — 'dadeschools' or 'prgs'.
host: Override the Gitea host.
org: Override the owner/organization.
@@ -1277,11 +1278,11 @@ def gitea_list_issues(
"""
h, o, r = _resolve(remote, host, org, repo)
auth = _auth(h)
params = f"state={state}&limit={limit}&type=issues"
params = f"state={state}&type=issues"
if label:
params += f"&labels={label}"
url = f"{repo_api_url(h, o, r)}/issues?{params}"
issues = api_request("GET", url, auth)
issues = api_get_all(url, auth, limit=limit)
return [
{
"number": i["number"],
@@ -1554,7 +1555,7 @@ def gitea_list_labels(
h, o, r = _resolve(remote, host, org, repo)
auth = _auth(h)
base = repo_api_url(h, o, r)
return api_request("GET", f"{base}/labels?limit=100", auth)
return api_get_all(f"{base}/labels", auth)
@mcp.tool()
+205
View File
@@ -0,0 +1,205 @@
"""Unit coverage for shared API pagination and failure handling (#67).
Covers gitea_auth.api_request failure conversion (timeouts, DNS/network,
502/503, malformed error payloads, malformed success JSON, no-secret leakage,
preserved success + 429 behavior) and gitea_auth.api_get_all pagination
(single/multi page, missing/malformed metadata, limit cap, max_pages, query
handling). Everything is mocked — no real network calls are made.
"""
import io
import unittest
import urllib.error
from unittest.mock import patch
import gitea_auth
import gitea_audit
FAKE_AUTH = "Basic ZmFrZTpmYWtl" # not a real credential
URL = "https://gitea.example.com/api/v1/repos/o/r/issues"
class FakeResp:
"""Minimal context-manager stand-in for a urlopen response."""
def __init__(self, body):
self._body = body.encode("utf-8") if isinstance(body, str) else body
def read(self):
return self._body
def __enter__(self):
return self
def __exit__(self, *exc):
return False
def http_error(code, body="", headers=None):
return urllib.error.HTTPError(
url=URL, code=code, msg="err",
hdrs=headers or {}, fp=io.BytesIO(body.encode("utf-8")),
)
# ---------------------------------------------------------------------------
# api_request — success path preserved
# ---------------------------------------------------------------------------
class TestApiRequestSuccess(unittest.TestCase):
@patch("gitea_auth.urllib.request.urlopen")
def test_success_returns_parsed_json(self, mock_open):
mock_open.return_value = FakeResp('{"number": 1, "title": "ok"}')
self.assertEqual(
gitea_auth.api_request("GET", URL, FAKE_AUTH),
{"number": 1, "title": "ok"},
)
@patch("gitea_auth.urllib.request.urlopen")
def test_empty_body_returns_none(self, mock_open):
mock_open.return_value = FakeResp("")
self.assertIsNone(gitea_auth.api_request("GET", URL, FAKE_AUTH))
@patch("gitea_auth.urllib.request.urlopen")
def test_429_then_success_still_retries(self, mock_open):
mock_open.side_effect = [http_error(429), FakeResp('{"ok": true}')]
calls = []
result = gitea_auth.api_request(
"GET", URL, FAKE_AUTH,
sleep_func=lambda d: calls.append(d), rand_func=lambda: 0.0,
)
self.assertEqual(result, {"ok": True})
self.assertEqual(len(calls), 1) # slept once between the two attempts
# ---------------------------------------------------------------------------
# api_request — failure handling
# ---------------------------------------------------------------------------
class TestApiRequestFailures(unittest.TestCase):
@patch("gitea_auth.urllib.request.urlopen")
def test_timeout_converted_to_runtimeerror(self, mock_open):
mock_open.side_effect = TimeoutError("timed out")
with self.assertRaises(RuntimeError) as ctx:
gitea_auth.api_request("GET", URL, FAKE_AUTH)
self.assertIn("network error contacting Gitea", str(ctx.exception))
@patch("gitea_auth.urllib.request.urlopen")
def test_dns_network_failure_converted(self, mock_open):
mock_open.side_effect = urllib.error.URLError("Name or service not known")
with self.assertRaises(RuntimeError) as ctx:
gitea_auth.api_request("GET", URL, FAKE_AUTH)
self.assertIn("network error contacting Gitea", str(ctx.exception))
@patch("gitea_auth.urllib.request.urlopen")
def test_502_upstream_message(self, mock_open):
mock_open.side_effect = http_error(502, "bad gateway")
with self.assertRaises(RuntimeError) as ctx:
gitea_auth.api_request("GET", URL, FAKE_AUTH)
msg = str(ctx.exception)
self.assertIn("HTTP 502", msg)
self.assertIn("upstream unavailable", msg)
@patch("gitea_auth.urllib.request.urlopen")
def test_503_upstream_message(self, mock_open):
mock_open.side_effect = http_error(503, "")
with self.assertRaises(RuntimeError) as ctx:
gitea_auth.api_request("GET", URL, FAKE_AUTH)
self.assertIn("HTTP 503", str(ctx.exception))
self.assertIn("upstream unavailable", str(ctx.exception))
@patch("gitea_auth.urllib.request.urlopen")
def test_malformed_error_payload_does_not_crash(self, mock_open):
# Non-JSON garbage error body must still yield a clean RuntimeError.
mock_open.side_effect = http_error(500, "<html>garbage</html>")
with self.assertRaises(RuntimeError) as ctx:
gitea_auth.api_request("GET", URL, FAKE_AUTH)
self.assertIn("HTTP 500", str(ctx.exception))
@patch("gitea_auth.urllib.request.urlopen")
def test_malformed_success_json_raises_clean_error(self, mock_open):
mock_open.return_value = FakeResp("not json{")
with self.assertRaises(RuntimeError) as ctx:
gitea_auth.api_request("GET", URL, FAKE_AUTH)
self.assertIn("malformed JSON response", str(ctx.exception))
@patch("gitea_auth.urllib.request.urlopen")
def test_no_secret_leak_in_error_body(self, mock_open):
mock_open.side_effect = http_error(
400, "failed: token supersecret123 rejected")
with self.assertRaises(RuntimeError) as ctx:
gitea_auth.api_request("GET", URL, FAKE_AUTH)
msg = str(ctx.exception)
self.assertNotIn("supersecret123", msg)
self.assertIn(gitea_audit.REDACTED, msg)
@patch("gitea_auth.urllib.request.urlopen")
def test_auth_header_never_in_error(self, mock_open):
mock_open.side_effect = http_error(400, "bad request")
with self.assertRaises(RuntimeError) as ctx:
gitea_auth.api_request("GET", URL, FAKE_AUTH)
self.assertNotIn(FAKE_AUTH, str(ctx.exception))
# ---------------------------------------------------------------------------
# api_get_all — pagination
# ---------------------------------------------------------------------------
class TestApiGetAll(unittest.TestCase):
@patch("gitea_auth.api_request")
def test_single_page(self, mock_req):
mock_req.return_value = [{"id": 1}, {"id": 2}] # short page (< page_size)
result = gitea_auth.api_get_all(URL, FAKE_AUTH, page_size=50)
self.assertEqual(result, [{"id": 1}, {"id": 2}])
self.assertEqual(mock_req.call_count, 1)
@patch("gitea_auth.api_request")
def test_multi_page(self, mock_req):
mock_req.side_effect = [
[{"id": 1}, {"id": 2}], # full page
[{"id": 3}, {"id": 4}], # full page
[{"id": 5}], # short page -> stop
]
result = gitea_auth.api_get_all(URL, FAKE_AUTH, page_size=2)
self.assertEqual([r["id"] for r in result], [1, 2, 3, 4, 5])
self.assertEqual(mock_req.call_count, 3)
@patch("gitea_auth.api_request")
def test_missing_metadata_none_page_ends(self, mock_req):
mock_req.return_value = None # empty/malformed metadata -> treated as end
self.assertEqual(gitea_auth.api_get_all(URL, FAKE_AUTH), [])
self.assertEqual(mock_req.call_count, 1)
@patch("gitea_auth.api_request")
def test_malformed_metadata_non_list_raises(self, mock_req):
mock_req.return_value = {"message": "not a list"}
with self.assertRaises(RuntimeError) as ctx:
gitea_auth.api_get_all(URL, FAKE_AUTH)
self.assertIn("expected a list page", str(ctx.exception))
@patch("gitea_auth.api_request")
def test_limit_caps_results(self, mock_req):
mock_req.side_effect = [[{"id": 1}, {"id": 2}], [{"id": 3}, {"id": 4}]]
result = gitea_auth.api_get_all(URL, FAKE_AUTH, page_size=2, limit=3)
self.assertEqual([r["id"] for r in result], [1, 2, 3])
@patch("gitea_auth.api_request")
def test_max_pages_safety_cap(self, mock_req):
mock_req.side_effect = [
[{"id": 1}, {"id": 2}], [{"id": 3}, {"id": 4}], [{"id": 5}, {"id": 6}],
]
result = gitea_auth.api_get_all(URL, FAKE_AUTH, page_size=2, max_pages=2)
self.assertEqual(len(result), 4)
self.assertEqual(mock_req.call_count, 2)
@patch("gitea_auth.api_request")
def test_query_params_appended_and_preserved(self, mock_req):
mock_req.return_value = [] # first (empty) page ends immediately
gitea_auth.api_get_all(URL + "?state=open", FAKE_AUTH, page_size=2)
called_url = mock_req.call_args[0][1]
self.assertIn("state=open", called_url)
self.assertIn("page=1", called_url)
self.assertIn("limit=2", called_url)
if __name__ == "__main__":
unittest.main()
+6 -6
View File
@@ -103,7 +103,7 @@ class TestCloseIssue(unittest.TestCase):
# ---------------------------------------------------------------------------
class TestListIssues(unittest.TestCase):
@patch("mcp_server.api_request")
@patch("mcp_server.api_get_all")
@patch("mcp_server.get_auth_header", return_value=FAKE_AUTH)
def test_returns_formatted_list(self, _auth, mock_api):
mock_api.return_value = [
@@ -124,20 +124,20 @@ class TestListIssues(unittest.TestCase):
self.assertEqual(result[0]["assignee"], "alice")
self.assertEqual(result[1]["assignee"], "")
@patch("mcp_server.api_request")
@patch("mcp_server.api_get_all")
@patch("mcp_server.get_auth_header", return_value=FAKE_AUTH)
def test_passes_label_filter(self, _auth, mock_api):
mock_api.return_value = []
gitea_list_issues(label="important")
url = mock_api.call_args[0][1]
url = mock_api.call_args[0][0]
self.assertIn("labels=important", url)
@patch("mcp_server.api_request")
@patch("mcp_server.api_get_all")
@patch("mcp_server.get_auth_header", return_value=FAKE_AUTH)
def test_passes_state_filter(self, _auth, mock_api):
mock_api.return_value = []
gitea_list_issues(state="closed")
url = mock_api.call_args[0][1]
url = mock_api.call_args[0][0]
self.assertIn("state=closed", url)
@@ -259,7 +259,7 @@ class TestMirrorRefs(unittest.TestCase):
# ---------------------------------------------------------------------------
class TestListPRs(unittest.TestCase):
@patch("mcp_server.api_request")
@patch("mcp_server.api_get_all")
@patch("mcp_server.get_auth_header", return_value=FAKE_AUTH)
def test_list_prs(self, _auth, mock_api):
mock_api.return_value = [