fix: add shared API pagination and failure handling (#67)

Harden gitea_auth.api_request: add a per-request timeout (env
GITEA_HTTP_TIMEOUT), convert timeouts and DNS/network failures
(URLError/TimeoutError) into clear RuntimeErrors, give 502/503/504 an
explicit 'upstream unavailable' message, convert malformed success JSON
into a clean error, and redact credential-like substrings from all error
text. Preserves the success path and existing 429 retry/backoff.

Add shared gitea_auth.api_get_all: page-based pagination that tolerates
missing/malformed metadata (relies on page length, not Link/X-Total-Count
headers), honors an optional overall limit, and caps pages. Wire it into
the read-only list tools gitea_list_issues, gitea_list_prs, and
gitea_list_labels (return shape unchanged).

Add tests/test_api_reliability.py (18 cases) and update the three list-tool
tests to the new call path. No auth/profile/merge/review/tracker behavior
changed. No modular #65 refactor.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-07-02 13:27:06 -04:00
parent 093945254d
commit cfe3ff6755
4 changed files with 337 additions and 18 deletions
+120 -7
View File
@@ -14,6 +14,7 @@ import datetime
import subprocess
import urllib.request
import urllib.error
import urllib.parse
from email.utils import parsedate_to_datetime
from dotenv import dotenv_values, load_dotenv
@@ -188,6 +189,39 @@ DEFAULT_MAX_RETRIES = _env_int("GITEA_MAX_RETRIES", 3)
DEFAULT_BASE_DELAY = _env_float("GITEA_RETRY_BASE_DELAY", 1.0) # seconds
DEFAULT_MAX_DELAY = _env_float("GITEA_RETRY_MAX_DELAY", 60.0) # seconds
# Per-request socket timeout (seconds). Overridable via environment.
DEFAULT_HTTP_TIMEOUT = _env_float("GITEA_HTTP_TIMEOUT", 30.0)
def _redact(text):
"""Best-effort strip of credential-like substrings from error text.
Reuses the audit module's redactor so error messages never surface tokens,
Basic/Bearer headers, or password-like values. Falls back to the plain
string if the audit helper is unavailable.
"""
try:
from gitea_audit import _redact_str
return _redact_str(str(text))
except Exception:
return str(text)
def _add_query(url, **params):
"""Return *url* with the given query parameters added or overridden.
Preserves any existing query string on *url* (e.g. ``?state=open``) so
pagination params can be layered on top of an already-filtered endpoint.
"""
parts = urllib.parse.urlsplit(url)
query = dict(urllib.parse.parse_qsl(parts.query, keep_blank_values=True))
for key, value in params.items():
query[str(key)] = str(value)
new_query = urllib.parse.urlencode(query)
return urllib.parse.urlunsplit(
(parts.scheme, parts.netloc, parts.path, new_query, parts.fragment)
)
def parse_retry_after(value, now=None):
"""Parse a ``Retry-After`` header into a non-negative delay in seconds.
@@ -239,16 +273,31 @@ def backoff_delay(attempt, base=DEFAULT_BASE_DELAY, cap=DEFAULT_MAX_DELAY, rand=
def api_request(method, url, auth_header, payload=None, *,
max_retries=None, base_delay=None, max_delay=None,
timeout=None,
sleep_func=time.sleep, rand_func=random.random,
now_func=time.time):
"""Make an authenticated JSON request to the Gitea API.
Returns parsed JSON on success, raises ``RuntimeError`` on HTTP errors.
Returns parsed JSON on success (or ``None`` for an empty body), and raises
``RuntimeError`` on failure.
On HTTP 429 the request is retried up to *max_retries* times: honoring a
valid ``Retry-After`` header (seconds or HTTP-date) when present, otherwise
using capped jittered exponential backoff. Non-429 errors and successful
responses are unchanged. The ``*_func`` parameters are injection points for
using capped jittered exponential backoff. Successful responses are
unchanged.
All failures are converted to a ``RuntimeError`` with a clear, secret
-redacted message (no raw stack traces or credential material):
- Non-429 HTTP errors surface the status code and a redacted response body.
502/503/504 upstream errors get an explicit "Gitea upstream unavailable"
message.
- Timeouts and network/DNS failures (``URLError`` / ``TimeoutError``) surface
a generic "network error contacting Gitea" message.
- A malformed (non-JSON) success body surfaces a "malformed JSON response"
message rather than a raw decode error.
The ``*_func`` parameters and ``timeout`` are injection points for
deterministic testing.
"""
if max_retries is None:
@@ -257,6 +306,8 @@ def api_request(method, url, auth_header, payload=None, *,
base_delay = DEFAULT_BASE_DELAY
if max_delay is None:
max_delay = DEFAULT_MAX_DELAY
if timeout is None:
timeout = DEFAULT_HTTP_TIMEOUT
data = json.dumps(payload).encode("utf-8") if payload is not None else None
req = urllib.request.Request(url, data=data, method=method)
@@ -267,9 +318,8 @@ def api_request(method, url, auth_header, payload=None, *,
attempt = 0
while True:
try:
with urllib.request.urlopen(req) as resp:
with urllib.request.urlopen(req, timeout=timeout) as resp:
body = resp.read().decode("utf-8")
return json.loads(body) if body else None
except urllib.error.HTTPError as e:
if e.code == 429 and attempt < max_retries:
header = e.headers.get("Retry-After") if e.headers else None
@@ -279,8 +329,71 @@ def api_request(method, url, auth_header, payload=None, *,
attempt += 1
sleep_func(delay)
continue
error_body = e.read().decode("utf-8", errors="replace")
raise RuntimeError(f"HTTP {e.code}: {error_body}") from e
try:
error_body = e.read().decode("utf-8", errors="replace")
except Exception:
error_body = ""
detail = _redact(error_body).strip()
if e.code in (502, 503, 504):
msg = f"HTTP {e.code}: Gitea upstream unavailable"
raise RuntimeError(f"{msg}: {detail}" if detail else msg) from e
raise RuntimeError(f"HTTP {e.code}: {detail}") from e
except (urllib.error.URLError, TimeoutError) as e:
reason = getattr(e, "reason", e)
raise RuntimeError(
f"network error contacting Gitea: {_redact(reason)}"
) from e
if not body:
return None
try:
return json.loads(body)
except ValueError as e:
raise RuntimeError("malformed JSON response from Gitea") from e
def api_get_all(url, auth_header, *, limit=None, page_size=50, max_pages=100,
**kwargs):
"""Fetch a paginated Gitea collection, following page-based pagination.
Issues successive ``GET`` requests with ``page`` and ``limit`` (per-page)
query parameters, accumulating list items until one of:
- a page returns fewer items than the page size (the last page),
- an empty or ``None`` page is returned (also treated as the end — this is
how missing/malformed pagination metadata degrades safely),
- *limit* total items have been collected, or
- *max_pages* pages have been fetched (a safety cap against runaway loops).
Pagination relies on the *length of each returned page*, not on
``X-Total-Count`` / ``Link`` headers, so it tolerates missing or malformed
pagination metadata. Returns a list (possibly empty). Raises ``RuntimeError``
(via :func:`api_request`) on network/HTTP/malformed failures, or if a page is
not a JSON list. Extra ``kwargs`` pass through to :func:`api_request`.
"""
if page_size < 1:
page_size = 1
if page_size > 50:
page_size = 50 # Gitea caps per-page results at 50
if limit is not None and limit < page_size:
page_size = max(1, limit)
results = []
for page in range(1, max_pages + 1):
page_url = _add_query(url, page=page, limit=page_size)
data = api_request("GET", page_url, auth_header, **kwargs)
if data is None:
break
if not isinstance(data, list):
raise RuntimeError(
f"expected a list page from Gitea, got {type(data).__name__}"
)
results.extend(data)
if limit is not None and len(results) >= limit:
return results[:limit]
if len(data) < page_size:
break
return results
def repo_api_url(host, org, repo):