Skip to content

Flaky tests detected

If your AI agent has git push + gh workflow run permissions, it can develop a dangerous pattern: “didn’t converge — I’ll rerun.” Flaky tests will never get fixed that way, and you’ll burn a lot of CI minutes.

A simple detector on top of GitHub Actions:

# scheduled YC fn, checks the last N runs
import os, requests, collections
GH = "https://api.github.com/repos/<owner>/<repo>/actions/runs?per_page=50"
H = {"Authorization": f"Bearer {os.environ['GH_TOKEN']}"}
def handler(event, context):
runs = requests.get(GH, headers=H, timeout=10).json()["workflow_runs"]
by_sha = collections.defaultdict(list)
for r in runs:
by_sha[r["head_sha"]].append(r["conclusion"])
flaky = {sha: c for sha, c in by_sha.items()
if len(c) >= 3 and "success" in c and "failure" in c}
if flaky:
body = "\n".join(f"• {sha[:7]}: {' '.join(c)}" for sha, c in flaky.items())
push("🪲 Flaky тесты", f"{len(flaky)} коммита прыгают:\n{body}", priority=7)
return {"statusCode": 200}
def push(t, m, p):
requests.post(f"{os.environ['NOTIFLY_URL']}/message",
params={"token": os.environ["NOTIFLY_TOKEN"]},
json={"title": t, "message": m, "priority": p}, timeout=5)