Skip to content

LLM job queue is growing

Backlog in the queue is a late symptom. Alert immediately as soon as the derivative is positive for several consecutive minutes:

import os, time, json, requests, redis
R = redis.from_url(os.environ["REDIS_URL"])
QUEUE = "llm-jobs"
S = "/tmp/queue-trend.json"
def handler(event, context):
cur = R.llen(QUEUE)
s = (json.load(open(S)) if os.path.exists(S) else {"hist": []})
s["hist"] = (s["hist"] + [cur])[-10:]
json.dump(s, open(S, "w"))
if len(s["hist"]) == 10 and all(b > a for a, b in zip(s["hist"], s["hist"][1:])):
push("📥 LLM-очередь растёт",
f"Длина очереди: {''.join(map(str, s['hist']))}\n"
"Производная положительная 10 шагов — нужно больше воркеров или есть deadlock.",
priority=8)
elif cur > 1000:
push("📥 LLM-очередь длинная", f"len={cur}", 7)
return {"statusCode": 200}
def push(t, m, p):
requests.post(f"{os.environ['NOTIFLY_URL']}/message",
params={"token": os.environ["NOTIFLY_TOKEN"]},
json={"title": t, "message": m, "priority": p}, timeout=5)

Run every 30 seconds via a timer trigger.