Skip to content

Voice API quotas (TTS / STT)

Voice providers charge by “number of seconds / characters per month”. Unlike LLMs, limits here are strict — exceeding = 403, and the user doesn’t receive audio. We count ourselves and alert in advance:

import os, requests, json, time
S = "/tmp/voice-quota.json"
LIMITS = { # characters / seconds per month
"elevenlabs": 100_000,
"deepgram": 45_000,
"whisper": 60_000,
}
def observe(provider: str, units: int):
s = (json.load(open(S)) if os.path.exists(S) else {})
month = time.strftime("%Y-%m")
s.setdefault(month, {}).setdefault(provider, 0)
s[month][provider] += units
json.dump(s, open(S, "w"))
used = s[month][provider]
limit = LIMITS.get(provider, 1)
for thr in (0.7, 0.9, 0.99):
flag = f"/tmp/voice-{provider}-{month}-{int(thr*100)}.flag"
if used / limit >= thr and not os.path.exists(flag):
push(f"🎙️ {provider}: {int(thr*100)}% квоты",
f"{used:,} / {limit:,}",
9 if thr >= 0.9 else 5)
open(flag, "w").close()
def push(t, m, p):
requests.post(f"{os.environ['NOTIFLY_URL']}/message",
params={"token": os.environ["NOTIFLY_TOKEN"]},
json={"title": t, "message": m, "priority": p}, timeout=5)

At the same time — an HTTP monitor on api.elevenlabs.io/v1/user to detect quota_exceeded in their own response.