Update wrapper/wrapper.py

Added playwright and chromium for data scraping, and simplified env designation
This commit is contained in:
frarol96 2025-08-16 05:06:23 +00:00
parent eac918e317
commit 34a4582791

View File

@ -1,255 +1,262 @@
import os, sys, time, shutil, subprocess, signal, json, pathlib, re, datetime import os, sys, time, shutil, subprocess, signal, json, pathlib, re, datetime
from typing import Tuple from typing import Tuple
# ---------- Config (env) ---------- # ---------- Config (env) ----------
REPO_URL = os.getenv("REPO_URL", "").strip() # e.g. https://git.rolfsvaag.no/frarol96/shaiwatcher REPO_URL = os.getenv("REPO_URL", "").strip()
REPO_BRANCH = os.getenv("REPO_BRANCH", "main").strip() REPO_BRANCH = os.getenv("REPO_BRANCH", "main").strip()
REPO_TOKEN = os.getenv("REPO_TOKEN", "").strip() # optional (for private), not used if empty REPO_TOKEN = os.getenv("REPO_TOKEN", "").strip() # optional
RECHECK_UTC = os.getenv("RECHECK_UTC", "03:00").strip() # HH:MM (UTC)
PIP_INSTALL = os.getenv("PIP_INSTALL_REQUIREMENTS", "1").strip() == "1" # prefer CHECK_TIME_UTC; fall back to old RECHECK_UTC
CHECK_TIME_UTC = os.getenv("CHECK_TIME_UTC", os.getenv("RECHECK_UTC", "03:00")).strip()
CACHE_DIR = pathlib.Path("/cache/app") # current code
TMP_DIR = pathlib.Path("/cache/tmp") # temp checkout # 1 = ignore test-only bumps (e.g. v1.2.3.4-T3 -> v1.2.3.4-T4)
PREV_DIR = pathlib.Path("/cache/prev") # rollback IGNORE_TEST_LEVEL = os.getenv("IGNORE_TEST_LEVEL", "1").strip() == "1"
DATA_DIR = pathlib.Path("/data") # persistent data volume
RUN_TIMEOUT = int(os.getenv("WRAPPER_STOP_TIMEOUT", "25")) PIP_INSTALL = os.getenv("PIP_INSTALL_REQUIREMENTS", "1").strip() == "1"
ROLLBACK_MAX_FAILS = 3
CACHE_DIR = pathlib.Path("/cache/app")
# ---------- Helpers ---------- TMP_DIR = pathlib.Path("/cache/tmp")
def log(msg: str): PREV_DIR = pathlib.Path("/cache/prev")
print(f"[wrapper] {msg}", flush=True) DATA_DIR = pathlib.Path("/data")
def run(*cmd, cwd=None, check=True) -> subprocess.CompletedProcess: RUN_TIMEOUT = int(os.getenv("WRAPPER_STOP_TIMEOUT", "25"))
log(f"$ {' '.join(cmd)}") ROLLBACK_MAX_FAILS = 3
return subprocess.run(cmd, cwd=cwd, check=check, text=True, capture_output=True)
# ---------- Helpers ----------
def ensure_git(): def log(msg: str):
try: print(f"[wrapper] {msg}", flush=True)
run("git","--version")
except subprocess.CalledProcessError as e: def run(*cmd, cwd=None, check=True) -> subprocess.CompletedProcess:
log(f"git missing? {e.stderr}") log(f"$ {' '.join(cmd)}")
sys.exit(1) return subprocess.run(cmd, cwd=cwd, check=check, text=True, capture_output=True)
def utc_now() -> datetime.datetime: def ensure_git():
return datetime.datetime.utcnow() try:
run("git","--version")
def next_utc(hhmm: str) -> float: except subprocess.CalledProcessError as e:
hh, mm = map(int, hhmm.split(":")) log(f"git missing? {e.stderr}")
now = utc_now() sys.exit(1)
tgt = now.replace(hour=hh, minute=mm, second=0, microsecond=0)
if tgt <= now: def utc_now() -> datetime.datetime:
tgt = tgt + datetime.timedelta(days=1) return datetime.datetime.utcnow()
return (tgt - now).total_seconds()
def next_utc(hhmm: str) -> float:
_VERSION_RE = re.compile(r'^\s*VERSION\s*=\s*[\'"]([^\'"]+)[\'"]', re.M) hh, mm = map(int, hhmm.split(":"))
def extract_version_from(path: pathlib.Path) -> str: now = utc_now()
try: tgt = now.replace(hour=hh, minute=mm, second=0, microsecond=0)
txt = path.read_text(encoding="utf-8", errors="ignore") if tgt <= now:
m = _VERSION_RE.search(txt) tgt = tgt + datetime.timedelta(days=1)
return m.group(1).strip() if m else "v0.0.0.0" return (tgt - now).total_seconds()
except Exception:
return "v0.0.0.0" _VERSION_RE = re.compile(r'^\s*VERSION\s*=\s*[\'"]([^\'"]+)[\'"]', re.M)
def extract_version_from(path: pathlib.Path) -> str:
def parse_version(ver: str) -> Tuple[int,int,int,int,bool]: try:
# Format: vMajor.Minor.Enh.Patch[-T...] txt = path.read_text(encoding="utf-8", errors="ignore")
# Example: v1.2.3.4-T7 m = _VERSION_RE.search(txt)
test = "-T" in ver return m.group(1).strip() if m else "v0.0.0.0"
core = ver.split("-T")[0].lstrip("v") except Exception:
parts = [int(p or 0) for p in core.split(".")+["0","0","0","0"]][:4] return "v0.0.0.0"
return parts[0], parts[1], parts[2], parts[3], test
def parse_version(ver: str) -> Tuple[int,int,int,int,bool]:
def should_update(old: str, new: str) -> bool: # Format: vMajor.Minor.Enh.Patch[-T...]
""" # Example: v1.2.3.4-T7
Update if the numeric tuple increases. test = "-T" in ver
Ignore updates that change *only* the test suffix (e.g., v1.2.3.4-T1 -> v1.2.3.4-T2). core = ver.split("-T")[0].lstrip("v")
""" parts = [int(p or 0) for p in core.split(".")+["0","0","0","0"]][:4]
oM,oE,oN,oP,ot = parse_version(old) return parts[0], parts[1], parts[2], parts[3], test
nM,nE,nN,nP,nt = parse_version(new)
if (oM,oE,oN,oP) != (nM,nE,nN,nP): def should_update(old: str, new: str) -> bool:
return True """
# numeric same -> only test part differs → do NOT update Update if the numeric tuple increases.
return False Ignore updates that change *only* the test suffix (e.g., v1.2.3.4-T1 -> v1.2.3.4-T2).
"""
def clone_or_fetch(target: pathlib.Path): oM,oE,oN,oP,ot = parse_version(old)
if target.exists() and (target / ".git").exists(): nM,nE,nN,nP,nt = parse_version(new)
try: if (oM,oE,oN,oP) != (nM,nE,nN,nP):
run("git","fetch","--all","-p", cwd=target) return True
run("git","reset","--hard", f"origin/{REPO_BRANCH}", cwd=target) # numeric parts same → only test suffix changed
return return not IGNORE_TEST_LEVEL and (ot != nt)
except Exception as e:
log(f"fetch failed, recloning: {e}") def clone_or_fetch(target: pathlib.Path):
shutil.rmtree(target, ignore_errors=True) if target.exists() and (target / ".git").exists():
try:
url = REPO_URL run("git","fetch","--all","-p", cwd=target)
# optional token (only if provided) run("git","reset","--hard", f"origin/{REPO_BRANCH}", cwd=target)
if REPO_TOKEN and REPO_URL.startswith("https://"): return
url = REPO_URL.replace("https://", f"https://{REPO_TOKEN}@") except Exception as e:
run("git","clone","--branch",REPO_BRANCH,"--depth","1", url, str(target)) log(f"fetch failed, recloning: {e}")
shutil.rmtree(target, ignore_errors=True)
def prime_tmp_then_decide():
# Pull to TMP, compare versions (bot.py VERSION), decide if we swap CACHE url = REPO_URL
TMP_DIR.mkdir(parents=True, exist_ok=True) # optional token (only if provided)
shutil.rmtree(TMP_DIR, ignore_errors=True) if REPO_TOKEN and REPO_URL.startswith("https://"):
clone_or_fetch(TMP_DIR) url = REPO_URL.replace("https://", f"https://{REPO_TOKEN}@")
run("git","clone","--branch",REPO_BRANCH,"--depth","1", url, str(target))
new_ver = extract_version_from(TMP_DIR / "bot.py")
old_ver = extract_version_from(CACHE_DIR / "bot.py") if (CACHE_DIR / "bot.py").exists() else "v0.0.0.0" def prime_tmp_then_decide():
log(f"cached version: {old_ver} / remote version: {new_ver}") # Pull to TMP, compare versions (bot.py VERSION), decide if we swap CACHE
TMP_DIR.mkdir(parents=True, exist_ok=True)
if not (CACHE_DIR / "bot.py").exists(): shutil.rmtree(TMP_DIR, ignore_errors=True)
# First time clone_or_fetch(TMP_DIR)
do_swap = True
reason = f"first fetch -> {new_ver}" new_ver = extract_version_from(TMP_DIR / "bot.py")
else: old_ver = extract_version_from(CACHE_DIR / "bot.py") if (CACHE_DIR / "bot.py").exists() else "v0.0.0.0"
do_swap = should_update(old_ver, new_ver) log(f"cached version: {old_ver} / remote version: {new_ver}")
reason = f"update allowed: {do_swap} (old={old_ver}, new={new_ver})"
if not (CACHE_DIR / "bot.py").exists():
return do_swap, old_ver, new_ver, reason # First time
do_swap = True
def swap_cache_to_prev(): reason = f"first fetch -> {new_ver}"
PREV_DIR.mkdir(parents=True, exist_ok=True) else:
shutil.rmtree(PREV_DIR, ignore_errors=True) do_swap = should_update(old_ver, new_ver)
if CACHE_DIR.exists(): reason = f"update allowed: {do_swap} (old={old_ver}, new={new_ver})"
shutil.move(str(CACHE_DIR), str(PREV_DIR))
return do_swap, old_ver, new_ver, reason
def copy_tmp_to_cache():
shutil.rmtree(CACHE_DIR, ignore_errors=True) def swap_cache_to_prev():
shutil.copytree(TMP_DIR, CACHE_DIR, dirs_exist_ok=False) PREV_DIR.mkdir(parents=True, exist_ok=True)
shutil.rmtree(PREV_DIR, ignore_errors=True)
def pip_install(cwd: pathlib.Path): if CACHE_DIR.exists():
req = cwd / "requirements.txt" shutil.move(str(CACHE_DIR), str(PREV_DIR))
if PIP_INSTALL and req.exists():
try: def copy_tmp_to_cache():
run(sys.executable, "-m", "pip", "install", "-r", str(req)) shutil.rmtree(CACHE_DIR, ignore_errors=True)
except subprocess.CalledProcessError as e: shutil.copytree(TMP_DIR, CACHE_DIR, dirs_exist_ok=False)
log("pip install failed (will continue anyway)")
log(e.stdout + "\n" + e.stderr) def pip_install(cwd: pathlib.Path):
req = cwd / "requirements.txt"
def set_boot_env(status: str, old_ver: str, new_ver: str, commit: str = "", subject: str = ""): if PIP_INSTALL and req.exists():
# Env passed to the bot; bot should read and post to modlog on_ready try:
os.environ["SHAI_BOOT_STATUS"] = status run(sys.executable, "-m", "pip", "install", "-r", str(req))
os.environ["SHAI_BOOT_OLDVER"] = old_ver except subprocess.CalledProcessError as e:
os.environ["SHAI_BOOT_NEWVER"] = new_ver log("pip install failed (will continue anyway)")
os.environ["SHAI_BUILD_COMMIT"] = commit log(e.stdout + "\n" + e.stderr)
os.environ["SHAI_BUILD_SUBJECT"]= subject
def set_boot_env(status: str, old_ver: str, new_ver: str, commit: str = "", subject: str = ""):
def get_head_info(cwd: pathlib.Path) -> Tuple[str,str]: # Env passed to the bot; bot should read and post to modlog on_ready
try: os.environ["SHAI_BOOT_STATUS"] = status
c1 = run("git","rev-parse","--short","HEAD", cwd=cwd, check=True) os.environ["SHAI_BOOT_OLDVER"] = old_ver
sha = c1.stdout.strip() os.environ["SHAI_BOOT_NEWVER"] = new_ver
c2 = run("git","log","-1","--pretty=%s", cwd=cwd, check=True) os.environ["SHAI_BUILD_COMMIT"] = commit
subj = c2.stdout.strip() os.environ["SHAI_BUILD_SUBJECT"]= subject
return (sha, subj)
except Exception: def get_head_info(cwd: pathlib.Path) -> Tuple[str,str]:
return ("", "") try:
c1 = run("git","rev-parse","--short","HEAD", cwd=cwd, check=True)
def start_bot(cwd: pathlib.Path) -> subprocess.Popen: sha = c1.stdout.strip()
env = os.environ.copy() c2 = run("git","log","-1","--pretty=%s", cwd=cwd, check=True)
# Make sure data dir exists (the bot should use SHAI_DATA or SHAI_DATA_FILE or config) subj = c2.stdout.strip()
env.setdefault("SHAI_DATA", "/data/data.json") return (sha, subj)
# Run from the cached code directory except Exception:
return subprocess.Popen([sys.executable, "-u", "bot.py"], cwd=cwd, env=env) return ("", "")
def graceful_restart(proc: subprocess.Popen) -> bool: def start_bot(cwd: pathlib.Path) -> subprocess.Popen:
try: env = os.environ.copy()
proc.terminate() # Make sure data dir exists (the bot should use SHAI_DATA or SHAI_DATA_FILE or config)
try: env.setdefault("SHAI_DATA", "/data/data.json")
proc.wait(timeout=RUN_TIMEOUT) # Run from the cached code directory
return True return subprocess.Popen([sys.executable, "-u", "bot.py"], cwd=cwd, env=env)
except subprocess.TimeoutExpired:
proc.kill() def graceful_restart(proc: subprocess.Popen) -> bool:
proc.wait(timeout=10) try:
return True proc.terminate()
except Exception: try:
return False proc.wait(timeout=RUN_TIMEOUT)
return True
def run_loop(): except subprocess.TimeoutExpired:
ensure_git() proc.kill()
proc.wait(timeout=10)
# initial fetch/decide return True
updated, old_ver, new_ver, reason = prime_tmp_then_decide() except Exception:
if updated: return False
log(f"updating cache: {reason}")
swap_cache_to_prev() def run_loop():
copy_tmp_to_cache() ensure_git()
else:
log(f"no update: {reason}") # initial fetch/decide
updated, old_ver, new_ver, reason = prime_tmp_then_decide()
# pip install if needed (requirements.txt inside repo) if updated:
pip_install(CACHE_DIR) log(f"updating cache: {reason}")
swap_cache_to_prev()
# boot status env copy_tmp_to_cache()
sha, subj = get_head_info(CACHE_DIR) else:
if updated: log(f"no update: {reason}")
set_boot_env(
f"Successfully fetched, cached, and booted new version", # pip install if needed (requirements.txt inside repo)
old_ver, new_ver, sha, subj pip_install(CACHE_DIR)
)
else: # boot status env
msg = "Successfully booted from cached version" sha, subj = get_head_info(CACHE_DIR)
if sha or subj: if updated:
msg += " (repo reachable)" set_boot_env(
set_boot_env(msg, old_ver, new_ver, sha, subj) f"Successfully fetched, cached, and booted new version",
old_ver, new_ver, sha, subj
# start bot )
proc = start_bot(CACHE_DIR) else:
log(f"bot started pid={proc.pid}") msg = "Successfully booted from cached version"
if sha or subj:
consecutive_failures = 0 msg += " (repo reachable)"
set_boot_env(msg, old_ver, new_ver, sha, subj)
while True:
# sleep until next 03:00 UTC # start bot
delay = next_utc(RECHECK_UTC) proc = start_bot(CACHE_DIR)
log(f"sleeping {int(delay)}s until {RECHECK_UTC} UTC for update check") log(f"bot started pid={proc.pid}")
time.sleep(delay)
consecutive_failures = 0
# check for update
try: while True:
upd, cur_ver, remote_ver, why = prime_tmp_then_decide() # sleep until next check (03:00 UTC by default)
log(f"nightly check: {why}") delay = next_utc(CHECK_TIME_UTC)
if not upd: log(f"sleeping {int(delay)}s until {RECHECK_UTC} UTC for update check")
# no update -> continue loop time.sleep(delay)
continue
# check for update
# graceful restart into new version try:
log("updating to new version at nightly window") upd, cur_ver, remote_ver, why = prime_tmp_then_decide()
ok = graceful_restart(proc) log(f"nightly check: {why}")
if not ok: if not upd:
log("warning: bot did not stop cleanly") # no update -> continue loop
continue
# swap and boot
swap_cache_to_prev() # graceful restart into new version
copy_tmp_to_cache() log("updating to new version at nightly window")
pip_install(CACHE_DIR) ok = graceful_restart(proc)
sha, subj = get_head_info(CACHE_DIR) if not ok:
set_boot_env("Successfully fetched, cached, and booted new version", cur_ver, remote_ver, sha, subj) log("warning: bot did not stop cleanly")
proc = start_bot(CACHE_DIR)
log(f"bot restarted on new version pid={proc.pid}") # swap and boot
consecutive_failures = 0 swap_cache_to_prev()
copy_tmp_to_cache()
except Exception as e: pip_install(CACHE_DIR)
log(f"nightly update failed: {e}") sha, subj = get_head_info(CACHE_DIR)
consecutive_failures += 1 set_boot_env("Successfully fetched, cached, and booted new version", cur_ver, remote_ver, sha, subj)
if consecutive_failures < ROLLBACK_MAX_FAILS and PREV_DIR.exists() and (PREV_DIR / "bot.py").exists(): proc = start_bot(CACHE_DIR)
log("attempting rollback to previous cached version") log(f"bot restarted on new version pid={proc.pid}")
if proc.poll() is None: consecutive_failures = 0
graceful_restart(proc)
shutil.rmtree(CACHE_DIR, ignore_errors=True) except Exception as e:
shutil.copytree(PREV_DIR, CACHE_DIR, dirs_exist_ok=False) log(f"nightly update failed: {e}")
pip_install(CACHE_DIR) consecutive_failures += 1
set_boot_env("Rolled back to last known working version", "-", extract_version_from(CACHE_DIR / "bot.py")) if consecutive_failures < ROLLBACK_MAX_FAILS and PREV_DIR.exists() and (PREV_DIR / "bot.py").exists():
proc = start_bot(CACHE_DIR) log("attempting rollback to previous cached version")
elif consecutive_failures >= ROLLBACK_MAX_FAILS: if proc.poll() is None:
log("critical: failed 3 times to update/restart; entering freeze mode") graceful_restart(proc)
# Optional: DM owner could be done in a tiny fallback bot here if OWNER_ID provided. shutil.rmtree(CACHE_DIR, ignore_errors=True)
# For now, just idle to allow SSH/exec into container. shutil.copytree(PREV_DIR, CACHE_DIR, dirs_exist_ok=False)
try: pip_install(CACHE_DIR)
if proc.poll() is None: set_boot_env("Rolled back to last known working version", "-", extract_version_from(CACHE_DIR / "bot.py"))
graceful_restart(proc) proc = start_bot(CACHE_DIR)
except Exception: elif consecutive_failures >= ROLLBACK_MAX_FAILS:
pass log("critical: failed 3 times to update/restart; entering freeze mode")
while True: # Optional: DM owner could be done in a tiny fallback bot here if OWNER_ID provided.
time.sleep(3600) # For now, just idle to allow SSH/exec into container.
try:
if __name__ == "__main__": if proc.poll() is None:
run_loop() graceful_restart(proc)
except Exception:
pass
while True:
time.sleep(3600)
if __name__ == "__main__":
run_loop()