0.3.9.5.a2

Fixed issues relating to RSS feed not giving enough details
This commit is contained in:
Franz Rolfsvaag 2025-08-11 01:18:01 +02:00
parent cd11e3106f
commit 2a898802b6
2 changed files with 266 additions and 211 deletions

2
bot.py
View File

@ -9,7 +9,7 @@ from modules.common.boot_notice import post_boot_notice
# Version consists of:
# Major.Enhancement.Minor.Patch.Test (Test is alphanumeric; doesnt trigger auto update)
VERSION = "0.3.9.5.a1"
VERSION = "0.3.9.5.a2"
# ---------- Env loading ----------

View File

@ -1,177 +1,32 @@
# modules/common/boot_notice.py
import os
import re
import html
import base64
import json
import time
from datetime import datetime, timezone, timedelta
from urllib.parse import urlparse
import discord
import aiohttp
import xml.etree.ElementTree as ET
from urllib.parse import urlparse
from modules.common.settings import cfg
# ---------------- Utilities ----------------
# ---------------- Version helpers ----------------
def _strip_html_keep_text(s: str) -> str:
"""Remove HTML tags, unescape entities, collapse excessive blank lines."""
if not s:
return ""
s = re.sub(r'(?i)<\s*br\s*/?\s*>', '\n', s)
s = re.sub(r'(?i)</\s*p\s*>', '\n', s)
s = re.sub(r'(?i)<\s*p\s*>', '', s)
s = re.sub(r'<[^>]+>', '', s)
s = html.unescape(s)
s = '\n'.join(line.rstrip() for line in s.splitlines())
s = re.sub(r'\n{3,}', '\n\n', s).strip()
return s
_VERSION_RE = re.compile(r'\b\d+\.\d+\.\d+\.\d+(?:\.[A-Za-z0-9]+)?\b')
def _build_status_line(status: str, old_v: str, new_v: str, desc: str) -> str | None:
status = (status or "").strip()
old_v = (old_v or "").strip()
new_v = (new_v or "").strip()
if status == "fetched_new":
line = f"✅ Booted new version: v{old_v or '0.0.0.0'} → **v{new_v}**"
elif status == "cached_no_update":
line = f"🟢 Booted cached version: **v{new_v}** — no new update found"
elif status == "cache_only_error":
line = f"🟡 Booted cached version: **v{new_v}** — repository not accessible"
elif status == "scheduled_restart":
line = "🕒 Scheduled restart executed"
else:
def _extract_version(subject: str) -> str | None:
if not subject:
return None
return f"{line}\n_{desc.strip()}_" if desc else line
m = _VERSION_RE.search(subject)
return m.group(0) if m else None
def _only_version_and_details(subject: str | None, body: str | None) -> str | None:
"""Format to 'Version number' (bold) + 'Version details' (md)."""
if not subject and not body:
return None
version = None
if subject:
m = re.search(r'\bv?(\d+\.\d+(?:\.\d+){0,2})\b', subject)
version = m.group(0) if m else subject.strip()
if version and body:
return f"**{version}**\n{body.strip()}"
if version:
return f"**{version}**"
return body.strip() if body else None
# ---------------- RSS helpers ----------------
async def _fetch_latest_rss_item(rss_url: str):
"""
Return (title:str|None, body:str|None, link:str|None) from newest item.
Gitea RSS often only has the first line in <title>/<description>.
"""
try:
timeout = aiohttp.ClientTimeout(total=8)
async with aiohttp.ClientSession(timeout=timeout) as sess:
async with sess.get(rss_url) as resp:
if resp.status != 200:
return None, None, None
text = await resp.text()
root = ET.fromstring(text)
item = root.find('./channel/item')
if item is None:
return None, None, None
title = (item.findtext('title') or '').strip()
desc_raw = (item.findtext('description') or '').strip()
body = _strip_html_keep_text(desc_raw) or None
link = (item.findtext('link') or '').strip() or None
# If title looks like noise ("pushed", etc.), prefer body's first line
if title and re.search(r'\b(pushed|commit|committed)\b', title, re.I):
first = (body.splitlines()[0].strip() if body else "") or ""
if first:
title = first
title = _strip_html_keep_text(title) or None
return title, body, link
except Exception:
return None, None, None
def _parse_gitea_link_for_api(link: str):
"""
From a Gitea commit link like:
https://git.example.com/owner/repo/commit/abcdef...
derive:
api_base: https://git.example.com/api/v1
owner: owner
repo: repo
sha: abcdef...
"""
try:
pr = urlparse(link)
parts = [p for p in pr.path.split('/') if p]
# Expect: [owner, repo, 'commit', sha]
if len(parts) >= 4 and parts[2] == 'commit':
owner, repo, sha = parts[0], parts[1], parts[3]
api_base = f"{pr.scheme}://{pr.netloc}/api/v1"
return api_base, owner, repo, sha
except Exception:
pass
return None, None, None, None
async def _fetch_gitea_commit_message(commit_link: str, token: str | None):
"""
Ask Gitea API for the full commit message.
Try both endpoints:
1) /api/v1/repos/{owner}/{repo}/git/commits/{sha}
2) /api/v1/repos/{owner}/{repo}/commits/{sha}
Return full_message:str|None on success.
"""
api_base, owner, repo, sha = _parse_gitea_link_for_api(commit_link)
if not all([api_base, owner, repo, sha]):
return None
headers = {}
if token:
headers['Authorization'] = f'token {token}'
timeout = aiohttp.ClientTimeout(total=8)
async with aiohttp.ClientSession(timeout=timeout, headers=headers) as sess:
# 1) git/commits
url1 = f"{api_base}/repos/{owner}/{repo}/git/commits/{sha}"
try:
async with sess.get(url1) as resp:
if resp.status == 200:
data = await resp.json()
# Common shapes: {"message": "..."} or {"commit":{"message":"..."}}
msg = data.get('message') if isinstance(data, dict) else None
if not msg and isinstance(data, dict):
commit = data.get('commit') or {}
msg = commit.get('message')
if msg:
return str(msg)
except Exception:
pass
# 2) commits
url2 = f"{api_base}/repos/{owner}/{repo}/commits/{sha}"
try:
async with sess.get(url2) as resp:
if resp.status == 200:
data = await resp.json()
msg = None
if isinstance(data, dict):
# Gitea returns {"commit":{"message":"..."}, ...}
commit = data.get('commit') or {}
msg = commit.get('message') or data.get('message')
if msg:
return str(msg)
except Exception:
pass
return None
def _split_subject_body(full_message: str):
"""
Split a full commit message into (subject, body).
Subject = first non-empty line; body = rest (preserve markdown).
"""
def _split_subject_body(full_message: str) -> tuple[str | None, str | None]:
if not full_message:
return None, None
lines = [ln.rstrip() for ln in full_message.splitlines()]
# find first non-empty line
# subject = first non-empty line
subject = None
i = 0
while i < len(lines) and subject is None:
@ -181,72 +36,272 @@ def _split_subject_body(full_message: str):
body = '\n'.join(lines[i:]).strip() if i < len(lines) else ''
return subject or None, (body or None)
def _cmp_versions(a: str | None, b: str | None) -> int:
"""
Compare your version style: 1.2.3.4.a2 (last segment alnum optional).
Returns: -1 if a<b, 0 if equal/unknown, +1 if a>b.
If either is None, treat as equal (0) to avoid false rollback/upgrade.
"""
if not a or not b:
return 0
pa = a.split('.')
pb = b.split('.')
# pad to 5 parts
while len(pa) < 5: pa.append('0')
while len(pb) < 5: pb.append('0')
def part_key(x: str):
# numeric if digits; else (numeric_prefix, alpha_suffix)
if x.isdigit():
return (int(x), '', 1)
# split alnum: digits prefix (if any) + rest
m = re.match(r'(\d+)(.*)', x)
if m:
return (int(m.group(1)), m.group(2), 2)
return (0, x, 3)
for xa, xb in zip(pa, pb):
ka, kb = part_key(xa), part_key(xb)
if ka[0] != kb[0]:
return 1 if ka[0] > kb[0] else -1
if ka[2] != kb[2]:
return 1 if ka[2] < kb[2] else -1 # prefer pure numeric (1) > num+alpha (2) > alpha (3)
if ka[1] != kb[1]:
return 1 if ka[1] > kb[1] else -1
return 0
# ---------------- Gitea helpers ----------------
def _parse_repo_url(repo_url: str) -> tuple[str | None, str | None, str | None]:
"""
From https://host/owner/repo(.git) -> (api_base, owner, repo)
api_base = https://host/api/v1
"""
try:
pr = urlparse(repo_url)
parts = [p for p in pr.path.split('/') if p]
if len(parts) >= 2:
owner = parts[0]
repo = parts[1]
if repo.endswith('.git'):
repo = repo[:-4]
api_base = f"{pr.scheme}://{pr.netloc}/api/v1"
return api_base, owner, repo
except Exception:
pass
return None, None, None
async def _gitea_get_json(url: str, token: str | None, user: str | None, timeout_sec: int = 10):
headers = {}
if token and user:
# Basic auth with user:token
cred = base64.b64encode(f"{user}:{token}".encode()).decode()
headers['Authorization'] = f"Basic {cred}"
elif token:
headers['Authorization'] = f"token {token}"
timeout = aiohttp.ClientTimeout(total=timeout_sec)
async with aiohttp.ClientSession(timeout=timeout, headers=headers) as sess:
async with sess.get(url) as resp:
if resp.status != 200:
text = await resp.text()
raise RuntimeError(f"Gitea GET {url} -> {resp.status}: {text[:200]}")
return await resp.json()
async def _fetch_latest_commit(api_base: str, owner: str, repo: str, branch: str,
token: str | None, user: str | None) -> tuple[str | None, str | None, str | None]:
"""
Returns (sha, subject, body) for the latest commit on branch.
"""
# Fast path: get branch -> commit sha
branch_url = f"{api_base}/repos/{owner}/{repo}/branches/{branch}"
try:
bjson = await _gitea_get_json(branch_url, token, user)
sha = bjson.get('commit', {}).get('id') or bjson.get('commit', {}).get('sha')
if not sha:
raise RuntimeError("No commit sha on branch")
except Exception as e:
# Fallback: list commits
commits_url = f"{api_base}/repos/{owner}/{repo}/commits/{branch}"
try:
cjson = await _gitea_get_json(commits_url, token, user)
if isinstance(cjson, list) and cjson:
sha = cjson[0].get('sha') or cjson[0].get('id')
else:
raise RuntimeError("Empty commits list")
except Exception as e2:
raise RuntimeError(f"Failed to get latest commit: {e} / {e2}")
# Now fetch full commit message
# Try git/commits first
for endpoint in (f"{api_base}/repos/{owner}/{repo}/git/commits/{sha}",
f"{api_base}/repos/{owner}/{repo}/commits/{sha}"):
try:
data = await _gitea_get_json(endpoint, token, user)
msg = None
if isinstance(data, dict):
msg = data.get('message')
if not msg:
msg = data.get('commit', {}).get('message')
subject, body = _split_subject_body(msg or "")
return sha, (subject or ""), (body or "")
except Exception:
continue
raise RuntimeError("Unable to fetch commit details")
# ---------------- Boot reason inference ----------------
def _is_near_scheduled(now_utc: datetime, hhmm_utc: str | None, window_min: int = 5) -> bool:
if not hhmm_utc:
return False
try:
hh, mm = [int(x) for x in hhmm_utc.strip().split(':', 1)]
except Exception:
return False
sched = now_utc.replace(hour=hh, minute=mm, second=0, microsecond=0)
delta = abs((now_utc - sched).total_seconds())
return delta <= window_min * 60
def _format_status_line(kind: str, old_ver: str | None, new_ver: str | None) -> str:
if kind == "updated":
return f"✅ Updated from **{old_ver or 'unknown'}** → **{new_ver or 'unknown'}**"
if kind == "scheduled":
return "🕒 Scheduled restart executed"
if kind == "manual":
return "🟢 Manual restart detected"
if kind == "rollback":
return f"⚠️ Version rollback detected: **{old_ver or 'unknown'}** → **{new_ver or 'unknown'}**"
return "🟢 Bot started"
# ---------------- Main entry ----------------
async def post_boot_notice(bot):
"""
Posts concise boot status to the modlog channel.
- Always: one status line (if SHAI_BOOT_STATUS is set to a known value).
- If SHAI_BOOT_STATUS == 'fetched_new': fetch **full commit message**.
Prefer Gitea API (requires only the commit link from RSS).
Fallback to RSS subject/description if API fails.
Then post ONLY: Version number (bold) + markdown details.
Always post a boot status to the modlog channel.
Logic:
- Wait until bot is ready (guilds/channels cached).
- Resolve repo from cfg(repo_url/repo_branch); attempt to fetch latest commit (sha, subject, body).
- Compare to stored boot_state (last_sha/last_version/last_boot_ts):
* sha/version advanced -> Updated
* sha same and near scheduled time -> Scheduled restart
* sha same and not near schedule -> Manual restart
* version decreased -> Rollback (ping guild owner)
- Post status line.
- Post commit message (bold version + md body).
- Persist new boot_state.
"""
status = os.getenv("SHAI_BOOT_STATUS", "").strip() # fetched_new | cached_no_update | cache_only_error | scheduled_restart | ''
desc = os.getenv("SHAI_BOOT_DESC", "").strip()
old_v = os.getenv("SHAI_BOOT_OLD", "").strip()
new_v = os.getenv("SHAI_BOOT_NEW", "").strip()
rss = os.getenv("SHAI_REPO_RSS", "").strip()
token = os.getenv("SHAI_GITEA_TOKEN", "").strip() or None # optional
status_line = _build_status_line(status, old_v, new_v, desc)
if not status_line:
return # nothing to say
try:
await bot.wait_until_ready()
except Exception as e:
print(f"[boot_notice] wait_until_ready failed: {e}")
# Resolve modlog channel
modlog_channel_id = cfg(bot).int('modlog_channel_id', 0)
if not modlog_channel_id:
print("[boot_notice] modlog_channel_id not configured; skipping.")
return
ch = None
ch = bot.get_channel(modlog_channel_id)
if not ch:
# fallback: search across guilds
for g in bot.guilds:
ch = g.get_channel(modlog_channel_id)
if ch:
break
if not ch:
print(f"[boot_notice] channel id {modlog_channel_id} not found; skipping.")
return
# 1) Post the status line
# Repo info
r = cfg(bot)
repo_url = r.get('repo_url', '')
branch = r.get('repo_branch', 'main')
api_base = owner = repo = None
if repo_url:
api_base, owner, repo = _parse_repo_url(repo_url)
token = os.getenv("SHAI_GITEA_TOKEN", "").strip() or None
user = os.getenv("SHAI_GITEA_USER", "").strip() or None
check_time_utc = r.get('check_time_utc', '') # e.g., "03:00"
now_utc = datetime.now(timezone.utc)
# State store
dm = getattr(bot, "data_manager", None)
if not dm:
print("[boot_notice] data_manager missing on bot; cannot persist state.")
return
prev = (dm.get('boot_state') or [{}])[-1] if dm.get('boot_state') else {}
prev_sha = prev.get('last_sha') or None
prev_ver = prev.get('last_version') or None
# Fetch latest commit (sha, subject, body)
sha = subject = body = None
if api_base and owner and repo:
try:
await ch.send(status_line, allowed_mentions=discord.AllowedMentions.none())
except Exception:
return
sha, subject, body = await _fetch_latest_commit(api_base, owner, repo, branch, token, user)
except Exception as e:
print(f"[boot_notice] fetch latest commit failed: {e}")
# 2) If updated, post ONLY the commit message (version + details)
if status == "fetched_new" and rss:
subj, body, link = await _fetch_latest_rss_item(rss)
# Derive current version (from subject)
curr_ver = _extract_version(subject) if subject else None
# Try to get the full commit message from Gitea API using the link's SHA
full_msg = None
if link:
full_msg = await _fetch_gitea_commit_message(link, token)
# Decide reason
reason = "manual"
mention_owner = False
# If API failed, fall back to what RSS gave us
if not full_msg:
# if RSS body is a single-line/empty, we only have subject anyway
if body and subj and body.startswith(subj):
# avoid duplicate subject at start
body_trimmed = body[len(subj):].lstrip()
full_msg = f"{subj}\n{body_trimmed}".strip() if body_trimmed else subj
if prev_ver and curr_ver:
cmpv = _cmp_versions(prev_ver, curr_ver)
if cmpv < 0:
reason = "updated"
elif cmpv > 0:
reason = "rollback"
mention_owner = True
else: # same version
reason = "scheduled" if _is_near_scheduled(now_utc, check_time_utc) else "manual"
else:
combined = "\n".join(x for x in [subj or "", body or ""] if x).strip()
full_msg = combined or (subj or body or "")
# Fall back to sha compare if versions missing
if prev_sha and sha and prev_sha != sha:
reason = "updated"
else:
reason = "scheduled" if _is_near_scheduled(now_utc, check_time_utc) else "manual"
# Split into subject/body and format as Version + Details
c_subject, c_body = _split_subject_body(full_msg or "")
commit_msg = _only_version_and_details(c_subject, c_body)
if commit_msg:
# Post status line
status_line = _format_status_line(reason, prev_ver, curr_ver)
try:
# ping owner only on rollback
allowed = discord.AllowedMentions(everyone=False, users=True if mention_owner else False, roles=False, replied_user=False)
if mention_owner and ch.guild and ch.guild.owner_id:
status_line = f"{status_line}\n<@{ch.guild.owner_id}>"
await ch.send(status_line, allowed_mentions=allowed)
except Exception as e:
print(f"[boot_notice] failed to send status line: {e}")
return
# Post commit message (if we have it)
# Format: **Version**\n<md body>
try:
title = curr_ver or (subject or "Latest commit")
if title or body:
# Always post a commit message on start; its the core “whats running now”
if body:
commit_msg = f"**{title}**\n{body}"
else:
commit_msg = f"**{title}**"
await ch.send(commit_msg, allowed_mentions=discord.AllowedMentions.none())
except Exception:
pass
except Exception as e:
print(f"[boot_notice] failed to send commit message: {e}")
# Persist new state
try:
new_state = {
'last_sha': sha,
'last_version': curr_ver,
'last_subject': subject,
'last_boot_ts': time.time(),
}
# keep boot_state as list to preserve history
dm.add('boot_state', new_state)
except Exception as e:
print(f"[boot_notice] failed to persist boot_state: {e}")