0.3.9.5.a2

Fixed issues relating to RSS feed not giving enough details
This commit is contained in:
Franz Rolfsvaag 2025-08-11 01:18:01 +02:00
parent cd11e3106f
commit 2a898802b6
2 changed files with 266 additions and 211 deletions

2
bot.py
View File

@ -9,7 +9,7 @@ from modules.common.boot_notice import post_boot_notice
# Version consists of: # Version consists of:
# Major.Enhancement.Minor.Patch.Test (Test is alphanumeric; doesnt trigger auto update) # Major.Enhancement.Minor.Patch.Test (Test is alphanumeric; doesnt trigger auto update)
VERSION = "0.3.9.5.a1" VERSION = "0.3.9.5.a2"
# ---------- Env loading ---------- # ---------- Env loading ----------

View File

@ -1,177 +1,32 @@
# modules/common/boot_notice.py # modules/common/boot_notice.py
import os import os
import re import re
import html import base64
import json import json
import time
from datetime import datetime, timezone, timedelta
from urllib.parse import urlparse
import discord import discord
import aiohttp import aiohttp
import xml.etree.ElementTree as ET
from urllib.parse import urlparse
from modules.common.settings import cfg from modules.common.settings import cfg
# ---------------- Utilities ---------------- # ---------------- Version helpers ----------------
def _strip_html_keep_text(s: str) -> str: _VERSION_RE = re.compile(r'\b\d+\.\d+\.\d+\.\d+(?:\.[A-Za-z0-9]+)?\b')
"""Remove HTML tags, unescape entities, collapse excessive blank lines."""
if not s:
return ""
s = re.sub(r'(?i)<\s*br\s*/?\s*>', '\n', s)
s = re.sub(r'(?i)</\s*p\s*>', '\n', s)
s = re.sub(r'(?i)<\s*p\s*>', '', s)
s = re.sub(r'<[^>]+>', '', s)
s = html.unescape(s)
s = '\n'.join(line.rstrip() for line in s.splitlines())
s = re.sub(r'\n{3,}', '\n\n', s).strip()
return s
def _build_status_line(status: str, old_v: str, new_v: str, desc: str) -> str | None: def _extract_version(subject: str) -> str | None:
status = (status or "").strip() if not subject:
old_v = (old_v or "").strip()
new_v = (new_v or "").strip()
if status == "fetched_new":
line = f"✅ Booted new version: v{old_v or '0.0.0.0'} → **v{new_v}**"
elif status == "cached_no_update":
line = f"🟢 Booted cached version: **v{new_v}** — no new update found"
elif status == "cache_only_error":
line = f"🟡 Booted cached version: **v{new_v}** — repository not accessible"
elif status == "scheduled_restart":
line = "🕒 Scheduled restart executed"
else:
return None return None
return f"{line}\n_{desc.strip()}_" if desc else line m = _VERSION_RE.search(subject)
return m.group(0) if m else None
def _only_version_and_details(subject: str | None, body: str | None) -> str | None: def _split_subject_body(full_message: str) -> tuple[str | None, str | None]:
"""Format to 'Version number' (bold) + 'Version details' (md)."""
if not subject and not body:
return None
version = None
if subject:
m = re.search(r'\bv?(\d+\.\d+(?:\.\d+){0,2})\b', subject)
version = m.group(0) if m else subject.strip()
if version and body:
return f"**{version}**\n{body.strip()}"
if version:
return f"**{version}**"
return body.strip() if body else None
# ---------------- RSS helpers ----------------
async def _fetch_latest_rss_item(rss_url: str):
"""
Return (title:str|None, body:str|None, link:str|None) from newest item.
Gitea RSS often only has the first line in <title>/<description>.
"""
try:
timeout = aiohttp.ClientTimeout(total=8)
async with aiohttp.ClientSession(timeout=timeout) as sess:
async with sess.get(rss_url) as resp:
if resp.status != 200:
return None, None, None
text = await resp.text()
root = ET.fromstring(text)
item = root.find('./channel/item')
if item is None:
return None, None, None
title = (item.findtext('title') or '').strip()
desc_raw = (item.findtext('description') or '').strip()
body = _strip_html_keep_text(desc_raw) or None
link = (item.findtext('link') or '').strip() or None
# If title looks like noise ("pushed", etc.), prefer body's first line
if title and re.search(r'\b(pushed|commit|committed)\b', title, re.I):
first = (body.splitlines()[0].strip() if body else "") or ""
if first:
title = first
title = _strip_html_keep_text(title) or None
return title, body, link
except Exception:
return None, None, None
def _parse_gitea_link_for_api(link: str):
"""
From a Gitea commit link like:
https://git.example.com/owner/repo/commit/abcdef...
derive:
api_base: https://git.example.com/api/v1
owner: owner
repo: repo
sha: abcdef...
"""
try:
pr = urlparse(link)
parts = [p for p in pr.path.split('/') if p]
# Expect: [owner, repo, 'commit', sha]
if len(parts) >= 4 and parts[2] == 'commit':
owner, repo, sha = parts[0], parts[1], parts[3]
api_base = f"{pr.scheme}://{pr.netloc}/api/v1"
return api_base, owner, repo, sha
except Exception:
pass
return None, None, None, None
async def _fetch_gitea_commit_message(commit_link: str, token: str | None):
"""
Ask Gitea API for the full commit message.
Try both endpoints:
1) /api/v1/repos/{owner}/{repo}/git/commits/{sha}
2) /api/v1/repos/{owner}/{repo}/commits/{sha}
Return full_message:str|None on success.
"""
api_base, owner, repo, sha = _parse_gitea_link_for_api(commit_link)
if not all([api_base, owner, repo, sha]):
return None
headers = {}
if token:
headers['Authorization'] = f'token {token}'
timeout = aiohttp.ClientTimeout(total=8)
async with aiohttp.ClientSession(timeout=timeout, headers=headers) as sess:
# 1) git/commits
url1 = f"{api_base}/repos/{owner}/{repo}/git/commits/{sha}"
try:
async with sess.get(url1) as resp:
if resp.status == 200:
data = await resp.json()
# Common shapes: {"message": "..."} or {"commit":{"message":"..."}}
msg = data.get('message') if isinstance(data, dict) else None
if not msg and isinstance(data, dict):
commit = data.get('commit') or {}
msg = commit.get('message')
if msg:
return str(msg)
except Exception:
pass
# 2) commits
url2 = f"{api_base}/repos/{owner}/{repo}/commits/{sha}"
try:
async with sess.get(url2) as resp:
if resp.status == 200:
data = await resp.json()
msg = None
if isinstance(data, dict):
# Gitea returns {"commit":{"message":"..."}, ...}
commit = data.get('commit') or {}
msg = commit.get('message') or data.get('message')
if msg:
return str(msg)
except Exception:
pass
return None
def _split_subject_body(full_message: str):
"""
Split a full commit message into (subject, body).
Subject = first non-empty line; body = rest (preserve markdown).
"""
if not full_message: if not full_message:
return None, None return None, None
lines = [ln.rstrip() for ln in full_message.splitlines()] lines = [ln.rstrip() for ln in full_message.splitlines()]
# find first non-empty line # subject = first non-empty line
subject = None subject = None
i = 0 i = 0
while i < len(lines) and subject is None: while i < len(lines) and subject is None:
@ -181,72 +36,272 @@ def _split_subject_body(full_message: str):
body = '\n'.join(lines[i:]).strip() if i < len(lines) else '' body = '\n'.join(lines[i:]).strip() if i < len(lines) else ''
return subject or None, (body or None) return subject or None, (body or None)
def _cmp_versions(a: str | None, b: str | None) -> int:
"""
Compare your version style: 1.2.3.4.a2 (last segment alnum optional).
Returns: -1 if a<b, 0 if equal/unknown, +1 if a>b.
If either is None, treat as equal (0) to avoid false rollback/upgrade.
"""
if not a or not b:
return 0
pa = a.split('.')
pb = b.split('.')
# pad to 5 parts
while len(pa) < 5: pa.append('0')
while len(pb) < 5: pb.append('0')
def part_key(x: str):
# numeric if digits; else (numeric_prefix, alpha_suffix)
if x.isdigit():
return (int(x), '', 1)
# split alnum: digits prefix (if any) + rest
m = re.match(r'(\d+)(.*)', x)
if m:
return (int(m.group(1)), m.group(2), 2)
return (0, x, 3)
for xa, xb in zip(pa, pb):
ka, kb = part_key(xa), part_key(xb)
if ka[0] != kb[0]:
return 1 if ka[0] > kb[0] else -1
if ka[2] != kb[2]:
return 1 if ka[2] < kb[2] else -1 # prefer pure numeric (1) > num+alpha (2) > alpha (3)
if ka[1] != kb[1]:
return 1 if ka[1] > kb[1] else -1
return 0
# ---------------- Gitea helpers ----------------
def _parse_repo_url(repo_url: str) -> tuple[str | None, str | None, str | None]:
"""
From https://host/owner/repo(.git) -> (api_base, owner, repo)
api_base = https://host/api/v1
"""
try:
pr = urlparse(repo_url)
parts = [p for p in pr.path.split('/') if p]
if len(parts) >= 2:
owner = parts[0]
repo = parts[1]
if repo.endswith('.git'):
repo = repo[:-4]
api_base = f"{pr.scheme}://{pr.netloc}/api/v1"
return api_base, owner, repo
except Exception:
pass
return None, None, None
async def _gitea_get_json(url: str, token: str | None, user: str | None, timeout_sec: int = 10):
headers = {}
if token and user:
# Basic auth with user:token
cred = base64.b64encode(f"{user}:{token}".encode()).decode()
headers['Authorization'] = f"Basic {cred}"
elif token:
headers['Authorization'] = f"token {token}"
timeout = aiohttp.ClientTimeout(total=timeout_sec)
async with aiohttp.ClientSession(timeout=timeout, headers=headers) as sess:
async with sess.get(url) as resp:
if resp.status != 200:
text = await resp.text()
raise RuntimeError(f"Gitea GET {url} -> {resp.status}: {text[:200]}")
return await resp.json()
async def _fetch_latest_commit(api_base: str, owner: str, repo: str, branch: str,
token: str | None, user: str | None) -> tuple[str | None, str | None, str | None]:
"""
Returns (sha, subject, body) for the latest commit on branch.
"""
# Fast path: get branch -> commit sha
branch_url = f"{api_base}/repos/{owner}/{repo}/branches/{branch}"
try:
bjson = await _gitea_get_json(branch_url, token, user)
sha = bjson.get('commit', {}).get('id') or bjson.get('commit', {}).get('sha')
if not sha:
raise RuntimeError("No commit sha on branch")
except Exception as e:
# Fallback: list commits
commits_url = f"{api_base}/repos/{owner}/{repo}/commits/{branch}"
try:
cjson = await _gitea_get_json(commits_url, token, user)
if isinstance(cjson, list) and cjson:
sha = cjson[0].get('sha') or cjson[0].get('id')
else:
raise RuntimeError("Empty commits list")
except Exception as e2:
raise RuntimeError(f"Failed to get latest commit: {e} / {e2}")
# Now fetch full commit message
# Try git/commits first
for endpoint in (f"{api_base}/repos/{owner}/{repo}/git/commits/{sha}",
f"{api_base}/repos/{owner}/{repo}/commits/{sha}"):
try:
data = await _gitea_get_json(endpoint, token, user)
msg = None
if isinstance(data, dict):
msg = data.get('message')
if not msg:
msg = data.get('commit', {}).get('message')
subject, body = _split_subject_body(msg or "")
return sha, (subject or ""), (body or "")
except Exception:
continue
raise RuntimeError("Unable to fetch commit details")
# ---------------- Boot reason inference ----------------
def _is_near_scheduled(now_utc: datetime, hhmm_utc: str | None, window_min: int = 5) -> bool:
if not hhmm_utc:
return False
try:
hh, mm = [int(x) for x in hhmm_utc.strip().split(':', 1)]
except Exception:
return False
sched = now_utc.replace(hour=hh, minute=mm, second=0, microsecond=0)
delta = abs((now_utc - sched).total_seconds())
return delta <= window_min * 60
def _format_status_line(kind: str, old_ver: str | None, new_ver: str | None) -> str:
if kind == "updated":
return f"✅ Updated from **{old_ver or 'unknown'}** → **{new_ver or 'unknown'}**"
if kind == "scheduled":
return "🕒 Scheduled restart executed"
if kind == "manual":
return "🟢 Manual restart detected"
if kind == "rollback":
return f"⚠️ Version rollback detected: **{old_ver or 'unknown'}** → **{new_ver or 'unknown'}**"
return "🟢 Bot started"
# ---------------- Main entry ---------------- # ---------------- Main entry ----------------
async def post_boot_notice(bot): async def post_boot_notice(bot):
""" """
Posts concise boot status to the modlog channel. Always post a boot status to the modlog channel.
- Always: one status line (if SHAI_BOOT_STATUS is set to a known value). Logic:
- If SHAI_BOOT_STATUS == 'fetched_new': fetch **full commit message**. - Wait until bot is ready (guilds/channels cached).
Prefer Gitea API (requires only the commit link from RSS). - Resolve repo from cfg(repo_url/repo_branch); attempt to fetch latest commit (sha, subject, body).
Fallback to RSS subject/description if API fails. - Compare to stored boot_state (last_sha/last_version/last_boot_ts):
Then post ONLY: Version number (bold) + markdown details. * sha/version advanced -> Updated
* sha same and near scheduled time -> Scheduled restart
* sha same and not near schedule -> Manual restart
* version decreased -> Rollback (ping guild owner)
- Post status line.
- Post commit message (bold version + md body).
- Persist new boot_state.
""" """
status = os.getenv("SHAI_BOOT_STATUS", "").strip() # fetched_new | cached_no_update | cache_only_error | scheduled_restart | '' try:
desc = os.getenv("SHAI_BOOT_DESC", "").strip() await bot.wait_until_ready()
old_v = os.getenv("SHAI_BOOT_OLD", "").strip() except Exception as e:
new_v = os.getenv("SHAI_BOOT_NEW", "").strip() print(f"[boot_notice] wait_until_ready failed: {e}")
rss = os.getenv("SHAI_REPO_RSS", "").strip()
token = os.getenv("SHAI_GITEA_TOKEN", "").strip() or None # optional
status_line = _build_status_line(status, old_v, new_v, desc)
if not status_line:
return # nothing to say
# Resolve modlog channel # Resolve modlog channel
modlog_channel_id = cfg(bot).int('modlog_channel_id', 0) modlog_channel_id = cfg(bot).int('modlog_channel_id', 0)
if not modlog_channel_id: if not modlog_channel_id:
print("[boot_notice] modlog_channel_id not configured; skipping.")
return return
ch = None
for g in bot.guilds: ch = bot.get_channel(modlog_channel_id)
ch = g.get_channel(modlog_channel_id)
if ch:
break
if not ch: if not ch:
# fallback: search across guilds
for g in bot.guilds:
ch = g.get_channel(modlog_channel_id)
if ch:
break
if not ch:
print(f"[boot_notice] channel id {modlog_channel_id} not found; skipping.")
return return
# 1) Post the status line # Repo info
r = cfg(bot)
repo_url = r.get('repo_url', '')
branch = r.get('repo_branch', 'main')
api_base = owner = repo = None
if repo_url:
api_base, owner, repo = _parse_repo_url(repo_url)
token = os.getenv("SHAI_GITEA_TOKEN", "").strip() or None
user = os.getenv("SHAI_GITEA_USER", "").strip() or None
check_time_utc = r.get('check_time_utc', '') # e.g., "03:00"
now_utc = datetime.now(timezone.utc)
# State store
dm = getattr(bot, "data_manager", None)
if not dm:
print("[boot_notice] data_manager missing on bot; cannot persist state.")
return
prev = (dm.get('boot_state') or [{}])[-1] if dm.get('boot_state') else {}
prev_sha = prev.get('last_sha') or None
prev_ver = prev.get('last_version') or None
# Fetch latest commit (sha, subject, body)
sha = subject = body = None
if api_base and owner and repo:
try:
sha, subject, body = await _fetch_latest_commit(api_base, owner, repo, branch, token, user)
except Exception as e:
print(f"[boot_notice] fetch latest commit failed: {e}")
# Derive current version (from subject)
curr_ver = _extract_version(subject) if subject else None
# Decide reason
reason = "manual"
mention_owner = False
if prev_ver and curr_ver:
cmpv = _cmp_versions(prev_ver, curr_ver)
if cmpv < 0:
reason = "updated"
elif cmpv > 0:
reason = "rollback"
mention_owner = True
else: # same version
reason = "scheduled" if _is_near_scheduled(now_utc, check_time_utc) else "manual"
else:
# Fall back to sha compare if versions missing
if prev_sha and sha and prev_sha != sha:
reason = "updated"
else:
reason = "scheduled" if _is_near_scheduled(now_utc, check_time_utc) else "manual"
# Post status line
status_line = _format_status_line(reason, prev_ver, curr_ver)
try: try:
await ch.send(status_line, allowed_mentions=discord.AllowedMentions.none()) # ping owner only on rollback
except Exception: allowed = discord.AllowedMentions(everyone=False, users=True if mention_owner else False, roles=False, replied_user=False)
if mention_owner and ch.guild and ch.guild.owner_id:
status_line = f"{status_line}\n<@{ch.guild.owner_id}>"
await ch.send(status_line, allowed_mentions=allowed)
except Exception as e:
print(f"[boot_notice] failed to send status line: {e}")
return return
# 2) If updated, post ONLY the commit message (version + details) # Post commit message (if we have it)
if status == "fetched_new" and rss: # Format: **Version**\n<md body>
subj, body, link = await _fetch_latest_rss_item(rss) try:
title = curr_ver or (subject or "Latest commit")
# Try to get the full commit message from Gitea API using the link's SHA if title or body:
full_msg = None # Always post a commit message on start; its the core “whats running now”
if link: if body:
full_msg = await _fetch_gitea_commit_message(link, token) commit_msg = f"**{title}**\n{body}"
# If API failed, fall back to what RSS gave us
if not full_msg:
# if RSS body is a single-line/empty, we only have subject anyway
if body and subj and body.startswith(subj):
# avoid duplicate subject at start
body_trimmed = body[len(subj):].lstrip()
full_msg = f"{subj}\n{body_trimmed}".strip() if body_trimmed else subj
else: else:
combined = "\n".join(x for x in [subj or "", body or ""] if x).strip() commit_msg = f"**{title}**"
full_msg = combined or (subj or body or "") await ch.send(commit_msg, allowed_mentions=discord.AllowedMentions.none())
except Exception as e:
print(f"[boot_notice] failed to send commit message: {e}")
# Split into subject/body and format as Version + Details # Persist new state
c_subject, c_body = _split_subject_body(full_msg or "") try:
commit_msg = _only_version_and_details(c_subject, c_body) new_state = {
'last_sha': sha,
if commit_msg: 'last_version': curr_ver,
try: 'last_subject': subject,
await ch.send(commit_msg, allowed_mentions=discord.AllowedMentions.none()) 'last_boot_ts': time.time(),
except Exception: }
pass # keep boot_state as list to preserve history
dm.add('boot_state', new_state)
except Exception as e:
print(f"[boot_notice] failed to persist boot_state: {e}")