autotako

Service to monitor moombox for completed livestream downloads to upload for distribution
git clone https://code.alwayswait.ing/autotako.git
Log | Files | Refs

commit 912eb461ef29ce4bb781a429752c3f52fd7ae2b9
parent 13a5e2a6c440079de909e07e2574bb49e06b4b8b
Author: archiveanon <>
Date:   Sat, 25 Jan 2025 10:33:57 +0000

Implement stream topic extraction

Diffstat:
M.justfile | 7++++---
Msrc/autotako/job_render.py | 20++++++++++++++++++++
Atests/test_job_render.py | 38++++++++++++++++++++++++++++++++++++++
3 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/.justfile b/.justfile @@ -1,7 +1,8 @@ test: - ruff check src + ruff check src tests mypy -p src + pytest tests format: - ruff check src --select I001 --fix - ruff format src + ruff check src tests --select I001 --fix + ruff format src tests diff --git a/src/autotako/job_render.py b/src/autotako/job_render.py @@ -5,6 +5,7 @@ import datetime import enum import itertools import pathlib +import re import traceback from typing import Self @@ -25,6 +26,13 @@ background_tasks = set() render_tasks: dict[str, asyncio.Task] = {} upload_tasks: dict[pathlib.Path, asyncio.Task] = {} +# https://www.japanesewithanime.com/2017/05/quotation-marks-japanese.html#sumitsukikakko +# strip extraneous spaces and hashtag +_pattern_bracketed_kw = re.compile(r"^【[\s#]*(?P<topic>.*?)[\s]?】") + +# bae prefers the double angle brackets because she's quirky like that +_pattern_angled_bracketed_kw = re.compile(r"^≪[\s#]*(?P<topic>.*?)[\s]?≫") + class JobUploadCriteria(enum.StrEnum): MANUALLY = "manual" @@ -88,6 +96,16 @@ class MoomboxJobInfo(msgspec.Struct, kw_only=True, frozen=True): return set(map(pathlib.Path, self.output_paths or [])) +def _extract_name_from_stream_title(title: str) -> str | None: + """ + Attempts to extract a preset name from the stream title. + """ + match = _pattern_bracketed_kw.search(title) or _pattern_angled_bracketed_kw.search(title) + if match: + return str(match["topic"]).strip() + return None + + class JobConfig(msgspec.Struct): name: str upload: JobUploadCriteria @@ -101,6 +119,8 @@ class JobConfig(msgspec.Struct): if not job.title: return cls(name, upload) + name = _extract_name_from_stream_title(job.title) or name + # mapping of phrases and any possible synonyms # if a synonym isn't present then an empty set should be present phrases = { diff --git a/tests/test_job_render.py b/tests/test_job_render.py @@ -0,0 +1,38 @@ +#!/usr/bin/python3 + +import pytest +from autotako.job_render import _extract_name_from_stream_title + + +@pytest.mark.parametrize( + "input, expected", + [ + ( + # extract topic from brackets + "【100% Orange Juice】game for good friends!! #calliolive", + "100% Orange Juice", + ), + ( + # extract topic from brackets + "【ART & YAP】 So a cat and an owl walk into a room… and start drawing! 😼🦉w/ @NanashiMumei", + "ART & YAP", + ), + ( + # ensure search for closing bracket is non-greedy + "【HuniePop 2】OUT TO SAVE THE WORLD (WITH RIZZ)【Pavolia Reine/hololiveID 2nd gen】", + "HuniePop 2", + ), + ( + # strip hashtag from topic + "【 #ねっこよ24 】後半戦一発目Start✨マシュマロ読んでく!【 桃鈴ねね / 博衣こより / ホロライブ 】", + "ねっこよ24", + ), + ( + # strip hashtag from topic; angle brackets + "≪#holocountdown2024 - OFFCOLLAB WATCHALONG≫ Starting 2025 THE RIGHT WAY w/ @IRyS", + "holocountdown2024 - OFFCOLLAB WATCHALONG", + ), + ], +) +def test_extract(input: str, expected: str | None): + assert _extract_name_from_stream_title(input) == expected