commit 912eb461ef29ce4bb781a429752c3f52fd7ae2b9
parent 13a5e2a6c440079de909e07e2574bb49e06b4b8b
Author: archiveanon <>
Date: Sat, 25 Jan 2025 10:33:57 +0000
Implement stream topic extraction
Diffstat:
3 files changed, 62 insertions(+), 3 deletions(-)
diff --git a/.justfile b/.justfile
@@ -1,7 +1,8 @@
test:
- ruff check src
+ ruff check src tests
mypy -p src
+ pytest tests
format:
- ruff check src --select I001 --fix
- ruff format src
+ ruff check src tests --select I001 --fix
+ ruff format src tests
diff --git a/src/autotako/job_render.py b/src/autotako/job_render.py
@@ -5,6 +5,7 @@ import datetime
import enum
import itertools
import pathlib
+import re
import traceback
from typing import Self
@@ -25,6 +26,13 @@ background_tasks = set()
render_tasks: dict[str, asyncio.Task] = {}
upload_tasks: dict[pathlib.Path, asyncio.Task] = {}
+# https://www.japanesewithanime.com/2017/05/quotation-marks-japanese.html#sumitsukikakko
+# strip extraneous spaces and hashtag
+_pattern_bracketed_kw = re.compile(r"^【[\s#]*(?P<topic>.*?)[\s]?】")
+
+# bae prefers the double angle brackets because she's quirky like that
+_pattern_angled_bracketed_kw = re.compile(r"^≪[\s#]*(?P<topic>.*?)[\s]?≫")
+
class JobUploadCriteria(enum.StrEnum):
MANUALLY = "manual"
@@ -88,6 +96,16 @@ class MoomboxJobInfo(msgspec.Struct, kw_only=True, frozen=True):
return set(map(pathlib.Path, self.output_paths or []))
+def _extract_name_from_stream_title(title: str) -> str | None:
+ """
+ Attempts to extract a preset name from the stream title.
+ """
+ match = _pattern_bracketed_kw.search(title) or _pattern_angled_bracketed_kw.search(title)
+ if match:
+ return str(match["topic"]).strip()
+ return None
+
+
class JobConfig(msgspec.Struct):
name: str
upload: JobUploadCriteria
@@ -101,6 +119,8 @@ class JobConfig(msgspec.Struct):
if not job.title:
return cls(name, upload)
+ name = _extract_name_from_stream_title(job.title) or name
+
# mapping of phrases and any possible synonyms
# if a synonym isn't present then an empty set should be present
phrases = {
diff --git a/tests/test_job_render.py b/tests/test_job_render.py
@@ -0,0 +1,38 @@
+#!/usr/bin/python3
+
+import pytest
+from autotako.job_render import _extract_name_from_stream_title
+
+
+@pytest.mark.parametrize(
+ "input, expected",
+ [
+ (
+ # extract topic from brackets
+ "【100% Orange Juice】game for good friends!! #calliolive",
+ "100% Orange Juice",
+ ),
+ (
+ # extract topic from brackets
+ "【ART & YAP】 So a cat and an owl walk into a room… and start drawing! 😼🦉w/ @NanashiMumei",
+ "ART & YAP",
+ ),
+ (
+ # ensure search for closing bracket is non-greedy
+ "【HuniePop 2】OUT TO SAVE THE WORLD (WITH RIZZ)【Pavolia Reine/hololiveID 2nd gen】",
+ "HuniePop 2",
+ ),
+ (
+ # strip hashtag from topic
+ "【 #ねっこよ24 】後半戦一発目Start✨マシュマロ読んでく!【 桃鈴ねね / 博衣こより / ホロライブ 】",
+ "ねっこよ24",
+ ),
+ (
+ # strip hashtag from topic; angle brackets
+ "≪#holocountdown2024 - OFFCOLLAB WATCHALONG≫ Starting 2025 THE RIGHT WAY w/ @IRyS",
+ "holocountdown2024 - OFFCOLLAB WATCHALONG",
+ ),
+ ],
+)
+def test_extract(input: str, expected: str | None):
+ assert _extract_name_from_stream_title(input) == expected