Initial haunt-fm implementation

Full music recommendation pipeline: listening history capture via webhook,
Last.fm candidate discovery, iTunes preview download, CLAP audio embeddings
(512-dim), pgvector cosine similarity recommendations, playlist generation
with known/new track interleaving, and Music Assistant playback via HA.

Includes: FastAPI app, SQLAlchemy models, Alembic migrations, Docker Compose
with pgvector/pg17, status dashboard, and all API endpoints.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-22 08:36:36 -06:00
parent 897d0fe1fb
commit 7ff69449d6
39 changed files with 2049 additions and 0 deletions

View File

59
src/haunt_fm/api/admin.py Normal file
View File

@@ -0,0 +1,59 @@
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from haunt_fm.db import get_session
from haunt_fm.models.track import ListenEvent, Track
from haunt_fm.services.lastfm_client import discover_similar_for_track
from haunt_fm.services.taste_profile import build_taste_profile
router = APIRouter(prefix="/api/admin")
class DiscoverRequest(BaseModel):
limit: int = 50 # max tracks from history to expand
@router.post("/discover")
async def discover(req: DiscoverRequest, session: AsyncSession = Depends(get_session)):
"""Expand listening history via Last.fm track.getSimilar."""
# Get most-listened tracks that haven't been expanded yet
listened_tracks = (
await session.execute(
select(Track)
.join(ListenEvent, ListenEvent.track_id == Track.id)
.group_by(Track.id)
.order_by(func.count(ListenEvent.id).desc())
.limit(req.limit)
)
).scalars().all()
total_discovered = 0
errors = []
for track in listened_tracks:
try:
count = await discover_similar_for_track(session, track)
total_discovered += count
except Exception as e:
errors.append({"track": f"{track.artist} - {track.title}", "error": str(e)})
return {
"tracks_expanded": len(listened_tracks),
"candidates_discovered": total_discovered,
"errors": errors,
}
@router.post("/build-taste-profile")
async def build_profile(session: AsyncSession = Depends(get_session)):
"""Rebuild the taste profile from listened-track embeddings."""
profile = await build_taste_profile(session)
if profile is None:
return {"ok": False, "error": "No listened tracks with embeddings found"}
return {
"ok": True,
"track_count": profile.track_count,
"updated_at": profile.updated_at.isoformat(),
}

View File

@@ -0,0 +1,19 @@
from fastapi import APIRouter, Depends
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from haunt_fm.db import get_session
router = APIRouter()
@router.get("/health")
async def health(session: AsyncSession = Depends(get_session)):
try:
await session.execute(text("SELECT 1"))
db_ok = True
except Exception:
db_ok = False
status = "healthy" if db_ok else "degraded"
return {"status": status, "db_connected": db_ok}

View File

@@ -0,0 +1,39 @@
from datetime import datetime, timezone
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from haunt_fm.db import get_session
from haunt_fm.services.history_ingest import ingest_listen_event
router = APIRouter(prefix="/api/history")
class WebhookPayload(BaseModel):
title: str
artist: str
album: str | None = None
speaker_name: str | None = None
duration_played: int | None = None
source: str = "music_assistant"
listened_at: datetime | None = None
@router.post("/webhook")
async def receive_webhook(payload: WebhookPayload, session: AsyncSession = Depends(get_session)):
if payload.listened_at is None:
payload.listened_at = datetime.now(timezone.utc)
event = await ingest_listen_event(
session=session,
title=payload.title,
artist=payload.artist,
album=payload.album,
speaker_name=payload.speaker_name,
duration_played=payload.duration_played,
source=payload.source,
listened_at=payload.listened_at,
raw_payload=payload.model_dump(mode="json"),
)
return {"ok": True, "track_id": event.track_id, "event_id": event.id}

View File

@@ -0,0 +1,63 @@
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from haunt_fm.db import get_session
from haunt_fm.models.track import PlaylistTrack, Track
from haunt_fm.services.music_assistant import play_playlist_on_speaker
from haunt_fm.services.playlist_generator import generate_playlist
router = APIRouter(prefix="/api/playlists")
class GenerateRequest(BaseModel):
total_tracks: int = 20
known_pct: int = 30
name: str | None = None
speaker_entity: str | None = None
auto_play: bool = False
@router.post("/generate")
async def generate(req: GenerateRequest, session: AsyncSession = Depends(get_session)):
playlist = await generate_playlist(
session,
total_tracks=req.total_tracks,
known_pct=req.known_pct,
name=req.name,
)
# Load playlist tracks with track info
result = await session.execute(
select(PlaylistTrack, Track)
.join(Track, PlaylistTrack.track_id == Track.id)
.where(PlaylistTrack.playlist_id == playlist.id)
.order_by(PlaylistTrack.position)
)
rows = result.all()
track_list = [
{
"position": pt.position,
"artist": t.artist,
"title": t.title,
"album": t.album,
"is_known": pt.is_known,
"similarity_score": pt.similarity_score,
}
for pt, t in rows
]
# Auto-play if requested
if req.auto_play and req.speaker_entity:
await play_playlist_on_speaker(track_list, req.speaker_entity)
return {
"playlist_id": playlist.id,
"name": playlist.name,
"total_tracks": playlist.total_tracks,
"known_pct": playlist.known_pct,
"tracks": track_list,
"auto_played": req.auto_play and req.speaker_entity is not None,
}

View File

@@ -0,0 +1,19 @@
from fastapi import APIRouter, Depends, Query
from sqlalchemy.ext.asyncio import AsyncSession
from haunt_fm.db import get_session
from haunt_fm.services.recommender import get_recommendations
router = APIRouter(prefix="/api")
@router.get("/recommendations")
async def recommendations(
limit: int = Query(default=50, ge=1, le=200),
include_known: bool = Query(default=False),
session: AsyncSession = Depends(get_session),
):
results = await get_recommendations(
session, limit=limit, exclude_known=not include_known
)
return {"recommendations": results, "count": len(results)}

113
src/haunt_fm/api/status.py Normal file
View File

@@ -0,0 +1,113 @@
from datetime import datetime, timedelta, timezone
from fastapi import APIRouter, Depends
from sqlalchemy import func, select, text
from sqlalchemy.ext.asyncio import AsyncSession
from haunt_fm.db import get_session
from haunt_fm.models.track import (
ListenEvent,
Playlist,
TasteProfile,
Track,
TrackEmbedding,
)
from haunt_fm.config import settings
from haunt_fm.services.embedding import is_model_loaded
from haunt_fm.services.embedding_worker import is_running as is_worker_running
from haunt_fm.services.embedding_worker import last_processed as worker_last_processed
router = APIRouter(prefix="/api")
@router.get("/status")
async def status(session: AsyncSession = Depends(get_session)):
# DB connectivity
try:
await session.execute(text("SELECT 1"))
db_connected = True
except Exception:
db_connected = False
if not db_connected:
return {"healthy": False, "db_connected": False}
now = datetime.now(timezone.utc)
day_ago = now - timedelta(days=1)
# Listen events
total_events = (await session.execute(select(func.count(ListenEvent.id)))).scalar() or 0
events_24h = (
await session.execute(
select(func.count(ListenEvent.id)).where(ListenEvent.listened_at >= day_ago)
)
).scalar() or 0
latest_event = (
await session.execute(select(func.max(ListenEvent.listened_at)))
).scalar()
# Tracks
total_tracks = (await session.execute(select(func.count(Track.id)))).scalar() or 0
from_history = (
await session.execute(
select(func.count(func.distinct(ListenEvent.track_id)))
)
).scalar() or 0
from_discovery = total_tracks - from_history
# Embeddings
def _embedding_count(status_val: str):
return select(func.count(Track.id)).where(Track.embedding_status == status_val)
emb_done = (await session.execute(_embedding_count("done"))).scalar() or 0
emb_pending = (await session.execute(_embedding_count("pending"))).scalar() or 0
emb_failed = (await session.execute(_embedding_count("failed"))).scalar() or 0
emb_no_preview = (await session.execute(_embedding_count("no_preview"))).scalar() or 0
# Taste profile
taste = (await session.execute(select(TasteProfile).where(TasteProfile.name == "default"))).scalar()
# Playlists
total_playlists = (await session.execute(select(func.count(Playlist.id)))).scalar() or 0
last_playlist = (await session.execute(select(func.max(Playlist.created_at)))).scalar()
return {
"healthy": db_connected,
"db_connected": db_connected,
"clap_model_loaded": is_model_loaded(),
"pipeline": {
"listen_events": {
"total": total_events,
"last_24h": events_24h,
"latest": latest_event.isoformat() if latest_event else None,
},
"tracks": {
"total": total_tracks,
"from_history": from_history,
"from_discovery": from_discovery,
},
"embeddings": {
"done": emb_done,
"pending": emb_pending,
"failed": emb_failed,
"no_preview": emb_no_preview,
"worker_running": is_worker_running(),
"worker_last_processed": worker_last_processed().isoformat() if worker_last_processed() else None,
},
"taste_profile": {
"exists": taste is not None,
"track_count": taste.track_count if taste else 0,
"updated_at": taste.updated_at.isoformat() if taste else None,
},
"playlists": {
"total_generated": total_playlists,
"last_generated": last_playlist.isoformat() if last_playlist else None,
},
},
"dependencies": {
"lastfm_api": "configured" if settings.lastfm_api_key else "not_configured",
"itunes_api": "ok", # no auth needed
"ha_reachable": bool(settings.ha_token),
"music_assistant_reachable": bool(settings.ha_token),
},
}

View File

@@ -0,0 +1,23 @@
from datetime import datetime, timezone
from pathlib import Path
from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse
from jinja2 import Environment, FileSystemLoader
from sqlalchemy.ext.asyncio import AsyncSession
from haunt_fm.api.status import status as get_status_data
from haunt_fm.db import get_session
router = APIRouter()
_template_dir = Path(__file__).parent.parent / "templates"
_jinja_env = Environment(loader=FileSystemLoader(str(_template_dir)), autoescape=True)
@router.get("/", response_class=HTMLResponse)
async def status_page(request: Request, session: AsyncSession = Depends(get_session)):
data = await get_status_data(session)
template = _jinja_env.get_template("status.html")
html = template.render(data=data, now=datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC"))
return HTMLResponse(html)