Add vibe-aware playlists with CLAP text embeddings

Blend taste profile with text-embedded mood descriptions (e.g. "chill
ambient lo-fi") using pre-blended vector search against the existing
HNSW index. New optional `vibe` and `alpha` params on playlist generate
and recommendations endpoints. Backward compatible — no vibe = pure
taste profile (alpha=1.0).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-22 13:14:28 -06:00
parent 23fd0e9804
commit 1b739fbd20
9 changed files with 146 additions and 12 deletions

View File

@@ -41,6 +41,16 @@ curl http://192.168.86.51:8321/api/recommendations?limit=20
curl -X POST http://192.168.86.51:8321/api/playlists/generate \
-H "Content-Type: application/json" \
-d '{"total_tracks":20,"known_pct":30,"speaker_entity":"media_player.living_room_speaker_2","auto_play":true}'
# Generate a vibe-based playlist (mood/activity matching)
curl -X POST http://192.168.86.51:8321/api/playlists/generate \
-H "Content-Type: application/json" \
-d '{"total_tracks":15,"vibe":"chill ambient lo-fi","speaker_entity":"media_player.living_room_speaker_2","auto_play":true}'
# Vibe with custom blend (alpha: 0=pure vibe, 0.5=blend, 1=pure taste)
curl -X POST http://192.168.86.51:8321/api/playlists/generate \
-H "Content-Type: application/json" \
-d '{"total_tracks":15,"vibe":"upbeat party music","alpha":0.3,"auto_play":true,"speaker_entity":"media_player.living_room_speaker_2"}'
```
## Environment Variables

View File

@@ -51,7 +51,7 @@ docker exec haunt-fm alembic upgrade head
| POST | `/api/history/webhook` | Log a listen event (from HA automation) |
| POST | `/api/admin/discover` | Expand listening history via Last.fm |
| POST | `/api/admin/build-taste-profile` | Rebuild taste profile from embeddings |
| GET | `/api/recommendations?limit=50` | Get ranked recommendations |
| GET | `/api/recommendations?limit=50&vibe=chill+ambient` | Get ranked recommendations (optional vibe) |
| POST | `/api/playlists/generate` | Generate and optionally play a playlist |
## Usage
@@ -69,11 +69,26 @@ curl -X POST http://192.168.86.51:8321/api/playlists/generate \
}'
```
### Generate a vibe-based playlist
```bash
curl -X POST http://192.168.86.51:8321/api/playlists/generate \
-H "Content-Type: application/json" \
-d '{
"total_tracks": 15,
"vibe": "chill ambient lo-fi",
"speaker_entity": "media_player.living_room_speaker_2",
"auto_play": true
}'
```
**Parameters:**
- `total_tracks` — number of tracks in the playlist (default 20)
- `known_pct` — percentage of known-liked tracks vs new discoveries (default 30)
- `speaker_entity` — Music Assistant entity ID (must be a `_2` suffix entity)
- `auto_play``true` to immediately play on the speaker
- `vibe` — text description of the desired mood/vibe (e.g. "chill lo-fi beats", "upbeat party music"). Uses CLAP text embeddings to match tracks in the same vector space as audio.
- `alpha` — blend factor between taste profile and vibe (default 0.5). `1.0` = pure taste profile, `0.0` = pure vibe match, `0.5` = equal blend. Ignored when no vibe is provided.
### Speaker entities
@@ -113,6 +128,9 @@ curl -X POST http://192.168.86.51:8321/api/admin/build-taste-profile
# Get recommendations (without playing)
curl http://192.168.86.51:8321/api/recommendations?limit=20
# Get vibe-matched recommendations
curl "http://192.168.86.51:8321/api/recommendations?limit=20&vibe=dark+electronic&alpha=0.3"
```
## Pipeline Stages

View File

@@ -0,0 +1,26 @@
"""Add vibe and alpha columns to playlists
Revision ID: 002
Revises: 001
Create Date: 2026-02-22
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "002"
down_revision: Union[str, None] = "001"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column("playlists", sa.Column("vibe", sa.Text, nullable=True))
op.add_column("playlists", sa.Column("alpha", sa.REAL, nullable=True))
def downgrade() -> None:
op.drop_column("playlists", "alpha")
op.drop_column("playlists", "vibe")

View File

@@ -1,5 +1,5 @@
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from pydantic import BaseModel, Field
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
@@ -17,15 +17,31 @@ class GenerateRequest(BaseModel):
name: str | None = None
speaker_entity: str | None = None
auto_play: bool = False
vibe: str | None = None
alpha: float = Field(default=0.5, ge=0.0, le=1.0)
@router.post("/generate")
async def generate(req: GenerateRequest, session: AsyncSession = Depends(get_session)):
# Compute text embedding for vibe description
vibe_embedding = None
if req.vibe:
from haunt_fm.services.embedding import embed_text, is_model_loaded, load_model
if not is_model_loaded():
load_model()
vibe_embedding = embed_text(req.vibe)
# Force pure taste when no vibe provided (preserves current behavior)
alpha = req.alpha if req.vibe else 1.0
playlist = await generate_playlist(
session,
total_tracks=req.total_tracks,
known_pct=req.known_pct,
name=req.name,
vibe_embedding=vibe_embedding,
alpha=alpha,
vibe_text=req.vibe,
)
# Load playlist tracks with track info
@@ -58,6 +74,8 @@ async def generate(req: GenerateRequest, session: AsyncSession = Depends(get_ses
"name": playlist.name,
"total_tracks": playlist.total_tracks,
"known_pct": playlist.known_pct,
"vibe": playlist.vibe,
"alpha": playlist.alpha,
"tracks": track_list,
"auto_played": req.auto_play and req.speaker_entity is not None,
}

View File

@@ -11,9 +11,22 @@ router = APIRouter(prefix="/api")
async def recommendations(
limit: int = Query(default=50, ge=1, le=200),
include_known: bool = Query(default=False),
vibe: str | None = Query(default=None),
alpha: float = Query(default=0.5, ge=0.0, le=1.0),
session: AsyncSession = Depends(get_session),
):
vibe_embedding = None
if vibe:
from haunt_fm.services.embedding import embed_text, is_model_loaded, load_model
if not is_model_loaded():
load_model()
vibe_embedding = embed_text(vibe)
# Force pure taste when no vibe provided
effective_alpha = alpha if vibe else 1.0
results = await get_recommendations(
session, limit=limit, exclude_known=not include_known
session, limit=limit, exclude_known=not include_known,
vibe_embedding=vibe_embedding, alpha=effective_alpha,
)
return {"recommendations": results, "count": len(results)}
return {"recommendations": results, "count": len(results), "vibe": vibe, "alpha": effective_alpha}

View File

@@ -94,6 +94,8 @@ class Playlist(Base):
name: Mapped[str | None] = mapped_column(Text)
known_pct: Mapped[int] = mapped_column(Integer, nullable=False)
total_tracks: Mapped[int] = mapped_column(Integer, nullable=False)
vibe: Mapped[str | None] = mapped_column(Text)
alpha: Mapped[float | None] = mapped_column(REAL)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
tracks: Mapped[list["PlaylistTrack"]] = relationship(back_populates="playlist", cascade="all, delete-orphan")

View File

@@ -53,3 +53,23 @@ def embed_audio(audio: np.ndarray, sample_rate: int = 48000) -> np.ndarray:
# Normalize to unit vector
emb = emb / np.linalg.norm(emb)
return emb
def embed_text(text: str) -> np.ndarray:
"""Embed a text description into the same 512-dim CLAP space as audio."""
import torch
if _model is None or _processor is None:
raise RuntimeError("CLAP model not loaded. Call load_model() first.")
inputs = _processor(text=[text], return_tensors="pt", padding=True)
with torch.no_grad():
output = _model.get_text_features(**inputs)
if hasattr(output, "pooler_output"):
emb = output.pooler_output[0].numpy()
else:
emb = output[0].numpy()
emb = emb / np.linalg.norm(emb)
return emb

View File

@@ -1,6 +1,7 @@
import logging
import random
import numpy as np
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
@@ -20,6 +21,9 @@ async def generate_playlist(
total_tracks: int = 20,
known_pct: int = 30,
name: str | None = None,
vibe_embedding: np.ndarray | None = None,
alpha: float = 0.5,
vibe_text: str | None = None,
) -> Playlist:
"""Generate a playlist mixing known-liked tracks with new recommendations.
@@ -48,7 +52,10 @@ async def generate_playlist(
new_count = total_tracks - known_count
# Get new recommendations
recs = await get_recommendations(session, limit=new_count * 2, exclude_known=True)
recs = await get_recommendations(
session, limit=new_count * 2, exclude_known=True,
vibe_embedding=vibe_embedding, alpha=alpha,
)
new_tracks = [(r["track_id"], r["similarity"]) for r in recs[:new_count]]
# Interleave: spread known tracks throughout the playlist
@@ -84,6 +91,8 @@ async def generate_playlist(
name=name or f"haunt-fm mix ({len(interleaved)} tracks)",
known_pct=known_pct,
total_tracks=len(interleaved),
vibe=vibe_text,
alpha=alpha if vibe_text else None,
)
session.add(playlist)
await session.flush()

View File

@@ -1,5 +1,6 @@
import logging
import numpy as np
from sqlalchemy import select, text
from sqlalchemy.ext.asyncio import AsyncSession
@@ -18,22 +19,40 @@ async def get_recommendations(
limit: int = 50,
exclude_known: bool = True,
profile_name: str = "default",
vibe_embedding: np.ndarray | None = None,
alpha: float = 0.5,
) -> list[dict]:
"""Get track recommendations ranked by cosine similarity to taste profile."""
"""Get track recommendations ranked by cosine similarity to taste profile.
Args:
vibe_embedding: Optional 512-dim text embedding for vibe/mood matching.
alpha: Blend factor. 1.0 = pure taste, 0.0 = pure vibe, 0.5 = equal blend.
"""
# Load taste profile
profile = (
await session.execute(select(TasteProfile).where(TasteProfile.name == profile_name))
).scalar_one_or_none()
if profile is None:
if profile is None and vibe_embedding is None:
return []
# Determine query vector: blend taste profile with vibe embedding
if vibe_embedding is not None and profile is not None:
taste_emb = np.array(profile.embedding, dtype=np.float32)
vibe_emb = vibe_embedding.astype(np.float32)
query_emb = alpha * taste_emb + (1.0 - alpha) * vibe_emb
norm = np.linalg.norm(query_emb)
if norm > 0:
query_emb = query_emb / norm
elif vibe_embedding is not None:
# No taste profile yet — pure vibe (cold start)
query_emb = vibe_embedding.astype(np.float32)
else:
query_emb = np.array(profile.embedding, dtype=np.float32)
# Use pgvector cosine distance operator (<=>)
# Lower distance = more similar
if exclude_known:
# Subquery: track IDs that have listen events
known_ids_subq = select(ListenEvent.track_id).distinct().subquery()
query = text("""
SELECT t.id, t.title, t.artist, t.album, t.genre,
1 - (te.embedding <=> :profile_embedding) AS similarity
@@ -54,8 +73,7 @@ async def get_recommendations(
""")
# Format embedding as pgvector literal: [n1,n2,...]
emb = profile.embedding
emb_str = "[" + ",".join(str(float(x)) for x in emb) + "]"
emb_str = "[" + ",".join(str(float(x)) for x in query_emb) + "]"
result = await session.execute(
query,