Add vibe-aware playlists with CLAP text embeddings
Blend taste profile with text-embedded mood descriptions (e.g. "chill ambient lo-fi") using pre-blended vector search against the existing HNSW index. New optional `vibe` and `alpha` params on playlist generate and recommendations endpoints. Backward compatible — no vibe = pure taste profile (alpha=1.0). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
10
CLAUDE.md
10
CLAUDE.md
@@ -41,6 +41,16 @@ curl http://192.168.86.51:8321/api/recommendations?limit=20
|
||||
curl -X POST http://192.168.86.51:8321/api/playlists/generate \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"total_tracks":20,"known_pct":30,"speaker_entity":"media_player.living_room_speaker_2","auto_play":true}'
|
||||
|
||||
# Generate a vibe-based playlist (mood/activity matching)
|
||||
curl -X POST http://192.168.86.51:8321/api/playlists/generate \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"total_tracks":15,"vibe":"chill ambient lo-fi","speaker_entity":"media_player.living_room_speaker_2","auto_play":true}'
|
||||
|
||||
# Vibe with custom blend (alpha: 0=pure vibe, 0.5=blend, 1=pure taste)
|
||||
curl -X POST http://192.168.86.51:8321/api/playlists/generate \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"total_tracks":15,"vibe":"upbeat party music","alpha":0.3,"auto_play":true,"speaker_entity":"media_player.living_room_speaker_2"}'
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
20
README.md
20
README.md
@@ -51,7 +51,7 @@ docker exec haunt-fm alembic upgrade head
|
||||
| POST | `/api/history/webhook` | Log a listen event (from HA automation) |
|
||||
| POST | `/api/admin/discover` | Expand listening history via Last.fm |
|
||||
| POST | `/api/admin/build-taste-profile` | Rebuild taste profile from embeddings |
|
||||
| GET | `/api/recommendations?limit=50` | Get ranked recommendations |
|
||||
| GET | `/api/recommendations?limit=50&vibe=chill+ambient` | Get ranked recommendations (optional vibe) |
|
||||
| POST | `/api/playlists/generate` | Generate and optionally play a playlist |
|
||||
|
||||
## Usage
|
||||
@@ -69,11 +69,26 @@ curl -X POST http://192.168.86.51:8321/api/playlists/generate \
|
||||
}'
|
||||
```
|
||||
|
||||
### Generate a vibe-based playlist
|
||||
|
||||
```bash
|
||||
curl -X POST http://192.168.86.51:8321/api/playlists/generate \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"total_tracks": 15,
|
||||
"vibe": "chill ambient lo-fi",
|
||||
"speaker_entity": "media_player.living_room_speaker_2",
|
||||
"auto_play": true
|
||||
}'
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `total_tracks` — number of tracks in the playlist (default 20)
|
||||
- `known_pct` — percentage of known-liked tracks vs new discoveries (default 30)
|
||||
- `speaker_entity` — Music Assistant entity ID (must be a `_2` suffix entity)
|
||||
- `auto_play` — `true` to immediately play on the speaker
|
||||
- `vibe` — text description of the desired mood/vibe (e.g. "chill lo-fi beats", "upbeat party music"). Uses CLAP text embeddings to match tracks in the same vector space as audio.
|
||||
- `alpha` — blend factor between taste profile and vibe (default 0.5). `1.0` = pure taste profile, `0.0` = pure vibe match, `0.5` = equal blend. Ignored when no vibe is provided.
|
||||
|
||||
### Speaker entities
|
||||
|
||||
@@ -113,6 +128,9 @@ curl -X POST http://192.168.86.51:8321/api/admin/build-taste-profile
|
||||
|
||||
# Get recommendations (without playing)
|
||||
curl http://192.168.86.51:8321/api/recommendations?limit=20
|
||||
|
||||
# Get vibe-matched recommendations
|
||||
curl "http://192.168.86.51:8321/api/recommendations?limit=20&vibe=dark+electronic&alpha=0.3"
|
||||
```
|
||||
|
||||
## Pipeline Stages
|
||||
|
||||
26
alembic/versions/002_add_playlist_vibe.py
Normal file
26
alembic/versions/002_add_playlist_vibe.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""Add vibe and alpha columns to playlists
|
||||
|
||||
Revision ID: 002
|
||||
Revises: 001
|
||||
Create Date: 2026-02-22
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision: str = "002"
|
||||
down_revision: Union[str, None] = "001"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column("playlists", sa.Column("vibe", sa.Text, nullable=True))
|
||||
op.add_column("playlists", sa.Column("alpha", sa.REAL, nullable=True))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("playlists", "alpha")
|
||||
op.drop_column("playlists", "vibe")
|
||||
@@ -1,5 +1,5 @@
|
||||
from fastapi import APIRouter, Depends
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
@@ -17,15 +17,31 @@ class GenerateRequest(BaseModel):
|
||||
name: str | None = None
|
||||
speaker_entity: str | None = None
|
||||
auto_play: bool = False
|
||||
vibe: str | None = None
|
||||
alpha: float = Field(default=0.5, ge=0.0, le=1.0)
|
||||
|
||||
|
||||
@router.post("/generate")
|
||||
async def generate(req: GenerateRequest, session: AsyncSession = Depends(get_session)):
|
||||
# Compute text embedding for vibe description
|
||||
vibe_embedding = None
|
||||
if req.vibe:
|
||||
from haunt_fm.services.embedding import embed_text, is_model_loaded, load_model
|
||||
|
||||
if not is_model_loaded():
|
||||
load_model()
|
||||
vibe_embedding = embed_text(req.vibe)
|
||||
# Force pure taste when no vibe provided (preserves current behavior)
|
||||
alpha = req.alpha if req.vibe else 1.0
|
||||
|
||||
playlist = await generate_playlist(
|
||||
session,
|
||||
total_tracks=req.total_tracks,
|
||||
known_pct=req.known_pct,
|
||||
name=req.name,
|
||||
vibe_embedding=vibe_embedding,
|
||||
alpha=alpha,
|
||||
vibe_text=req.vibe,
|
||||
)
|
||||
|
||||
# Load playlist tracks with track info
|
||||
@@ -58,6 +74,8 @@ async def generate(req: GenerateRequest, session: AsyncSession = Depends(get_ses
|
||||
"name": playlist.name,
|
||||
"total_tracks": playlist.total_tracks,
|
||||
"known_pct": playlist.known_pct,
|
||||
"vibe": playlist.vibe,
|
||||
"alpha": playlist.alpha,
|
||||
"tracks": track_list,
|
||||
"auto_played": req.auto_play and req.speaker_entity is not None,
|
||||
}
|
||||
|
||||
@@ -11,9 +11,22 @@ router = APIRouter(prefix="/api")
|
||||
async def recommendations(
|
||||
limit: int = Query(default=50, ge=1, le=200),
|
||||
include_known: bool = Query(default=False),
|
||||
vibe: str | None = Query(default=None),
|
||||
alpha: float = Query(default=0.5, ge=0.0, le=1.0),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
vibe_embedding = None
|
||||
if vibe:
|
||||
from haunt_fm.services.embedding import embed_text, is_model_loaded, load_model
|
||||
|
||||
if not is_model_loaded():
|
||||
load_model()
|
||||
vibe_embedding = embed_text(vibe)
|
||||
# Force pure taste when no vibe provided
|
||||
effective_alpha = alpha if vibe else 1.0
|
||||
|
||||
results = await get_recommendations(
|
||||
session, limit=limit, exclude_known=not include_known
|
||||
session, limit=limit, exclude_known=not include_known,
|
||||
vibe_embedding=vibe_embedding, alpha=effective_alpha,
|
||||
)
|
||||
return {"recommendations": results, "count": len(results)}
|
||||
return {"recommendations": results, "count": len(results), "vibe": vibe, "alpha": effective_alpha}
|
||||
|
||||
@@ -94,6 +94,8 @@ class Playlist(Base):
|
||||
name: Mapped[str | None] = mapped_column(Text)
|
||||
known_pct: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
total_tracks: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
vibe: Mapped[str | None] = mapped_column(Text)
|
||||
alpha: Mapped[float | None] = mapped_column(REAL)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
tracks: Mapped[list["PlaylistTrack"]] = relationship(back_populates="playlist", cascade="all, delete-orphan")
|
||||
|
||||
@@ -53,3 +53,23 @@ def embed_audio(audio: np.ndarray, sample_rate: int = 48000) -> np.ndarray:
|
||||
# Normalize to unit vector
|
||||
emb = emb / np.linalg.norm(emb)
|
||||
return emb
|
||||
|
||||
|
||||
def embed_text(text: str) -> np.ndarray:
|
||||
"""Embed a text description into the same 512-dim CLAP space as audio."""
|
||||
import torch
|
||||
|
||||
if _model is None or _processor is None:
|
||||
raise RuntimeError("CLAP model not loaded. Call load_model() first.")
|
||||
|
||||
inputs = _processor(text=[text], return_tensors="pt", padding=True)
|
||||
with torch.no_grad():
|
||||
output = _model.get_text_features(**inputs)
|
||||
|
||||
if hasattr(output, "pooler_output"):
|
||||
emb = output.pooler_output[0].numpy()
|
||||
else:
|
||||
emb = output[0].numpy()
|
||||
|
||||
emb = emb / np.linalg.norm(emb)
|
||||
return emb
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
@@ -20,6 +21,9 @@ async def generate_playlist(
|
||||
total_tracks: int = 20,
|
||||
known_pct: int = 30,
|
||||
name: str | None = None,
|
||||
vibe_embedding: np.ndarray | None = None,
|
||||
alpha: float = 0.5,
|
||||
vibe_text: str | None = None,
|
||||
) -> Playlist:
|
||||
"""Generate a playlist mixing known-liked tracks with new recommendations.
|
||||
|
||||
@@ -48,7 +52,10 @@ async def generate_playlist(
|
||||
new_count = total_tracks - known_count
|
||||
|
||||
# Get new recommendations
|
||||
recs = await get_recommendations(session, limit=new_count * 2, exclude_known=True)
|
||||
recs = await get_recommendations(
|
||||
session, limit=new_count * 2, exclude_known=True,
|
||||
vibe_embedding=vibe_embedding, alpha=alpha,
|
||||
)
|
||||
new_tracks = [(r["track_id"], r["similarity"]) for r in recs[:new_count]]
|
||||
|
||||
# Interleave: spread known tracks throughout the playlist
|
||||
@@ -84,6 +91,8 @@ async def generate_playlist(
|
||||
name=name or f"haunt-fm mix ({len(interleaved)} tracks)",
|
||||
known_pct=known_pct,
|
||||
total_tracks=len(interleaved),
|
||||
vibe=vibe_text,
|
||||
alpha=alpha if vibe_text else None,
|
||||
)
|
||||
session.add(playlist)
|
||||
await session.flush()
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
@@ -18,22 +19,40 @@ async def get_recommendations(
|
||||
limit: int = 50,
|
||||
exclude_known: bool = True,
|
||||
profile_name: str = "default",
|
||||
vibe_embedding: np.ndarray | None = None,
|
||||
alpha: float = 0.5,
|
||||
) -> list[dict]:
|
||||
"""Get track recommendations ranked by cosine similarity to taste profile."""
|
||||
"""Get track recommendations ranked by cosine similarity to taste profile.
|
||||
|
||||
Args:
|
||||
vibe_embedding: Optional 512-dim text embedding for vibe/mood matching.
|
||||
alpha: Blend factor. 1.0 = pure taste, 0.0 = pure vibe, 0.5 = equal blend.
|
||||
"""
|
||||
# Load taste profile
|
||||
profile = (
|
||||
await session.execute(select(TasteProfile).where(TasteProfile.name == profile_name))
|
||||
).scalar_one_or_none()
|
||||
|
||||
if profile is None:
|
||||
if profile is None and vibe_embedding is None:
|
||||
return []
|
||||
|
||||
# Determine query vector: blend taste profile with vibe embedding
|
||||
if vibe_embedding is not None and profile is not None:
|
||||
taste_emb = np.array(profile.embedding, dtype=np.float32)
|
||||
vibe_emb = vibe_embedding.astype(np.float32)
|
||||
query_emb = alpha * taste_emb + (1.0 - alpha) * vibe_emb
|
||||
norm = np.linalg.norm(query_emb)
|
||||
if norm > 0:
|
||||
query_emb = query_emb / norm
|
||||
elif vibe_embedding is not None:
|
||||
# No taste profile yet — pure vibe (cold start)
|
||||
query_emb = vibe_embedding.astype(np.float32)
|
||||
else:
|
||||
query_emb = np.array(profile.embedding, dtype=np.float32)
|
||||
|
||||
# Use pgvector cosine distance operator (<=>)
|
||||
# Lower distance = more similar
|
||||
if exclude_known:
|
||||
# Subquery: track IDs that have listen events
|
||||
known_ids_subq = select(ListenEvent.track_id).distinct().subquery()
|
||||
|
||||
query = text("""
|
||||
SELECT t.id, t.title, t.artist, t.album, t.genre,
|
||||
1 - (te.embedding <=> :profile_embedding) AS similarity
|
||||
@@ -54,8 +73,7 @@ async def get_recommendations(
|
||||
""")
|
||||
|
||||
# Format embedding as pgvector literal: [n1,n2,...]
|
||||
emb = profile.embedding
|
||||
emb_str = "[" + ",".join(str(float(x)) for x in emb) + "]"
|
||||
emb_str = "[" + ",".join(str(float(x)) for x in query_emb) + "]"
|
||||
|
||||
result = await session.execute(
|
||||
query,
|
||||
|
||||
Reference in New Issue
Block a user