-
Notifications
You must be signed in to change notification settings - Fork 9
Feat/tmdb cache #47
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feat/tmdb cache #47
Changes from all commits
394bb7b
c58f887
331c24c
e9d9971
bf4ed6b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -2,8 +2,13 @@ | |||||||
| * TMDB Enrichment — fetches rich metadata from TMDB using an IMDB ID. | ||||||||
| * Makes 2-3 API calls: /find (IMDB→TMDB) + /movie or /tv (details+credits). | ||||||||
| * Falls back to /search if /find returns nothing (common for obscure IMDB entries). | ||||||||
| * | ||||||||
| * Results are cached in the tmdb_data table to avoid repeated API calls. | ||||||||
| */ | ||||||||
|
|
||||||||
| import { createClient } from '@supabase/supabase-js'; | ||||||||
| import { createHash } from 'crypto'; | ||||||||
|
|
||||||||
| export interface TmdbData { | ||||||||
| posterUrl: string | null; | ||||||||
| backdropUrl: string | null; | ||||||||
|
|
@@ -20,10 +25,98 @@ const EMPTY: TmdbData = { | |||||||
| cast: null, writers: null, contentRating: null, tmdbId: null, | ||||||||
| }; | ||||||||
|
|
||||||||
| function getCacheKey(imdbId: string, titleHint?: string): string { | ||||||||
| if (imdbId) return imdbId; | ||||||||
| if (titleHint) return 'title:' + createHash('sha256').update(titleHint.toLowerCase().trim()).digest('hex').slice(0, 32); | ||||||||
| return ''; | ||||||||
| } | ||||||||
|
|
||||||||
| function getSupabaseClient() { | ||||||||
| const url = process.env.SUPABASE_URL || process.env.NEXT_PUBLIC_SUPABASE_URL; | ||||||||
| const key = process.env.SUPABASE_SERVICE_ROLE_KEY; | ||||||||
| if (!url || !key) return null; | ||||||||
| return createClient(url, key); | ||||||||
| } | ||||||||
|
|
||||||||
| async function getCached(key: string): Promise<TmdbData | null> { | ||||||||
| if (!key) return null; | ||||||||
| try { | ||||||||
| const supabase = getSupabaseClient(); | ||||||||
| if (!supabase) return null; | ||||||||
| const { data } = await supabase | ||||||||
| .from('tmdb_data') | ||||||||
| .select('*') | ||||||||
|
||||||||
| .select('*') | |
| .select('poster_url, backdrop_url, overview, tagline, cast_names, writers, content_rating, tmdb_id') |
Copilot
AI
Mar 6, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
setCache upserts rows but doesn’t set updated_at. Without a DB trigger, repeated refreshes won’t bump updated_at, which undermines the “stale entry cleanup” index/purpose. Either add a BEFORE UPDATE trigger on tmdb_data (preferred) or include updated_at: now()/new Date().toISOString() in the upsert payload for conflict updates.
| content_rating: data.contentRating, | |
| content_rating: data.contentRating, | |
| updated_at: new Date().toISOString(), |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -303,8 +303,9 @@ function getMemoryThresholds(): { warning: number; critical: number; severe: num | |||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| const { warning: MEMORY_WARNING_THRESHOLD, critical: MEMORY_CRITICAL_THRESHOLD, severe: MEMORY_SEVERE_THRESHOLD } = getMemoryThresholds(); | ||||||||||||||||||||||||||||
| const MEMORY_CHECK_INTERVAL_MS = 30000; // Check every 30 seconds | ||||||||||||||||||||||||||||
| const MEMORY_CHECK_INTERVAL_MS = 15000; // Check every 15 seconds | ||||||||||||||||||||||||||||
| const TORRENT_MIN_AGE_MS = 30 * 60 * 1000; // Don't cleanup torrents younger than 30 minutes | ||||||||||||||||||||||||||||
| const TORRENT_MIN_AGE_CRITICAL_MS = 5 * 60 * 1000; // Shortened to 5 minutes under critical/severe pressure | ||||||||||||||||||||||||||||
| const ORPHAN_TORRENT_MAX_AGE_MS = 2 * 60 * 60 * 1000; // Auto-cleanup torrents with 0 watchers after 2 hours | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||
|
|
@@ -344,7 +345,7 @@ export class StreamingService { | |||||||||||||||||||||||||||
| ensureDir(this.downloadPath); | ||||||||||||||||||||||||||||
| this.options = options; | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| this.maxConcurrentStreams = options.maxConcurrentStreams ?? 10; | ||||||||||||||||||||||||||||
| this.maxConcurrentStreams = options.maxConcurrentStreams ?? 4; | ||||||||||||||||||||||||||||
| this.streamTimeout = options.streamTimeout ?? 120000; | ||||||||||||||||||||||||||||
|
Comment on lines
+348
to
349
|
||||||||||||||||||||||||||||
| this.torrentCleanupDelay = options.torrentCleanupDelay ?? DEFAULT_CLEANUP_DELAY; | ||||||||||||||||||||||||||||
| this.activeStreams = new Map(); | ||||||||||||||||||||||||||||
|
|
@@ -724,6 +725,7 @@ export class StreamingService { | |||||||||||||||||||||||||||
| }); | ||||||||||||||||||||||||||||
| this.killOldestStreams(Math.max(3, Math.floor(this.activeStreams.size / 2))); | ||||||||||||||||||||||||||||
| this.emergencyCleanup(); | ||||||||||||||||||||||||||||
| this.aggressiveCleanup('severe'); | ||||||||||||||||||||||||||||
| } else if (memUsage.rss >= MEMORY_CRITICAL_THRESHOLD) { | ||||||||||||||||||||||||||||
| logger.error('CRITICAL memory pressure - triggering emergency cleanup', { | ||||||||||||||||||||||||||||
| rssMB, | ||||||||||||||||||||||||||||
|
|
@@ -732,6 +734,7 @@ export class StreamingService { | |||||||||||||||||||||||||||
| activeStreams: this.activeStreams.size, | ||||||||||||||||||||||||||||
| }); | ||||||||||||||||||||||||||||
| this.emergencyCleanup(); | ||||||||||||||||||||||||||||
| this.aggressiveCleanup('critical'); | ||||||||||||||||||||||||||||
| } else if (memUsage.rss >= MEMORY_WARNING_THRESHOLD) { | ||||||||||||||||||||||||||||
| logger.warn('High memory pressure - triggering aggressive cleanup', { | ||||||||||||||||||||||||||||
| rssMB, | ||||||||||||||||||||||||||||
|
|
@@ -747,19 +750,60 @@ export class StreamingService { | |||||||||||||||||||||||||||
| * Kill the oldest N streams to free memory during severe pressure | ||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||
| private killOldestStreams(count: number): void { | ||||||||||||||||||||||||||||
| // NEVER kill active streams. Users are watching these — killing them mid-playback | ||||||||||||||||||||||||||||
| // is a terrible UX. If memory is truly critical, systemd's MemoryMax will handle it. | ||||||||||||||||||||||||||||
| // Emergency/aggressive cleanup already removes idle torrents without active watchers. | ||||||||||||||||||||||||||||
| logger.warn('killOldestStreams called but SKIPPING — active streams are protected', { | ||||||||||||||||||||||||||||
| requestedKill: count, | ||||||||||||||||||||||||||||
| activeStreams: this.activeStreams.size, | ||||||||||||||||||||||||||||
| // Only kill streams that have NO active watchers (nobody is watching). | ||||||||||||||||||||||||||||
| // Active watchers = someone has an SSE connection open for this torrent. | ||||||||||||||||||||||||||||
| // This protects users mid-playback while still freeing unwatched resources. | ||||||||||||||||||||||||||||
| const unwatchedStreams: Array<[string, ActiveStream]> = []; | ||||||||||||||||||||||||||||
|
Comment on lines
752
to
+756
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| for (const [id, stream] of this.activeStreams) { | ||||||||||||||||||||||||||||
| const watcherInfo = this.torrentWatchers.get(stream.infohash); | ||||||||||||||||||||||||||||
| const watcherCount = watcherInfo?.watchers.size ?? 0; | ||||||||||||||||||||||||||||
| if (watcherCount === 0) { | ||||||||||||||||||||||||||||
| unwatchedStreams.push([id, stream]); | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| if (unwatchedStreams.length === 0) { | ||||||||||||||||||||||||||||
| logger.warn('killOldestStreams: all streams have active watchers — skipping to protect playback', { | ||||||||||||||||||||||||||||
| requestedKill: count, | ||||||||||||||||||||||||||||
| activeStreams: this.activeStreams.size, | ||||||||||||||||||||||||||||
| }); | ||||||||||||||||||||||||||||
| return; | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| // Sort by creation time (oldest first) and kill up to `count` | ||||||||||||||||||||||||||||
| const toKill = unwatchedStreams.slice(0, count); | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
| const toKill = unwatchedStreams.slice(0, count); | |
| const sortedUnwatched = unwatchedStreams.slice().sort((a, b) => { | |
| const aCreated = a[1].createdAt; | |
| const bCreated = b[1].createdAt; | |
| if (aCreated == null || bCreated == null) { | |
| // If creation time is missing, keep original relative order | |
| return 0; | |
| } | |
| if (aCreated < bCreated) return -1; | |
| if (aCreated > bCreated) return 1; | |
| return 0; | |
| }); | |
| const toKill = sortedUnwatched.slice(0, count); |
Copilot
AI
Mar 6, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Before deleting this.torrentWatchers for an infohash, clear any pending cleanupTimer on that entry. Otherwise the scheduled cleanup callback will still run later with stale state (and can log misleading messages / do redundant work).
| this.activeStreams.delete(id); | |
| this.activeStreams.delete(id); | |
| const watcherInfo = this.torrentWatchers.get(stream.infohash); | |
| if (watcherInfo?.cleanupTimer) { | |
| clearTimeout(watcherInfo.cleanupTimer); | |
| // Avoid holding onto a stale timeout reference | |
| (watcherInfo as { cleanupTimer?: NodeJS.Timeout | undefined }).cleanupTimer = undefined; | |
| } |
Copilot
AI
Mar 6, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This logic destroys the underlying torrent for a single ActiveStream (torrent.destroy(...)), but multiple activeStreams can share the same infohash (e.g., different files from the same torrent). Destroying the torrent here will break those other streams, and the code doesn’t remove/close the other activeStreams entries for that infohash. Consider selecting victims at the torrent level (by infohash) and, when destroying a torrent, also destroy/remove all streams that reference that infohash (including calling activeStream.stream.destroy()).
Copilot
AI
Mar 6, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
skippedWatched is computed after mutating this.activeStreams (you delete entries in the loop). Because unwatchedStreams.length was captured before deletions but this.activeStreams.size is post-deletion, this value can be wrong/negative. Compute watchedCount / unwatchedCount and killedCount before deleting, then log those stable counts.
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,33 @@ | ||||||||||||||||
| -- Cache table for TMDB API responses | ||||||||||||||||
| -- Avoids repeated API calls for the same content | ||||||||||||||||
| CREATE TABLE IF NOT EXISTS tmdb_data ( | ||||||||||||||||
| -- Lookup key: either an IMDB ID (tt1234567) or a cleaned title hash | ||||||||||||||||
| lookup_key text PRIMARY KEY, | ||||||||||||||||
| tmdb_id integer, | ||||||||||||||||
| poster_url text, | ||||||||||||||||
| backdrop_url text, | ||||||||||||||||
| overview text, | ||||||||||||||||
| tagline text, | ||||||||||||||||
| cast_names text, | ||||||||||||||||
| writers text, | ||||||||||||||||
| content_rating text, | ||||||||||||||||
| -- Track freshness | ||||||||||||||||
| created_at timestamptz NOT NULL DEFAULT now(), | ||||||||||||||||
| updated_at timestamptz NOT NULL DEFAULT now() | ||||||||||||||||
| ); | ||||||||||||||||
|
|
||||||||||||||||
|
||||||||||||||||
| -- Keep updated_at current on updates/upserts | |
| CREATE TRIGGER set_tmdb_data_updated_at | |
| BEFORE UPDATE ON tmdb_data | |
| FOR EACH ROW | |
| EXECUTE FUNCTION update_updated_at_column(); |
Copilot
AI
Mar 6, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CREATE POLICY ... FOR SELECT USING (true) makes the entire shared TMDB cache readable to anyone who has table SELECT privileges (often anon/authenticated in Supabase setups). If that’s not explicitly intended, restrict reads to service_role (or to authenticated users / an RPC that only returns a single key) to avoid easy scraping and unbounded data exposure.
| -- Public read/write (no user-scoping needed, this is shared cache) | |
| CREATE POLICY "tmdb_data_public_read" ON tmdb_data FOR SELECT USING (true); | |
| -- Restrict direct table reads/writes to service_role; clients should use controlled RPCs | |
| CREATE POLICY "tmdb_data_service_read" ON tmdb_data | |
| FOR SELECT | |
| USING (auth.role() = 'service_role'); |
Copilot
AI
Mar 6, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comment says “Public read/write”, but the policies only allow public read; writes are restricted to service_role. Please update the comment to match the actual policy intent (or adjust the policies if public write was intended).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For title-based lookups,
getCacheKeyhashes the rawtitleHint, while the TMDB search uses a cleaned title (cleanTitleForSearch). This will fragment the cache (same content gets multiple keys) and can grow the table quickly. Consider hashingcleanTitleForSearch(titleHint)(and/or including year) so equivalent titles map to the same cache key.