From 5f60d1299603ed31d6064fb531635225d9ab00ac Mon Sep 17 00:00:00 2001 From: Mark Zheleznyakov Date: Wed, 19 Nov 2025 14:43:39 +0300 Subject: [PATCH] =?UTF-8?q?feat:=20=D0=A1=D0=B5=D1=80=D0=B2=D0=B8=D1=81=20?= =?UTF-8?q?=D0=B8=20=D0=BA=D0=BE=D0=BD=D1=82=D1=80=D0=BE=D0=BB=D0=BB=D0=B5?= =?UTF-8?q?=D1=80=20=D1=81=D0=B5=D0=B9=D0=B2=D0=BE=D0=B2=20=D1=81=20=D0=BC?= =?UTF-8?q?=D0=B5=D1=82=D0=BE=D0=B4=D0=B0=D0=BC=D0=B8=20=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D1=83=D1=87=D0=B5=D0=BD=D0=B8=D1=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/backend/package.json | 5 + .../src/controllers/saves.controller.ts | 176 +++++++ apps/backend/src/drizzle.config.ts | 12 - apps/backend/src/index.ts | 10 +- apps/backend/src/services/saves.service.ts | 92 ++++ apps/backend/src/services/scraper.service.ts | 441 ++++++++++++++++++ apps/backend/tsconfig.json | 3 + 7 files changed, 723 insertions(+), 16 deletions(-) create mode 100644 apps/backend/src/controllers/saves.controller.ts delete mode 100644 apps/backend/src/drizzle.config.ts create mode 100644 apps/backend/src/services/saves.service.ts create mode 100644 apps/backend/src/services/scraper.service.ts diff --git a/apps/backend/package.json b/apps/backend/package.json index 7a3b7a1..3a0caaa 100644 --- a/apps/backend/package.json +++ b/apps/backend/package.json @@ -5,6 +5,10 @@ "dev": "bun run --watch src/index.ts", "build": "bun build src/index.ts --outdir ./dist --target bun", "start": "bun run dist/index.js", + "test": "bun test", + "test:watch": "bun test --watch", + "test:unit": "bun test ./src/tests/unit/**/*.test.ts", + "test:e2e": "bun test --preload ./src/tests/setup.ts ./src/tests/e2e/**/*.test.ts", "db:generate": "drizzle-kit generate", "db:migrate": "bun run src/db/migrate.ts", "db:studio": "drizzle-kit studio" @@ -13,6 +17,7 @@ "@elysiajs/cors": "^1.1.1", "@elysiajs/eden": "^1.4.4", "@elysiajs/openapi": "^1.4.11", + "@p1ctos4ve/shared-types": "workspace:*", "better-auth": "^1.1.6", "drizzle-orm": "^0.36.4", "elysia": "^1.1.23", diff --git a/apps/backend/src/controllers/saves.controller.ts b/apps/backend/src/controllers/saves.controller.ts new file mode 100644 index 0000000..163d750 --- /dev/null +++ b/apps/backend/src/controllers/saves.controller.ts @@ -0,0 +1,176 @@ +import { Elysia, t } from 'elysia'; +import { savesService } from '@/services/saves.service'; +import { s3Service } from '@/services/s3.service'; +import { auth } from '@/lib/auth'; +import { betterAuthMiddleware } from '@/lib/auth/middleware'; + +export const savesController = new Elysia({ prefix: '/saves' }) + .use(betterAuthMiddleware) + .get( + '/my', + async ({ user, set }) => { + try { + const saves = await savesService.getUserSaves(user.id); + + return saves.map((save) => ({ + id: save.id, + name: save.name, + type: save.type, + description: save.description, + tags: save.tags, + visibility: save.visibility, + shareUrl: save.visibility === 'link' ? save.shareUrl : undefined, + url: save.url, + createdAt: save.createdAt.toISOString(), + updatedAt: save.updatedAt.toISOString(), + })); + } catch (error) { + set.status = 500; + return { + error: error instanceof Error ? error.message : 'Failed to get saves' + }; + } + }, + { + detail: { + tags: ['Saves'], + summary: 'Get my saves', + description: 'Returns all saves of the current user', + }, + auth: true + } + ) + .get( + '/u/:slug', + async ({ params: { slug }, set }) => { + try { + const saves = await savesService.getPublicSavesByUser(slug); + + return saves.map((save) => ({ + id: save.id, + name: save.name, + type: save.type, + description: save.description, + tags: save.tags, + visibility: save.visibility, + url: save.url, + createdAt: save.createdAt.toISOString(), + updatedAt: save.updatedAt.toISOString(), + })); + } catch (error) { + set.status = 500; + return { + error: error instanceof Error ? error.message : 'Failed to get public saves' + }; + } + }, + { + params: t.Object({ + slug: t.String(), + }), + detail: { + tags: ['Saves'], + summary: 'Get public saves by user slug', + description: 'Returns only public saves of a specific user', + }, + } + ) + .get( + '/:id', + async ({ params: { id }, user, set, headers, request }) => { + let shareToken: string | undefined; + try { + const url = new URL(request.url); + shareToken = url.searchParams.get('share') || undefined; + } catch { + shareToken = undefined; + } + + let userId = user?.id; + if (!userId) { + try { + const session = await auth.api.getSession({ headers }); + userId = session?.user?.id!; + } catch {} + } + + const saveId = Number(id); + if (isNaN(saveId)) { + set.status = 400; + return { error: 'Invalid save ID' }; + } + + const save = await savesService.getById(saveId, userId, shareToken); + + if (!save) { + set.status = 404; + return { error: 'Save not found' }; + } + + return { + id: save.id, + name: save.name, + type: save.type, + description: save.description, + tags: save.tags, + visibility: save.visibility, + shareUrl: save.visibility === 'link' ? save.shareUrl : undefined, + userId: save.userId, + url: save.url, + createdAt: save.createdAt.toISOString(), + updatedAt: save.updatedAt.toISOString(), + }; + }, + { + params: t.Object({ + id: t.String(), + }), + detail: { + tags: ['Saves'], + summary: 'Get save by ID', + description: 'Returns a specific save with access control', + }, + auth: true, + } + ) + .get( + '/:id/download', + async ({ params: { id }, query, set, headers }) => { + const session = await auth.api.getSession({ headers }) + + const shareToken = query.share; + + const saveId = Number(id); + if (isNaN(saveId)) { + set.status = 400; + return { error: 'Invalid save ID' }; + } + + const save = await savesService.getById(saveId, session?.user.id, shareToken); + + if (!save) { + set.status = 404; + return { error: 'Save not found' }; + } + + const signedUrl = s3Service.getSignedUrl(save.s3Key, 3600); + + set.status = 302; + set.headers['Location'] = signedUrl; + + return null; + }, + { + params: t.Object({ + id: t.String(), + }), + query: t.Object({ + share: t.Optional(t.String()), + }), + detail: { + tags: ['Saves'], + summary: 'Download save file', + description: 'Redirects to a presigned URL for downloading the file', + }, + } + ); diff --git a/apps/backend/src/drizzle.config.ts b/apps/backend/src/drizzle.config.ts deleted file mode 100644 index e70df46..0000000 --- a/apps/backend/src/drizzle.config.ts +++ /dev/null @@ -1,12 +0,0 @@ -import type { Config } from 'drizzle-kit'; -import { env } from './src/config/env'; - -export default { - schema: './src/db/schema.ts', - out: './drizzle', - dialect: 'postgresql', - dbCredentials: { - url: env.DATABASE_URL, - }, -} satisfies Config; - diff --git a/apps/backend/src/index.ts b/apps/backend/src/index.ts index caa45f8..d8eb349 100644 --- a/apps/backend/src/index.ts +++ b/apps/backend/src/index.ts @@ -1,10 +1,11 @@ import { Elysia } from "elysia"; import { openapi } from '@elysiajs/openapi' import { auth } from "@/lib/auth"; -import { env } from "./config/env"; -import { AuthOpenAPI } from "./lib/auth/openapi"; -import { purple } from "./lib/term/color"; -import { usersController } from "./controllers/users.controller"; +import { env } from "@/config/env"; +import { AuthOpenAPI } from "@/lib/auth/openapi"; +import { purple } from "@/lib/term/color"; +import { usersController } from "@/controllers/users.controller"; +import { savesController } from "@/controllers/saves.controller"; const app = new Elysia() .use(openapi({ @@ -15,6 +16,7 @@ const app = new Elysia() })) .mount('/auth', auth.handler) .use(usersController) + .use(savesController) .listen(env.PORT); const hostname = app.server?.hostname diff --git a/apps/backend/src/services/saves.service.ts b/apps/backend/src/services/saves.service.ts new file mode 100644 index 0000000..03b3aae --- /dev/null +++ b/apps/backend/src/services/saves.service.ts @@ -0,0 +1,92 @@ +import { eq, and, desc } from 'drizzle-orm'; +import { db, save, type Save } from '@/db'; +import { redis } from './redis.service'; + +class SavesService { + private readonly CACHE_TTL = 3600; + + async getById( + id: number, + requestUserId?: string, + shareToken?: string + ): Promise { + const cacheKey = `save:${id}`; + const cached = await redis.get(cacheKey); + if (cached) { + if (this.hasAccess(cached, requestUserId, shareToken)) { + return cached; + } + return null; + } + + const [savedItem] = await db + .select() + .from(save) + .where(eq(save.id, id)) + .limit(1); + + if (!savedItem) return null; + + await redis.set(cacheKey, savedItem, this.CACHE_TTL); + + if (this.hasAccess(savedItem, requestUserId, shareToken)) { + return savedItem; + } + + return null; + } + + async getUserSaves(userId: string): Promise { + const cacheKey = `user_saves:${userId}`; + const cached = await redis.get(cacheKey); + if (cached) return cached; + + const userSaves = await db + .select() + .from(save) + .where(eq(save.userId, userId)) + .orderBy(desc(save.createdAt)); + + await redis.set(cacheKey, userSaves, this.CACHE_TTL); + + return userSaves; + } + + async getPublicSavesByUser(userId: string): Promise { + const cacheKey = `public_saves:${userId}`; + const cached = await redis.get(cacheKey); + if (cached) return cached; + + const publicSaves = await db + .select() + .from(save) + .where(and(eq(save.userId, userId), eq(save.visibility, 'public'))) + .orderBy(desc(save.createdAt)); + + await redis.set(cacheKey, publicSaves, this.CACHE_TTL); + + return publicSaves; + } + + private hasAccess( + savedItem: Save, + requestUserId?: string, + shareToken?: string + ): boolean { + if (savedItem.userId === requestUserId) return true; + + if (savedItem.visibility === 'public') return true; + + if ( + savedItem.visibility === 'link' && + savedItem.shareUrl && + shareToken === savedItem.shareUrl + ) { + return true; + } + + return false; + } +} + +export const savesService = new SavesService(); diff --git a/apps/backend/src/services/scraper.service.ts b/apps/backend/src/services/scraper.service.ts new file mode 100644 index 0000000..79a4cae --- /dev/null +++ b/apps/backend/src/services/scraper.service.ts @@ -0,0 +1,441 @@ +interface ScrapedMedia { + url: string; + title?: string; + description?: string; + type: 'image' | 'video' | 'gif'; + source: 'pinterest' | 'tenor' | 'direct'; + filename?: string; +} + +type MediaVariant = { + url: string; + width?: number; + height?: number; + label?: string +}; + +class ScraperService { + private readonly USER_AGENT = + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'; + + async scrapeUrl(url: string): Promise { + const urlObj = new URL(url); + const hostname = urlObj.hostname.toLowerCase(); + + if (hostname.includes('pinterest.com') || hostname.includes('pin.it')) { + return await this.scrapePinterest(url); + } + + if (hostname.includes('tenor.com')) { + return await this.scrapeTenor(url); + } + + if (this.isDirectMediaUrl(url)) { + return { + url, + type: this.guessMediaType(url), + source: 'direct', + }; + } + + const mediaType = await this.checkMediaContentType(url); + if (mediaType) { + return { + url, + type: mediaType, + source: 'direct', + }; + } + + throw new Error(`Unsupported URL: ${url}`); + } + + private async scrapePinterest(url: string): Promise { + try { + let pinId = this.extractPinterestId(url); + + if (url.includes('pin.it/')) { + const shortLink = url.split('pin.it/')[1].split('/')[0].split('?')[0]; + const redirectUrl = await this.resolveRedirect( + `https://api.pinterest.com/url_shortener/${shortLink}/redirect/` + ); + if (redirectUrl) { + pinId = this.extractPinterestId(redirectUrl); + } + } + + if (!pinId) { + throw new Error('Could not extract Pinterest pin ID'); + } + + if (pinId.includes('--')) { + pinId = pinId.split('--')[1]; + } + + const html = await fetch(`https://www.pinterest.com/pin/${pinId}/`, { + headers: { + 'User-Agent': this.USER_AGENT, + }, + }).then((r) => r.text()); + + const notFoundRegex = /"__typename"\s*:\s*"PinNotFound"/; + if (notFoundRegex.test(html)) { + throw new Error('Pinterest pin not found'); + } + + const videoRegex = /"url":"(https:\/\/v1\.pinimg\.com\/videos\/.*?)"/g; + const videoMatches = [...html.matchAll(videoRegex)]; + const videoLink = videoMatches + .map(([, link]) => link) + .find((a) => a.endsWith('.mp4')); + + if (videoLink) { + return { + url: videoLink, + type: 'video', + source: 'pinterest', + filename: `pinterest_${pinId}.mp4`, + }; + } + + const imageRegex = /src="(https:\/\/i\.pinimg\.com\/.*?\.(jpg|gif))"/g; + const imageMatches = [...html.matchAll(imageRegex)]; + const imageLink = imageMatches + .map(([, link]) => link) + .find((a) => a.endsWith('.jpg') || a.endsWith('.gif')); + + if (imageLink) { + const isGif = imageLink.endsWith('.gif'); + return { + url: imageLink, + type: isGif ? 'gif' : 'image', + source: 'pinterest', + filename: `pinterest_${pinId}.${isGif ? 'gif' : 'jpg'}`, + }; + } + + throw new Error('Could not extract media URL from Pinterest'); + } catch (error) { + throw new Error( + `Failed to scrape Pinterest: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + private async scrapeTenor(url: string): Promise { + try { + const idMatch = url.match(/\/view\/[^\/]+-(\d+)/); + if (!idMatch) { + throw new Error('Invalid Tenor URL format'); + } + + const gifId = idMatch[1]; + + const html = await fetch(url, { + redirect: 'follow', + headers: { + 'User-Agent': this.USER_AGENT, + 'Accept': 'text/html,application/xhtml+xml', + }, + }).then((r) => r.text()); + + const mediaJson = this.extractTenorMediaJson(html); + const mediaVariants = this.collectTenorMedia(mediaJson); + + const chosen = this.pickTenorVariant(mediaVariants, 'mp4', 'medium'); + + if (!chosen) { + throw new Error('Could not find suitable media variant'); + } + + return { + url: chosen.url, + type: 'gif', + source: 'tenor', + filename: `tenor_${gifId}.mp4`, + }; + } catch (error) { + throw new Error( + `Failed to scrape Tenor: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + private extractTenorMediaJson(html: string): any { + const candidates: string[] = []; + + const stateMatch = html.match(/window\.__STATE__\s*=\s*(\{[\s\S]*?\});/); + if (stateMatch?.[1]) { + candidates.push(stateMatch[1]); + } + + const appConfigIdx = html.indexOf('{"appConfig"'); + if (appConfigIdx !== -1) { + const slice = html.slice(appConfigIdx, appConfigIdx + 2_000_000); + const endTagIdx = slice.indexOf(''); + const jsonCandidate = endTagIdx !== -1 ? slice.slice(0, endTagIdx) : slice; + candidates.push(jsonCandidate); + } + + const nextDataMatch = html.match( + /]*id="__NEXT_DATA__"[^>]*>([\s\S]*?)<\/script>/ + ); + if (nextDataMatch?.[1]) { + candidates.push(nextDataMatch[1]); + } + + for (const raw of candidates) { + try { + return JSON.parse(raw); + } catch { + const extracted = this.extractFirstJsonObject(raw); + if (extracted) { + try { + return JSON.parse(extracted); + } catch { + // ignore + } + } + } + } + + throw new Error('Unable to locate Tenor media JSON in page'); + } + + private extractFirstJsonObject(s: string): string | null { + let depth = 0; + let inStr: string | null = null; + let escape = false; + let start = -1; + + for (let i = 0; i < s.length; i++) { + const c = s[i]; + + if (inStr) { + if (escape) { + escape = false; + } else if (c === '\\') { + escape = true; + } else if (c === inStr) { + inStr = null; + } + continue; + } else { + if (c === '"' || c === "'") { + inStr = c; + continue; + } + if (c === '{') { + if (depth === 0) start = i; + depth++; + } else if (c === '}') { + depth--; + if (depth === 0 && start !== -1) { + return s.slice(start, i + 1); + } + } + } + } + return null; + } + + private collectTenorMedia(data: any): Record { + const out: Record = {}; + + const formatKeys = new Set([ + 'gif', + 'mediumgif', + 'tinygif', + 'nanogif', + 'mp4', + 'loopedmp4', + 'tinymp4', + 'nanomp4', + 'webm', + 'tinywebm', + 'nanowebm', + 'preview', + ]); + + const push = (format: string, url: string, meta?: Partial) => { + if (!out[format]) out[format] = []; + out[format].push({ url, ...meta }); + }; + + const visit = (node: any) => { + if (!node) return; + if (Array.isArray(node)) { + for (const v of node) visit(v); + return; + } + if (typeof node === 'object') { + for (const k of Object.keys(node)) { + if (formatKeys.has(k) && node[k] && typeof node[k] === 'object') { + const v = node[k]; + if (typeof v.url === 'string') { + push(k, v.url, { + width: typeof v.dims?.[0] === 'number' ? v.dims[0] : v.width, + height: typeof v.dims?.[1] === 'number' ? v.dims[1] : v.height, + }); + } else if (typeof v === 'string') { + push(k, v); + } + } + } + for (const k of Object.keys(node)) { + visit(node[k]); + } + } + }; + + visit(data); + return out; + } + + private pickTenorVariant( + media: Record, + prefFormat: 'gif' | 'mp4' | 'webm', + prefSize: 'original' | 'medium' | 'tiny' | 'nano' + ): MediaVariant | null { + const sizeToFormats: Record< + 'gif' | 'mp4' | 'webm', + Record<'original' | 'medium' | 'tiny' | 'nano', string[]> + > = { + gif: { + original: ['gif'], + medium: ['mediumgif', 'gif'], + tiny: ['tinygif', 'nanogif', 'gif'], + nano: ['nanogif', 'tinygif', 'gif'], + }, + mp4: { + original: ['mp4', 'loopedmp4'], + medium: ['mp4', 'tinymp4'], + tiny: ['tinymp4', 'nanomp4', 'mp4'], + nano: ['nanomp4', 'tinymp4', 'mp4'], + }, + webm: { + original: ['webm'], + medium: ['webm', 'tinywebm'], + tiny: ['tinywebm', 'nanowebm', 'webm'], + nano: ['nanowebm', 'tinywebm', 'webm'], + }, + }; + + const order = sizeToFormats[prefFormat][prefSize]; + for (const key of order) { + const list = media[key]; + if (list?.length) { + const best = [...list].sort((a, b) => (b.width ?? 0) - (a.width ?? 0))[0]; + return best; + } + } + + const anyKeys = Object.keys(media).filter((k) => k.includes(prefFormat)); + for (const k of anyKeys) { + const list = media[k]; + if (list?.length) return list[0]; + } + + return null; + } + + private extractPinterestId(url: string): string | null { + const match = url.match(/\/pin\/([^\/\?]+)/); + return match ? match[1] : null; + } + + private async resolveRedirect(url: string): Promise { + try { + const response = await fetch(url, { + redirect: 'manual', + headers: { + 'User-Agent': this.USER_AGENT, + }, + }); + + const location = response.headers.get('location'); + return location; + } catch (error) { + console.warn('Failed to resolve redirect:', error); + return null; + } + } + + private isDirectMediaUrl(url: string): boolean { + const mediaExtensions = [ + '.jpg', + '.jpeg', + '.png', + '.gif', + '.webp', + '.avif', + '.mp4', + '.webm', + '.mov', + ]; + const lowerUrl = url.toLowerCase(); + return mediaExtensions.some((ext) => lowerUrl.includes(ext)); + } + + private guessMediaType(url: string): 'image' | 'video' | 'gif' { + const lowerUrl = url.toLowerCase(); + + if (lowerUrl.includes('.gif')) return 'gif'; + if ( + lowerUrl.includes('.mp4') || + lowerUrl.includes('.webm') || + lowerUrl.includes('.mov') + ) + return 'video'; + return 'image'; + } + + private async checkMediaContentType(url: string): Promise<'image' | 'video' | 'gif' | null> { + try { + const response = await fetch(url, { + method: 'HEAD', + headers: { + 'User-Agent': this.USER_AGENT, + }, + }); + + if (!response.ok) { + return null; + } + + const contentType = response.headers.get('content-type')?.toLowerCase() || ''; + + if (contentType === 'image/gif') return 'gif'; + if (contentType.startsWith('image/')) return 'image'; + if (contentType.startsWith('video/')) return 'video'; + + return null; + } catch (error) { + try { + const response = await fetch(url, { + method: 'GET', + headers: { + 'User-Agent': this.USER_AGENT, + 'Range': 'bytes=0-1023', + }, + }); + + if (!response.ok) { + return null; + } + + const contentType = response.headers.get('content-type')?.toLowerCase() || ''; + + if (contentType === 'image/gif') return 'gif'; + if (contentType.startsWith('image/')) return 'image'; + if (contentType.startsWith('video/')) return 'video'; + + return null; + } catch { + return null; + } + } + } +} + +export const scraperService = new ScraperService(); diff --git a/apps/backend/tsconfig.json b/apps/backend/tsconfig.json index 709cec4..26de728 100644 --- a/apps/backend/tsconfig.json +++ b/apps/backend/tsconfig.json @@ -27,6 +27,9 @@ "paths": { "@/*": [ "./src/*" + ], + "@p1ctos4ve/shared-types": [ + "../../packages/shared-types/index.ts" ] } },