diff --git a/apps/web/.env.example b/apps/web/.env.example index a87a0d2bd33..16cbe59788f 100644 --- a/apps/web/.env.example +++ b/apps/web/.env.example @@ -169,6 +169,9 @@ YOUTUBE_API_KEY= # Scrape Creators SCRAPECREATORS_API_KEY= +# X API (Twitter) - for social engagement tracking +X_API_BEARER_TOKEN= + # Paypal PAYPAL_CLIENT_ID= PAYPAL_CLIENT_SECRET= diff --git a/apps/web/app/(ee)/api/bounties/[bountyId]/sync-social-metrics/route.ts b/apps/web/app/(ee)/api/bounties/[bountyId]/sync-social-metrics/route.ts index ebc6c786f2f..69ad14bd7fb 100644 --- a/apps/web/app/(ee)/api/bounties/[bountyId]/sync-social-metrics/route.ts +++ b/apps/web/app/(ee)/api/bounties/[bountyId]/sync-social-metrics/route.ts @@ -2,6 +2,7 @@ import { DubApiError } from "@/lib/api/errors"; import { getDefaultProgramIdOrThrow } from "@/lib/api/programs/get-default-program-id-or-throw"; import { parseRequestBody } from "@/lib/api/utils"; import { withWorkspace } from "@/lib/auth"; +import { detectBountySubmissionFraud } from "@/lib/bounty/api/detect-bounty-submission-fraud"; import { getBountyOrThrow } from "@/lib/bounty/api/get-bounty-or-throw"; import { getSocialMetricsUpdates } from "@/lib/bounty/api/get-social-metrics-updates"; import { resolveBountyDetails } from "@/lib/bounty/utils"; @@ -51,6 +52,7 @@ export const POST = withWorkspace( id: true, urls: true, status: true, + partnerId: true, partner: true, }, }, @@ -142,6 +144,35 @@ export const POST = withWorkspace( socialMetricsLastSyncedAt, }; + if (socialMetricCount != null) { + const partnerPlatformBaseline = await prisma.partnerPlatform.findUnique( + { + where: { + partnerId_type: { + partnerId: submission.partnerId, + type: bountyInfo.socialPlatform!.value, + }, + }, + select: { + medianViews: true, + medianLikes: true, + medianComments: true, + medianEngagementRate: true, + subscribers: true, + }, + }, + ); + + const fraudResult = detectBountySubmissionFraud({ + socialMetricCount, + bountyMetric: bountyInfo.socialMetrics!.metric, + partnerPlatform: partnerPlatformBaseline, + }); + + updateData.fraudRiskLevel = fraudResult.fraudRiskLevel; + updateData.fraudFlags = fraudResult.fraudFlags; + } + const hasMetCriteria = socialMetricCount != null && bountyInfo.socialMetrics?.minCount != null && diff --git a/apps/web/app/(ee)/api/cron/bounties/sync-social-metrics/route.ts b/apps/web/app/(ee)/api/cron/bounties/sync-social-metrics/route.ts index 73a49bc11f7..001fe126a7c 100644 --- a/apps/web/app/(ee)/api/cron/bounties/sync-social-metrics/route.ts +++ b/apps/web/app/(ee)/api/cron/bounties/sync-social-metrics/route.ts @@ -1,3 +1,4 @@ +import { detectBountySubmissionFraud } from "@/lib/bounty/api/detect-bounty-submission-fraud"; import { getSocialMetricsUpdates } from "@/lib/bounty/api/get-social-metrics-updates"; import { resolveBountyDetails } from "@/lib/bounty/utils"; import { qstash } from "@/lib/cron"; @@ -70,6 +71,7 @@ export const POST = withCron(async ({ rawBody }) => { urls: true, socialMetricCount: true, status: true, + partnerId: true, partner: { select: { email: true, @@ -99,6 +101,31 @@ export const POST = withCron(async ({ rawBody }) => { submissions, }); + // Batch-fetch partner platform baselines for fraud detection + const partnerIds = [...new Set(submissions.map((s) => s.partnerId))]; + const partnerPlatforms = await prisma.partnerPlatform.findMany({ + where: { + partnerId: { + in: partnerIds, + }, + type: bountyInfo.socialPlatform!.value, + verifiedAt: { + not: null, + }, + }, + select: { + partnerId: true, + medianViews: true, + medianLikes: true, + medianComments: true, + medianEngagementRate: true, + subscribers: true, + }, + }); + const baselineByPartnerId = new Map( + partnerPlatforms.map((p) => [p.partnerId, p]), + ); + const minCount = bountyInfo.socialMetrics?.minCount; if (!minCount) { @@ -134,6 +161,17 @@ export const POST = withCron(async ({ rawBody }) => { socialMetricsLastSyncedAt, }; + if (socialMetricCount != null) { + const fraudResult = detectBountySubmissionFraud({ + socialMetricCount, + bountyMetric: bountyInfo.socialMetrics!.metric, + partnerPlatform: baselineByPartnerId.get(submission.partnerId) ?? null, + }); + + updateData.fraudRiskLevel = fraudResult.fraudRiskLevel; + updateData.fraudFlags = fraudResult.fraudFlags; + } + if (shouldTransitionToSubmitted) { updateData.status = "submitted"; updateData.completedAt = now; diff --git a/apps/web/app/(ee)/api/cron/partner-platforms/route.ts b/apps/web/app/(ee)/api/cron/partner-platforms/route.ts index 5ffeee63c42..f07407c6ad1 100644 --- a/apps/web/app/(ee)/api/cron/partner-platforms/route.ts +++ b/apps/web/app/(ee)/api/cron/partner-platforms/route.ts @@ -1,9 +1,9 @@ +import { qstash } from "@/lib/cron"; +import { withCron } from "@/lib/cron/with-cron"; import { AccountNotFoundError, getSocialProfile, -} from "@/lib/api/scrape-creators/get-social-profile"; -import { qstash } from "@/lib/cron"; -import { withCron } from "@/lib/cron/with-cron"; +} from "@/lib/social-platforms/get-social-profile"; import { prisma } from "@dub/prisma"; import { APP_DOMAIN_WITH_NGROK } from "@dub/utils"; import { subDays } from "date-fns"; @@ -89,6 +89,9 @@ export const POST = withCron(async ({ rawBody }) => { }); const newStats = { + ...(socialProfile.platformId && { + platformId: socialProfile.platformId, + }), subscribers: socialProfile.subscribers, posts: socialProfile.posts, avatarUrl: socialProfile.avatarUrl, diff --git a/apps/web/app/(ee)/api/cron/queue-sync-social-engagement/route.ts b/apps/web/app/(ee)/api/cron/queue-sync-social-engagement/route.ts new file mode 100644 index 00000000000..f4c0083e26b --- /dev/null +++ b/apps/web/app/(ee)/api/cron/queue-sync-social-engagement/route.ts @@ -0,0 +1,72 @@ +import { enqueueBatchJobs } from "@/lib/cron/enqueue-batch-jobs"; +import { withCron } from "@/lib/cron/with-cron"; +import { prisma } from "@dub/prisma"; +import { APP_DOMAIN_WITH_NGROK } from "@dub/utils"; +import { logAndRespond } from "../utils"; + +export const dynamic = "force-dynamic"; + +const BATCH_SIZE = 100; + +// GET /api/cron/queue-sync-social-engagement +// Runs daily at 07:00 UTC. Queues one sync job per eligible partner platform. +export const GET = withCron(async () => { + let cursor: string | undefined; + let totalQueued = 0; + + while (true) { + const batch = await prisma.partnerPlatform.findMany({ + where: { + verifiedAt: { + not: null, + }, + platformId: { + not: null, + }, + type: { + in: ["twitter", "youtube"], + }, + }, + select: { + id: true, + }, + take: BATCH_SIZE, + orderBy: { + id: "asc", + }, + ...(cursor + ? { + cursor: { + id: cursor, + }, + skip: 1, + } + : {}), + }); + + if (batch.length === 0) { + break; + } + + const jobs = await enqueueBatchJobs( + batch.map((pp) => ({ + queueName: "sync-social-engagement", + url: `${APP_DOMAIN_WITH_NGROK}/api/cron/sync-social-engagement`, + deduplicationId: pp.id, + body: { + partnerPlatformId: pp.id, + }, + })), + ); + + totalQueued += jobs.length; + + if (batch.length < BATCH_SIZE) { + break; + } + + cursor = batch[batch.length - 1].id; + } + + return logAndRespond(`Queued ${totalQueued} social engagement sync jobs.`); +}); diff --git a/apps/web/app/(ee)/api/cron/sync-social-engagement/route.ts b/apps/web/app/(ee)/api/cron/sync-social-engagement/route.ts new file mode 100644 index 00000000000..c422ac943f0 --- /dev/null +++ b/apps/web/app/(ee)/api/cron/sync-social-engagement/route.ts @@ -0,0 +1,270 @@ +import { withCron } from "@/lib/cron/with-cron"; +import { getPlatformAdapter } from "@/lib/social-platforms"; +import { prisma } from "@dub/prisma"; +import { startOfDay, subDays } from "date-fns"; +import * as z from "zod/v4"; +import { logAndRespond } from "../utils"; + +export const dynamic = "force-dynamic"; + +const bodySchema = z.object({ + partnerPlatformId: z.string(), +}); + +// Max number of posts to retain per partner platform when pruning; older posts are deleted. +const MAX_POSTS_PER_PARTNER = 50; + +// Days to look back from now when no prior engagement data exists (first sync). +const INITIAL_LOOKBACK_DAYS = 30; + +// POST /api/cron/sync-social-engagement +// Processes a single partner platform: fetches engagement data, upserts daily +// snapshots + individual posts, prunes old data, and recomputes baselines. +export const POST = withCron(async ({ rawBody }) => { + const { partnerPlatformId } = bodySchema.parse(JSON.parse(rawBody)); + + const partnerPlatform = await prisma.partnerPlatform.findUnique({ + where: { + id: partnerPlatformId, + }, + select: { + id: true, + platformId: true, + identifier: true, + type: true, + }, + }); + + if (!partnerPlatform) { + return logAndRespond( + `Partner platform ${partnerPlatformId} not found. Skipping.`, + ); + } + + if (!partnerPlatform.platformId) { + return logAndRespond( + `Partner platform ${partnerPlatformId} (@${partnerPlatform.identifier}) has no platformId. Skipping.`, + ); + } + + const platform = getPlatformAdapter(partnerPlatform.type); + + if (!platform) { + return logAndRespond( + `No engagement adapter for platform type "${partnerPlatform.type}". Skipping.`, + ); + } + + const now = new Date(); + const todayStart = startOfDay(now); + + // Check if we have any existing engagement data to determine the fetch window. + // First sync: look back 30 days. Subsequent syncs: just fetch yesterday. + const existingCount = await prisma.partnerPlatformEngagement.count({ + where: { + partnerPlatformId, + }, + }); + + const startTime = + existingCount > 0 + ? startOfDay(subDays(now, 1)) + : startOfDay(subDays(now, INITIAL_LOOKBACK_DAYS)); + const endTime = todayStart; + + const posts = await platform.fetchPosts({ + platformId: partnerPlatform.platformId, + identifier: partnerPlatform.identifier, + startTime, + endTime, + }); + + const dailyEngagements = platform.calculateDailyEngagement(posts); + + // Daily aggregate upserts + const engagementResults = await Promise.allSettled( + dailyEngagements.map((day) => + prisma.partnerPlatformEngagement.upsert({ + where: { + partnerPlatformId_date: { + partnerPlatformId, + date: day.date, + }, + }, + create: { + partnerPlatformId, + date: day.date, + totalPosts: day.totalPosts, + totalImpressions: BigInt(day.totalImpressions), + totalLikes: BigInt(day.totalLikes), + totalComments: BigInt(day.totalComments), + engagementRate: day.engagementRate, + }, + update: { + totalPosts: day.totalPosts, + totalImpressions: BigInt(day.totalImpressions), + totalLikes: BigInt(day.totalLikes), + totalComments: BigInt(day.totalComments), + engagementRate: day.engagementRate, + }, + }), + ), + ); + + const engagementFailures = engagementResults.filter( + (r) => r.status === "rejected", + ); + + if (engagementFailures.length > 0) { + console.warn( + `Failed to upsert ${engagementFailures.length}/${dailyEngagements.length} daily engagements for ${partnerPlatformId}`, + ); + } + + // Prune old daily engagements (keep 31 days for timezone safety) + await prisma.partnerPlatformEngagement.deleteMany({ + where: { + partnerPlatformId, + date: { + lt: startOfDay(subDays(now, 31)), + }, + }, + }); + + // Per-post upserts + const postResults = await Promise.allSettled( + posts.map((post) => + prisma.partnerPlatformPost.upsert({ + where: { + partnerPlatformId_postId: { + partnerPlatformId, + postId: post.postId, + }, + }, + create: { + partnerPlatformId, + postId: post.postId, + publishedAt: post.publishedAt, + title: post.title, + views: BigInt(post.views), + likes: BigInt(post.likes), + comments: BigInt(post.comments), + engagementRate: post.engagementRate, + }, + update: { + views: BigInt(post.views), + likes: BigInt(post.likes), + comments: BigInt(post.comments), + engagementRate: post.engagementRate, + }, + }), + ), + ); + + const postFailures = postResults.filter((r) => r.status === "rejected"); + + if (postFailures.length > 0) { + console.warn( + `Failed to upsert ${postFailures.length}/${posts.length} posts for ${partnerPlatformId}`, + ); + } + + // Prune to last N posts per partner + const postCount = await prisma.partnerPlatformPost.count({ + where: { + partnerPlatformId, + }, + }); + + if (postCount > MAX_POSTS_PER_PARTNER) { + const postsToKeep = await prisma.partnerPlatformPost.findMany({ + where: { + partnerPlatformId, + }, + orderBy: { + publishedAt: "desc", + }, + take: MAX_POSTS_PER_PARTNER, + select: { + id: true, + }, + }); + + const keepIds = postsToKeep.map((p) => p.id); + + await prisma.partnerPlatformPost.deleteMany({ + where: { + partnerPlatformId, + id: { + notIn: keepIds, + }, + }, + }); + } + + // Recompute avgEngagementRate + const avgResult = await prisma.partnerPlatformEngagement.aggregate({ + where: { + partnerPlatformId, + date: { + gte: startOfDay(subDays(now, 30)), + }, + }, + _avg: { + engagementRate: true, + }, + }); + + const avgEngagementRate = avgResult._avg.engagementRate ?? 0; + + // Compute median baselines from stored posts + const allPosts = await prisma.partnerPlatformPost.findMany({ + where: { + partnerPlatformId, + }, + select: { + views: true, + likes: true, + comments: true, + engagementRate: true, + }, + }); + + const medianViews = computeMedian(allPosts.map((p) => Number(p.views))); + const medianLikes = computeMedian(allPosts.map((p) => Number(p.likes))); + const medianComments = computeMedian(allPosts.map((p) => Number(p.comments))); + const medianEngagementRate = computeMedian( + allPosts.map((p) => p.engagementRate), + ); + + // Update PartnerPlatform with baselines + await prisma.partnerPlatform.update({ + where: { + id: partnerPlatformId, + }, + data: { + avgEngagementRate, + medianViews: BigInt(Math.round(medianViews)), + medianLikes: BigInt(Math.round(medianLikes)), + medianComments: BigInt(Math.round(medianComments)), + medianEngagementRate, + }, + }); + + return logAndRespond( + `Synced @${partnerPlatform.identifier}: ${dailyEngagements.length} days, ${posts.length} posts, avgRate=${avgEngagementRate.toFixed(6)}, medianViews=${medianViews}`, + ); +}); + +function computeMedian(values: number[]): number { + if (values.length === 0) { + return 0; + } + + const sorted = [...values].sort((a, b) => a - b); + const mid = Math.floor(sorted.length / 2); + + // For odd-length arrays, return the middle element; + // for even-length, average the two middle elements + return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2; +} diff --git a/apps/web/app/(ee)/api/partner-profile/programs/[programId]/bounties/[bountyId]/social-content-stats/route.ts b/apps/web/app/(ee)/api/partner-profile/programs/[programId]/bounties/[bountyId]/social-content-stats/route.ts index e21566b7e17..f1b92b62140 100644 --- a/apps/web/app/(ee)/api/partner-profile/programs/[programId]/bounties/[bountyId]/social-content-stats/route.ts +++ b/apps/web/app/(ee)/api/partner-profile/programs/[programId]/bounties/[bountyId]/social-content-stats/route.ts @@ -1,9 +1,9 @@ import { DubApiError } from "@/lib/api/errors"; import { getProgramEnrollmentOrThrow } from "@/lib/api/programs/get-program-enrollment-or-throw"; -import { getSocialContent } from "@/lib/api/scrape-creators/get-social-content"; import { withPartnerProfile } from "@/lib/auth/partner"; import { getBountyOrThrow } from "@/lib/bounty/api/get-bounty-or-throw"; import { resolveBountyDetails } from "@/lib/bounty/utils"; +import { getSocialContent } from "@/lib/social-platforms/get-social-content"; import { ratelimit } from "@/lib/upstash"; import { NextResponse } from "next/server"; import * as z from "zod/v4"; diff --git a/apps/web/app/(ee)/api/partners/platforms/callback/route.ts b/apps/web/app/(ee)/api/partners/platforms/callback/route.ts index d7a97e44c27..09d5cd766f7 100644 --- a/apps/web/app/(ee)/api/partners/platforms/callback/route.ts +++ b/apps/web/app/(ee)/api/partners/platforms/callback/route.ts @@ -1,6 +1,6 @@ import { PARTNER_PLATFORMS_PROVIDERS } from "@/lib/api/partner-profile/partner-platforms-providers"; -import { getSocialProfile } from "@/lib/api/scrape-creators/get-social-profile"; import { getSession } from "@/lib/auth/utils"; +import { getSocialProfile } from "@/lib/social-platforms/get-social-profile"; import { redis } from "@/lib/upstash/redis"; import { prisma } from "@dub/prisma"; import { PartnerPlatform, PlatformType } from "@dub/prisma/client"; @@ -151,8 +151,9 @@ export async function GET(req: Request) { let socialStats: Pick< PartnerPlatform, - "subscribers" | "posts" | "views" | "avatarUrl" + "platformId" | "subscribers" | "posts" | "views" | "avatarUrl" > = { + platformId: null, subscribers: BigInt(0), posts: BigInt(0), views: BigInt(0), @@ -167,6 +168,7 @@ export async function GET(req: Request) { }); socialStats = { + platformId: socialProfile.platformId, subscribers: socialProfile.subscribers, posts: socialProfile.posts, views: socialProfile.views, @@ -190,6 +192,7 @@ export async function GET(req: Request) { data: { verifiedAt: new Date(), ...(metadata && { metadata }), + ...(socialStats.platformId && { platformId: socialStats.platformId }), subscribers: socialStats.subscribers, posts: socialStats.posts, views: socialStats.views, diff --git a/apps/web/app/api/ai/sync-embeddings/route.ts b/apps/web/app/api/ai/sync-embeddings/route.ts index 855c659b63d..6368faab8a1 100644 --- a/apps/web/app/api/ai/sync-embeddings/route.ts +++ b/apps/web/app/api/ai/sync-embeddings/route.ts @@ -39,7 +39,10 @@ export const POST = async (req: Request) => { try { const body = await req.json().catch(() => { - throw new DubApiError({ code: "bad_request", message: "Invalid JSON body." }); + throw new DubApiError({ + code: "bad_request", + message: "Invalid JSON body.", + }); }); const { url, delay } = schema.parse(body); const normalizedUrl = new URL(url).toString(); diff --git a/apps/web/lib/actions/partners/create-bounty-submission.ts b/apps/web/lib/actions/partners/create-bounty-submission.ts index 11af1708f42..5fc0761e0ee 100644 --- a/apps/web/lib/actions/partners/create-bounty-submission.ts +++ b/apps/web/lib/actions/partners/create-bounty-submission.ts @@ -3,12 +3,13 @@ import { createId } from "@/lib/api/create-id"; import { getWorkspaceUsers } from "@/lib/api/get-workspace-users"; import { getProgramEnrollmentOrThrow } from "@/lib/api/programs/get-program-enrollment-or-throw"; -import { getSocialContent } from "@/lib/api/scrape-creators/get-social-content"; +import { detectBountySubmissionFraud } from "@/lib/bounty/api/detect-bounty-submission-fraud"; import { BOUNTY_MAX_SUBMISSION_URLS } from "@/lib/bounty/constants"; import { getPlatformFromSocialUrl, resolveBountyDetails, } from "@/lib/bounty/utils"; +import { getSocialContent } from "@/lib/social-platforms/get-social-content"; import { createBountySubmissionInputSchema, submissionRequirementsSchema, @@ -235,6 +236,11 @@ export const createBountySubmissionAction = authPartnerActionClient select: { identifier: true, verifiedAt: true, + medianViews: true, + medianLikes: true, + medianComments: true, + medianEngagementRate: true, + subscribers: true, }, }); @@ -296,6 +302,15 @@ export const createBountySubmissionAction = authPartnerActionClient socialMetricsLastSyncedAt: new Date(), }; + const fraudResult = detectBountySubmissionFraud({ + socialMetricCount: metricValue, + bountyMetric: bountyInfo.socialMetrics.metric, + partnerPlatform: partnerPlatform, + }); + + submissionData.fraudRiskLevel = fraudResult.fraudRiskLevel; + submissionData.fraudFlags = fraudResult.fraudFlags; + if ( metricValue && bountyInfo.socialMetrics.minCount && diff --git a/apps/web/lib/actions/partners/verify-social-account-by-code.ts b/apps/web/lib/actions/partners/verify-social-account-by-code.ts index fc130f29588..90a8788da7c 100644 --- a/apps/web/lib/actions/partners/verify-social-account-by-code.ts +++ b/apps/web/lib/actions/partners/verify-social-account-by-code.ts @@ -1,6 +1,6 @@ "use server"; -import { getSocialProfile } from "@/lib/api/scrape-creators/get-social-profile"; +import { getSocialProfile } from "@/lib/social-platforms/get-social-profile"; import { ratelimit } from "@/lib/upstash"; import { redis } from "@/lib/upstash/redis"; import { prisma } from "@dub/prisma"; diff --git a/apps/web/lib/api/scrape-creators/client.ts b/apps/web/lib/api/scrape-creators/client.ts deleted file mode 100644 index 19d3a7aa304..00000000000 --- a/apps/web/lib/api/scrape-creators/client.ts +++ /dev/null @@ -1,58 +0,0 @@ -import { createFetch, createSchema } from "@better-fetch/fetch"; -import { PlatformType } from "@dub/prisma/client"; -import * as z from "zod/v4"; -import { socialContentSchema, socialProfileSchema } from "./schema"; - -export const scrapeCreatorsFetch = createFetch({ - baseURL: "https://api.scrapecreators.com", - retry: { - type: "linear", - attempts: 1, - delay: 3000, - }, - headers: { - "x-api-key": process.env.SCRAPECREATORS_API_KEY!, - }, - schema: createSchema( - { - // Fetch social profile - "/v1/:platform/:handleType": { - method: "get", - params: z.object({ - platform: z.enum(PlatformType), - handleType: z.enum(["channel", "profile"]), - }), - query: z.object({ - handle: z.string(), - }), - output: socialProfileSchema, - }, - - // Fetch social content - "/:version/:platform/:contentType": { - method: "get", - params: z.object({ - version: z.enum(["v1", "v2"]), - platform: z.enum(PlatformType), - contentType: z.enum(["post", "video", "tweet"]), - }), - query: z.object({ - url: z.string(), - }), - output: socialContentSchema, - }, - }, - { - strict: true, - }, - ), - onError: ({ error }) => { - console.error("[ScrapeCreators] Error", error); - }, - // onResponse: async ({ response }) => { - // console.log( - // "[ScrapeCreators] Response", - // prettyPrint(await response.clone().json()), - // ); - // }, -}); diff --git a/apps/web/lib/api/scrape-creators/get-social-content.ts b/apps/web/lib/api/scrape-creators/get-social-content.ts deleted file mode 100644 index bd3a1255bb4..00000000000 --- a/apps/web/lib/api/scrape-creators/get-social-content.ts +++ /dev/null @@ -1,224 +0,0 @@ -import { BOUNTY_SOCIAL_PLATFORM_VALUES } from "@/lib/bounty/constants"; -import { SocialContent } from "@/lib/types"; -import { redis } from "@/lib/upstash"; -import { PlatformType } from "@dub/prisma/client"; -import { hashStringSHA256, isValidUrl } from "@dub/utils"; -import { waitUntil } from "@vercel/functions"; -import { scrapeCreatorsFetch } from "./client"; - -const CACHE_KEY_PREFIX = "socialContentCache"; -const CACHE_TTL = 60 * 60; - -interface GetSocialContentStatsParams { - platform: PlatformType; - url: string; -} - -const PLATFORM_CONTENT_TYPE: Record< - Exclude, - "post" | "video" | "tweet" -> = { - youtube: "video", - instagram: "post", - twitter: "tweet", - tiktok: "video", -}; - -const EMPTY_SOCIAL_CONTENT: SocialContent = { - publishedAt: null, - platformId: null, - handle: null, - likes: 0, - views: 0, - title: null, - description: null, - thumbnailUrl: null, -}; - -export async function getSocialContent({ - platform, - url, -}: GetSocialContentStatsParams): Promise { - url = url?.trim(); - - // Invalid URL - if (!url || !isValidUrl(url)) { - return EMPTY_SOCIAL_CONTENT; - } - - url = normalizeUrl(url); - - // Check cache first - const urlHash = await hashStringSHA256(url); - const cacheKey = `${CACHE_KEY_PREFIX}:${urlHash}`; - - const cachedResult = await redis.get(cacheKey); - - if (cachedResult) { - return cachedResult; - } - - const contentType = PLATFORM_CONTENT_TYPE[platform]; - const version = platform === "tiktok" ? "v2" : "v1"; - - const { data, error } = await scrapeCreatorsFetch( - "/:version/:platform/:contentType", - { - params: { - version, - platform, - contentType, - }, - query: { - url, - }, - }, - ); - - if (error) { - // Post not found - // Cache empty result, so that we don't keep trying to scrape the same post. - if (error.status === 404) { - waitUntil( - redis.set(cacheKey, EMPTY_SOCIAL_CONTENT, { - ex: CACHE_TTL * 24 * 30, - }), - ); - } - - // We don't cache other errors because they are likely to be transient. - return EMPTY_SOCIAL_CONTENT; - } - - let result: SocialContent; - - switch (data.platform) { - case "youtube": { - result = { - publishedAt: new Date(data.publishDateText), - handle: data.channel.handle, - platformId: data.channel.id, - views: data.viewCountInt, - likes: data.likeCountInt, - title: data.title ?? null, - description: data.description ?? null, - thumbnailUrl: data.thumbnailUrl ?? null, - }; - break; - } - - case "instagram": { - const thumbnailUrl = data.display_url ?? data.thumbnail_src ?? null; - const carouselEdges = data.edge_sidecar_to_children?.edges ?? []; - - const thumbnailUrls = - carouselEdges.length > 0 - ? carouselEdges - .map( - (edge: { - node: { display_url?: string; thumbnail_src?: string }; - }) => edge.node.display_url ?? edge.node.thumbnail_src ?? "", - ) - .filter(Boolean) - : undefined; - - let mediaType: "image" | "video" | "carousel" | undefined; - - if (data.__typename === "GraphVideo") { - mediaType = "video"; - } else if ( - data.__typename === "GraphSidecar" || - (thumbnailUrls !== undefined && thumbnailUrls.length > 0) - ) { - mediaType = "carousel"; - } else if (data.__typename === "GraphImage") { - mediaType = "image"; - } else if (thumbnailUrls === undefined && data.video_view_count > 0) { - mediaType = "video"; - } - - result = { - publishedAt: new Date(data.taken_at_timestamp * 1000), - handle: data.owner.username, - platformId: null, - views: data.video_view_count, - likes: data.edge_media_preview_like.count, - title: null, - description: data.edge_media_to_caption?.edges?.[0]?.node?.text ?? null, - thumbnailUrl, - mediaType, - thumbnailUrls, - }; - break; - } - - case "twitter": { - result = { - publishedAt: new Date(data.legacy.created_at), - handle: data.core.user_results.result.core.screen_name, - platformId: null, - views: data.views.count, - likes: data.legacy.favorite_count, - title: null, - description: data.legacy.full_text ?? null, - thumbnailUrl: null, - }; - break; - } - - case "tiktok": { - result = { - publishedAt: new Date(data.create_time_utc), - handle: data.author.unique_id, - platformId: null, - views: data.statistics.play_count, - likes: data.statistics.digg_count, - title: null, - description: data.desc ?? null, - thumbnailUrl: data.video?.cover?.url_list?.[0] ?? null, - }; - break; - } - - default: - result = { - publishedAt: null, - handle: null, - platformId: null, - views: 0, - likes: 0, - title: null, - description: null, - thumbnailUrl: null, - }; - } - - if (BOUNTY_SOCIAL_PLATFORM_VALUES.includes(data.platform)) { - waitUntil(redis.set(cacheKey, result, { ex: CACHE_TTL })); - } - - return result; -} - -function normalizeUrl(raw: string) { - const url = new URL(raw); - - // Lowercase hostname - url.hostname = url.hostname.toLowerCase(); - - // Remove tracking params - [ - "utm_source", - "utm_medium", - "utm_campaign", - "si", - "feature", - "igshid", - "t", - ].forEach((p) => url.searchParams.delete(p)); - - // Remove trailing slash - url.pathname = url.pathname.replace(/\/$/, ""); - - return url.toString(); -} diff --git a/apps/web/lib/api/scrape-creators/get-social-profile.ts b/apps/web/lib/api/scrape-creators/get-social-profile.ts deleted file mode 100644 index 1ea893b3837..00000000000 --- a/apps/web/lib/api/scrape-creators/get-social-profile.ts +++ /dev/null @@ -1,107 +0,0 @@ -import { PartnerPlatform, PlatformType } from "@dub/prisma/client"; -import { scrapeCreatorsFetch } from "./client"; - -type SocialProfile = Pick< - PartnerPlatform, - "platformId" | "subscribers" | "posts" | "views" | "avatarUrl" -> & { - description: string | null; -}; - -interface GetSocialProfileParams { - platform: PlatformType; - handle: string; -} - -export class AccountNotFoundError extends Error { - constructor(message: string) { - super(message); - this.name = "AccountNotFoundError"; - } -} - -export async function getSocialProfile({ - platform, - handle, -}: GetSocialProfileParams) { - const { data, error } = await scrapeCreatorsFetch( - "/v1/:platform/:handleType", - { - params: { - platform, - handleType: platform === "youtube" ? "channel" : "profile", - }, - query: { - handle, - }, - }, - ); - - if (error) { - throw new Error( - "We were unable to retrieve your social media profile. Please try again.", - ); - } - - // Check if account doesn't exist - if (data.platform === "account_not_found") { - throw new AccountNotFoundError( - (data as { message?: string }).message || "Account doesn't exist", - ); - } - - let socialProfile: SocialProfile = { - description: null, - platformId: null, - subscribers: BigInt(0), - posts: BigInt(0), - views: BigInt(0), - avatarUrl: null, - }; - - switch (data.platform) { - case "youtube": { - const largestAvatar = data.avatar.image.sources.sort( - (a, b) => b.width - a.width, - )[0]; - - socialProfile.description = data.description; - socialProfile.platformId = data.channelId; - socialProfile.subscribers = BigInt(data.subscriberCount); - socialProfile.posts = BigInt(data.videoCount); - socialProfile.views = BigInt(data.viewCount); - socialProfile.avatarUrl = largestAvatar?.url ?? null; - break; - } - - case "instagram": { - socialProfile.description = data.data.user.biography; - socialProfile.subscribers = BigInt(data.data.user.edge_followed_by.count); - socialProfile.posts = BigInt( - data.data.user.edge_owner_to_timeline_media.count, - ); - socialProfile.avatarUrl = data.data.user.profile_pic_url; - break; - } - - case "tiktok": { - socialProfile.description = data.user.signature; - socialProfile.platformId = data.user.id; - socialProfile.subscribers = BigInt(data.stats.followerCount); - socialProfile.posts = BigInt(data.stats.videoCount); - socialProfile.avatarUrl = data.user.avatarThumb; - break; - } - - case "twitter": { - socialProfile.description = data.legacy.description; - socialProfile.platformId = data.rest_id; - socialProfile.subscribers = BigInt(data.legacy.followers_count); - socialProfile.posts = BigInt(data.legacy.statuses_count); - socialProfile.avatarUrl = data.avatar.image_url; - break; - } - } - - return socialProfile; -} diff --git a/apps/web/lib/api/scrape-creators/schema.ts b/apps/web/lib/api/scrape-creators/schema.ts deleted file mode 100644 index 355d8f107fd..00000000000 --- a/apps/web/lib/api/scrape-creators/schema.ts +++ /dev/null @@ -1,315 +0,0 @@ -import * as z from "zod/v4"; - -export const socialProfileSchema = z.preprocess( - (data: any) => { - if (typeof data === "object" && data !== null) { - // Check for "account doesn't exist" response - if ( - "message" in data && - typeof data.message === "string" && - (data.message.toLowerCase().includes("doesn't exist") || - data.message.toLowerCase().includes("does not exist") || - data.message.toLowerCase().includes("not found")) - ) { - return { - ...data, - platform: "account_not_found", - }; - } - - // YouTube detection - if ("description" in data && "channelId" in data) { - return { - ...data, - platform: "youtube", - }; - } - - // Instagram detection - if ("data" in data && typeof data.data === "object" && data.data?.user) { - return { - ...data, - platform: "instagram", - }; - } - - // TikTok detection - if ("user" in data && "stats" in data) { - return { - ...data, - platform: "tiktok", - }; - } - - // Twitter detection: check for rest_id and legacy fields (more reliable than is_blue_verified) - if ("rest_id" in data && "legacy" in data) { - return { - ...data, - platform: "twitter", - }; - } - } - - return data; - }, - - z.discriminatedUnion("platform", [ - z.object({ - platform: z.literal("account_not_found"), - handle: z.string().optional(), - message: z.string().optional(), - }), - - z.object({ - platform: z.literal("youtube"), - description: z.string(), - channelId: z.string(), - videoCount: z - .number() - .nullish() - .transform((val) => val ?? 0), - subscriberCount: z - .number() - .nullish() - .transform((val) => val ?? 0), - viewCount: z - .number() - .nullish() - .transform((val) => val ?? 0), - avatar: z.object({ - image: z.object({ - sources: z.array( - z.object({ - url: z.url(), - width: z.number(), - height: z.number(), - }), - ), - }), - }), - }), - - z.object({ - platform: z.literal("instagram"), - data: z.object({ - user: z.object({ - biography: z.string(), - edge_followed_by: z.object({ - count: z - .number() - .nullish() - .transform((val) => val ?? 0), - }), - edge_owner_to_timeline_media: z.object({ - count: z - .number() - .nullish() - .transform((val) => val ?? 0), - }), - profile_pic_url: z.url().nullish().default(null), - }), - }), - }), - - z.object({ - platform: z.literal("tiktok"), - user: z.object({ - id: z.string(), - signature: z.string(), - uniqueId: z.string(), - avatarThumb: z.url().nullish().default(null), - }), - stats: z.object({ - followerCount: z - .number() - .nullish() - .transform((val) => val ?? 0), - videoCount: z - .number() - .nullish() - .transform((val) => val ?? 0), - heartCount: z - .number() - .nullish() - .transform((val) => val ?? 0), - }), - }), - - z.object({ - platform: z.literal("twitter"), - rest_id: z.string(), - legacy: z.object({ - description: z.string(), - followers_count: z - .number() - .nullish() - .transform((val) => val ?? 0), - statuses_count: z - .number() - .nullish() - .transform((val) => val ?? 0), - }), - avatar: z.object({ - image_url: z.url().nullish().default(null), - }), - }), - ]), -); - -export const socialContentSchema = z.preprocess( - (data: any) => { - if (typeof data === "object" && data !== null) { - // YouTube detection - if ("viewCountInt" in data && "likeCountInt" in data) { - return { - ...data, - platform: "youtube", - }; - } - - // Instagram detection - if ("data" in data && "xdt_shortcode_media" in data.data) { - return { - ...data.data.xdt_shortcode_media, - platform: "instagram", - }; - } - - // Twitter detection - if ("__typename" in data && data.__typename === "Tweet") { - return { - ...data, - platform: "twitter", - }; - } - - // TikTok detection - if ("aweme_detail" in data) { - return { - ...data.aweme_detail, - platform: "tiktok", - }; - } - } - - return data; - }, - z.discriminatedUnion("platform", [ - z.object({ - platform: z.literal("youtube"), - publishDateText: z.string(), - channel: z.object({ - id: z.string(), - handle: z.string(), - }), - viewCountInt: z - .number() - .nullable() - .transform((val) => val ?? 0), - likeCountInt: z - .number() - .nullable() - .transform((val) => val ?? 0), - title: z.string().nullish(), - description: z.string().nullish(), - thumbnailUrl: z.string().nullish(), - }), - - z.object({ - platform: z.literal("instagram"), - taken_at_timestamp: z.number(), - owner: z.object({ - username: z.string(), - }), - video_view_count: z - .number() - .nullish() - .transform((val) => val ?? 0), - edge_media_preview_like: z.object({ - count: z - .number() - .nullable() - .transform((val) => val ?? 0), - }), - edge_media_to_caption: z - .object({ - edges: z.array( - z.object({ - node: z.object({ text: z.string() }), - }), - ), - }) - .optional(), - display_url: z.string().optional(), - thumbnail_src: z.string().optional(), - __typename: z.string().optional(), - edge_sidecar_to_children: z - .object({ - edges: z.array( - z.object({ - node: z.object({ - display_url: z.string().optional(), - thumbnail_src: z.string().optional(), - }), - }), - ), - }) - .optional(), - }), - - z.object({ - platform: z.literal("twitter"), - core: z.object({ - user_results: z.object({ - result: z.object({ - core: z.object({ - screen_name: z.string(), - }), - }), - }), - }), - views: z.object({ - count: z - .string() - .nullable() - .transform((val) => (val == null ? 0 : Number(val))), - }), - legacy: z.object({ - created_at: z.string(), - favorite_count: z - .number() - .nullable() - .transform((val) => val ?? 0), - full_text: z.string().optional(), - }), - }), - - z.object({ - platform: z.literal("tiktok"), - create_time_utc: z.string(), - author: z.object({ - unique_id: z.string(), - }), - statistics: z.object({ - play_count: z - .number() - .nullable() - .transform((val) => val ?? 0), - digg_count: z - .number() - .nullable() - .transform((val) => val ?? 0), - }), - desc: z.string().optional(), - video: z - .object({ - cover: z - .object({ - url_list: z.array(z.string()), - }) - .optional(), - }) - .optional(), - }), - ]), -); diff --git a/apps/web/lib/bounty/api/detect-bounty-submission-fraud.ts b/apps/web/lib/bounty/api/detect-bounty-submission-fraud.ts new file mode 100644 index 00000000000..cc50ca526e1 --- /dev/null +++ b/apps/web/lib/bounty/api/detect-bounty-submission-fraud.ts @@ -0,0 +1,187 @@ +import { PartnerPlatform } from "@dub/prisma/client"; + +// Thresholds for fraud detection signals +const SPIKE_MULTIPLIER = 5; +const ENGAGEMENT_RATE_MULTIPLIER = 3; +const LOW_FOLLOWER_THRESHOLD = 500; +const HIGH_ENGAGEMENT_THRESHOLD = 10_000; + +type FraudRiskLevel = "low" | "medium" | "high"; + +type FraudFlag = + | "engagementSpike" + | "lowFollowerHighEngagement" + | "engagementRateAnomaly"; + +const FRAUD_FLAG_SEVERITY: Record = { + engagementSpike: "high", + lowFollowerHighEngagement: "high", + engagementRateAnomaly: "medium", +}; + +type PartnerPlatformBaseline = Pick< + PartnerPlatform, + | "medianViews" + | "medianLikes" + | "medianComments" + | "medianEngagementRate" + | "subscribers" +>; + +interface DetectBountyFraudInput { + socialMetricCount: number; + bountyMetric: "views" | "likes"; + partnerPlatform: PartnerPlatformBaseline | null; +} + +interface BountyFraudResult { + fraudRiskLevel: FraudRiskLevel | null; + fraudFlags: FraudFlag[]; +} + +/** + * Detects potential botting/fraud signals on a bounty submission by comparing + * the submission's engagement metrics against the partner's historical baselines. + * + * - engagementSpike: metrics far exceed partner's median + * - engagementRateAnomaly: engagement rate significantly above baseline + * - lowFollowerHighEngagement: disproportionate reach vs audience size + */ +export function detectBountySubmissionFraud({ + socialMetricCount, + bountyMetric, + partnerPlatform, +}: DetectBountyFraudInput): BountyFraudResult { + console.log("[detectBountySubmissionFraud] input", { + socialMetricCount, + bountyMetric, + partnerPlatform, + }); + + // New/unverified accounts with no historical posts synced + if (!partnerPlatform) { + return { + fraudRiskLevel: null, + fraudFlags: [], + }; + } + + // Ensure the required baseline fields are present for the given metric + const hasRequiredBaseline = + bountyMetric === "views" + ? partnerPlatform.medianViews !== null && + partnerPlatform.subscribers !== null + : partnerPlatform.medianLikes !== null && + partnerPlatform.subscribers !== null; + + if (!hasRequiredBaseline) { + return { + fraudRiskLevel: null, + fraudFlags: [], + }; + } + + const flags: FraudFlag[] = []; + + const median = + bountyMetric === "views" + ? Number(partnerPlatform.medianViews) + : Number(partnerPlatform.medianLikes); + + // Signal: engagementSpike + // Submission metrics far exceed the partner's historical median + if (median > 0 && socialMetricCount > median * SPIKE_MULTIPLIER) { + console.log("[detectBountySubmissionFraud] engagementSpike detected", { + ratio: socialMetricCount / median, + }); + + flags.push("engagementSpike"); + } + + // Signal: engagementRateAnomaly + // When tracking views: check if the ratio of likes-to-views implied by the + // submission is significantly higher than the partner's baseline engagement rate + if ( + bountyMetric === "views" && + partnerPlatform.medianEngagementRate !== null && + partnerPlatform.medianEngagementRate > 0 && + partnerPlatform.medianLikes !== null + ) { + const medianLikes = Number(partnerPlatform.medianLikes); + const medianViews = Number(partnerPlatform.medianViews); + + if (medianViews > 0 && medianLikes > 0) { + // Expected likes at this view count based on baseline engagement rate + const expectedLikes = + socialMetricCount * partnerPlatform.medianEngagementRate; + + // If views are growing but the view count implies an engagement pattern + // that would require an abnormally high rate, flag it + const currentRatio = socialMetricCount / medianViews; + + if ( + currentRatio > ENGAGEMENT_RATE_MULTIPLIER && + expectedLikes > medianLikes * ENGAGEMENT_RATE_MULTIPLIER + ) { + console.log( + "[detectBountySubmissionFraud] engagementRateAnomaly detected", + { + currentRatio, + expectedLikes, + medianLikes, + medianEngagementRate: partnerPlatform.medianEngagementRate, + }, + ); + + flags.push("engagementRateAnomaly"); + } + } + } + + // Signal: lowFollowerHighEngagement + // Disproportionate reach relative to audience size + if ( + partnerPlatform.subscribers !== null && + Number(partnerPlatform.subscribers) < LOW_FOLLOWER_THRESHOLD && + socialMetricCount > HIGH_ENGAGEMENT_THRESHOLD + ) { + console.log( + "[detectBountySubmissionFraud] lowFollowerHighEngagement detected", + { + subscribers: Number(partnerPlatform.subscribers), + socialMetricCount, + }, + ); + + flags.push("lowFollowerHighEngagement"); + } + + return { + fraudRiskLevel: getHighestSeverity(flags), + fraudFlags: flags, + }; +} + +function getHighestSeverity(flags: FraudFlag[]): FraudRiskLevel | null { + if (flags.length === 0) { + return null; + } + + const severityRank: Record = { + low: 0, + medium: 1, + high: 2, + }; + + let highest: FraudRiskLevel = "low"; + + for (const flag of flags) { + const severity = FRAUD_FLAG_SEVERITY[flag]; + + if (severityRank[severity] > severityRank[highest]) { + highest = severity; + } + } + + return highest; +} diff --git a/apps/web/lib/bounty/api/get-social-metrics-updates.ts b/apps/web/lib/bounty/api/get-social-metrics-updates.ts index dc8be280a2e..e2f9f16b137 100644 --- a/apps/web/lib/bounty/api/get-social-metrics-updates.ts +++ b/apps/web/lib/bounty/api/get-social-metrics-updates.ts @@ -1,5 +1,5 @@ -import { getSocialContent } from "@/lib/api/scrape-creators/get-social-content"; import { resolveBountyDetails } from "@/lib/bounty/utils"; +import { getSocialContent } from "@/lib/social-platforms/get-social-content"; import { Bounty, BountySubmission } from "@dub/prisma/client"; export type SocialMetricsUpdate = Pick< diff --git a/apps/web/lib/cron/enqueue-batch-jobs.ts b/apps/web/lib/cron/enqueue-batch-jobs.ts index 6bb58e50f67..10653a1074d 100644 --- a/apps/web/lib/cron/enqueue-batch-jobs.ts +++ b/apps/web/lib/cron/enqueue-batch-jobs.ts @@ -7,7 +7,8 @@ type EnqueueBatchJobsProps = PublishBatchRequest & { | "ban-partner" | "send-partner-summary" | "create-discount-code" - | "sync-bounty-social-metrics"; + | "sync-bounty-social-metrics" + | "sync-social-engagement"; }; // Generic helper to enqueue a batch of QStash jobs. diff --git a/apps/web/lib/cron/with-cron.ts b/apps/web/lib/cron/with-cron.ts index 5e5c43697c6..efacaf40136 100644 --- a/apps/web/lib/cron/with-cron.ts +++ b/apps/web/lib/cron/with-cron.ts @@ -57,7 +57,14 @@ export const withCron = (handler: WithCronHandler) => { console.error(error); const errorMessage = - error instanceof Error ? error.message : String(error); + error instanceof Error + ? JSON.stringify({ + ...error, + name: error.name, + message: error.message, + stack: error.stack, + }) + : String(error); // Send error to Axiom logger.error(errorMessage, error); diff --git a/apps/web/lib/social-platforms/base-adapter.ts b/apps/web/lib/social-platforms/base-adapter.ts new file mode 100644 index 00000000000..bf4f82b726f --- /dev/null +++ b/apps/web/lib/social-platforms/base-adapter.ts @@ -0,0 +1,100 @@ +import { SocialContent } from "@/lib/types"; +import { PartnerPlatform, PlatformType } from "@dub/prisma/client"; + +export type SocialProfile = Pick< + PartnerPlatform, + "platformId" | "subscribers" | "posts" | "views" | "avatarUrl" +> & { + description: string | null; +}; + +export interface DailyEngagement { + date: Date; + totalPosts: number; + totalImpressions: number; + totalLikes: number; + totalComments: number; + engagementRate: number; +} + +export interface PostEngagement { + postId: string; + publishedAt: Date; + title: string | null; + views: number; + likes: number; + comments: number; + engagementRate: number; +} + +export interface FetchPostsParams { + platformId: string; + identifier: string; + startTime: Date; + endTime: Date; +} + +export abstract class BasePlatformAdapter { + abstract platform: PlatformType; + + abstract fetchPosts(params: FetchPostsParams): Promise; + + abstract fetchPost(url: string): Promise; + + abstract fetchProfile(handle: string): Promise; + + /** + * Groups posts by calendar day (UTC) and aggregates metrics. + * Engagement rate is an impression-weighted average across posts. + */ + calculateDailyEngagement(posts: PostEngagement[]): DailyEngagement[] { + if (posts.length === 0) { + return []; + } + + const dayMap = new Map(); + + for (const post of posts) { + const dayKey = post.publishedAt.toISOString().slice(0, 10); // "YYYY-MM-DD" + const existing = dayMap.get(dayKey); + + if (existing) { + existing.push(post); + } else { + dayMap.set(dayKey, [post]); + } + } + + const results: DailyEngagement[] = []; + + for (const [dayKey, dayPosts] of dayMap) { + let totalImpressions = 0; + let totalLikes = 0; + let totalComments = 0; + let totalEngagements = 0; + + for (const post of dayPosts) { + totalImpressions += post.views; + totalLikes += post.likes; + totalComments += post.comments; + // engagementRate * views = total engagements (including platform-specific + // signals like retweets), so weighted sum preserves the per-post formula + totalEngagements += post.engagementRate * post.views; + } + + const engagementRate = + totalImpressions > 0 ? totalEngagements / totalImpressions : 0; + + results.push({ + date: new Date(`${dayKey}T00:00:00.000Z`), + totalPosts: dayPosts.length, + totalImpressions, + totalLikes, + totalComments, + engagementRate, + }); + } + + return results.sort((a, b) => a.date.getTime() - b.date.getTime()); + } +} diff --git a/apps/web/lib/social-platforms/get-social-content.ts b/apps/web/lib/social-platforms/get-social-content.ts new file mode 100644 index 00000000000..0be23c38c25 --- /dev/null +++ b/apps/web/lib/social-platforms/get-social-content.ts @@ -0,0 +1,107 @@ +import { BOUNTY_SOCIAL_PLATFORM_VALUES } from "@/lib/bounty/constants"; +import { getPlatformAdapter } from "@/lib/social-platforms"; +import { ContentNotFoundError } from "@/lib/social-platforms/scrape-creators"; +import { BountySocialPlatform, SocialContent } from "@/lib/types"; +import { redis } from "@/lib/upstash"; +import { PlatformType } from "@dub/prisma/client"; +import { hashStringSHA256, isValidUrl } from "@dub/utils"; +import { waitUntil } from "@vercel/functions"; + +const CACHE_KEY_PREFIX = "socialContentCache"; +const CACHE_TTL = 60 * 60; + +interface GetSocialContentStatsParams { + platform: PlatformType; + url: string; +} + +const EMPTY_SOCIAL_CONTENT: SocialContent = { + publishedAt: null, + platformId: null, + handle: null, + likes: 0, + views: 0, + title: null, + description: null, + thumbnailUrl: null, +}; + +export async function getSocialContent({ + platform, + url, +}: GetSocialContentStatsParams): Promise { + url = url?.trim(); + + // Invalid URL + if (!url || !isValidUrl(url)) { + return EMPTY_SOCIAL_CONTENT; + } + + url = normalizeUrl(url); + + // Check cache first + const urlHash = await hashStringSHA256(url); + const cacheKey = `${CACHE_KEY_PREFIX}:${urlHash}`; + + const cachedResult = await redis.get(cacheKey); + + if (cachedResult) { + return cachedResult; + } + + const platformAdapter = getPlatformAdapter(platform); + + if (!platformAdapter) { + return EMPTY_SOCIAL_CONTENT; + } + + let result: SocialContent; + + try { + result = await platformAdapter.fetchPost(url); + } catch (error) { + // Post not found + // Cache empty result, so that we don't keep trying to scrape the same post. + if (error instanceof ContentNotFoundError && error.status === 404) { + waitUntil( + redis.set(cacheKey, EMPTY_SOCIAL_CONTENT, { + ex: CACHE_TTL * 24 * 30, + }), + ); + } + + // We don't cache other errors because they are likely to be transient. + return EMPTY_SOCIAL_CONTENT; + } + + if ( + BOUNTY_SOCIAL_PLATFORM_VALUES.includes(platform as BountySocialPlatform) + ) { + waitUntil(redis.set(cacheKey, result, { ex: CACHE_TTL })); + } + + return result; +} + +function normalizeUrl(raw: string) { + const url = new URL(raw); + + // Lowercase hostname + url.hostname = url.hostname.toLowerCase(); + + // Remove tracking params + [ + "utm_source", + "utm_medium", + "utm_campaign", + "si", + "feature", + "igshid", + "t", + ].forEach((p) => url.searchParams.delete(p)); + + // Remove trailing slash + url.pathname = url.pathname.replace(/\/$/, ""); + + return url.toString(); +} diff --git a/apps/web/lib/social-platforms/get-social-profile.ts b/apps/web/lib/social-platforms/get-social-profile.ts new file mode 100644 index 00000000000..184cad47be5 --- /dev/null +++ b/apps/web/lib/social-platforms/get-social-profile.ts @@ -0,0 +1,21 @@ +import { getPlatformAdapter } from "@/lib/social-platforms"; +import { PlatformType } from "@dub/prisma/client"; +export { AccountNotFoundError } from "@/lib/social-platforms/scrape-creators"; + +interface GetSocialProfileParams { + platform: PlatformType; + handle: string; +} + +export async function getSocialProfile({ + platform, + handle, +}: GetSocialProfileParams) { + const platformAdapter = getPlatformAdapter(platform); + + if (!platformAdapter) { + throw new Error("Unsupported platform"); + } + + return platformAdapter.fetchProfile(handle); +} diff --git a/apps/web/lib/social-platforms/index.ts b/apps/web/lib/social-platforms/index.ts new file mode 100644 index 00000000000..25b419ac5e5 --- /dev/null +++ b/apps/web/lib/social-platforms/index.ts @@ -0,0 +1,19 @@ +import { PlatformType } from "@dub/prisma/client"; +import { BasePlatformAdapter } from "./base-adapter"; +import { InstagramAdapter } from "./instagram-adapter"; +import { TikTokAdapter } from "./tiktok-adapter"; +import { XAdapter } from "./x-adapter"; +import { YouTubeAdapter } from "./youtube-adapter"; + +const PLATFORM_ADAPTERS: Partial> = { + twitter: new XAdapter(), + youtube: new YouTubeAdapter(), + instagram: new InstagramAdapter(), + tiktok: new TikTokAdapter(), +}; + +export function getPlatformAdapter( + platform: PlatformType, +): BasePlatformAdapter | null { + return PLATFORM_ADAPTERS[platform] ?? null; +} diff --git a/apps/web/lib/social-platforms/instagram-adapter.ts b/apps/web/lib/social-platforms/instagram-adapter.ts new file mode 100644 index 00000000000..c0b8e54789d --- /dev/null +++ b/apps/web/lib/social-platforms/instagram-adapter.ts @@ -0,0 +1,215 @@ +import { SocialContent } from "@/lib/types"; +import { PlatformType } from "@dub/prisma/client"; +import * as z from "zod/v4"; +import { + BasePlatformAdapter, + type FetchPostsParams, + type PostEngagement, + type SocialProfile, +} from "./base-adapter"; +import { + AccountNotFoundError, + ContentNotFoundError, + isAccountNotFound, + scrapeCreatorsFetch, +} from "./scrape-creators"; + +const instagramContentSchema = z.object({ + taken_at_timestamp: z.number(), + owner: z.object({ + username: z.string(), + }), + video_view_count: z + .number() + .nullish() + .transform((val) => val ?? 0), + edge_media_preview_like: z.object({ + count: z + .number() + .nullable() + .transform((val) => val ?? 0), + }), + edge_media_to_caption: z + .object({ + edges: z.array( + z.object({ + node: z.object({ text: z.string() }), + }), + ), + }) + .optional(), + display_url: z.string().optional(), + thumbnail_src: z.string().optional(), + __typename: z.string().optional(), + edge_sidecar_to_children: z + .object({ + edges: z.array( + z.object({ + node: z.object({ + display_url: z.string().optional(), + thumbnail_src: z.string().optional(), + }), + }), + ), + }) + .optional(), +}); + +type InstagramContent = z.infer; + +const instagramProfileSchema = z.object({ + data: z.object({ + user: z.object({ + biography: z.string(), + edge_followed_by: z.object({ + count: z + .number() + .nullish() + .transform((val) => val ?? 0), + }), + edge_owner_to_timeline_media: z.object({ + count: z + .number() + .nullish() + .transform((val) => val ?? 0), + }), + profile_pic_url: z.url().nullish().default(null), + }), + }), +}); + +export class InstagramAdapter extends BasePlatformAdapter { + platform: PlatformType = "instagram"; + + async fetchProfile(handle: string): Promise { + const { data: raw, error } = await scrapeCreatorsFetch( + "/v1/:platform/:handleType", + { + params: { + platform: "instagram", + handleType: "profile", + }, + query: { + handle, + }, + }, + ); + + if (error) { + throw new Error( + "We were unable to retrieve your social media profile. Please try again.", + ); + } + + if (isAccountNotFound(raw)) { + throw new AccountNotFoundError( + (raw as any).message || "Account doesn't exist", + ); + } + + const data = instagramProfileSchema.parse(raw); + + return { + description: data.data.user.biography, + platformId: null, + subscribers: BigInt(data.data.user.edge_followed_by.count), + posts: BigInt(data.data.user.edge_owner_to_timeline_media.count), + views: BigInt(0), + avatarUrl: data.data.user.profile_pic_url, + }; + } + + async fetchPost(url: string): Promise { + const { data: raw, error } = await scrapeCreatorsFetch( + "/:version/:platform/:contentType", + { + params: { + version: "v1", + platform: "instagram", + contentType: "post", + }, + query: { + url, + }, + }, + ); + + if (error) { + throw new ContentNotFoundError( + error.status ?? 500, + error.statusText ?? "Unknown error", + ); + } + + // Instagram response wraps content in data.xdt_shortcode_media + const unwrapped = + typeof raw === "object" && + raw !== null && + "data" in raw && + typeof (raw as any).data === "object" && + "xdt_shortcode_media" in (raw as any).data + ? (raw as any).data.xdt_shortcode_media + : raw; + + const data = instagramContentSchema.parse(unwrapped); + + const thumbnailUrl = data.display_url ?? data.thumbnail_src ?? null; + const carouselEdges = data.edge_sidecar_to_children?.edges ?? []; + + const thumbnailUrls = + carouselEdges.length > 0 + ? carouselEdges + .map( + (edge: { + node: { display_url?: string; thumbnail_src?: string }; + }) => edge.node.display_url ?? edge.node.thumbnail_src ?? "", + ) + .filter(Boolean) + : undefined; + + const mediaType = this.detectMediaType(data, thumbnailUrls); + + return { + publishedAt: new Date(data.taken_at_timestamp * 1000), + handle: data.owner.username, + platformId: null, + views: data.video_view_count, + likes: data.edge_media_preview_like.count, + title: null, + description: data.edge_media_to_caption?.edges?.[0]?.node?.text ?? null, + thumbnailUrl, + mediaType, + thumbnailUrls, + }; + } + + private detectMediaType( + data: InstagramContent, + thumbnailUrls: string[] | undefined, + ): "image" | "video" | "carousel" | undefined { + if (data.__typename === "GraphVideo") { + return "video"; + } + + if ( + data.__typename === "GraphSidecar" || + (thumbnailUrls !== undefined && thumbnailUrls.length > 0) + ) { + return "carousel"; + } + + if (data.__typename === "GraphImage") { + return "image"; + } + + if (thumbnailUrls === undefined && data.video_view_count > 0) { + return "video"; + } + + return undefined; + } + + async fetchPosts(_params: FetchPostsParams): Promise { + return []; + } +} diff --git a/apps/web/lib/social-platforms/rate-limiter.ts b/apps/web/lib/social-platforms/rate-limiter.ts new file mode 100644 index 00000000000..dd3be922c2c --- /dev/null +++ b/apps/web/lib/social-platforms/rate-limiter.ts @@ -0,0 +1,32 @@ +import { redis } from "@/lib/upstash/redis"; +import { Ratelimit } from "@upstash/ratelimit"; + +// X API Pro tier: 300 requests per 15 minutes +// Use 280 to leave headroom +const xApiRateLimiter = new Ratelimit({ + redis, + limiter: Ratelimit.slidingWindow(280, "15 m"), + analytics: true, + prefix: "x-api-rate-limit", + timeout: 1000, +}); + +// YouTube Data API v3: 10,000 units/day default quota +// Budget 5,000 units/day for engagement sync (reserve rest for channel stats cron + headroom) +// playlistItems.list = 1 unit, videos.list = 1 unit → ~2-3 units per partner +// see: https://developers.google.com/youtube/v3/guides/quota_and_compliance_audits +const youtubeApiQuotaLimiter = new Ratelimit({ + redis, + limiter: Ratelimit.fixedWindow(5000, "1 d"), + analytics: true, + prefix: "yt-api-quota", + timeout: 1000, +}); + +export async function checkXApiRateLimit() { + return xApiRateLimiter.limit("global"); +} + +export async function checkYouTubeApiQuota(cost: number) { + return youtubeApiQuotaLimiter.limit("global", { rate: cost }); +} diff --git a/apps/web/lib/social-platforms/scrape-creators.ts b/apps/web/lib/social-platforms/scrape-creators.ts new file mode 100644 index 00000000000..ac372006e00 --- /dev/null +++ b/apps/web/lib/social-platforms/scrape-creators.ts @@ -0,0 +1,78 @@ +import { createFetch, createSchema } from "@better-fetch/fetch"; +import * as z from "zod/v4"; + +export class AccountNotFoundError extends Error { + constructor(message: string) { + super(message); + this.name = "AccountNotFoundError"; + } +} + +export class ContentNotFoundError extends Error { + status: number; + + constructor(status: number, message: string) { + super(message); + this.name = "ContentNotFoundError"; + this.status = status; + } +} + +export function isAccountNotFound(data: unknown): boolean { + if (typeof data !== "object" || data === null) { + return false; + } + + if (!("message" in data) || typeof (data as any).message !== "string") { + return false; + } + + const message = (data as any).message.toLowerCase(); + + return ( + message.includes("doesn't exist") || + message.includes("does not exist") || + message.includes("not found") + ); +} + +export const scrapeCreatorsFetch = createFetch({ + baseURL: "https://api.scrapecreators.com", + retry: { + type: "linear", + attempts: 1, + delay: 3000, + }, + headers: { + "x-api-key": process.env.SCRAPECREATORS_API_KEY!, + }, + schema: createSchema({ + "/v1/:platform/:handleType": { + method: "get", + params: z.object({ + platform: z.string(), + handleType: z.enum(["channel", "profile"]), + }), + query: z.object({ + handle: z.string(), + }), + output: z.unknown(), + }, + + "/:version/:platform/:contentType": { + method: "get", + params: z.object({ + version: z.enum(["v1", "v2"]), + platform: z.string(), + contentType: z.enum(["post", "video", "tweet"]), + }), + query: z.object({ + url: z.string(), + }), + output: z.unknown(), + }, + }), + onError: ({ error }) => { + console.error("[ScrapeCreators] Error", error); + }, +}); diff --git a/apps/web/lib/social-platforms/tiktok-adapter.ts b/apps/web/lib/social-platforms/tiktok-adapter.ts new file mode 100644 index 00000000000..56bad2aadd3 --- /dev/null +++ b/apps/web/lib/social-platforms/tiktok-adapter.ts @@ -0,0 +1,153 @@ +import { SocialContent } from "@/lib/types"; +import { PlatformType } from "@dub/prisma/client"; +import * as z from "zod/v4"; +import { + BasePlatformAdapter, + type FetchPostsParams, + type PostEngagement, + type SocialProfile, +} from "./base-adapter"; +import { + AccountNotFoundError, + ContentNotFoundError, + isAccountNotFound, + scrapeCreatorsFetch, +} from "./scrape-creators"; + +const tiktokContentSchema = z.object({ + create_time_utc: z.string(), + author: z.object({ + unique_id: z.string(), + }), + statistics: z.object({ + play_count: z + .number() + .nullable() + .transform((val) => val ?? 0), + digg_count: z + .number() + .nullable() + .transform((val) => val ?? 0), + }), + desc: z.string().optional(), + video: z + .object({ + cover: z + .object({ + url_list: z.array(z.string()), + }) + .optional(), + }) + .optional(), +}); + +const tiktokProfileSchema = z.object({ + user: z.object({ + id: z.string(), + signature: z.string(), + uniqueId: z.string(), + avatarThumb: z.url().nullish().default(null), + }), + stats: z.object({ + followerCount: z + .number() + .nullish() + .transform((val) => val ?? 0), + videoCount: z + .number() + .nullish() + .transform((val) => val ?? 0), + heartCount: z + .number() + .nullish() + .transform((val) => val ?? 0), + }), +}); + +export class TikTokAdapter extends BasePlatformAdapter { + platform: PlatformType = "tiktok"; + + async fetchProfile(handle: string): Promise { + const { data: raw, error } = await scrapeCreatorsFetch( + "/v1/:platform/:handleType", + { + params: { + platform: "tiktok", + handleType: "profile", + }, + query: { + handle, + }, + }, + ); + + if (error) { + throw new Error( + "We were unable to retrieve your social media profile. Please try again.", + ); + } + + if (isAccountNotFound(raw)) { + throw new AccountNotFoundError( + (raw as any).message || "Account doesn't exist", + ); + } + + const data = tiktokProfileSchema.parse(raw); + + return { + description: data.user.signature, + platformId: data.user.id, + subscribers: BigInt(data.stats.followerCount), + posts: BigInt(data.stats.videoCount), + views: BigInt(0), + avatarUrl: data.user.avatarThumb, + }; + } + + async fetchPost(url: string): Promise { + const { data: raw, error } = await scrapeCreatorsFetch( + "/:version/:platform/:contentType", + { + params: { + version: "v2", + platform: "tiktok", + contentType: "video", + }, + query: { + url, + }, + }, + ); + + if (error) { + throw new ContentNotFoundError( + error.status ?? 500, + error.statusText ?? "Unknown error", + ); + } + + // TikTok response wraps content in aweme_detail + const unwrapped = + typeof raw === "object" && raw !== null && "aweme_detail" in (raw as any) + ? (raw as any).aweme_detail + : raw; + + const data = tiktokContentSchema.parse(unwrapped); + + return { + publishedAt: new Date(data.create_time_utc), + handle: data.author.unique_id, + platformId: null, + views: data.statistics.play_count, + likes: data.statistics.digg_count, + title: null, + description: data.desc ?? null, + thumbnailUrl: data.video?.cover?.url_list?.[0] ?? null, + }; + } + + async fetchPosts(_params: FetchPostsParams): Promise { + return []; + } +} diff --git a/apps/web/lib/social-platforms/x-adapter.ts b/apps/web/lib/social-platforms/x-adapter.ts new file mode 100644 index 00000000000..7d99bfd294b --- /dev/null +++ b/apps/web/lib/social-platforms/x-adapter.ts @@ -0,0 +1,178 @@ +import { SocialContent } from "@/lib/types"; +import { PlatformType } from "@dub/prisma/client"; +import { + BasePlatformAdapter, + type FetchPostsParams, + type PostEngagement, + type SocialProfile, +} from "./base-adapter"; +import { checkXApiRateLimit } from "./rate-limiter"; +import { + AccountNotFoundError, + ContentNotFoundError, + isAccountNotFound, + scrapeCreatorsFetch, +} from "./scrape-creators"; +import { XApiError, XApiRateLimitError, xFetch } from "./x-client"; +import { type XTweet, xContentSchema, xProfileSchema } from "./x-schemas"; + +export class XAdapter extends BasePlatformAdapter { + platform: PlatformType = "twitter"; + + async fetchProfile(handle: string): Promise { + const { data: raw, error } = await scrapeCreatorsFetch( + "/v1/:platform/:handleType", + { + params: { + platform: "twitter", + handleType: "profile", + }, + query: { + handle, + }, + }, + ); + + if (error) { + throw new Error( + "We were unable to retrieve your social media profile. Please try again.", + ); + } + + if (isAccountNotFound(raw)) { + throw new AccountNotFoundError( + (raw as any).message || "Account doesn't exist", + ); + } + + const data = xProfileSchema.parse(raw); + + return { + description: data.legacy.description, + platformId: data.rest_id, + subscribers: BigInt(data.legacy.followers_count), + posts: BigInt(data.legacy.statuses_count), + views: BigInt(0), + avatarUrl: data.avatar.image_url, + }; + } + + async fetchPost(url: string): Promise { + const { data: raw, error } = await scrapeCreatorsFetch( + "/:version/:platform/:contentType", + { + params: { + version: "v1", + platform: "twitter", + contentType: "tweet", + }, + query: { + url, + }, + }, + ); + + if (error) { + throw new ContentNotFoundError( + error.status ?? 500, + error.statusText ?? "Unknown error", + ); + } + + const data = xContentSchema.parse(raw); + + return { + publishedAt: new Date(data.legacy.created_at), + handle: data.core.user_results.result.core.screen_name, + platformId: null, + views: data.views.count, + likes: data.legacy.favorite_count, + title: null, + description: data.legacy.full_text ?? null, + thumbnailUrl: null, + }; + } + + async fetchPosts({ + platformId, + startTime, + endTime, + }: FetchPostsParams): Promise { + const tweets = await this._getUserTweets({ + userId: platformId, + startTime, + endTime, + }); + + return tweets.map((tweet) => { + const m = tweet.public_metrics; + + // Engagement rate = (likes + retweets + replies + quotes) / impressions + const totalEngagements = + m.like_count + m.retweet_count + m.reply_count + m.quote_count; + const engagementRate = + m.impression_count > 0 ? totalEngagements / m.impression_count : 0; + + return { + postId: tweet.id, + publishedAt: new Date(tweet.created_at), + title: tweet.text, + views: m.impression_count, + likes: m.like_count, + comments: m.reply_count, + engagementRate, + }; + }); + } + + private async _getUserTweets({ + userId, + startTime, + endTime, + }: { + userId: string; + startTime: Date; + endTime: Date; + }): Promise { + const allTweets: XTweet[] = []; + let paginationToken: string | undefined; + + for (let page = 0; page < 5; page++) { + const { success } = await checkXApiRateLimit(); + + if (!success) { + throw new XApiRateLimitError("X API rate limit exceeded"); + } + + const { data, error } = await xFetch("/users/:userId/tweets", { + params: { + userId, + }, + query: { + "tweet.fields": "public_metrics,created_at,text", + exclude: "replies,retweets", + start_time: startTime.toISOString(), + end_time: endTime.toISOString(), + max_results: "100", + ...(paginationToken && { pagination_token: paginationToken }), + }, + }); + + if (error) { + throw new XApiError(error); + } + + if (data.data.length > 0) { + allTweets.push(...data.data); + } + + paginationToken = data.meta.next_token; + + if (!paginationToken) { + break; + } + } + + return allTweets; + } +} diff --git a/apps/web/lib/social-platforms/x-client.ts b/apps/web/lib/social-platforms/x-client.ts new file mode 100644 index 00000000000..cfbbf2182f8 --- /dev/null +++ b/apps/web/lib/social-platforms/x-client.ts @@ -0,0 +1,74 @@ +import { createFetch, createSchema } from "@better-fetch/fetch"; +import * as z from "zod/v4"; +import { + type XApiErrorResponse, + xApiErrorSchema, + xTweetsResponseSchema, +} from "./x-schemas"; + +export const xFetch = createFetch({ + baseURL: "https://api.x.com/2", + headers: { + Authorization: `Bearer ${process.env.X_API_BEARER_TOKEN}`, + }, + schema: createSchema( + { + "/users/:userId/tweets": { + method: "get", + params: z.object({ + userId: z.string(), + }), + query: z.object({ + "tweet.fields": z.string(), + exclude: z.string().optional(), + start_time: z.string(), + end_time: z.string(), + max_results: z.string(), + pagination_token: z.string().optional(), + }), + output: xTweetsResponseSchema, + }, + }, + { + strict: true, + }, + ), + defaultError: xApiErrorSchema, + onError: ({ error }) => { + console.error("[X API] Error", error); + }, +}); + +export class XApiError extends Error { + status: number; + statusText: string; + title?: string; + detail?: string; + type?: string; + errors?: XApiErrorResponse["errors"]; + + constructor( + error: XApiErrorResponse & { status: number; statusText: string }, + ) { + const message = + error.detail || + error.errors?.map((e) => e.message).join("; ") || + error.statusText; + + super(message); + this.name = "XApiError"; + this.status = error.status; + this.statusText = error.statusText; + this.title = error.title; + this.detail = error.detail; + this.type = error.type; + this.errors = error.errors; + } +} + +export class XApiRateLimitError extends Error { + constructor(message: string) { + super(message); + this.name = "XApiRateLimitError"; + } +} diff --git a/apps/web/lib/social-platforms/x-schemas.ts b/apps/web/lib/social-platforms/x-schemas.ts new file mode 100644 index 00000000000..48d80c8f709 --- /dev/null +++ b/apps/web/lib/social-platforms/x-schemas.ts @@ -0,0 +1,89 @@ +import * as z from "zod/v4"; + +export const xTweetSchema = z.object({ + id: z.string(), + text: z.string(), + created_at: z.string(), + public_metrics: z.object({ + bookmark_count: z.number(), + impression_count: z.number(), + like_count: z.number(), + quote_count: z.number(), + reply_count: z.number(), + retweet_count: z.number(), + }), +}); + +export const xTweetsResponseSchema = z.object({ + data: z.array(xTweetSchema).optional().default([]), + meta: z + .object({ + result_count: z.number(), + next_token: z.string().optional(), + oldest_id: z.string().optional(), + newest_id: z.string().optional(), + }) + .optional() + .default({ result_count: 0 }), +}); + +export const xApiErrorSchema = z.object({ + errors: z + .array( + z.object({ + parameters: z.record(z.string(), z.unknown()).optional(), + message: z.string(), + }), + ) + .optional(), + title: z.string().optional(), + detail: z.string().optional(), + type: z.string().optional(), +}); + +export const xContentSchema = z.object({ + core: z.object({ + user_results: z.object({ + result: z.object({ + core: z.object({ + screen_name: z.string(), + }), + }), + }), + }), + views: z.object({ + count: z + .string() + .nullable() + .transform((val) => (val == null ? 0 : Number(val))), + }), + legacy: z.object({ + created_at: z.string(), + favorite_count: z + .number() + .nullable() + .transform((val) => val ?? 0), + full_text: z.string().optional(), + }), +}); + +export const xProfileSchema = z.object({ + rest_id: z.string(), + legacy: z.object({ + description: z.string(), + followers_count: z + .number() + .nullish() + .transform((val) => val ?? 0), + statuses_count: z + .number() + .nullish() + .transform((val) => val ?? 0), + }), + avatar: z.object({ + image_url: z.url().nullish().default(null), + }), +}); + +export type XTweet = z.infer; +export type XApiErrorResponse = z.infer; diff --git a/apps/web/lib/social-platforms/youtube-adapter.ts b/apps/web/lib/social-platforms/youtube-adapter.ts new file mode 100644 index 00000000000..e3faa8114c0 --- /dev/null +++ b/apps/web/lib/social-platforms/youtube-adapter.ts @@ -0,0 +1,237 @@ +import { SocialContent } from "@/lib/types"; +import { PlatformType } from "@dub/prisma/client"; +import { + BasePlatformAdapter, + type FetchPostsParams, + type PostEngagement, + type SocialProfile, +} from "./base-adapter"; +import { checkYouTubeApiQuota } from "./rate-limiter"; +import { + AccountNotFoundError, + ContentNotFoundError, + isAccountNotFound, + scrapeCreatorsFetch, +} from "./scrape-creators"; +import { + YouTubeApiError, + YouTubeApiQuotaExceededError, + ytFetch, +} from "./youtube-client"; +import { + type YouTubeVideo, + youtubeContentSchema, + youtubeProfileSchema, +} from "./youtube-schemas"; + +export class YouTubeAdapter extends BasePlatformAdapter { + platform: PlatformType = "youtube"; + + async fetchProfile(handle: string): Promise { + const { data: raw, error } = await scrapeCreatorsFetch( + "/v1/:platform/:handleType", + { + params: { + platform: "youtube", + handleType: "channel", + }, + query: { + handle, + }, + }, + ); + + if (error) { + throw new Error( + "We were unable to retrieve your social media profile. Please try again.", + ); + } + + if (isAccountNotFound(raw)) { + throw new AccountNotFoundError( + (raw as any).message || "Account doesn't exist", + ); + } + + const data = youtubeProfileSchema.parse(raw); + + const largestAvatar = data.avatar.image.sources.sort( + (a, b) => b.width - a.width, + )[0]; + + return { + description: data.description, + platformId: data.channelId, + subscribers: BigInt(data.subscriberCount), + posts: BigInt(data.videoCount), + views: BigInt(data.viewCount), + avatarUrl: largestAvatar?.url ?? null, + }; + } + + async fetchPost(url: string): Promise { + const { data: raw, error } = await scrapeCreatorsFetch( + "/:version/:platform/:contentType", + { + params: { + version: "v1", + platform: "youtube", + contentType: "video", + }, + query: { + url, + }, + }, + ); + + if (error) { + throw new ContentNotFoundError( + error.status ?? 500, + error.statusText ?? "Unknown error", + ); + } + + const data = youtubeContentSchema.parse(raw); + + return { + publishedAt: new Date(data.publishDateText), + handle: data.channel.handle, + platformId: data.channel.id, + views: data.viewCountInt, + likes: data.likeCountInt, + title: data.title ?? null, + description: data.description ?? null, + thumbnailUrl: data.thumbnailUrl ?? null, + }; + } + + async fetchPosts({ + platformId, + startTime, + endTime, + }: FetchPostsParams): Promise { + const videoIds = await this._getChannelVideos( + platformId, + startTime, + endTime, + ); + + if (videoIds.length === 0) { + return []; + } + + const videos = await this._getVideoStatistics(videoIds); + + return videos.map((video) => { + const views = video.statistics.viewCount; + const likes = video.statistics.likeCount; + const comments = video.statistics.commentCount; + + const engagementRate = views > 0 ? (likes + comments) / views : 0; + + return { + postId: video.id, + publishedAt: new Date(video.snippet.publishedAt), + title: video.snippet.title, + views, + likes, + comments, + engagementRate, + }; + }); + } + + private async _getChannelVideos( + channelId: string, + startTime: Date, + endTime: Date, + ): Promise { + // Derive the auto-generated uploads playlist ID from the channel ID + // Every YouTube channel has one: UC... → UU... + const uploadsPlaylistId = "UU" + channelId.slice(2); + + const allVideoIds: string[] = []; + let pageToken: string | undefined; + + // The uploads playlist returns newest-first in practice (not guaranteed + // by the API docs). We paginate until we pass startTime, then stop. + // Cap at 10 pages (500 videos) — the playlist API doesn't support + // server-side time filtering, so we scan client-side. Most syncs + // (2-day window) finish in 1-2 pages due to the early exit. + for (let page = 0; page < 10; page++) { + const { success } = await checkYouTubeApiQuota(1); + + if (!success) { + throw new YouTubeApiQuotaExceededError(); + } + + const { data, error } = await ytFetch("/playlistItems", { + query: { + part: "contentDetails", + playlistId: uploadsPlaylistId, + maxResults: "50", + ...(pageToken && { pageToken }), + }, + }); + + if (error) { + throw new YouTubeApiError(error); + } + + let reachedOlderThanStart = false; + + for (const item of data.items) { + const publishedAt = new Date(item.contentDetails.videoPublishedAt); + + if (publishedAt < startTime) { + reachedOlderThanStart = true; + break; + } + + if (publishedAt < endTime) { + allVideoIds.push(item.contentDetails.videoId); + } + } + + if (reachedOlderThanStart || !data.nextPageToken) { + break; + } + + pageToken = data.nextPageToken; + } + + return allVideoIds; + } + + private async _getVideoStatistics( + videoIds: string[], + ): Promise { + const allVideos: YouTubeVideo[] = []; + + // Batch in chunks of 50 (YouTube API max per request) + for (let i = 0; i < videoIds.length; i += 50) { + const batch = videoIds.slice(i, i + 50); + + const { success } = await checkYouTubeApiQuota(1); + + if (!success) { + throw new YouTubeApiQuotaExceededError(); + } + + const { data, error } = await ytFetch("/videos", { + query: { + part: "snippet,statistics", + id: batch.join(","), + }, + }); + + if (error) { + throw new YouTubeApiError(error); + } + + allVideos.push(...data.items); + } + + return allVideos; + } +} diff --git a/apps/web/lib/social-platforms/youtube-client.ts b/apps/web/lib/social-platforms/youtube-client.ts new file mode 100644 index 00000000000..8f28cbeceb3 --- /dev/null +++ b/apps/web/lib/social-platforms/youtube-client.ts @@ -0,0 +1,72 @@ +import { createFetch, createSchema } from "@better-fetch/fetch"; +import * as z from "zod/v4"; +import { + type YouTubeApiErrorResponse, + youtubeApiErrorSchema, + youtubePlaylistItemsResponseSchema, + youtubeVideosResponseSchema, +} from "./youtube-schemas"; + +export const ytFetch = createFetch({ + baseURL: "https://www.googleapis.com/youtube/v3", + headers: { + "X-Goog-Api-Key": process.env.YOUTUBE_API_KEY!, + }, + schema: createSchema( + { + "/playlistItems": { + method: "get", + query: z.object({ + part: z.string(), + playlistId: z.string(), + maxResults: z.string(), + pageToken: z.string().optional(), + }), + output: youtubePlaylistItemsResponseSchema, + }, + "/videos": { + method: "get", + query: z.object({ + part: z.string(), + id: z.string(), + }), + output: youtubeVideosResponseSchema, + }, + }, + { + strict: true, + }, + ), + defaultError: youtubeApiErrorSchema, + onError: ({ error }) => { + console.error("[YouTube API] Error", error); + }, +}); + +export class YouTubeApiError extends Error { + status: number; + statusText: string; + detail?: string; + + constructor( + error: YouTubeApiErrorResponse & { status: number; statusText: string }, + ) { + const message = + error.error?.message || + error.error?.errors?.map((e) => e.message).join("; ") || + error.statusText; + + super(message); + this.name = "YouTubeApiError"; + this.status = error.status; + this.statusText = error.statusText; + this.detail = error.error?.message; + } +} + +export class YouTubeApiQuotaExceededError extends Error { + constructor() { + super("YouTube API daily quota exceeded"); + this.name = "YouTubeApiQuotaExceededError"; + } +} diff --git a/apps/web/lib/social-platforms/youtube-schemas.ts b/apps/web/lib/social-platforms/youtube-schemas.ts new file mode 100644 index 00000000000..1fe85afd726 --- /dev/null +++ b/apps/web/lib/social-platforms/youtube-schemas.ts @@ -0,0 +1,108 @@ +import * as z from "zod/v4"; + +export const youtubeContentSchema = z.object({ + publishDateText: z.string(), + channel: z.object({ + id: z.string(), + handle: z.string(), + }), + viewCountInt: z + .number() + .nullable() + .transform((val) => val ?? 0), + likeCountInt: z + .number() + .nullable() + .transform((val) => val ?? 0), + title: z.string().nullish(), + description: z.string().nullish(), + thumbnailUrl: z.string().nullish(), +}); + +export const youtubeProfileSchema = z.object({ + description: z.string(), + channelId: z.string(), + videoCount: z + .number() + .nullish() + .transform((val) => val ?? 0), + subscriberCount: z + .number() + .nullish() + .transform((val) => val ?? 0), + viewCount: z + .number() + .nullish() + .transform((val) => val ?? 0), + avatar: z.object({ + image: z.object({ + sources: z.array( + z.object({ + url: z.url(), + width: z.number(), + height: z.number(), + }), + ), + }), + }), +}); + +// YouTube Data API v3 response schemas +const youtubePlaylistItemSchema = z.object({ + contentDetails: z.object({ + videoId: z.string(), + videoPublishedAt: z.string(), + }), +}); + +export const youtubePlaylistItemsResponseSchema = z.object({ + items: z.array(youtubePlaylistItemSchema).default([]), + nextPageToken: z.string().optional(), +}); + +export const youtubeVideoSchema = z.object({ + id: z.string(), + snippet: z.object({ + publishedAt: z.string(), + title: z.string(), + }), + statistics: z.object({ + viewCount: z + .string() + .nullish() + .transform((val) => (val ? parseInt(val, 10) : 0)), + likeCount: z + .string() + .nullish() + .transform((val) => (val ? parseInt(val, 10) : 0)), + commentCount: z + .string() + .nullish() + .transform((val) => (val ? parseInt(val, 10) : 0)), + }), +}); + +export const youtubeVideosResponseSchema = z.object({ + items: z.array(youtubeVideoSchema).default([]), +}); + +export const youtubeApiErrorSchema = z.object({ + error: z + .object({ + code: z.number().optional(), + message: z.string().optional(), + errors: z + .array( + z.object({ + message: z.string().optional(), + domain: z.string().optional(), + reason: z.string().optional(), + }), + ) + .optional(), + }) + .optional(), +}); + +export type YouTubeVideo = z.infer; +export type YouTubeApiErrorResponse = z.infer; diff --git a/apps/web/lib/zod/schemas/bounties.ts b/apps/web/lib/zod/schemas/bounties.ts index e21dd35dd4d..079c0b4e3ca 100644 --- a/apps/web/lib/zod/schemas/bounties.ts +++ b/apps/web/lib/zod/schemas/bounties.ts @@ -227,6 +227,14 @@ export const BountySubmissionSchema = z.object({ .string() .nullable() .meta({ description: "The note for rejecting the submission" }), + fraudRiskLevel: z + .string() + .nullable() + .meta({ description: "The fraud risk level of the submission" }), + fraudFlags: z.array(z.string()).nullable().meta({ + description: + "The fraud flags triggered for the submission. Null if fraud detection has not run.", + }), }); export const BountySubmissionExtendedSchema = BountySubmissionSchema.extend({ diff --git a/apps/web/scripts/migrations/backfill-twitter-platform-ids.ts b/apps/web/scripts/migrations/backfill-twitter-platform-ids.ts new file mode 100644 index 00000000000..1e8511e63f6 --- /dev/null +++ b/apps/web/scripts/migrations/backfill-twitter-platform-ids.ts @@ -0,0 +1,96 @@ +import "dotenv-flow/config"; + +import { + AccountNotFoundError, + getSocialProfile, +} from "@/lib/social-platforms/get-social-profile"; +import { prisma } from "@dub/prisma"; + +const BATCH_SIZE = 10; + +async function main() { + let startingAfter: string | undefined = undefined; + let totalUpdated = 0; + let totalSkipped = 0; + + while (true) { + const partnerPlatforms = await prisma.partnerPlatform.findMany({ + where: { + type: "twitter", + verifiedAt: { not: null }, + platformId: null, + }, + take: BATCH_SIZE, + orderBy: { + id: "asc", + }, + ...(startingAfter + ? { + skip: 1, + cursor: { + id: startingAfter, + }, + } + : {}), + }); + + if (partnerPlatforms.length === 0) { + break; + } + + const promises = partnerPlatforms.map(async (pp) => { + const socialProfile = await getSocialProfile({ + platform: "twitter", + handle: pp.identifier, + }); + + if (!socialProfile.platformId) { + console.warn(`No platformId returned for @${pp.identifier}, skipping`); + return null; + } + + await prisma.partnerPlatform.update({ + where: { + id: pp.id, + }, + data: { + platformId: socialProfile.platformId, + }, + }); + + console.log( + `Updated platformId for @${pp.identifier} (${socialProfile.platformId})`, + ); + return pp.identifier; + }); + + const results = await Promise.allSettled(promises); + + for (const result of results) { + if (result.status === "fulfilled" && result.value !== null) { + totalUpdated++; + } else if (result.status === "rejected") { + totalSkipped++; + const error = result.reason; + if (error instanceof AccountNotFoundError) { + console.warn(`Account not found, skipping`); + } else { + console.error(`Failed to backfill:`, error); + } + } else { + totalSkipped++; + } + } + + startingAfter = partnerPlatforms[partnerPlatforms.length - 1].id; + + // Respect ScrapeCreators rate limits + await new Promise((resolve) => setTimeout(resolve, 1000)); + } + + console.log( + `Backfill complete. Updated: ${totalUpdated}, Skipped: ${totalSkipped}`, + ); +} + +main(); diff --git a/apps/web/vercel.json b/apps/web/vercel.json index 29850ca9664..f813dec3f87 100644 --- a/apps/web/vercel.json +++ b/apps/web/vercel.json @@ -63,6 +63,10 @@ { "path": "/api/cron/fraud/summary", "schedule": "0 16 * * *" + }, + { + "path": "/api/cron/queue-sync-social-engagement", + "schedule": "0 7 * * *" } ], "functions": { diff --git a/packages/email/src/templates/unresolved-fraud-events-summary.tsx b/packages/email/src/templates/unresolved-fraud-events-summary.tsx index fb788cea945..2a7d7ce6f46 100644 --- a/packages/email/src/templates/unresolved-fraud-events-summary.tsx +++ b/packages/email/src/templates/unresolved-fraud-events-summary.tsx @@ -110,14 +110,14 @@ export default function UnresolvedFraudEventsSummary({ - {group.partner.name} diff --git a/packages/prisma/schema/bounty.prisma b/packages/prisma/schema/bounty.prisma index ec480bab037..77103be37e8 100644 --- a/packages/prisma/schema/bounty.prisma +++ b/packages/prisma/schema/bounty.prisma @@ -86,6 +86,8 @@ model BountySubmission { rejectionNote String? @db.Text files Json? urls Json? + fraudRiskLevel String? + fraudFlags Json? @db.Json socialMetricsLastSyncedAt DateTime? completedAt DateTime? reviewedAt DateTime? diff --git a/packages/prisma/schema/platform.prisma b/packages/prisma/schema/platform.prisma index 7903a43a915..0742dfde46a 100644 --- a/packages/prisma/schema/platform.prisma +++ b/packages/prisma/schema/platform.prisma @@ -8,23 +8,67 @@ enum PlatformType { } model PartnerPlatform { - id String @id @default(cuid()) - partnerId String - type PlatformType - identifier String // The unique identifier for the platform (e.g., username for social platforms, domain for websites) - platformId String? // Platform-specific immutable ID (e.g., YouTube channel ID) - avatarUrl String? @db.Text - subscribers BigInt @default(0) // Subscribers/followers for social platforms, also for newsletter subscribers - posts BigInt @default(0) - views BigInt @default(0) // Video views for YouTube, impressions for social platforms, pageviews for websites - metadata Json? - createdAt DateTime @default(now()) - updatedAt DateTime @updatedAt - verifiedAt DateTime? - lastCheckedAt DateTime? - - partner Partner @relation(fields: [partnerId], references: [id], onDelete: Cascade) + id String @id @default(cuid()) + partnerId String + type PlatformType + identifier String // The unique identifier for the platform (e.g., username for social platforms, domain for websites) + platformId String? // Platform-specific immutable ID (e.g., YouTube channel ID) + avatarUrl String? @db.Text + subscribers BigInt @default(0) // Subscribers/followers for social platforms, also for newsletter subscribers + posts BigInt @default(0) + views BigInt @default(0) // Video views for YouTube, impressions for social platforms, pageviews for websites + medianViews BigInt? + medianLikes BigInt? + medianComments BigInt? + medianEngagementRate Float? + avgEngagementRate Float? // 30-day rolling average engagement rate + metadata Json? + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + verifiedAt DateTime? + lastCheckedAt DateTime? + + partner Partner @relation(fields: [partnerId], references: [id], onDelete: Cascade) + platformEngagements PartnerPlatformEngagement[] + platformPosts PartnerPlatformPost[] @@unique([partnerId, type]) @@index(type) } + +model PartnerPlatformPost { + id String @id @default(cuid()) + partnerPlatformId String + postId String // Tweet ID, YouTube video ID, etc. + publishedAt DateTime + title String? @db.Text // Video title (YT), tweet text preview (X) + views BigInt @default(0) // impressions (X), views (YT) + likes BigInt @default(0) + comments BigInt @default(0) // replies (X), comments (YT) + engagementRate Float @default(0) + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + + partnerPlatform PartnerPlatform @relation(fields: [partnerPlatformId], references: [id], onDelete: Cascade) + + @@unique([partnerPlatformId, postId]) + @@index(publishedAt) +} + +model PartnerPlatformEngagement { + id String @id @default(cuid()) + partnerPlatformId String + date DateTime @db.Date + totalPosts Int @default(0) + totalImpressions BigInt @default(0) + totalLikes BigInt @default(0) + totalComments BigInt @default(0) // replies on X, comments on IG/YT/TT + engagementRate Float @default(0) // platform-specific formula + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + + partnerPlatform PartnerPlatform @relation(fields: [partnerPlatformId], references: [id], onDelete: Cascade) + + @@unique([partnerPlatformId, date]) + @@index(date) +}