Merge pull request #1 from movie-web/dev

Add initial POC of Uber Proxy
This commit is contained in:
mrjvs
2023-12-20 16:26:07 +01:00
committed by GitHub
12 changed files with 1151 additions and 2109 deletions

View File

@@ -1,2 +1,5 @@
# provider-api # provider-api
Our provider package exposed as a HTTP API Our provider package exposed as a HTTP API. This is not the recommended way to deploy movie-web or use our providers. Instead use the provider package or simple-proxy.
> [!IMPORTANT]
> This only works on Cloudflare due to CloudFlare specific logic and build processes

View File

@@ -2,30 +2,30 @@
"name": "providers-api", "name": "providers-api",
"version": "1.0.0", "version": "1.0.0",
"private": true, "private": true,
"type": "module",
"scripts": { "scripts": {
"prepare": "nitropack prepare", "build": "wrangler deploy --dry-run --outdir dist",
"dev": "nitropack dev", "dev": "wrangler dev src/index.ts",
"build": "nitropack build", "deploy": "wrangler deploy --minify src/index.ts",
"build:cloudflare": "NITRO_PRESET=cloudflare npm run build",
"build:aws": "NITRO_PRESET=aws_lambda npm run build",
"build:node": "NITRO_PRESET=node-server npm run build",
"start": "node .output/server/index.mjs",
"lint": "eslint --ext .ts src/",
"lint:fix": "eslint --fix --ext .ts src/",
"preinstall": "npx only-allow pnpm" "preinstall": "npx only-allow pnpm"
}, },
"dependencies": { "dependencies": {
"@movie-web/providers": "^1.1.5", "@movie-web/providers": "^1.1.5",
"h3": "^1.9.0", "@tsndr/cloudflare-worker-jwt": "^2.3.2",
"nitropack": "latest" "@types/jsonwebtoken": "^9.0.5",
"esbuild": "^0.19.10",
"hono": "^3.11.8",
"zod": "^3.22.4"
}, },
"devDependencies": { "devDependencies": {
"@cloudflare/workers-types": "^4.20231121.0",
"@typescript-eslint/eslint-plugin": "^6.14.0", "@typescript-eslint/eslint-plugin": "^6.14.0",
"@typescript-eslint/parser": "^6.14.0", "@typescript-eslint/parser": "^6.14.0",
"eslint": "^8.56.0", "eslint": "^8.56.0",
"eslint-config-airbnb-base": "^15.0.0", "eslint-config-airbnb-base": "^15.0.0",
"eslint-config-prettier": "^9.1.0", "eslint-config-prettier": "^9.1.0",
"eslint-import-resolver-typescript": "^3.6.1", "eslint-import-resolver-typescript": "^3.6.1",
"eslint-plugin-prettier": "^5.0.1" "eslint-plugin-prettier": "^5.0.1",
"wrangler": "^3.21.0"
} }
} }

2662
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

272
src/index.ts Normal file
View File

@@ -0,0 +1,272 @@
import { Context, Env, Hono } from 'hono';
import { streamSSE } from 'hono/streaming';
import { cors } from 'hono/cors';
import {
ScrapeMedia,
makeProviders,
makeStandardFetcher,
targets,
} from '@movie-web/providers';
import { ZodError, z } from 'zod';
import { embedSchema, scrapeAllSchema, sourceSchema } from '@/schema';
import { validateTurnstile } from '@/turnstile';
// hono doesn't export this type, so we retrieve it from a function
type SSEStreamingApi = Parameters<Parameters<typeof streamSSE>['1']>['0'];
const fetcher = makeStandardFetcher(fetch);
const providers = makeProviders({
fetcher,
target: targets.BROWSER,
});
const app = new Hono();
function isTurnstileEnabled(context: Context<Env>) {
return context.env?.TURNSTILE_ENABLED === "true"
}
app.use('*', (context, next) => {
const allowedCorsHosts = ((context.env?.CORS_ALLOWED as string) ?? '').split(
',',
);
return cors({
origin: (origin) => {
try {
const hostname = new URL(origin).hostname;
if (allowedCorsHosts.includes(hostname)) {
return origin;
}
return '';
} catch (_) {
// If the Origin URL is not valid, return empty allowed origin
return '';
}
},
})(context, next);
});
let eventId = 0;
async function writeSSEEvent(
stream: SSEStreamingApi,
event: string,
data: any | undefined,
) {
return await stream.writeSSE({
event,
data: data ? JSON.stringify(data) : '',
id: String(eventId++),
});
}
app.get('/scrape', async (context) => {
const queryParams = context.req.query();
let jwtResponse: string | undefined = undefined;
if (isTurnstileEnabled(context)) {
const turnstileResponse = await validateTurnstile(context);
if (!turnstileResponse.success) {
context.status(401);
return context.text(
`Turnstile invalid, error codes: ${turnstileResponse.errorCodes.join(
', ',
)}`,
);
}
jwtResponse = turnstileResponse.jwtToken;
}
let media: ScrapeMedia;
try {
media = scrapeAllSchema.parse(queryParams);
} catch (e) {
if (e instanceof ZodError) {
context.status(400);
return context.json(e.format());
}
context.status(500);
return context.text('An error has occurred!');
}
return streamSSE(context, async (stream) => {
if (jwtResponse) {
await writeSSEEvent(stream, 'token', jwtResponse);
}
try {
const output = await providers.runAll({
media,
events: {
discoverEmbeds(evt) {
writeSSEEvent(stream, 'discoverEmbeds', evt);
},
init(evt) {
writeSSEEvent(stream, 'init', evt);
},
start(evt) {
writeSSEEvent(stream, 'start', evt);
},
update(evt) {
writeSSEEvent(stream, 'update', evt);
},
},
});
if (output) {
await writeSSEEvent(stream, 'completed', output);
return await stream.close();
}
await writeSSEEvent(stream, 'noOutput', '');
return await stream.close();
} catch (e: any) {
await writeSSEEvent(stream, 'error', {
name: e.name,
message: e.message,
stack: e.stack,
});
return await stream.close();
}
});
});
app.get('/scrape/embed', async (context) => {
const queryParams = context.req.query();
let jwtResponse: string | undefined = undefined;
if (isTurnstileEnabled(context)) {
const turnstileResponse = await validateTurnstile(context);
if (!turnstileResponse.success) {
context.status(401);
return context.text(
`Turnstile invalid, error codes: ${turnstileResponse.errorCodes.join(
', ',
)}`,
);
}
jwtResponse = turnstileResponse.jwtToken;
}
let embedInput: z.infer<typeof embedSchema>;
try {
embedInput = embedSchema.parse(queryParams);
} catch (e) {
if (e instanceof ZodError) {
context.status(400);
return context.json(e.format());
}
context.status(500);
return context.text('An error has occurred!');
}
return streamSSE(context, async (stream) => {
if (jwtResponse) {
await writeSSEEvent(stream, 'token', jwtResponse);
}
try {
const output = await providers.runEmbedScraper({
id: embedInput.id,
url: embedInput.url,
events: {
update(evt) {
writeSSEEvent(stream, 'update', evt);
},
},
});
if (output) {
await writeSSEEvent(stream, 'completed', output);
return await stream.close();
}
await writeSSEEvent(stream, 'noOutput', '');
return await stream.close();
} catch (e: any) {
await writeSSEEvent(stream, 'error', {
name: e.name,
message: e.message,
stack: e.stack,
});
return await stream.close();
}
});
});
app.get('/scrape/source', async (context) => {
const queryParams = context.req.query();
let jwtResponse: string | undefined = undefined;
if (isTurnstileEnabled(context)) {
const turnstileResponse = await validateTurnstile(context);
if (!turnstileResponse.success) {
context.status(401);
return context.text(
`Turnstile invalid, error codes: ${turnstileResponse.errorCodes.join(
', ',
)}`,
);
}
jwtResponse = turnstileResponse.jwtToken;
}
let sourceInput: z.infer<typeof sourceSchema>;
try {
sourceInput = sourceSchema.parse(queryParams);
} catch (e) {
if (e instanceof ZodError) {
context.status(400);
return context.json(e.format());
}
context.status(500);
return context.text('An error has occurred!');
}
return streamSSE(context, async (stream) => {
if (jwtResponse) {
await writeSSEEvent(stream, 'token', jwtResponse);
}
try {
const output = await providers.runSourceScraper({
id: sourceInput.id,
media: sourceInput,
events: {
update(evt) {
writeSSEEvent(stream, 'update', evt);
},
},
});
if (output) {
await writeSSEEvent(stream, 'completed', output);
return await stream.close();
}
await writeSSEEvent(stream, 'noOutput', '');
return await stream.close();
} catch (e: any) {
await writeSSEEvent(stream, 'error', {
name: e.name,
message: e.message,
stack: e.stack,
});
return await stream.close();
}
});
});
app.get('/metadata', async (context) => {
return context.json([providers.listEmbeds(), providers.listSources()]);
});
export default app;

View File

@@ -1,39 +0,0 @@
import { getBodyBuffer } from '@/utils/body';
import {
getProxyHeaders,
getAfterResponseHeaders,
cleanupHeadersBeforeProxy,
} from '@/utils/headers';
export default defineEventHandler(async (event) => {
// handle cors, if applicable
if (isPreflightRequest(event)) return handleCors(event, {});
// parse destination URL
const destination = getQuery<{ destination?: string }>(event).destination;
if (!destination)
return await sendJson({
event,
status: 400,
data: {
error: 'destination query parameter invalid',
},
});
// read body
const body = await getBodyBuffer(event);
// proxy
cleanupHeadersBeforeProxy(event);
await proxyRequest(event, destination, {
fetchOptions: {
redirect: 'follow',
headers: getProxyHeaders(event.headers),
body,
},
onResponse(outputEvent, response) {
const headers = getAfterResponseHeaders(response.headers, response.url);
setResponseHeaders(outputEvent, headers);
},
});
});

52
src/schema.ts Normal file
View File

@@ -0,0 +1,52 @@
import { z } from 'zod';
export const tmdbIdSchema = z.string().regex(/^\d+$/);
export const scrapeAllSchema = z
.discriminatedUnion('type', [
z.object({
type: z.literal('movie'),
title: z.string().min(1),
releaseYear: z.coerce.number().int().gt(0),
tmdbId: tmdbIdSchema,
}),
z.object({
type: z.literal('show'),
title: z.string().min(1),
releaseYear: z.coerce.number().int().gt(0),
tmdbId: tmdbIdSchema,
episodeNumber: z.coerce.number().int(),
episodeTmdbId: tmdbIdSchema,
seasonNumber: z.coerce.number().int(),
seasonTmdbId: tmdbIdSchema,
}),
])
.transform((query) => {
if (query.type == 'movie') return query;
return {
type: query.type,
title: query.title,
releaseYear: query.releaseYear,
tmdbId: query.tmdbId,
episode: {
number: query.episodeNumber,
tmdbId: query.episodeTmdbId,
},
season: {
number: query.seasonNumber,
tmdbId: query.seasonTmdbId,
},
};
});
export const embedSchema = z.object({
id: z.string(),
url: z.string(),
});
export const sourceSchema = scrapeAllSchema.and(
z.object({
id: z.string(),
}),
);

81
src/turnstile.ts Normal file
View File

@@ -0,0 +1,81 @@
import { Context, Env } from 'hono';
import jsonwebtoken from '@tsndr/cloudflare-worker-jwt';
export async function validateTurnstile(context: Context<Env>) {
const turnstileSecret = context.env?.TURNSTILE_SECRET as string | undefined;
const jwtSecret = (context.env?.JWT_SECRET as string | undefined) ?? '';
const token = context.req.query('token') || '';
const ip = context.req.header('CF-Connecting-IP') || '';
if (token.startsWith('jwt|')) {
try {
const isValid = await jsonwebtoken.verify(
token.slice('jwt|'.length),
jwtSecret,
{
algorithm: 'HS256',
},
);
if (!isValid) {
return {
success: false,
errorCodes: ['jwt-invalid'],
};
}
const { payload } = jsonwebtoken.decode(token.slice('jwt|'.length));
if (!payload || payload.ip !== ip) {
return {
success: false,
errorCodes: ['jwt-ip-invalid'],
};
}
return {
success: true,
errorCodes: [],
};
} catch (e: any) {}
}
if (!token.startsWith('turnstile|')) {
return {
success: false,
errorCodes: ['invalid-token-type'],
};
}
const formData = new FormData();
formData.append('secret', turnstileSecret || '');
formData.append('response', token.slice('turnstile|'.length));
formData.append('remoteip', ip);
const url = 'https://challenges.cloudflare.com/turnstile/v0/siteverify';
const result = await fetch(url, {
body: formData,
method: 'POST',
});
const outcome = await result.json<any>();
let jwt: string | undefined = undefined;
if (outcome.success) {
jwt = await jsonwebtoken.sign(
{
ip,
exp: Math.floor(Date.now() / 1000) + 60 * 10, // 10 Minutes
},
jwtSecret,
);
}
return {
success: outcome.success as boolean,
errorCodes: outcome['error-codes'] as string[],
jwtToken: jwt,
};
}

View File

@@ -1,13 +0,0 @@
import { H3Event } from 'h3';
export function hasBody(event: H3Event) {
const method = event.method.toUpperCase();
return ['PUT', 'POST', 'PATCH', 'DELETE'].includes(method);
}
export async function getBodyBuffer(
event: H3Event,
): Promise<Buffer | undefined> {
if (!hasBody(event)) return;
return await readRawBody(event, false);
}

View File

@@ -1,73 +0,0 @@
import { H3Event } from 'h3';
const blacklistedHeaders = [
'cf-connecting-ip',
'cf-worker',
'cf-ray',
'cf-visitor',
'cf-ew-via',
'x-forwarded-for',
'x-forwarded-host',
'x-forwarded-proto',
'forwarded',
'x-real-ip',
];
function copyHeader(
headers: Headers,
outputHeaders: Headers,
inputKey: string,
outputKey: string,
) {
if (headers.has(inputKey))
outputHeaders.set(outputKey, headers.get(inputKey) ?? '');
}
export function getProxyHeaders(headers: Headers): Headers {
const output = new Headers();
const headerMap: Record<string, string> = {
'X-Cookie': 'Cookie',
'X-Referer': 'Referer',
'X-Origin': 'Origin',
};
Object.entries(headerMap).forEach((entry) => {
copyHeader(headers, output, entry[0], entry[1]);
});
output.set(
'User-Agent',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0',
);
return output;
}
export function getAfterResponseHeaders(
headers: Headers,
finalUrl: string,
): Record<string, string> {
const output: Record<string, string> = {};
if (headers.has('Set-Cookie'))
output['X-Set-Cookie'] = headers.get('Set-Cookie') ?? '';
return {
'Access-Control-Allow-Origin': '*',
'Access-Control-Expose-Headers': '*',
Vary: 'Origin',
'X-Final-Destination': finalUrl,
};
}
export function removeHeadersFromEvent(event: H3Event, key: string) {
const normalizedKey = key.toLowerCase();
if (event.node.req.headers[normalizedKey])
delete event.node.req.headers[normalizedKey];
}
export function cleanupHeadersBeforeProxy(event: H3Event) {
blacklistedHeaders.forEach((key) => {
removeHeadersFromEvent(event, key);
});
}

View File

@@ -1,10 +0,0 @@
import { H3Event, EventHandlerRequest } from 'h3';
export async function sendJson(ops: {
event: H3Event<EventHandlerRequest>;
data: Record<string, any>;
status?: number;
}) {
setResponseStatus(ops.event, ops.status ?? 200);
await send(ops.event, JSON.stringify(ops.data, null, 2), 'application/json');
}

25
tsconfig.json Normal file
View File

@@ -0,0 +1,25 @@
{
"compilerOptions": {
"target": "ESNext",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
"esModuleInterop": true,
"allowSyntheticDefaultImports": true,
"strict": true,
"forceConsistentCasingInFileNames": true,
"noFallthroughCasesInSwitch": true,
"module": "ESNext",
"moduleResolution": "node",
"resolveJsonModule": true,
"isolatedModules": true,
"noEmit": true,
"baseUrl": "./src",
"paths": {
"@/*": ["./*"]
},
"types": [
"@cloudflare/workers-types"
],
},
}

4
wrangler.toml Normal file
View File

@@ -0,0 +1,4 @@
name = "providers-api"
main = "./src/index.ts"
workers_dev = true
compatibility_date = "2023-12-17"