From c26e135d745297a46b2c35882463205f5551f50a Mon Sep 17 00:00:00 2001 From: mrjvs Date: Tue, 26 Dec 2023 19:13:15 +0100 Subject: [PATCH] dev-cli with browser based fetching --- src/dev-cli/browser/.gitignore | 1 + src/dev-cli/browser/index.html | 11 +++++ src/dev-cli/browser/index.ts | 17 ++++++++ src/dev-cli/index.ts | 6 ++- src/dev-cli/scraper.ts | 77 +++++++++++++++++++++++++++++++++- 5 files changed, 109 insertions(+), 3 deletions(-) create mode 100644 src/dev-cli/browser/.gitignore create mode 100644 src/dev-cli/browser/index.html create mode 100644 src/dev-cli/browser/index.ts diff --git a/src/dev-cli/browser/.gitignore b/src/dev-cli/browser/.gitignore new file mode 100644 index 0000000..1521c8b --- /dev/null +++ b/src/dev-cli/browser/.gitignore @@ -0,0 +1 @@ +dist diff --git a/src/dev-cli/browser/index.html b/src/dev-cli/browser/index.html new file mode 100644 index 0000000..7709f4b --- /dev/null +++ b/src/dev-cli/browser/index.html @@ -0,0 +1,11 @@ + + + + + + Scraper CLI + + + + + diff --git a/src/dev-cli/browser/index.ts b/src/dev-cli/browser/index.ts new file mode 100644 index 0000000..d1f6494 --- /dev/null +++ b/src/dev-cli/browser/index.ts @@ -0,0 +1,17 @@ +import { makeProviders, makeSimpleProxyFetcher, makeStandardFetcher, targets } from '../../../lib'; + +(window as any).scrape = (proxyUrl: string, type: 'source' | 'embed', input: any) => { + const providers = makeProviders({ + fetcher: makeStandardFetcher(fetch), + target: targets.BROWSER, + proxiedFetcher: makeSimpleProxyFetcher(proxyUrl, fetch), + }); + if (type === 'source') { + return providers.runSourceScraper(input); + } + if (type === 'embed') { + return providers.runEmbedScraper(input); + } + + throw new Error('Input input type'); +}; diff --git a/src/dev-cli/index.ts b/src/dev-cli/index.ts index 1689a37..bd95599 100644 --- a/src/dev-cli/index.ts +++ b/src/dev-cli/index.ts @@ -62,7 +62,7 @@ async function runQuestions() { { type: 'select', name: 'fetcher', - message: 'Select a fetcher', + message: 'Select a fetcher mode', choices: [ { message: 'Native', @@ -72,6 +72,10 @@ async function runQuestions() { message: 'Node fetch', name: 'node-fetch', }, + { + message: 'Browser', + name: 'browser', + }, ], }, { diff --git a/src/dev-cli/scraper.ts b/src/dev-cli/scraper.ts index c72e36e..882d321 100644 --- a/src/dev-cli/scraper.ts +++ b/src/dev-cli/scraper.ts @@ -1,18 +1,91 @@ /* eslint import/no-extraneous-dependencies: ["error", {"devDependencies": true}] */ -import Spinnies from 'spinnies'; +import { existsSync } from 'fs'; +import { join } from 'path'; +import puppeteer, { Browser } from 'puppeteer'; +import Spinnies from 'spinnies'; +import { PreviewServer, build, preview } from 'vite'; + +import { getConfig } from '@/dev-cli/config'; import { logDeepObject } from '@/dev-cli/logging'; import { getMovieMediaDetails, getShowMediaDetails } from '@/dev-cli/tmdb'; import { CommandLineArguments } from '@/dev-cli/validate'; import { MetaOutput, ProviderMakerOptions, makeProviders } from '..'; -async function runActualScraping( +async function runBrowserScraping( providerOptions: ProviderMakerOptions, source: MetaOutput, options: CommandLineArguments, ) { + if (!existsSync(join(__dirname, '../../lib/index.mjs'))) + throw new Error('Please compile before running cli in browser mode'); + const config = getConfig(); + if (!config.proxyUrl) + throw new Error('Simple proxy url must be set in the environment (MOVIE_WEB_PROXY_URL) for browser mode to work'); + + const root = join(__dirname, 'browser'); + let server: PreviewServer | undefined; + let browser: Browser | undefined; + try { + // setup browser + await build({ + root, + }); + server = await preview({ + root, + }); + browser = await puppeteer.launch({ + headless: 'new', + args: ['--no-sandbox', '--disable-setuid-sandbox'], + }); + const page = await browser.newPage(); + await page.goto(server.resolvedUrls.local[0]); + await page.waitForFunction('!!window.scrape', { timeout: 5000 }); + + // get input media + let input: any; + if (source.type === 'embed') { + input = { + url: options.url, + id: source.id, + }; + } else if (source.type === 'source') { + let media; + if (options.type === 'movie') { + media = await getMovieMediaDetails(options.tmdbId); + } else { + media = await getShowMediaDetails(options.tmdbId, options.season, options.episode); + } + input = { + media, + id: source.id, + }; + } else { + throw new Error('Wrong source input type'); + } + + return await page.evaluate( + async (proxy, type, inp) => { + return (window as any).scrape(proxy, type, inp); + }, + config.proxyUrl, + source.type, + input, + ); + } finally { + server?.httpServer.close(); + await browser?.close(); + } +} + +async function runActualScraping( + providerOptions: ProviderMakerOptions, + source: MetaOutput, + options: CommandLineArguments, +): Promise { + if (options.fetcher === 'browser') return runBrowserScraping(providerOptions, source, options); const providers = makeProviders(providerOptions); if (source.type === 'embed') {