dev-cli with browser based fetching

This commit is contained in:
mrjvs
2023-12-26 19:13:15 +01:00
parent 75d4b9edcb
commit c26e135d74
5 changed files with 109 additions and 3 deletions

1
src/dev-cli/browser/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
dist

View File

@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Scraper CLI</title>
</head>
<body>
<script src="./index.ts" type="module"></script>
</body>
</html>

View File

@@ -0,0 +1,17 @@
import { makeProviders, makeSimpleProxyFetcher, makeStandardFetcher, targets } from '../../../lib';
(window as any).scrape = (proxyUrl: string, type: 'source' | 'embed', input: any) => {
const providers = makeProviders({
fetcher: makeStandardFetcher(fetch),
target: targets.BROWSER,
proxiedFetcher: makeSimpleProxyFetcher(proxyUrl, fetch),
});
if (type === 'source') {
return providers.runSourceScraper(input);
}
if (type === 'embed') {
return providers.runEmbedScraper(input);
}
throw new Error('Input input type');
};

View File

@@ -62,7 +62,7 @@ async function runQuestions() {
{ {
type: 'select', type: 'select',
name: 'fetcher', name: 'fetcher',
message: 'Select a fetcher', message: 'Select a fetcher mode',
choices: [ choices: [
{ {
message: 'Native', message: 'Native',
@@ -72,6 +72,10 @@ async function runQuestions() {
message: 'Node fetch', message: 'Node fetch',
name: 'node-fetch', name: 'node-fetch',
}, },
{
message: 'Browser',
name: 'browser',
},
], ],
}, },
{ {

View File

@@ -1,18 +1,91 @@
/* eslint import/no-extraneous-dependencies: ["error", {"devDependencies": true}] */ /* eslint import/no-extraneous-dependencies: ["error", {"devDependencies": true}] */
import Spinnies from 'spinnies'; import { existsSync } from 'fs';
import { join } from 'path';
import puppeteer, { Browser } from 'puppeteer';
import Spinnies from 'spinnies';
import { PreviewServer, build, preview } from 'vite';
import { getConfig } from '@/dev-cli/config';
import { logDeepObject } from '@/dev-cli/logging'; import { logDeepObject } from '@/dev-cli/logging';
import { getMovieMediaDetails, getShowMediaDetails } from '@/dev-cli/tmdb'; import { getMovieMediaDetails, getShowMediaDetails } from '@/dev-cli/tmdb';
import { CommandLineArguments } from '@/dev-cli/validate'; import { CommandLineArguments } from '@/dev-cli/validate';
import { MetaOutput, ProviderMakerOptions, makeProviders } from '..'; import { MetaOutput, ProviderMakerOptions, makeProviders } from '..';
async function runActualScraping( async function runBrowserScraping(
providerOptions: ProviderMakerOptions, providerOptions: ProviderMakerOptions,
source: MetaOutput, source: MetaOutput,
options: CommandLineArguments, options: CommandLineArguments,
) { ) {
if (!existsSync(join(__dirname, '../../lib/index.mjs')))
throw new Error('Please compile before running cli in browser mode');
const config = getConfig();
if (!config.proxyUrl)
throw new Error('Simple proxy url must be set in the environment (MOVIE_WEB_PROXY_URL) for browser mode to work');
const root = join(__dirname, 'browser');
let server: PreviewServer | undefined;
let browser: Browser | undefined;
try {
// setup browser
await build({
root,
});
server = await preview({
root,
});
browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.goto(server.resolvedUrls.local[0]);
await page.waitForFunction('!!window.scrape', { timeout: 5000 });
// get input media
let input: any;
if (source.type === 'embed') {
input = {
url: options.url,
id: source.id,
};
} else if (source.type === 'source') {
let media;
if (options.type === 'movie') {
media = await getMovieMediaDetails(options.tmdbId);
} else {
media = await getShowMediaDetails(options.tmdbId, options.season, options.episode);
}
input = {
media,
id: source.id,
};
} else {
throw new Error('Wrong source input type');
}
return await page.evaluate(
async (proxy, type, inp) => {
return (window as any).scrape(proxy, type, inp);
},
config.proxyUrl,
source.type,
input,
);
} finally {
server?.httpServer.close();
await browser?.close();
}
}
async function runActualScraping(
providerOptions: ProviderMakerOptions,
source: MetaOutput,
options: CommandLineArguments,
): Promise<any> {
if (options.fetcher === 'browser') return runBrowserScraping(providerOptions, source, options);
const providers = makeProviders(providerOptions); const providers = makeProviders(providerOptions);
if (source.type === 'embed') { if (source.type === 'embed') {