Merge branch 'pr/14' into pr-14-v2

This commit is contained in:
Jorrin
2023-12-25 22:52:49 +01:00
19 changed files with 340 additions and 4 deletions

View File

@@ -18,6 +18,8 @@ module.exports = {
}, },
plugins: ['@typescript-eslint', 'import', 'prettier'], plugins: ['@typescript-eslint', 'import', 'prettier'],
rules: { rules: {
'no-plusplus': 'off',
'no-bitwise': 'off',
'no-underscore-dangle': 'off', 'no-underscore-dangle': 'off',
'@typescript-eslint/no-explicit-any': 'off', '@typescript-eslint/no-explicit-any': 'off',
'no-console': 'off', 'no-console': 'off',

View File

@@ -26,6 +26,7 @@ The following CLI Mode arguments are available
| `--season` | `-s` | Season number. Only used if type is `show` | `0` | | `--season` | `-s` | Season number. Only used if type is `show` | `0` |
| `--episode` | `-e` | Episode number. Only used if type is `show` | `0` | | `--episode` | `-e` | Episode number. Only used if type is `show` | `0` |
| `--url` | `-u` | URL to a video embed. Only used if source is an embed | | | `--url` | `-u` | URL to a video embed. Only used if source is an embed | |
| `--headers` | `-h` | Optional headers to send while scraping | |
| `--help` | `-h` | Shows help for the command arguments | | | `--help` | `-h` | Shows help for the command arguments | |
Example testing the FlixHQ source on the movie "Spirited Away" Example testing the FlixHQ source on the movie "Spirited Away"

View File

@@ -39,6 +39,7 @@ type CommandLineArguments = {
season: string; season: string;
episode: string; episode: string;
url: string; url: string;
headers?: Record<string, string>;
}; };
const TMDB_API_KEY = process.env.MOVIE_WEB_TMDB_API_KEY ?? ''; const TMDB_API_KEY = process.env.MOVIE_WEB_TMDB_API_KEY ?? '';
@@ -185,6 +186,7 @@ async function runScraper(providers: ProviderControls, source: MetaOutput, optio
const result = await providers.runEmbedScraper({ const result = await providers.runEmbedScraper({
url: options.url, url: options.url,
id: source.id, id: source.id,
headers: options.headers,
}); });
spinnies.succeed('scrape', { text: 'Done!' }); spinnies.succeed('scrape', { text: 'Done!' });
logDeepObject(result); logDeepObject(result);
@@ -279,6 +281,10 @@ async function processOptions(options: CommandLineArguments) {
} }
} }
if (typeof options.headers === 'string') {
options.headers = JSON.parse(options.headers);
}
let fetcher; let fetcher;
if (options.fetcher === 'native') { if (options.fetcher === 'native') {
@@ -409,7 +415,8 @@ async function runCommandLine() {
.option('-t, --type <type>', "Media type. Either 'movie' or 'show'. Only used if source is a provider", 'movie') .option('-t, --type <type>', "Media type. Either 'movie' or 'show'. Only used if source is a provider", 'movie')
.option('-s, --season <number>', "Season number. Only used if type is 'show'", '0') .option('-s, --season <number>', "Season number. Only used if type is 'show'", '0')
.option('-e, --episode <number>', "Episode number. Only used if type is 'show'", '0') .option('-e, --episode <number>', "Episode number. Only used if type is 'show'", '0')
.option('-u, --url <embed URL>', 'URL to a video embed. Only used if source is an embed', ''); .option('-u, --url <embed URL>', 'URL to a video embed. Only used if source is an embed', '')
.option('-h, --headers <JSON>', 'Optional headers to pass to scrapers. JSON encoded');
program.parse(); program.parse();

View File

@@ -34,6 +34,7 @@ export function makeFullFetcher(fetcher: Fetcher): UseableFetcher {
query: ops?.query ?? {}, query: ops?.query ?? {},
baseUrl: ops?.baseUrl ?? '', baseUrl: ops?.baseUrl ?? '',
body: ops?.body, body: ops?.body,
returnRaw: ops?.returnRaw ?? false,
}); });
}; };
} }

View File

@@ -17,6 +17,7 @@ export type FetchReply = {
text(): Promise<string>; text(): Promise<string>;
json(): Promise<any>; json(): Promise<any>;
headers: FetchHeaders; headers: FetchHeaders;
url: string;
}; };
export type FetchLike = (url: string, ops?: FetchOps | undefined) => Promise<FetchReply>; export type FetchLike = (url: string, ops?: FetchOps | undefined) => Promise<FetchReply>;

View File

@@ -17,6 +17,10 @@ export function makeStandardFetcher(f: FetchLike): Fetcher {
body: seralizedBody.body, body: seralizedBody.body,
}); });
if (ops.returnRaw) {
return res;
}
const isJson = res.headers.get('content-type')?.includes('application/json'); const isJson = res.headers.get('content-type')?.includes('application/json');
if (isJson) return res.json(); if (isJson) return res.json();
return res.text(); return res.text();

View File

@@ -4,8 +4,9 @@ export type FetcherOptions = {
baseUrl?: string; baseUrl?: string;
headers?: Record<string, string>; headers?: Record<string, string>;
query?: Record<string, string>; query?: Record<string, string>;
method?: 'GET' | 'POST'; method?: 'HEAD' | 'GET' | 'POST';
body?: Record<string, any> | string | FormData | URLSearchParams; body?: Record<string, any> | string | FormData | URLSearchParams;
returnRaw?: boolean;
}; };
export type DefaultedFetcherOptions = { export type DefaultedFetcherOptions = {
@@ -13,7 +14,8 @@ export type DefaultedFetcherOptions = {
body?: Record<string, any> | string | FormData; body?: Record<string, any> | string | FormData;
headers: Record<string, string>; headers: Record<string, string>;
query: Record<string, string>; query: Record<string, string>;
method: 'GET' | 'POST'; method: 'HEAD' | 'GET' | 'POST';
returnRaw: boolean;
}; };
export type Fetcher<T = any> = { export type Fetcher<T = any> = {

View File

@@ -57,6 +57,9 @@ export interface EmbedRunnerOptions {
// id of the embed scraper you want to scrape from // id of the embed scraper you want to scrape from
id: string; id: string;
// optional headers for the embed scraper to use
headers?: Record<string, string>;
} }
export interface ProviderControls { export interface ProviderControls {

View File

@@ -66,6 +66,7 @@ export type IndividualEmbedRunnerOptions = {
url: string; url: string;
id: string; id: string;
events?: IndividualScraperEvents; events?: IndividualScraperEvents;
headers?: Record<string, string>;
}; };
export async function scrapeIndividualEmbed( export async function scrapeIndividualEmbed(
@@ -79,6 +80,7 @@ export async function scrapeIndividualEmbed(
fetcher: ops.fetcher, fetcher: ops.fetcher,
proxiedFetcher: ops.proxiedFetcher, proxiedFetcher: ops.proxiedFetcher,
url: ops.url, url: ops.url,
headers: ops.headers,
progress(val) { progress(val) {
ops.events?.update?.({ ops.events?.update?.({
id: embedScraper.id, id: embedScraper.id,

View File

@@ -2,15 +2,18 @@ import { Embed, Sourcerer } from '@/providers/base';
import { febBoxScraper } from '@/providers/embeds/febBox'; import { febBoxScraper } from '@/providers/embeds/febBox';
import { mixdropScraper } from '@/providers/embeds/mixdrop'; import { mixdropScraper } from '@/providers/embeds/mixdrop';
import { mp4uploadScraper } from '@/providers/embeds/mp4upload'; import { mp4uploadScraper } from '@/providers/embeds/mp4upload';
import { streambucketScraper } from '@/providers/embeds/streambucket';
import { streamsbScraper } from '@/providers/embeds/streamsb'; import { streamsbScraper } from '@/providers/embeds/streamsb';
import { upcloudScraper } from '@/providers/embeds/upcloud'; import { upcloudScraper } from '@/providers/embeds/upcloud';
import { upstreamScraper } from '@/providers/embeds/upstream'; import { upstreamScraper } from '@/providers/embeds/upstream';
import { vidsrcembedScraper } from '@/providers/embeds/vidsrc';
import { flixhqScraper } from '@/providers/sources/flixhq/index'; import { flixhqScraper } from '@/providers/sources/flixhq/index';
import { goMoviesScraper } from '@/providers/sources/gomovies/index'; import { goMoviesScraper } from '@/providers/sources/gomovies/index';
import { kissAsianScraper } from '@/providers/sources/kissasian/index'; import { kissAsianScraper } from '@/providers/sources/kissasian/index';
import { lookmovieScraper } from '@/providers/sources/lookmovie'; import { lookmovieScraper } from '@/providers/sources/lookmovie';
import { remotestreamScraper } from '@/providers/sources/remotestream'; import { remotestreamScraper } from '@/providers/sources/remotestream';
import { superStreamScraper } from '@/providers/sources/superstream/index'; import { superStreamScraper } from '@/providers/sources/superstream/index';
import { vidsrcScraper } from '@/providers/sources/vidsrc';
import { zoechipScraper } from '@/providers/sources/zoechip'; import { zoechipScraper } from '@/providers/sources/zoechip';
import { smashyStreamDScraper } from './embeds/smashystream/dued'; import { smashyStreamDScraper } from './embeds/smashystream/dued';
@@ -27,6 +30,7 @@ export function gatherAllSources(): Array<Sourcerer> {
superStreamScraper, superStreamScraper,
goMoviesScraper, goMoviesScraper,
zoechipScraper, zoechipScraper,
vidsrcScraper,
lookmovieScraper, lookmovieScraper,
showBoxScraper, showBoxScraper,
smashyStreamScraper, smashyStreamScraper,
@@ -40,8 +44,10 @@ export function gatherAllEmbeds(): Array<Embed> {
mp4uploadScraper, mp4uploadScraper,
streamsbScraper, streamsbScraper,
upstreamScraper, upstreamScraper,
febBoxScraper,
mixdropScraper, mixdropScraper,
vidsrcembedScraper,
streambucketScraper,
febBoxScraper,
smashyStreamFScraper, smashyStreamFScraper,
smashyStreamDScraper, smashyStreamDScraper,
]; ];

View File

@@ -5,6 +5,7 @@ import { EmbedScrapeContext, MovieScrapeContext, ShowScrapeContext } from '@/uti
export type SourcererEmbed = { export type SourcererEmbed = {
embedId: string; embedId: string;
url: string; url: string;
headers?: Record<string, string>;
}; };
export type SourcererOutput = { export type SourcererOutput = {

View File

@@ -0,0 +1,98 @@
import { flags } from '@/main/targets';
import { makeEmbed } from '@/providers/base';
// StreamBucket makes use of https://github.com/nicxlau/hunter-php-javascript-obfuscator
const hunterRegex = /eval\(function\(h,u,n,t,e,r\).*?\("(.*?)",\d*?,"(.*?)",(\d*?),(\d*?),\d*?\)\)/;
const linkRegex = /file:"(.*?)"/;
// This is a much more simple and optimized version of the "h,u,n,t,e,r"
// obfuscation algorithm. It's just basic chunked+mask encoding.
// I have seen this same encoding used on some sites under the name
// "p,l,a,y,e,r" as well
function decodeHunter(encoded: string, mask: string, charCodeOffset: number, delimiterOffset: number) {
// The encoded string is made up of 'n' number of chunks.
// Each chunk is separated by a delimiter inside the mask.
// This offset is also used as the exponentiation base in
// the charCode calculations
const delimiter = mask[delimiterOffset];
// Split the 'encoded' string into chunks using the delimiter,
// and filter out any empty chunks.
const chunks = encoded.split(delimiter).filter((chunk) => chunk);
// Decode each chunk and concatenate the results to form the final 'decoded' string.
const decoded = chunks
.map((chunk) => {
// Chunks are in reverse order. 'reduceRight' removes the
// need to 'reverse' the array first
const charCode = chunk.split('').reduceRight((c, value, index) => {
// Calculate the character code for each character in the chunk.
// This involves finding the index of 'value' in the 'mask' and
// multiplying it by (delimiterOffset^position).
return c + mask.indexOf(value) * delimiterOffset ** (chunk.length - 1 - index);
}, 0);
// The actual character code is offset by the given amount
return String.fromCharCode(charCode - charCodeOffset);
})
.join('');
return decoded;
}
export const streambucketScraper = makeEmbed({
id: 'streambucket',
name: 'StreamBucket',
rank: 196,
// TODO - Disabled until ctx.fetcher and ctx.proxiedFetcher don't trigger bot detection
disabled: true,
async scrape(ctx) {
// Using the context fetchers make the site return just the string "No bots please!"?
// TODO - Fix this. Native fetch does not trigger this. No idea why right now
const response = await fetch(ctx.url);
const html = await response.text();
// This is different than the above mentioned bot detection
if (html.includes('captcha-checkbox')) {
// TODO - This doesn't use recaptcha, just really basic "image match". Maybe could automate?
throw new Error('StreamBucket got captchaed');
}
let regexResult = html.match(hunterRegex);
if (!regexResult) {
throw new Error('Failed to find StreamBucket hunter JavaScript');
}
const encoded = regexResult[1];
const mask = regexResult[2];
const charCodeOffset = Number(regexResult[3]);
const delimiterOffset = Number(regexResult[4]);
if (Number.isNaN(charCodeOffset)) {
throw new Error('StreamBucket hunter JavaScript charCodeOffset is not a valid number');
}
if (Number.isNaN(delimiterOffset)) {
throw new Error('StreamBucket hunter JavaScript delimiterOffset is not a valid number');
}
const decoded = decodeHunter(encoded, mask, charCodeOffset, delimiterOffset);
regexResult = decoded.match(linkRegex);
if (!regexResult) {
throw new Error('Failed to find StreamBucket HLS link');
}
return {
stream: {
type: 'hls',
playlist: regexResult[1],
flags: [flags.NO_CORS],
captions: [],
},
};
},
});

View File

@@ -0,0 +1,35 @@
import { makeEmbed } from '@/providers/base';
const hlsURLRegex = /file:"(.*?)"/;
export const vidsrcembedScraper = makeEmbed({
id: 'vidsrcembed', // VidSrc is both a source and an embed host
name: 'VidSrc',
rank: 197,
async scrape(ctx) {
if (!ctx.headers || (!ctx.headers.referer && !ctx.headers.Referer)) {
throw new Error('VidSrc embeds require the referer header to be set');
}
const html = await ctx.proxiedFetcher<string>(ctx.url, {
headers: ctx.headers,
});
const match = html
.match(hlsURLRegex)?.[1]
?.replace(/(\/\/\S+?=)/g, '')
.replace('#2', '');
if (!match) throw new Error('Unable to find HLS playlist');
const finalUrl = atob(match);
if (!finalUrl.includes('.m3u8')) throw new Error('Unable to find HLS playlist');
return {
stream: {
type: 'hls',
playlist: finalUrl,
flags: [],
captions: [],
},
};
},
});

View File

@@ -0,0 +1,2 @@
export const vidsrcBase = 'https://vidsrc.me';
export const vidsrcRCPBase = 'https://rcp.vidsrc.me';

View File

@@ -0,0 +1,13 @@
import { flags } from '@/main/targets';
import { makeSourcerer } from '@/providers/base';
import { scrapeMovie } from '@/providers/sources/vidsrc/scrape-movie';
import { scrapeShow } from '@/providers/sources/vidsrc/scrape-show';
export const vidsrcScraper = makeSourcerer({
id: 'vidsrc',
name: 'VidSrc',
rank: 120,
flags: [flags.NO_CORS],
scrapeMovie,
scrapeShow,
});

View File

@@ -0,0 +1,8 @@
import { getVidSrcMovieSources } from '@/providers/sources/vidsrc/scrape';
import { MovieScrapeContext } from '@/utils/context';
export async function scrapeMovie(ctx: MovieScrapeContext) {
return {
embeds: await getVidSrcMovieSources(ctx),
};
}

View File

@@ -0,0 +1,8 @@
import { getVidSrcShowSources } from '@/providers/sources/vidsrc/scrape';
import { ShowScrapeContext } from '@/utils/context';
export async function scrapeShow(ctx: ShowScrapeContext) {
return {
embeds: await getVidSrcShowSources(ctx),
};
}

View File

@@ -0,0 +1,141 @@
import { load } from 'cheerio';
import { FetchReply } from '@/fetchers/fetch';
import { SourcererEmbed } from '@/providers/base';
import { streambucketScraper } from '@/providers/embeds/streambucket';
import { vidsrcembedScraper } from '@/providers/embeds/vidsrc';
import { vidsrcBase, vidsrcRCPBase } from '@/providers/sources/vidsrc/common';
import { MovieScrapeContext, ShowScrapeContext } from '@/utils/context';
function decodeSrc(encoded: string, seed: string) {
const encodedBuffer = Buffer.from(encoded, 'hex');
let decoded = '';
for (let i = 0; i < encodedBuffer.length; i++) {
decoded += String.fromCharCode(encodedBuffer[i] ^ seed.charCodeAt(i % seed.length));
}
return decoded;
}
async function getVidSrcEmbeds(ctx: MovieScrapeContext | ShowScrapeContext, startingURL: string) {
// VidSrc works by using hashes and a redirect system.
// The hashes are stored in the html, and VidSrc will
// make requests to their servers with the hash. This
// will trigger a 302 response with a Location header
// sending the user to the correct embed. To get the
// real embed links, we must do the same. Slow, but
// required
const embeds: SourcererEmbed[] = [];
let html = await ctx.proxiedFetcher<string>(startingURL, {
baseUrl: vidsrcBase,
});
let $ = load(html);
const sourceHashes = $('.server[data-hash]')
.toArray()
.map((el) => $(el).attr('data-hash'))
.filter((hash) => hash !== undefined);
for (const hash of sourceHashes) {
html = await ctx.proxiedFetcher<string>(`/rcp/${hash}`, {
baseUrl: vidsrcRCPBase,
headers: {
referer: `${vidsrcBase}${startingURL}`,
},
});
$ = load(html);
const encoded = $('#hidden').attr('data-h');
const seed = $('body').attr('data-i');
if (!encoded || !seed) {
throw new Error('Failed to find encoded iframe src');
}
let redirectURL = decodeSrc(encoded, seed);
if (redirectURL.startsWith('//')) {
redirectURL = `https:${redirectURL}`;
}
// Return the raw fetch response here.
// When a Location header is sent, fetch
// will silently follow it. The "url" inside
// the Response is the final requested URL,
// which is the real embeds URL
const { url: embedURL } = await ctx.proxiedFetcher<FetchReply>(redirectURL, {
returnRaw: true,
method: 'HEAD', // We don't care about the actual response body here
headers: {
referer: `${vidsrcRCPBase}/rcp/${hash}`,
},
});
const embed: SourcererEmbed = {
embedId: '',
url: embedURL,
};
const parsedUrl = new URL(embedURL);
switch (parsedUrl.host) {
case 'vidsrc.stream':
embed.embedId = vidsrcembedScraper.id;
embed.headers = {
referer: `${vidsrcRCPBase}/rcp/${hash}`,
};
break;
case 'streambucket.net':
embed.embedId = streambucketScraper.id;
break;
case '2embed.cc':
case 'www.2embed.cc':
// Just ignore this. This embed just sources from other embeds we can scrape as a 'source'
break;
case 'player-cdn.com':
// Just ignore this. This embed streams video over a custom WebSocket connection
break;
default:
throw new Error(`Failed to find VidSrc embed source for ${embedURL}`);
}
// Since some embeds are ignored on purpose, check if a valid one was found
if (embed.embedId !== '') {
embeds.push(embed);
}
}
return embeds;
}
export async function getVidSrcMovieSources(ctx: MovieScrapeContext) {
return getVidSrcEmbeds(ctx, `/embed/${ctx.media.tmdbId}`);
}
export async function getVidSrcShowSources(ctx: ShowScrapeContext) {
// VidSrc will always default to season 1 episode 1
// no matter what embed URL is used. It sends back
// a list of ALL the shows episodes, in order, for
// all seasons. To get the real embed URL, have to
// parse this from the response
const html = await ctx.proxiedFetcher<string>(`/embed/${ctx.media.tmdbId}`, {
baseUrl: vidsrcBase,
});
const $ = load(html);
const episodeElement = $(`.ep[data-s="${ctx.media.season.number}"][data-e="${ctx.media.episode.number}"]`).first();
if (episodeElement.length === 0) {
throw new Error('failed to find episode element');
}
const startingURL = episodeElement.attr('data-iframe');
if (!startingURL) {
throw new Error('failed to find episode starting URL');
}
return getVidSrcEmbeds(ctx, startingURL);
}

View File

@@ -9,6 +9,7 @@ export type ScrapeContext = {
export type EmbedInput = { export type EmbedInput = {
url: string; url: string;
headers?: Record<string, string>;
}; };
export type EmbedScrapeContext = EmbedInput & ScrapeContext; export type EmbedScrapeContext = EmbedInput & ScrapeContext;