Merge branch 'pr/14' into pr-14-v2

2025-09-13 12:43:25 +00:00 · 2023-12-25 22:52:49 +01:00
parent c93cc4babc 64050df350
commit d978e3b3fe
19 changed files with 340 additions and 4 deletions
--- a/.eslintrc.js
+++ b/.eslintrc.js
@@ -18,6 +18,8 @@ module.exports = {
  },
  plugins: ['@typescript-eslint', 'import', 'prettier'],
  rules: {
    'no-plusplus': 'off',
    'no-bitwise': 'off',
    'no-underscore-dangle': 'off',
    '@typescript-eslint/no-explicit-any': 'off',
    'no-console': 'off',
--- a/README.md
+++ b/README.md
@@ -26,6 +26,7 @@ The following CLI Mode arguments are available
 | `--season`    | `-s`   | Season number. Only used if type is `show`                              | `0`          |
 | `--episode`   | `-e`   | Episode number. Only used if type is `show`                             | `0`          |
 | `--url`       | `-u`   | URL to a video embed. Only used if source is an embed                   |              |
 | `--headers`   | `-h`   | Optional headers to send while scraping                                 |              |
 | `--help`      | `-h`   | Shows help for the command arguments                                    |              |
 Example testing the FlixHQ source on the movie "Spirited Away"
--- a/src/dev-cli.ts
+++ b/src/dev-cli.ts
@@ -39,6 +39,7 @@ type CommandLineArguments = {
  season: string;
  episode: string;
  url: string;
  headers?: Record<string, string>;
 };
 const TMDB_API_KEY = process.env.MOVIE_WEB_TMDB_API_KEY ?? '';
@@ -185,6 +186,7 @@ async function runScraper(providers: ProviderControls, source: MetaOutput, optio
      const result = await providers.runEmbedScraper({
        url: options.url,
        id: source.id,
        headers: options.headers,
      });
      spinnies.succeed('scrape', { text: 'Done!' });
      logDeepObject(result);
@@ -279,6 +281,10 @@ async function processOptions(options: CommandLineArguments) {
    }
  }
  if (typeof options.headers === 'string') {
    options.headers = JSON.parse(options.headers);
  }
  let fetcher;
  if (options.fetcher === 'native') {
@@ -409,7 +415,8 @@ async function runCommandLine() {
    .option('-t, --type <type>', "Media type. Either 'movie' or 'show'. Only used if source is a provider", 'movie')
    .option('-s, --season <number>', "Season number. Only used if type is 'show'", '0')
    .option('-e, --episode <number>', "Episode number. Only used if type is 'show'", '0')
-    .option('-u, --url <embed URL>', 'URL to a video embed. Only used if source is an embed', '');
+    .option('-u, --url <embed URL>', 'URL to a video embed. Only used if source is an embed', '')
    .option('-h, --headers <JSON>', 'Optional headers to pass to scrapers. JSON encoded');
  program.parse();
--- a/src/fetchers/common.ts
+++ b/src/fetchers/common.ts
@@ -34,6 +34,7 @@ export function makeFullFetcher(fetcher: Fetcher): UseableFetcher {
      query: ops?.query ?? {},
      baseUrl: ops?.baseUrl ?? '',
      body: ops?.body,
      returnRaw: ops?.returnRaw ?? false,
    });
  };
 }
--- a/src/fetchers/fetch.ts
+++ b/src/fetchers/fetch.ts
@@ -17,6 +17,7 @@ export type FetchReply = {
  text(): Promise<string>;
  json(): Promise<any>;
  headers: FetchHeaders;
  url: string;
 };
 export type FetchLike = (url: string, ops?: FetchOps | undefined) => Promise<FetchReply>;
--- a/src/fetchers/standardFetch.ts
+++ b/src/fetchers/standardFetch.ts
@@ -17,6 +17,10 @@ export function makeStandardFetcher(f: FetchLike): Fetcher {
      body: seralizedBody.body,
    });
    if (ops.returnRaw) {
      return res;
    }
    const isJson = res.headers.get('content-type')?.includes('application/json');
    if (isJson) return res.json();
    return res.text();
--- a/src/fetchers/types.ts
+++ b/src/fetchers/types.ts
@@ -4,8 +4,9 @@ export type FetcherOptions = {
  baseUrl?: string;
  headers?: Record<string, string>;
  query?: Record<string, string>;
-  method?: 'GET' | 'POST';
+  method?: 'HEAD' | 'GET' | 'POST';
  body?: Record<string, any> | string | FormData | URLSearchParams;
  returnRaw?: boolean;
 };
 export type DefaultedFetcherOptions = {
@@ -13,7 +14,8 @@ export type DefaultedFetcherOptions = {
  body?: Record<string, any> | string | FormData;
  headers: Record<string, string>;
  query: Record<string, string>;
-  method: 'GET' | 'POST';
+  method: 'HEAD' | 'GET' | 'POST';
  returnRaw: boolean;
 };
 export type Fetcher<T = any> = {
--- a/src/main/builder.ts
+++ b/src/main/builder.ts
@@ -57,6 +57,9 @@ export interface EmbedRunnerOptions {
  // id of the embed scraper you want to scrape from
  id: string;
  // optional headers for the embed scraper to use
  headers?: Record<string, string>;
 }
 export interface ProviderControls {
--- a/src/main/individualRunner.ts
+++ b/src/main/individualRunner.ts
@@ -66,6 +66,7 @@ export type IndividualEmbedRunnerOptions = {
  url: string;
  id: string;
  events?: IndividualScraperEvents;
  headers?: Record<string, string>;
 };
 export async function scrapeIndividualEmbed(
@@ -79,6 +80,7 @@ export async function scrapeIndividualEmbed(
    fetcher: ops.fetcher,
    proxiedFetcher: ops.proxiedFetcher,
    url: ops.url,
    headers: ops.headers,
    progress(val) {
      ops.events?.update?.({
        id: embedScraper.id,
--- a/src/providers/all.ts
+++ b/src/providers/all.ts
@@ -2,15 +2,18 @@ import { Embed, Sourcerer } from '@/providers/base';
 import { febBoxScraper } from '@/providers/embeds/febBox';
 import { mixdropScraper } from '@/providers/embeds/mixdrop';
 import { mp4uploadScraper } from '@/providers/embeds/mp4upload';
 import { streambucketScraper } from '@/providers/embeds/streambucket';
 import { streamsbScraper } from '@/providers/embeds/streamsb';
 import { upcloudScraper } from '@/providers/embeds/upcloud';
 import { upstreamScraper } from '@/providers/embeds/upstream';
 import { vidsrcembedScraper } from '@/providers/embeds/vidsrc';
 import { flixhqScraper } from '@/providers/sources/flixhq/index';
 import { goMoviesScraper } from '@/providers/sources/gomovies/index';
 import { kissAsianScraper } from '@/providers/sources/kissasian/index';
 import { lookmovieScraper } from '@/providers/sources/lookmovie';
 import { remotestreamScraper } from '@/providers/sources/remotestream';
 import { superStreamScraper } from '@/providers/sources/superstream/index';
 import { vidsrcScraper } from '@/providers/sources/vidsrc';
 import { zoechipScraper } from '@/providers/sources/zoechip';
 import { smashyStreamDScraper } from './embeds/smashystream/dued';
@@ -27,6 +30,7 @@ export function gatherAllSources(): Array<Sourcerer> {
    superStreamScraper,
    goMoviesScraper,
    zoechipScraper,
    vidsrcScraper,
    lookmovieScraper,
    showBoxScraper,
    smashyStreamScraper,
@@ -40,8 +44,10 @@ export function gatherAllEmbeds(): Array<Embed> {
    mp4uploadScraper,
    streamsbScraper,
    upstreamScraper,
    febBoxScraper,
    mixdropScraper,
    vidsrcembedScraper,
    streambucketScraper,
    febBoxScraper,
    smashyStreamFScraper,
    smashyStreamDScraper,
  ];
--- a/src/providers/base.ts
+++ b/src/providers/base.ts
@@ -5,6 +5,7 @@ import { EmbedScrapeContext, MovieScrapeContext, ShowScrapeContext } from '@/uti
 export type SourcererEmbed = {
  embedId: string;
  url: string;
  headers?: Record<string, string>;
 };
 export type SourcererOutput = {
--- a/src/providers/embeds/streambucket.ts
+++ b/src/providers/embeds/streambucket.ts
@@ -0,0 +1,98 @@
 import { flags } from '@/main/targets';
 import { makeEmbed } from '@/providers/base';
 // StreamBucket makes use of https://github.com/nicxlau/hunter-php-javascript-obfuscator
 const hunterRegex = /eval\(function\(h,u,n,t,e,r\).*?\("(.*?)",\d*?,"(.*?)",(\d*?),(\d*?),\d*?\)\)/;
 const linkRegex = /file:"(.*?)"/;
 // This is a much more simple and optimized version of the "h,u,n,t,e,r"
 // obfuscation algorithm. It's just basic chunked+mask encoding.
 // I have seen this same encoding used on some sites under the name
 // "p,l,a,y,e,r" as well
 function decodeHunter(encoded: string, mask: string, charCodeOffset: number, delimiterOffset: number) {
  // The encoded string is made up of 'n' number of chunks.
  // Each chunk is separated by a delimiter inside the mask.
  // This offset is also used as the exponentiation base in
  // the charCode calculations
  const delimiter = mask[delimiterOffset];
  // Split the 'encoded' string into chunks using the delimiter,
  // and filter out any empty chunks.
  const chunks = encoded.split(delimiter).filter((chunk) => chunk);
  // Decode each chunk and concatenate the results to form the final 'decoded' string.
  const decoded = chunks
    .map((chunk) => {
      // Chunks are in reverse order. 'reduceRight' removes the
      // need to 'reverse' the array first
      const charCode = chunk.split('').reduceRight((c, value, index) => {
        // Calculate the character code for each character in the chunk.
        // This involves finding the index of 'value' in the 'mask' and
        // multiplying it by (delimiterOffset^position).
        return c + mask.indexOf(value) * delimiterOffset ** (chunk.length - 1 - index);
      }, 0);
      // The actual character code is offset by the given amount
      return String.fromCharCode(charCode - charCodeOffset);
    })
    .join('');
  return decoded;
 }
 export const streambucketScraper = makeEmbed({
  id: 'streambucket',
  name: 'StreamBucket',
  rank: 196,
  // TODO - Disabled until ctx.fetcher and ctx.proxiedFetcher don't trigger bot detection
  disabled: true,
  async scrape(ctx) {
    // Using the context fetchers make the site return just the string "No bots please!"?
    // TODO - Fix this. Native fetch does not trigger this. No idea why right now
    const response = await fetch(ctx.url);
    const html = await response.text();
    // This is different than the above mentioned bot detection
    if (html.includes('captcha-checkbox')) {
      // TODO - This doesn't use recaptcha, just really basic "image match". Maybe could automate?
      throw new Error('StreamBucket got captchaed');
    }
    let regexResult = html.match(hunterRegex);
    if (!regexResult) {
      throw new Error('Failed to find StreamBucket hunter JavaScript');
    }
    const encoded = regexResult[1];
    const mask = regexResult[2];
    const charCodeOffset = Number(regexResult[3]);
    const delimiterOffset = Number(regexResult[4]);
    if (Number.isNaN(charCodeOffset)) {
      throw new Error('StreamBucket hunter JavaScript charCodeOffset is not a valid number');
    }
    if (Number.isNaN(delimiterOffset)) {
      throw new Error('StreamBucket hunter JavaScript delimiterOffset is not a valid number');
    }
    const decoded = decodeHunter(encoded, mask, charCodeOffset, delimiterOffset);
    regexResult = decoded.match(linkRegex);
    if (!regexResult) {
      throw new Error('Failed to find StreamBucket HLS link');
    }
    return {
      stream: {
        type: 'hls',
        playlist: regexResult[1],
        flags: [flags.NO_CORS],
        captions: [],
      },
    };
  },
 });
--- a/src/providers/embeds/vidsrc.ts
+++ b/src/providers/embeds/vidsrc.ts
@@ -0,0 +1,35 @@
 import { makeEmbed } from '@/providers/base';
 const hlsURLRegex = /file:"(.*?)"/;
 export const vidsrcembedScraper = makeEmbed({
  id: 'vidsrcembed', // VidSrc is both a source and an embed host
  name: 'VidSrc',
  rank: 197,
  async scrape(ctx) {
    if (!ctx.headers || (!ctx.headers.referer && !ctx.headers.Referer)) {
      throw new Error('VidSrc embeds require the referer header to be set');
    }
    const html = await ctx.proxiedFetcher<string>(ctx.url, {
      headers: ctx.headers,
    });
    const match = html
      .match(hlsURLRegex)?.[1]
      ?.replace(/(\/\/\S+?=)/g, '')
      .replace('#2', '');
    if (!match) throw new Error('Unable to find HLS playlist');
    const finalUrl = atob(match);
    if (!finalUrl.includes('.m3u8')) throw new Error('Unable to find HLS playlist');
    return {
      stream: {
        type: 'hls',
        playlist: finalUrl,
        flags: [],
        captions: [],
      },
    };
  },
 });
--- a/src/providers/sources/vidsrc/common.ts
+++ b/src/providers/sources/vidsrc/common.ts
@@ -0,0 +1,2 @@
 export const vidsrcBase = 'https://vidsrc.me';
 export const vidsrcRCPBase = 'https://rcp.vidsrc.me';
--- a/src/providers/sources/vidsrc/index.ts
+++ b/src/providers/sources/vidsrc/index.ts
@@ -0,0 +1,13 @@
 import { flags } from '@/main/targets';
 import { makeSourcerer } from '@/providers/base';
 import { scrapeMovie } from '@/providers/sources/vidsrc/scrape-movie';
 import { scrapeShow } from '@/providers/sources/vidsrc/scrape-show';
 export const vidsrcScraper = makeSourcerer({
  id: 'vidsrc',
  name: 'VidSrc',
  rank: 120,
  flags: [flags.NO_CORS],
  scrapeMovie,
  scrapeShow,
 });
--- a/src/providers/sources/vidsrc/scrape-movie.ts
+++ b/src/providers/sources/vidsrc/scrape-movie.ts
@@ -0,0 +1,8 @@
 import { getVidSrcMovieSources } from '@/providers/sources/vidsrc/scrape';
 import { MovieScrapeContext } from '@/utils/context';
 export async function scrapeMovie(ctx: MovieScrapeContext) {
  return {
    embeds: await getVidSrcMovieSources(ctx),
  };
 }
--- a/src/providers/sources/vidsrc/scrape-show.ts
+++ b/src/providers/sources/vidsrc/scrape-show.ts
@@ -0,0 +1,8 @@
 import { getVidSrcShowSources } from '@/providers/sources/vidsrc/scrape';
 import { ShowScrapeContext } from '@/utils/context';
 export async function scrapeShow(ctx: ShowScrapeContext) {
  return {
    embeds: await getVidSrcShowSources(ctx),
  };
 }
--- a/src/providers/sources/vidsrc/scrape.ts
+++ b/src/providers/sources/vidsrc/scrape.ts
@@ -0,0 +1,141 @@
 import { load } from 'cheerio';
 import { FetchReply } from '@/fetchers/fetch';
 import { SourcererEmbed } from '@/providers/base';
 import { streambucketScraper } from '@/providers/embeds/streambucket';
 import { vidsrcembedScraper } from '@/providers/embeds/vidsrc';
 import { vidsrcBase, vidsrcRCPBase } from '@/providers/sources/vidsrc/common';
 import { MovieScrapeContext, ShowScrapeContext } from '@/utils/context';
 function decodeSrc(encoded: string, seed: string) {
  const encodedBuffer = Buffer.from(encoded, 'hex');
  let decoded = '';
  for (let i = 0; i < encodedBuffer.length; i++) {
    decoded += String.fromCharCode(encodedBuffer[i] ^ seed.charCodeAt(i % seed.length));
  }
  return decoded;
 }
 async function getVidSrcEmbeds(ctx: MovieScrapeContext | ShowScrapeContext, startingURL: string) {
  // VidSrc works by using hashes and a redirect system.
  // The hashes are stored in the html, and VidSrc will
  // make requests to their servers with the hash. This
  // will trigger a 302 response with a Location header
  // sending the user to the correct embed. To get the
  // real embed links, we must do the same. Slow, but
  // required
  const embeds: SourcererEmbed[] = [];
  let html = await ctx.proxiedFetcher<string>(startingURL, {
    baseUrl: vidsrcBase,
  });
  let $ = load(html);
  const sourceHashes = $('.server[data-hash]')
    .toArray()
    .map((el) => $(el).attr('data-hash'))
    .filter((hash) => hash !== undefined);
  for (const hash of sourceHashes) {
    html = await ctx.proxiedFetcher<string>(`/rcp/${hash}`, {
      baseUrl: vidsrcRCPBase,
      headers: {
        referer: `${vidsrcBase}${startingURL}`,
      },
    });
    $ = load(html);
    const encoded = $('#hidden').attr('data-h');
    const seed = $('body').attr('data-i');
    if (!encoded || !seed) {
      throw new Error('Failed to find encoded iframe src');
    }
    let redirectURL = decodeSrc(encoded, seed);
    if (redirectURL.startsWith('//')) {
      redirectURL = `https:${redirectURL}`;
    }
    // Return the raw fetch response here.
    // When a Location header is sent, fetch
    // will silently follow it. The "url" inside
    // the Response is the final requested URL,
    // which is the real embeds URL
    const { url: embedURL } = await ctx.proxiedFetcher<FetchReply>(redirectURL, {
      returnRaw: true,
      method: 'HEAD', // We don't care about the actual response body here
      headers: {
        referer: `${vidsrcRCPBase}/rcp/${hash}`,
      },
    });
    const embed: SourcererEmbed = {
      embedId: '',
      url: embedURL,
    };
    const parsedUrl = new URL(embedURL);
    switch (parsedUrl.host) {
      case 'vidsrc.stream':
        embed.embedId = vidsrcembedScraper.id;
        embed.headers = {
          referer: `${vidsrcRCPBase}/rcp/${hash}`,
        };
        break;
      case 'streambucket.net':
        embed.embedId = streambucketScraper.id;
        break;
      case '2embed.cc':
      case 'www.2embed.cc':
        // Just ignore this. This embed just sources from other embeds we can scrape as a 'source'
        break;
      case 'player-cdn.com':
        // Just ignore this. This embed streams video over a custom WebSocket connection
        break;
      default:
        throw new Error(`Failed to find VidSrc embed source for ${embedURL}`);
    }
    // Since some embeds are ignored on purpose, check if a valid one was found
    if (embed.embedId !== '') {
      embeds.push(embed);
    }
  }
  return embeds;
 }
 export async function getVidSrcMovieSources(ctx: MovieScrapeContext) {
  return getVidSrcEmbeds(ctx, `/embed/${ctx.media.tmdbId}`);
 }
 export async function getVidSrcShowSources(ctx: ShowScrapeContext) {
  // VidSrc will always default to season 1 episode 1
  // no matter what embed URL is used. It sends back
  // a list of ALL the shows episodes, in order, for
  // all seasons. To get the real embed URL, have to
  // parse this from the response
  const html = await ctx.proxiedFetcher<string>(`/embed/${ctx.media.tmdbId}`, {
    baseUrl: vidsrcBase,
  });
  const $ = load(html);
  const episodeElement = $(`.ep[data-s="${ctx.media.season.number}"][data-e="${ctx.media.episode.number}"]`).first();
  if (episodeElement.length === 0) {
    throw new Error('failed to find episode element');
  }
  const startingURL = episodeElement.attr('data-iframe');
  if (!startingURL) {
    throw new Error('failed to find episode starting URL');
  }
  return getVidSrcEmbeds(ctx, startingURL);
 }
--- a/src/utils/context.ts
+++ b/src/utils/context.ts
@@ -9,6 +9,7 @@ export type ScrapeContext = {
 export type EmbedInput = {
  url: string;
  headers?: Record<string, string>;
 };
 export type EmbedScrapeContext = EmbedInput & ScrapeContext;
		`@@ -0,0 +1,2 @@`
							`export const vidsrcBase = 'https://vidsrc.me';`
							`export const vidsrcRCPBase = 'https://rcp.vidsrc.me';`