diff --git a/.eslintrc.js b/.eslintrc.js index 0e7322b..927283f 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -18,6 +18,8 @@ module.exports = { }, plugins: ['@typescript-eslint', 'import', 'prettier'], rules: { + 'no-plusplus': 'off', + 'no-bitwise': 'off', 'no-underscore-dangle': 'off', '@typescript-eslint/no-explicit-any': 'off', 'no-console': 'off', diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index d0f0ca6..7458772 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,3 +1 @@ -* @movie-web/core - -.github @binaryoverload +* @movie-web/project-leads diff --git a/package.json b/package.json index 890aa50..f7d1e18 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@movie-web/providers", - "version": "2.0.2", + "version": "2.0.3", "description": "Package that contains all the providers of movie-web", "main": "./lib/index.umd.js", "types": "./lib/index.d.ts", diff --git a/src/dev-cli/scraper.ts b/src/dev-cli/scraper.ts index 882d321..39f75f6 100644 --- a/src/dev-cli/scraper.ts +++ b/src/dev-cli/scraper.ts @@ -41,6 +41,7 @@ async function runBrowserScraping( args: ['--no-sandbox', '--disable-setuid-sandbox'], }); const page = await browser.newPage(); + page.on('console', (message) => console.log(`${message.type().slice(0, 3).toUpperCase()} ${message.text()}`)); await page.goto(server.resolvedUrls.local[0]); await page.waitForFunction('!!window.scrape', { timeout: 5000 }); diff --git a/src/dev-cli/validate.ts b/src/dev-cli/validate.ts index b600454..dd1f638 100644 --- a/src/dev-cli/validate.ts +++ b/src/dev-cli/validate.ts @@ -81,6 +81,7 @@ export async function processOptions(sources: Array, options: const providerOptions: ProviderMakerOptions = { fetcher, target: targets.ANY, + consistentIpForRequests: true, }; return { diff --git a/src/entrypoint/providers.ts b/src/entrypoint/providers.ts index b306417..e456eb0 100644 --- a/src/entrypoint/providers.ts +++ b/src/entrypoint/providers.ts @@ -2,9 +2,9 @@ import { gatherAllEmbeds, gatherAllSources } from '@/providers/all'; import { Embed, Sourcerer } from '@/providers/base'; export function getBuiltinSources(): Sourcerer[] { - return gatherAllSources(); + return gatherAllSources().filter((v) => !v.disabled); } export function getBuiltinEmbeds(): Embed[] { - return gatherAllEmbeds(); + return gatherAllEmbeds().filter((v) => !v.disabled); } diff --git a/src/fetchers/types.ts b/src/fetchers/types.ts index 7daa5df..f5dbe06 100644 --- a/src/fetchers/types.ts +++ b/src/fetchers/types.ts @@ -4,7 +4,7 @@ export type FetcherOptions = { baseUrl?: string; headers?: Record; query?: Record; - method?: 'GET' | 'POST'; + method?: 'HEAD' | 'GET' | 'POST'; readHeaders?: string[]; body?: Record | string | FormData | URLSearchParams; }; @@ -17,7 +17,7 @@ export type DefaultedFetcherOptions = { headers: Record; query: Record; readHeaders: string[]; - method: 'GET' | 'POST'; + method: 'HEAD' | 'GET' | 'POST'; }; export type FetcherResponse = { diff --git a/src/providers/all.ts b/src/providers/all.ts index d1e7885..c2eb771 100644 --- a/src/providers/all.ts +++ b/src/providers/all.ts @@ -3,15 +3,18 @@ import { febboxHlsScraper } from '@/providers/embeds/febbox/hls'; import { febboxMp4Scraper } from '@/providers/embeds/febbox/mp4'; import { mixdropScraper } from '@/providers/embeds/mixdrop'; import { mp4uploadScraper } from '@/providers/embeds/mp4upload'; +import { streambucketScraper } from '@/providers/embeds/streambucket'; import { streamsbScraper } from '@/providers/embeds/streamsb'; import { upcloudScraper } from '@/providers/embeds/upcloud'; import { upstreamScraper } from '@/providers/embeds/upstream'; +import { vidsrcembedScraper } from '@/providers/embeds/vidsrc'; import { flixhqScraper } from '@/providers/sources/flixhq/index'; import { goMoviesScraper } from '@/providers/sources/gomovies/index'; import { kissAsianScraper } from '@/providers/sources/kissasian/index'; import { lookmovieScraper } from '@/providers/sources/lookmovie'; import { remotestreamScraper } from '@/providers/sources/remotestream'; import { showboxScraper } from '@/providers/sources/showbox/index'; +import { vidsrcScraper } from '@/providers/sources/vidsrc/index'; import { zoechipScraper } from '@/providers/sources/zoechip'; import { smashyStreamDScraper } from './embeds/smashystream/dued'; @@ -27,6 +30,7 @@ export function gatherAllSources(): Array { showboxScraper, goMoviesScraper, zoechipScraper, + vidsrcScraper, lookmovieScraper, smashyStreamScraper, ]; @@ -42,6 +46,8 @@ export function gatherAllEmbeds(): Array { febboxMp4Scraper, febboxHlsScraper, mixdropScraper, + vidsrcembedScraper, + streambucketScraper, smashyStreamFScraper, smashyStreamDScraper, ]; diff --git a/src/providers/embeds/febbox/hls.ts b/src/providers/embeds/febbox/hls.ts index 2a3a03f..792c112 100644 --- a/src/providers/embeds/febbox/hls.ts +++ b/src/providers/embeds/febbox/hls.ts @@ -15,6 +15,7 @@ export const febboxHlsScraper = makeEmbed({ id: 'febbox-hls', name: 'Febbox (HLS)', rank: 160, + disabled: true, async scrape(ctx) { const { type, id, season, episode } = parseInputUrl(ctx.url); const sharelinkResult = await ctx.proxiedFetcher<{ diff --git a/src/providers/embeds/streambucket.ts b/src/providers/embeds/streambucket.ts new file mode 100644 index 0000000..9e21a93 --- /dev/null +++ b/src/providers/embeds/streambucket.ts @@ -0,0 +1,101 @@ +import { flags } from '@/entrypoint/utils/targets'; +import { makeEmbed } from '@/providers/base'; + +// StreamBucket makes use of https://github.com/nicxlau/hunter-php-javascript-obfuscator + +const hunterRegex = /eval\(function\(h,u,n,t,e,r\).*?\("(.*?)",\d*?,"(.*?)",(\d*?),(\d*?),\d*?\)\)/; +const linkRegex = /file:"(.*?)"/; + +// This is a much more simple and optimized version of the "h,u,n,t,e,r" +// obfuscation algorithm. It's just basic chunked+mask encoding. +// I have seen this same encoding used on some sites under the name +// "p,l,a,y,e,r" as well +function decodeHunter(encoded: string, mask: string, charCodeOffset: number, delimiterOffset: number) { + // The encoded string is made up of 'n' number of chunks. + // Each chunk is separated by a delimiter inside the mask. + // This offset is also used as the exponentiation base in + // the charCode calculations + const delimiter = mask[delimiterOffset]; + + // Split the 'encoded' string into chunks using the delimiter, + // and filter out any empty chunks. + const chunks = encoded.split(delimiter).filter((chunk) => chunk); + + // Decode each chunk and concatenate the results to form the final 'decoded' string. + const decoded = chunks + .map((chunk) => { + // Chunks are in reverse order. 'reduceRight' removes the + // need to 'reverse' the array first + const charCode = chunk.split('').reduceRight((c, value, index) => { + // Calculate the character code for each character in the chunk. + // This involves finding the index of 'value' in the 'mask' and + // multiplying it by (delimiterOffset^position). + return c + mask.indexOf(value) * delimiterOffset ** (chunk.length - 1 - index); + }, 0); + + // The actual character code is offset by the given amount + return String.fromCharCode(charCode - charCodeOffset); + }) + .join(''); + + return decoded; +} + +export const streambucketScraper = makeEmbed({ + id: 'streambucket', + name: 'StreamBucket', + rank: 196, + // TODO - Disabled until ctx.fetcher and ctx.proxiedFetcher don't trigger bot detection + disabled: true, + async scrape(ctx) { + // Using the context fetchers make the site return just the string "No bots please!"? + // TODO - Fix this. Native fetch does not trigger this. No idea why right now + const response = await fetch(ctx.url); + const html = await response.text(); + + // This is different than the above mentioned bot detection + if (html.includes('captcha-checkbox')) { + // TODO - This doesn't use recaptcha, just really basic "image match". Maybe could automate? + throw new Error('StreamBucket got captchaed'); + } + + let regexResult = html.match(hunterRegex); + + if (!regexResult) { + throw new Error('Failed to find StreamBucket hunter JavaScript'); + } + + const encoded = regexResult[1]; + const mask = regexResult[2]; + const charCodeOffset = Number(regexResult[3]); + const delimiterOffset = Number(regexResult[4]); + + if (Number.isNaN(charCodeOffset)) { + throw new Error('StreamBucket hunter JavaScript charCodeOffset is not a valid number'); + } + + if (Number.isNaN(delimiterOffset)) { + throw new Error('StreamBucket hunter JavaScript delimiterOffset is not a valid number'); + } + + const decoded = decodeHunter(encoded, mask, charCodeOffset, delimiterOffset); + + regexResult = decoded.match(linkRegex); + + if (!regexResult) { + throw new Error('Failed to find StreamBucket HLS link'); + } + + return { + stream: [ + { + id: 'primary', + type: 'hls', + playlist: regexResult[1], + flags: [flags.CORS_ALLOWED], + captions: [], + }, + ], + }; + }, +}); diff --git a/src/providers/embeds/vidsrc.ts b/src/providers/embeds/vidsrc.ts new file mode 100644 index 0000000..cd47e21 --- /dev/null +++ b/src/providers/embeds/vidsrc.ts @@ -0,0 +1,55 @@ +import { flags } from '@/entrypoint/utils/targets'; +import { makeEmbed } from '@/providers/base'; + +const hlsURLRegex = /file:"(.*?)"/; +const setPassRegex = /var pass_path = "(.*set_pass\.php.*)";/; + +export const vidsrcembedScraper = makeEmbed({ + id: 'vidsrcembed', // VidSrc is both a source and an embed host + name: 'VidSrc', + rank: 197, + async scrape(ctx) { + const html = await ctx.proxiedFetcher(ctx.url, { + headers: { + referer: ctx.url, + }, + }); + + const match = html + .match(hlsURLRegex)?.[1] + ?.replace(/(\/\/\S+?=)/g, '') + .replace('#2', ''); + if (!match) throw new Error('Unable to find HLS playlist'); + const finalUrl = atob(match); + + if (!finalUrl.includes('.m3u8')) throw new Error('Unable to find HLS playlist'); + + let setPassLink = html.match(setPassRegex)?.[1]; + if (!setPassLink) throw new Error('Unable to find set_pass.php link'); + + if (setPassLink.startsWith('//')) { + setPassLink = `https:${setPassLink}`; + } + + // VidSrc uses a password endpoint to temporarily whitelist the user's IP. This is called in an interval by the player. + // It currently has no effect on the player itself, the content plays fine without it. + // In the future we might have to introduce hooks for the frontend to call this endpoint. + await ctx.proxiedFetcher(setPassLink, { + headers: { + referer: ctx.url, + }, + }); + + return { + stream: [ + { + id: 'primary', + type: 'hls', + playlist: finalUrl, + flags: [flags.CORS_ALLOWED], + captions: [], + }, + ], + }; + }, +}); diff --git a/src/providers/sources/lookmovie/index.ts b/src/providers/sources/lookmovie/index.ts index 8611373..73226dc 100644 --- a/src/providers/sources/lookmovie/index.ts +++ b/src/providers/sources/lookmovie/index.ts @@ -33,6 +33,7 @@ export const lookmovieScraper = makeSourcerer({ id: 'lookmovie', name: 'LookMovie', rank: 1, + disabled: true, flags: [flags.IP_LOCKED], scrapeShow: universalScraper, scrapeMovie: universalScraper, diff --git a/src/providers/sources/showbox/index.ts b/src/providers/sources/showbox/index.ts index 267a6ef..d6c4887 100644 --- a/src/providers/sources/showbox/index.ts +++ b/src/providers/sources/showbox/index.ts @@ -1,6 +1,5 @@ import { flags } from '@/entrypoint/utils/targets'; import { SourcererOutput, makeSourcerer } from '@/providers/base'; -import { febboxHlsScraper } from '@/providers/embeds/febbox/hls'; import { febboxMp4Scraper } from '@/providers/embeds/febbox/mp4'; import { compareTitle } from '@/utils/compare'; import { MovieScrapeContext, ShowScrapeContext } from '@/utils/context'; @@ -31,10 +30,6 @@ async function comboScraper(ctx: ShowScrapeContext | MovieScrapeContext): Promis return { embeds: [ - { - embedId: febboxHlsScraper.id, - url: `/${ctx.media.type}/${id}/${season}/${episode}`, - }, { embedId: febboxMp4Scraper.id, url: `/${ctx.media.type}/${id}/${season}/${episode}`, diff --git a/src/providers/sources/vidsrc/common.ts b/src/providers/sources/vidsrc/common.ts new file mode 100644 index 0000000..4ccc93c --- /dev/null +++ b/src/providers/sources/vidsrc/common.ts @@ -0,0 +1,2 @@ +export const vidsrcBase = 'https://vidsrc.me'; +export const vidsrcRCPBase = 'https://rcp.vidsrc.me'; diff --git a/src/providers/sources/vidsrc/index.ts b/src/providers/sources/vidsrc/index.ts new file mode 100644 index 0000000..6331a05 --- /dev/null +++ b/src/providers/sources/vidsrc/index.ts @@ -0,0 +1,13 @@ +import { flags } from '@/entrypoint/utils/targets'; +import { makeSourcerer } from '@/providers/base'; +import { scrapeMovie } from '@/providers/sources/vidsrc/scrape-movie'; +import { scrapeShow } from '@/providers/sources/vidsrc/scrape-show'; + +export const vidsrcScraper = makeSourcerer({ + id: 'vidsrc', + name: 'VidSrc', + rank: 120, + flags: [flags.CORS_ALLOWED], + scrapeMovie, + scrapeShow, +}); diff --git a/src/providers/sources/vidsrc/scrape-movie.ts b/src/providers/sources/vidsrc/scrape-movie.ts new file mode 100644 index 0000000..585eb31 --- /dev/null +++ b/src/providers/sources/vidsrc/scrape-movie.ts @@ -0,0 +1,8 @@ +import { getVidSrcMovieSources } from '@/providers/sources/vidsrc/scrape'; +import { MovieScrapeContext } from '@/utils/context'; + +export async function scrapeMovie(ctx: MovieScrapeContext) { + return { + embeds: await getVidSrcMovieSources(ctx), + }; +} diff --git a/src/providers/sources/vidsrc/scrape-show.ts b/src/providers/sources/vidsrc/scrape-show.ts new file mode 100644 index 0000000..ff5d2a4 --- /dev/null +++ b/src/providers/sources/vidsrc/scrape-show.ts @@ -0,0 +1,8 @@ +import { getVidSrcShowSources } from '@/providers/sources/vidsrc/scrape'; +import { ShowScrapeContext } from '@/utils/context'; + +export async function scrapeShow(ctx: ShowScrapeContext) { + return { + embeds: await getVidSrcShowSources(ctx), + }; +} diff --git a/src/providers/sources/vidsrc/scrape.ts b/src/providers/sources/vidsrc/scrape.ts new file mode 100644 index 0000000..81dceff --- /dev/null +++ b/src/providers/sources/vidsrc/scrape.ts @@ -0,0 +1,133 @@ +import { load } from 'cheerio'; + +import { SourcererEmbed } from '@/providers/base'; +import { streambucketScraper } from '@/providers/embeds/streambucket'; +import { vidsrcembedScraper } from '@/providers/embeds/vidsrc'; +import { vidsrcBase, vidsrcRCPBase } from '@/providers/sources/vidsrc/common'; +import { MovieScrapeContext, ShowScrapeContext } from '@/utils/context'; + +function decodeSrc(encoded: string, seed: string) { + let decoded = ''; + const seedLength = seed.length; + + for (let i = 0; i < encoded.length; i += 2) { + const byte = parseInt(encoded.substr(i, 2), 16); + const seedChar = seed.charCodeAt((i / 2) % seedLength); + decoded += String.fromCharCode(byte ^ seedChar); + } + + return decoded; +} + +async function getVidSrcEmbeds(ctx: MovieScrapeContext | ShowScrapeContext, startingURL: string) { + // VidSrc works by using hashes and a redirect system. + // The hashes are stored in the html, and VidSrc will + // make requests to their servers with the hash. This + // will trigger a 302 response with a Location header + // sending the user to the correct embed. To get the + // real embed links, we must do the same. Slow, but + // required + + const embeds: SourcererEmbed[] = []; + + let html = await ctx.proxiedFetcher(startingURL, { + baseUrl: vidsrcBase, + }); + + let $ = load(html); + + const sourceHashes = $('.server[data-hash]') + .toArray() + .map((el) => $(el).attr('data-hash')) + .filter((hash) => hash !== undefined); + + for (const hash of sourceHashes) { + html = await ctx.proxiedFetcher(`/rcp/${hash}`, { + baseUrl: vidsrcRCPBase, + headers: { + referer: vidsrcBase, + }, + }); + + $ = load(html); + const encoded = $('#hidden').attr('data-h'); + const seed = $('body').attr('data-i'); + + if (!encoded || !seed) { + throw new Error('Failed to find encoded iframe src'); + } + + let redirectURL = decodeSrc(encoded, seed); + if (redirectURL.startsWith('//')) { + redirectURL = `https:${redirectURL}`; + } + + const { finalUrl } = await ctx.proxiedFetcher.full(redirectURL, { + method: 'HEAD', + headers: { + referer: vidsrcBase, + }, + }); + + const embed: SourcererEmbed = { + embedId: '', + url: finalUrl, + }; + + const parsedUrl = new URL(finalUrl); + + switch (parsedUrl.host) { + case 'vidsrc.stream': + embed.embedId = vidsrcembedScraper.id; + break; + case 'streambucket.net': + embed.embedId = streambucketScraper.id; + break; + case '2embed.cc': + case 'www.2embed.cc': + // Just ignore this. This embed just sources from other embeds we can scrape as a 'source' + break; + case 'player-cdn.com': + // Just ignore this. This embed streams video over a custom WebSocket connection + break; + default: + throw new Error(`Failed to find VidSrc embed source for ${finalUrl}`); + } + + // Since some embeds are ignored on purpose, check if a valid one was found + if (embed.embedId !== '') { + embeds.push(embed); + } + } + + return embeds; +} + +export async function getVidSrcMovieSources(ctx: MovieScrapeContext) { + return getVidSrcEmbeds(ctx, `/embed/${ctx.media.tmdbId}`); +} + +export async function getVidSrcShowSources(ctx: ShowScrapeContext) { + // VidSrc will always default to season 1 episode 1 + // no matter what embed URL is used. It sends back + // a list of ALL the shows episodes, in order, for + // all seasons. To get the real embed URL, have to + // parse this from the response + const html = await ctx.proxiedFetcher(`/embed/${ctx.media.tmdbId}`, { + baseUrl: vidsrcBase, + }); + + const $ = load(html); + + const episodeElement = $(`.ep[data-s="${ctx.media.season.number}"][data-e="${ctx.media.episode.number}"]`).first(); + if (episodeElement.length === 0) { + throw new Error('failed to find episode element'); + } + + const startingURL = episodeElement.attr('data-iframe'); + if (!startingURL) { + throw new Error('failed to find episode starting URL'); + } + + return getVidSrcEmbeds(ctx, startingURL); +}