diff --git a/README.md b/README.md index f9b5661..b5fa78a 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,7 @@ Feel free to use for your own projects. features: - scrape popular streaming websites - - works in both browser and NodeJS server - - choose between all streams or non-protected stream (for browser use) + - works in both browser and server-side > This package is still WIP @@ -18,12 +17,8 @@ features: > TODO functionality: running individual scrapers -> TODO functionality: running all scrapers - > TODO functionality: choose environment (for browser, for native) -> TODO functionality: show which types are supported for scraper in meta - > TODO content: add all scrapers/providers > TODO tests: add tests diff --git a/src/fetchers/common.ts b/src/fetchers/common.ts index 6ce7c87..e0c1780 100644 --- a/src/fetchers/common.ts +++ b/src/fetchers/common.ts @@ -1,4 +1,4 @@ -import { FetcherOptions } from '@/fetchers/types'; +import { Fetcher, FetcherOptions, UseableFetcher } from '@/fetchers/types'; // make url with query params and base url used correctly export function makeFullUrl(url: string, ops?: FetcherOptions): string { @@ -13,3 +13,15 @@ export function makeFullUrl(url: string, ops?: FetcherOptions): string { return parsedUrl.toString(); } + +export function makeFullFetcher(fetcher: Fetcher): UseableFetcher { + return (url, ops) => { + return fetcher(url, { + headers: ops?.headers ?? {}, + method: ops?.method ?? 'GET', + query: ops?.query ?? {}, + baseUrl: ops?.baseUrl ?? '', + body: ops?.body, + }); + }; +} diff --git a/src/index.ts b/src/index.ts index 9025534..f6b45ed 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,2 +1,9 @@ +export type { RunOutput } from '@/main/runner'; +export type { MetaOutput } from '@/main/meta'; +export type { FullScraperEvents } from '@/main/events'; +export type { MediaTypes, ShowMedia, ScrapeMedia, MovieMedia } from '@/main/media'; +export type { ProviderBuilderOptions, ProviderControls, RunnerOptions } from '@/main/builder'; + +export { NotFoundError } from '@/utils/errors'; +export { makeProviders } from '@/main/builder'; export { makeStandardFetcher } from '@/fetchers/standardFetch'; -export { ProviderBuilderOptions, ProviderControls, makeProviders } from '@/main/builder'; diff --git a/src/main/builder.ts b/src/main/builder.ts index bb4d114..2cf50fe 100644 --- a/src/main/builder.ts +++ b/src/main/builder.ts @@ -1,3 +1,4 @@ +import { makeFullFetcher } from '@/fetchers/common'; import { Fetcher } from '@/fetchers/types'; import { FullScraperEvents } from '@/main/events'; import { ScrapeMedia } from '@/main/media'; @@ -16,9 +17,11 @@ export interface ProviderBuilderOptions { export interface RunnerOptions { // overwrite the order of sources to run. list of ids + // any omitted ids are in added to the end in order of rank (highest first) sourceOrder?: string[]; // overwrite the order of embeds to run. list of ids + // any omitted ids are in added to the end in order of rank (highest first) embedOrder?: string[]; // object of event functions @@ -46,13 +49,13 @@ export interface ProviderControls { export function makeProviders(ops: ProviderBuilderOptions): ProviderControls { const list = getProviders(); const providerRunnerOps = { - fetcher: ops.fetcher, - proxiedFetcher: ops.proxiedFetcher ?? ops.fetcher, + fetcher: makeFullFetcher(ops.fetcher), + proxiedFetcher: makeFullFetcher(ops.proxiedFetcher ?? ops.fetcher), }; return { runAll(runnerOps: RunnerOptions) { - return runAllProviders({ + return runAllProviders(list, { ...providerRunnerOps, ...runnerOps, }); diff --git a/src/main/meta.ts b/src/main/meta.ts index d2a05b4..cbd8297 100644 --- a/src/main/meta.ts +++ b/src/main/meta.ts @@ -1,3 +1,4 @@ +import { MediaTypes } from '@/main/media'; import { ProviderList } from '@/providers/all'; export type MetaOutput = { @@ -5,17 +6,24 @@ export type MetaOutput = { id: string; rank: number; name: string; + mediaTypes?: Array; }; export function getAllSourceMetaSorted(list: ProviderList): MetaOutput[] { return list.sources .sort((a, b) => b.rank - a.rank) - .map((v) => ({ - type: 'source', - id: v.id, - name: v.name, - rank: v.rank, - })); + .map((v) => { + const types: Array = []; + if (v.scrapeMovie) types.push('movie'); + if (v.scrapeShow) types.push('show'); + return { + type: 'source', + id: v.id, + rank: v.rank, + name: v.name, + mediaTypes: types, + }; + }); } export function getAllEmbedMetaSorted(_list: ProviderList): MetaOutput[] { diff --git a/src/main/runner.ts b/src/main/runner.ts index 75203af..e09ea00 100644 --- a/src/main/runner.ts +++ b/src/main/runner.ts @@ -1,7 +1,12 @@ -import { Fetcher } from '@/fetchers/types'; +import { UseableFetcher } from '@/fetchers/types'; import { FullScraperEvents } from '@/main/events'; import { ScrapeMedia } from '@/main/media'; +import { ProviderList } from '@/providers/all'; +import { EmbedOutput, SourcererOutput } from '@/providers/base'; import { Stream } from '@/providers/streams'; +import { ScrapeContext } from '@/utils/context'; +import { NotFoundError } from '@/utils/errors'; +import { reorderOnIdList } from '@/utils/list'; export type RunOutput = { sourceId: string; @@ -21,20 +26,132 @@ export type EmbedRunOutput = { }; export type ProviderRunnerOptions = { - fetcher: Fetcher; - proxiedFetcher: Fetcher; + fetcher: UseableFetcher; + proxiedFetcher: UseableFetcher; sourceOrder?: string[]; embedOrder?: string[]; events?: FullScraperEvents; media: ScrapeMedia; }; -export async function runAllProviders(_ops: ProviderRunnerOptions): Promise { - return { - sourceId: '123', - stream: { - type: 'file', - qualities: {}, +export async function runAllProviders(list: ProviderList, ops: ProviderRunnerOptions): Promise { + const sources = reorderOnIdList(ops.sourceOrder ?? [], list.sources).filter((v) => { + if (ops.media.type === 'movie') return !!v.scrapeMovie; + if (ops.media.type === 'show') return !!v.scrapeShow; + return false; + }); + const embeds = reorderOnIdList(ops.embedOrder ?? [], list.embeds); + const embedIds = embeds.map((v) => v.id); + + const contextBase: ScrapeContext = { + fetcher: ops.fetcher, + proxiedFetcher: ops.proxiedFetcher, + progress(val) { + ops.events?.update?.({ + percentage: val, + status: 'pending', + }); }, }; + + ops.events?.init?.({ + sourceIds: sources.map((v) => v.id), + }); + + for (const s of sources) { + ops.events?.start?.(s.id); + + // run source scrapers + let output: SourcererOutput | null = null; + try { + if (ops.media.type === 'movie' && s.scrapeMovie) + output = await s.scrapeMovie({ + ...contextBase, + media: ops.media, + }); + else if (ops.media.type === 'show' && s.scrapeShow) + output = await s.scrapeShow({ + ...contextBase, + media: ops.media, + }); + } catch (err) { + if (err instanceof NotFoundError) { + ops.events?.update?.({ + percentage: 100, + status: 'notfound', + }); + continue; + } + ops.events?.update?.({ + percentage: 100, + status: 'failure', + }); + // TODO log error + continue; + } + if (!output) throw new Error('Invalid media type'); + + // return stream is there are any + if (output.stream) { + return { + sourceId: s.id, + stream: output.stream, + }; + } + + if (output.embeds.length > 0) { + ops.events?.discoverEmbeds?.({ + embeds: output.embeds.map((v, i) => ({ + id: [s.id, i].join('-'), + embedScraperId: v.embedId, + })), + sourceId: s.id, + }); + } + + // run embed scrapers on listed embeds + const sortedEmbeds = output.embeds; + sortedEmbeds.sort((a, b) => embedIds.indexOf(a.embedId) - embedIds.indexOf(b.embedId)); + + for (const ind in sortedEmbeds) { + if (!Object.prototype.hasOwnProperty.call(sortedEmbeds, ind)) continue; + const e = sortedEmbeds[ind]; + const scraper = embeds.find((v) => v.id === e.embedId); + if (!scraper) throw new Error('Invalid embed returned'); + + // run embed scraper + const id = [s.id, ind].join('-'); + ops.events?.start?.(id); + let embedOutput: EmbedOutput; + try { + embedOutput = await scraper.scrape({ + ...contextBase, + url: e.url, + }); + } catch (err) { + if (err instanceof NotFoundError) { + ops.events?.update?.({ + percentage: 100, + status: 'notfound', + }); + continue; + } + ops.events?.update?.({ + percentage: 100, + status: 'failure', + }); + // TODO log error + continue; + } + + return { + sourceId: s.id, + embedId: scraper.id, + stream: embedOutput.stream, + }; + } + } + + // no providers or embeds returns streams + return null; } diff --git a/src/providers/all.ts b/src/providers/all.ts index 9ad3156..0340111 100644 --- a/src/providers/all.ts +++ b/src/providers/all.ts @@ -1,24 +1,36 @@ -import { Sourcerer } from '@/providers/base'; +import { Embed, Sourcerer } from '@/providers/base'; import { hasDuplicates, isNotNull } from '@/utils/predicates'; function gatherAllSources(): Array { + // all sources are gathered here + return []; +} + +function gatherAllEmbeds(): Array { + // all embeds are gathered here return []; } export interface ProviderList { sources: Sourcerer[]; + embeds: Embed[]; } export function getProviders(): ProviderList { const sources = gatherAllSources().filter(isNotNull); + const embeds = gatherAllEmbeds().filter(isNotNull); + const combined = [...sources, ...embeds]; - const anyDuplicateId = hasDuplicates(sources.map((v) => v.id)); - const anyDuplicateRank = hasDuplicates(sources.map((v) => v.rank)); + const anyDuplicateId = hasDuplicates(combined.map((v) => v.id)); + const anyDuplicateSourceRank = hasDuplicates(sources.map((v) => v.rank)); + const anyDuplicateEmbedRank = hasDuplicates(embeds.map((v) => v.rank)); - if (anyDuplicateId) throw new Error('Duplicate id found in sources'); - if (anyDuplicateRank) throw new Error('Duplicate rank found in sources'); + if (anyDuplicateId) throw new Error('Duplicate id found in sources/embeds'); + if (anyDuplicateSourceRank) throw new Error('Duplicate rank found in sources'); + if (anyDuplicateEmbedRank) throw new Error('Duplicate rank found in embeds'); return { sources, + embeds, }; } diff --git a/src/providers/base.ts b/src/providers/base.ts index df414f2..68c32b0 100644 --- a/src/providers/base.ts +++ b/src/providers/base.ts @@ -16,7 +16,7 @@ export type Sourcerer = { rank: number; // the higher the number, the earlier it gets put on the queue disabled?: boolean; scrapeMovie?: (input: ScrapeContext & { media: MovieMedia }) => Promise; - scrapeShow: (input: ScrapeContext & { media: ShowMedia }) => Promise; + scrapeShow?: (input: ScrapeContext & { media: ShowMedia }) => Promise; }; export function makeSourcerer(state: Sourcerer): Sourcerer | null { @@ -25,7 +25,7 @@ export function makeSourcerer(state: Sourcerer): Sourcerer | null { } export type EmbedOutput = { - stream?: Stream; + stream: Stream; }; export type Embed = { diff --git a/src/utils/list.ts b/src/utils/list.ts new file mode 100644 index 0000000..75479c4 --- /dev/null +++ b/src/utils/list.ts @@ -0,0 +1,20 @@ +export function reorderOnIdList(order: string[], list: T): T { + const copy = [...list] as T; + copy.sort((a, b) => { + const aIndex = order.indexOf(a.id); + const bIndex = order.indexOf(b.id); + + // both in order list + if (aIndex >= 0 && bIndex >= 0) return aIndex - bIndex; + + // only one in order list + // negative means order [a,b] + // positive means order [b,a] + if (aIndex < 0) return 1; // A isnt in list, so A goes later on the list + if (bIndex < 0) return -1; // B isnt in list, so B goes later on the list + + // both not in list, sort on rank + return b.rank - a.rank; + }); + return copy; +}