From ffe7ae0985b2e7336f0203e32593a205680db266 Mon Sep 17 00:00:00 2001 From: mrjvs Date: Tue, 26 Dec 2023 15:43:52 +0100 Subject: [PATCH] More elaborate fetcher API's --- src/entrypoint/controls.ts | 6 +++--- src/fetchers/common.ts | 8 ++++++-- src/fetchers/fetch.ts | 5 +++++ src/fetchers/simpleProxy.ts | 21 ++++++++++++++++++++- src/fetchers/standardFetch.ts | 26 +++++++++++++++++++++++--- src/fetchers/types.ts | 19 ++++++++++++++++--- 6 files changed, 73 insertions(+), 12 deletions(-) diff --git a/src/entrypoint/controls.ts b/src/entrypoint/controls.ts index babcde4..5ff400b 100644 --- a/src/entrypoint/controls.ts +++ b/src/entrypoint/controls.ts @@ -2,7 +2,7 @@ import { FullScraperEvents, IndividualScraperEvents } from '@/entrypoint/utils/e import { ScrapeMedia } from '@/entrypoint/utils/media'; import { MetaOutput, getAllEmbedMetaSorted, getAllSourceMetaSorted, getSpecificId } from '@/entrypoint/utils/meta'; import { FeatureMap } from '@/entrypoint/utils/targets'; -import { makeFullFetcher } from '@/fetchers/common'; +import { makeFetcher } from '@/fetchers/common'; import { Fetcher } from '@/fetchers/types'; import { Embed, EmbedOutput, Sourcerer, SourcererOutput } from '@/providers/base'; import { scrapeIndividualEmbed, scrapeInvidualSource } from '@/runners/individualRunner'; @@ -83,8 +83,8 @@ export function makeControls(ops: ProviderControlsInput): ProviderControls { const providerRunnerOps = { features: ops.features, - fetcher: makeFullFetcher(ops.fetcher), - proxiedFetcher: makeFullFetcher(ops.proxiedFetcher ?? ops.fetcher), + fetcher: makeFetcher(ops.fetcher), + proxiedFetcher: makeFetcher(ops.proxiedFetcher ?? ops.fetcher), }; return { diff --git a/src/fetchers/common.ts b/src/fetchers/common.ts index e31b6d1..71956ba 100644 --- a/src/fetchers/common.ts +++ b/src/fetchers/common.ts @@ -26,14 +26,18 @@ export function makeFullUrl(url: string, ops?: FullUrlOptions): string { return parsedUrl.toString(); } -export function makeFullFetcher(fetcher: Fetcher): UseableFetcher { - return (url, ops) => { +export function makeFetcher(fetcher: Fetcher): UseableFetcher { + const newFetcher = (url: string, ops?: FetcherOptions) => { return fetcher(url, { headers: ops?.headers ?? {}, method: ops?.method ?? 'GET', query: ops?.query ?? {}, baseUrl: ops?.baseUrl ?? '', + readHeaders: ops?.readHeaders ?? [], body: ops?.body, }); }; + const output: UseableFetcher = async (url, ops) => (await newFetcher(url, ops)).body; + output.full = newFetcher; + return output; } diff --git a/src/fetchers/fetch.ts b/src/fetchers/fetch.ts index 1d419f0..d2156d0 100644 --- a/src/fetchers/fetch.ts +++ b/src/fetchers/fetch.ts @@ -11,12 +11,17 @@ export type FetchOps = { export type FetchHeaders = { get(key: string): string | null; + set(key: string, value: string): void; }; export type FetchReply = { text(): Promise; json(): Promise; + extraHeaders?: FetchHeaders; + extraUrl?: string; headers: FetchHeaders; + url: string; + status: number; }; export type FetchLike = (url: string, ops?: FetchOps | undefined) => Promise; diff --git a/src/fetchers/simpleProxy.ts b/src/fetchers/simpleProxy.ts index 07b048e..21ed5ca 100644 --- a/src/fetchers/simpleProxy.ts +++ b/src/fetchers/simpleProxy.ts @@ -9,9 +9,28 @@ const headerMap: Record = { origin: 'X-Origin', }; +const responseHeaderMap: Record = { + 'x-set-cookie': 'Set-Cookie', +}; + export function makeSimpleProxyFetcher(proxyUrl: string, f: FetchLike): Fetcher { - const fetcher = makeStandardFetcher(f); const proxiedFetch: Fetcher = async (url, ops) => { + const fetcher = makeStandardFetcher(async (a, b) => { + const res = await f(a, b); + + // set extra headers that cant normally be accessed + res.extraHeaders = new Headers(); + Object.entries(responseHeaderMap).forEach((entry) => { + const value = res.headers.get(entry[0]); + if (!value) return; + res.extraHeaders?.set(entry[0].toLowerCase(), value); + }); + + // set correct final url + res.extraUrl = res.headers.get('X-Final-Destination') ?? res.url; + return res; + }); + const fullUrl = makeFullUrl(url, ops); const headerEntries = Object.entries(ops.headers).map((entry) => { diff --git a/src/fetchers/standardFetch.ts b/src/fetchers/standardFetch.ts index dd84893..a17dd92 100644 --- a/src/fetchers/standardFetch.ts +++ b/src/fetchers/standardFetch.ts @@ -1,8 +1,20 @@ import { serializeBody } from '@/fetchers/body'; import { makeFullUrl } from '@/fetchers/common'; -import { FetchLike } from '@/fetchers/fetch'; +import { FetchLike, FetchReply } from '@/fetchers/fetch'; import { Fetcher } from '@/fetchers/types'; +function getHeaders(list: string[], res: FetchReply): Headers { + const output = new Headers(); + list.forEach((header) => { + const realHeader = header.toLowerCase(); + const value = res.headers.get(realHeader); + const extraValue = res.extraHeaders?.get(realHeader); + if (!value) return; + output.set(realHeader, extraValue ?? value); + }); + return output; +} + export function makeStandardFetcher(f: FetchLike): Fetcher { const normalFetch: Fetcher = async (url, ops) => { const fullUrl = makeFullUrl(url, ops); @@ -17,9 +29,17 @@ export function makeStandardFetcher(f: FetchLike): Fetcher { body: seralizedBody.body, }); + let body: any; const isJson = res.headers.get('content-type')?.includes('application/json'); - if (isJson) return res.json(); - return res.text(); + if (isJson) body = await res.json(); + else body = res.text(); + + return { + body, + finalUrl: res.extraUrl ?? res.url, + headers: getHeaders(ops.readHeaders, res), + statusCode: res.status, + }; }; return normalFetch; diff --git a/src/fetchers/types.ts b/src/fetchers/types.ts index 2d14748..4b9cdc0 100644 --- a/src/fetchers/types.ts +++ b/src/fetchers/types.ts @@ -5,22 +5,35 @@ export type FetcherOptions = { headers?: Record; query?: Record; method?: 'GET' | 'POST'; + readHeaders?: string[]; body?: Record | string | FormData | URLSearchParams; }; +// Version of the options that always has the defaults set +// This is to make making fetchers yourself easier export type DefaultedFetcherOptions = { baseUrl?: string; body?: Record | string | FormData; headers: Record; query: Record; + readHeaders: string[]; method: 'GET' | 'POST'; }; -export type Fetcher = { - (url: string, ops: DefaultedFetcherOptions): Promise; +export type FetcherResponse = { + statusCode: number; + headers: Headers; + finalUrl: string; + body: T; }; -// this feature has some quality of life features +// This is the version that will be inputted by library users +export type Fetcher = { + (url: string, ops: DefaultedFetcherOptions): Promise>; +}; + +// This is the version that scrapers will be interacting with export type UseableFetcher = { (url: string, ops?: FetcherOptions): Promise; + full: (url: string, ops?: FetcherOptions) => Promise>; };