More elaborate fetcher API's

This commit is contained in:
mrjvs
2023-12-26 15:43:52 +01:00
parent 09ba0ebcc1
commit ffe7ae0985
6 changed files with 73 additions and 12 deletions

View File

@@ -2,7 +2,7 @@ import { FullScraperEvents, IndividualScraperEvents } from '@/entrypoint/utils/e
import { ScrapeMedia } from '@/entrypoint/utils/media';
import { MetaOutput, getAllEmbedMetaSorted, getAllSourceMetaSorted, getSpecificId } from '@/entrypoint/utils/meta';
import { FeatureMap } from '@/entrypoint/utils/targets';
import { makeFullFetcher } from '@/fetchers/common';
import { makeFetcher } from '@/fetchers/common';
import { Fetcher } from '@/fetchers/types';
import { Embed, EmbedOutput, Sourcerer, SourcererOutput } from '@/providers/base';
import { scrapeIndividualEmbed, scrapeInvidualSource } from '@/runners/individualRunner';
@@ -83,8 +83,8 @@ export function makeControls(ops: ProviderControlsInput): ProviderControls {
const providerRunnerOps = {
features: ops.features,
fetcher: makeFullFetcher(ops.fetcher),
proxiedFetcher: makeFullFetcher(ops.proxiedFetcher ?? ops.fetcher),
fetcher: makeFetcher(ops.fetcher),
proxiedFetcher: makeFetcher(ops.proxiedFetcher ?? ops.fetcher),
};
return {

View File

@@ -26,14 +26,18 @@ export function makeFullUrl(url: string, ops?: FullUrlOptions): string {
return parsedUrl.toString();
}
export function makeFullFetcher(fetcher: Fetcher): UseableFetcher {
return (url, ops) => {
export function makeFetcher(fetcher: Fetcher): UseableFetcher {
const newFetcher = (url: string, ops?: FetcherOptions) => {
return fetcher(url, {
headers: ops?.headers ?? {},
method: ops?.method ?? 'GET',
query: ops?.query ?? {},
baseUrl: ops?.baseUrl ?? '',
readHeaders: ops?.readHeaders ?? [],
body: ops?.body,
});
};
const output: UseableFetcher = async (url, ops) => (await newFetcher(url, ops)).body;
output.full = newFetcher;
return output;
}

View File

@@ -11,12 +11,17 @@ export type FetchOps = {
export type FetchHeaders = {
get(key: string): string | null;
set(key: string, value: string): void;
};
export type FetchReply = {
text(): Promise<string>;
json(): Promise<any>;
extraHeaders?: FetchHeaders;
extraUrl?: string;
headers: FetchHeaders;
url: string;
status: number;
};
export type FetchLike = (url: string, ops?: FetchOps | undefined) => Promise<FetchReply>;

View File

@@ -9,9 +9,28 @@ const headerMap: Record<string, string> = {
origin: 'X-Origin',
};
const responseHeaderMap: Record<string, string> = {
'x-set-cookie': 'Set-Cookie',
};
export function makeSimpleProxyFetcher(proxyUrl: string, f: FetchLike): Fetcher {
const fetcher = makeStandardFetcher(f);
const proxiedFetch: Fetcher = async (url, ops) => {
const fetcher = makeStandardFetcher(async (a, b) => {
const res = await f(a, b);
// set extra headers that cant normally be accessed
res.extraHeaders = new Headers();
Object.entries(responseHeaderMap).forEach((entry) => {
const value = res.headers.get(entry[0]);
if (!value) return;
res.extraHeaders?.set(entry[0].toLowerCase(), value);
});
// set correct final url
res.extraUrl = res.headers.get('X-Final-Destination') ?? res.url;
return res;
});
const fullUrl = makeFullUrl(url, ops);
const headerEntries = Object.entries(ops.headers).map((entry) => {

View File

@@ -1,8 +1,20 @@
import { serializeBody } from '@/fetchers/body';
import { makeFullUrl } from '@/fetchers/common';
import { FetchLike } from '@/fetchers/fetch';
import { FetchLike, FetchReply } from '@/fetchers/fetch';
import { Fetcher } from '@/fetchers/types';
function getHeaders(list: string[], res: FetchReply): Headers {
const output = new Headers();
list.forEach((header) => {
const realHeader = header.toLowerCase();
const value = res.headers.get(realHeader);
const extraValue = res.extraHeaders?.get(realHeader);
if (!value) return;
output.set(realHeader, extraValue ?? value);
});
return output;
}
export function makeStandardFetcher(f: FetchLike): Fetcher {
const normalFetch: Fetcher = async (url, ops) => {
const fullUrl = makeFullUrl(url, ops);
@@ -17,9 +29,17 @@ export function makeStandardFetcher(f: FetchLike): Fetcher {
body: seralizedBody.body,
});
let body: any;
const isJson = res.headers.get('content-type')?.includes('application/json');
if (isJson) return res.json();
return res.text();
if (isJson) body = await res.json();
else body = res.text();
return {
body,
finalUrl: res.extraUrl ?? res.url,
headers: getHeaders(ops.readHeaders, res),
statusCode: res.status,
};
};
return normalFetch;

View File

@@ -5,22 +5,35 @@ export type FetcherOptions = {
headers?: Record<string, string>;
query?: Record<string, string>;
method?: 'GET' | 'POST';
readHeaders?: string[];
body?: Record<string, any> | string | FormData | URLSearchParams;
};
// Version of the options that always has the defaults set
// This is to make making fetchers yourself easier
export type DefaultedFetcherOptions = {
baseUrl?: string;
body?: Record<string, any> | string | FormData;
headers: Record<string, string>;
query: Record<string, string>;
readHeaders: string[];
method: 'GET' | 'POST';
};
export type Fetcher<T = any> = {
(url: string, ops: DefaultedFetcherOptions): Promise<T>;
export type FetcherResponse<T = any> = {
statusCode: number;
headers: Headers;
finalUrl: string;
body: T;
};
// this feature has some quality of life features
// This is the version that will be inputted by library users
export type Fetcher<T = any> = {
(url: string, ops: DefaultedFetcherOptions): Promise<FetcherResponse<T>>;
};
// This is the version that scrapers will be interacting with
export type UseableFetcher<T = any> = {
(url: string, ops?: FetcherOptions): Promise<T>;
full: (url: string, ops?: FetcherOptions) => Promise<FetcherResponse<T>>;
};