More elaborate fetcher API's

This commit is contained in:
mrjvs
2023-12-26 15:43:52 +01:00
parent 09ba0ebcc1
commit ffe7ae0985
6 changed files with 73 additions and 12 deletions

View File

@@ -2,7 +2,7 @@ import { FullScraperEvents, IndividualScraperEvents } from '@/entrypoint/utils/e
import { ScrapeMedia } from '@/entrypoint/utils/media'; import { ScrapeMedia } from '@/entrypoint/utils/media';
import { MetaOutput, getAllEmbedMetaSorted, getAllSourceMetaSorted, getSpecificId } from '@/entrypoint/utils/meta'; import { MetaOutput, getAllEmbedMetaSorted, getAllSourceMetaSorted, getSpecificId } from '@/entrypoint/utils/meta';
import { FeatureMap } from '@/entrypoint/utils/targets'; import { FeatureMap } from '@/entrypoint/utils/targets';
import { makeFullFetcher } from '@/fetchers/common'; import { makeFetcher } from '@/fetchers/common';
import { Fetcher } from '@/fetchers/types'; import { Fetcher } from '@/fetchers/types';
import { Embed, EmbedOutput, Sourcerer, SourcererOutput } from '@/providers/base'; import { Embed, EmbedOutput, Sourcerer, SourcererOutput } from '@/providers/base';
import { scrapeIndividualEmbed, scrapeInvidualSource } from '@/runners/individualRunner'; import { scrapeIndividualEmbed, scrapeInvidualSource } from '@/runners/individualRunner';
@@ -83,8 +83,8 @@ export function makeControls(ops: ProviderControlsInput): ProviderControls {
const providerRunnerOps = { const providerRunnerOps = {
features: ops.features, features: ops.features,
fetcher: makeFullFetcher(ops.fetcher), fetcher: makeFetcher(ops.fetcher),
proxiedFetcher: makeFullFetcher(ops.proxiedFetcher ?? ops.fetcher), proxiedFetcher: makeFetcher(ops.proxiedFetcher ?? ops.fetcher),
}; };
return { return {

View File

@@ -26,14 +26,18 @@ export function makeFullUrl(url: string, ops?: FullUrlOptions): string {
return parsedUrl.toString(); return parsedUrl.toString();
} }
export function makeFullFetcher(fetcher: Fetcher): UseableFetcher { export function makeFetcher(fetcher: Fetcher): UseableFetcher {
return (url, ops) => { const newFetcher = (url: string, ops?: FetcherOptions) => {
return fetcher(url, { return fetcher(url, {
headers: ops?.headers ?? {}, headers: ops?.headers ?? {},
method: ops?.method ?? 'GET', method: ops?.method ?? 'GET',
query: ops?.query ?? {}, query: ops?.query ?? {},
baseUrl: ops?.baseUrl ?? '', baseUrl: ops?.baseUrl ?? '',
readHeaders: ops?.readHeaders ?? [],
body: ops?.body, body: ops?.body,
}); });
}; };
const output: UseableFetcher = async (url, ops) => (await newFetcher(url, ops)).body;
output.full = newFetcher;
return output;
} }

View File

@@ -11,12 +11,17 @@ export type FetchOps = {
export type FetchHeaders = { export type FetchHeaders = {
get(key: string): string | null; get(key: string): string | null;
set(key: string, value: string): void;
}; };
export type FetchReply = { export type FetchReply = {
text(): Promise<string>; text(): Promise<string>;
json(): Promise<any>; json(): Promise<any>;
extraHeaders?: FetchHeaders;
extraUrl?: string;
headers: FetchHeaders; headers: FetchHeaders;
url: string;
status: number;
}; };
export type FetchLike = (url: string, ops?: FetchOps | undefined) => Promise<FetchReply>; export type FetchLike = (url: string, ops?: FetchOps | undefined) => Promise<FetchReply>;

View File

@@ -9,9 +9,28 @@ const headerMap: Record<string, string> = {
origin: 'X-Origin', origin: 'X-Origin',
}; };
const responseHeaderMap: Record<string, string> = {
'x-set-cookie': 'Set-Cookie',
};
export function makeSimpleProxyFetcher(proxyUrl: string, f: FetchLike): Fetcher { export function makeSimpleProxyFetcher(proxyUrl: string, f: FetchLike): Fetcher {
const fetcher = makeStandardFetcher(f);
const proxiedFetch: Fetcher = async (url, ops) => { const proxiedFetch: Fetcher = async (url, ops) => {
const fetcher = makeStandardFetcher(async (a, b) => {
const res = await f(a, b);
// set extra headers that cant normally be accessed
res.extraHeaders = new Headers();
Object.entries(responseHeaderMap).forEach((entry) => {
const value = res.headers.get(entry[0]);
if (!value) return;
res.extraHeaders?.set(entry[0].toLowerCase(), value);
});
// set correct final url
res.extraUrl = res.headers.get('X-Final-Destination') ?? res.url;
return res;
});
const fullUrl = makeFullUrl(url, ops); const fullUrl = makeFullUrl(url, ops);
const headerEntries = Object.entries(ops.headers).map((entry) => { const headerEntries = Object.entries(ops.headers).map((entry) => {

View File

@@ -1,8 +1,20 @@
import { serializeBody } from '@/fetchers/body'; import { serializeBody } from '@/fetchers/body';
import { makeFullUrl } from '@/fetchers/common'; import { makeFullUrl } from '@/fetchers/common';
import { FetchLike } from '@/fetchers/fetch'; import { FetchLike, FetchReply } from '@/fetchers/fetch';
import { Fetcher } from '@/fetchers/types'; import { Fetcher } from '@/fetchers/types';
function getHeaders(list: string[], res: FetchReply): Headers {
const output = new Headers();
list.forEach((header) => {
const realHeader = header.toLowerCase();
const value = res.headers.get(realHeader);
const extraValue = res.extraHeaders?.get(realHeader);
if (!value) return;
output.set(realHeader, extraValue ?? value);
});
return output;
}
export function makeStandardFetcher(f: FetchLike): Fetcher { export function makeStandardFetcher(f: FetchLike): Fetcher {
const normalFetch: Fetcher = async (url, ops) => { const normalFetch: Fetcher = async (url, ops) => {
const fullUrl = makeFullUrl(url, ops); const fullUrl = makeFullUrl(url, ops);
@@ -17,9 +29,17 @@ export function makeStandardFetcher(f: FetchLike): Fetcher {
body: seralizedBody.body, body: seralizedBody.body,
}); });
let body: any;
const isJson = res.headers.get('content-type')?.includes('application/json'); const isJson = res.headers.get('content-type')?.includes('application/json');
if (isJson) return res.json(); if (isJson) body = await res.json();
return res.text(); else body = res.text();
return {
body,
finalUrl: res.extraUrl ?? res.url,
headers: getHeaders(ops.readHeaders, res),
statusCode: res.status,
};
}; };
return normalFetch; return normalFetch;

View File

@@ -5,22 +5,35 @@ export type FetcherOptions = {
headers?: Record<string, string>; headers?: Record<string, string>;
query?: Record<string, string>; query?: Record<string, string>;
method?: 'GET' | 'POST'; method?: 'GET' | 'POST';
readHeaders?: string[];
body?: Record<string, any> | string | FormData | URLSearchParams; body?: Record<string, any> | string | FormData | URLSearchParams;
}; };
// Version of the options that always has the defaults set
// This is to make making fetchers yourself easier
export type DefaultedFetcherOptions = { export type DefaultedFetcherOptions = {
baseUrl?: string; baseUrl?: string;
body?: Record<string, any> | string | FormData; body?: Record<string, any> | string | FormData;
headers: Record<string, string>; headers: Record<string, string>;
query: Record<string, string>; query: Record<string, string>;
readHeaders: string[];
method: 'GET' | 'POST'; method: 'GET' | 'POST';
}; };
export type Fetcher<T = any> = { export type FetcherResponse<T = any> = {
(url: string, ops: DefaultedFetcherOptions): Promise<T>; statusCode: number;
headers: Headers;
finalUrl: string;
body: T;
}; };
// this feature has some quality of life features // This is the version that will be inputted by library users
export type Fetcher<T = any> = {
(url: string, ops: DefaultedFetcherOptions): Promise<FetcherResponse<T>>;
};
// This is the version that scrapers will be interacting with
export type UseableFetcher<T = any> = { export type UseableFetcher<T = any> = {
(url: string, ops?: FetcherOptions): Promise<T>; (url: string, ops?: FetcherOptions): Promise<T>;
full: (url: string, ops?: FetcherOptions) => Promise<FetcherResponse<T>>;
}; };