khanon deepseek diff
unknown
diff
4 months ago
22 kB
120
No Index
diff --git a/src/config.ts b/src/config.ts index db38ffa..f8af911 100644 --- a/src/config.ts +++ b/src/config.ts @@ -33,6 +33,10 @@ type Config = { * Comma-delimited list of Mistral AI API keys. */ mistralAIKey?: string; + /** + * Comma-delimited list of Deepseek API keys. + */ + deepseekKey?: string; /** * Comma-delimited list of AWS credentials. Each credential item should be a * colon-delimited list of access key, secret key, and AWS region. @@ -426,6 +430,7 @@ export const config: Config = { anthropicKey: getEnvWithDefault("ANTHROPIC_KEY", ""), googleAIKey: getEnvWithDefault("GOOGLE_AI_KEY", ""), mistralAIKey: getEnvWithDefault("MISTRAL_AI_KEY", ""), + deepseekKey: getEnvWithDefault("DEEPSEEK_KEY", ""), awsCredentials: getEnvWithDefault("AWS_CREDENTIALS", ""), gcpCredentials: getEnvWithDefault("GCP_CREDENTIALS", ""), azureCredentials: getEnvWithDefault("AZURE_CREDENTIALS", ""), @@ -541,6 +546,7 @@ function generateSigningKey() { config.anthropicKey, config.googleAIKey, config.mistralAIKey, + config.deepseekKey, config.awsCredentials, config.gcpCredentials, config.azureCredentials, @@ -689,6 +695,7 @@ export const OMITTED_KEYS = [ "openaiKey", "anthropicKey", "googleAIKey", + "deepseekKey", "mistralAIKey", "awsCredentials", "gcpCredentials", diff --git a/src/info-page.ts b/src/info-page.ts index e55c810..e656e69 100644 --- a/src/info-page.ts +++ b/src/info-page.ts @@ -12,6 +12,7 @@ import { checkCsrfToken, injectCsrfToken } from "./shared/inject-csrf"; const INFO_PAGE_TTL = 2000; const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = { + deepseek: "Deepseek Chat", turbo: "GPT-4o Mini / 3.5 Turbo", gpt4: "GPT-4", "gpt4-32k": "GPT-4 32k", diff --git a/src/proxy/deepseek.ts b/src/proxy/deepseek.ts new file mode 100644 index 0000000..c741a3b --- /dev/null +++ b/src/proxy/deepseek.ts @@ -0,0 +1,42 @@ +import { Router } from "express"; +import { createPreprocessorMiddleware } from "./middleware/request"; +import { ipLimiter } from "./rate-limit"; +import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middleware-factory"; +import { addKey, finalizeBody } from "./middleware/request"; +import { ProxyResHandlerWithBody } from "./middleware/response"; + +const deepseekResponseHandler: ProxyResHandlerWithBody = async ( + _proxyRes, + req, + res, + body +) => { + if (typeof body !== "object") { + throw new Error("Expected body to be an object"); + } + + let newBody = body; + + res.status(200).json({ ...newBody, proxy: body.proxy }); +}; + +const deepseekProxy = createQueuedProxyMiddleware({ + mutations: [addKey, finalizeBody], + target: "https://api.deepseek.com", + blockingResponseHandler: deepseekResponseHandler, +}); + +const deepseekRouter = Router(); + +deepseekRouter.post( + "/v1/chat/completions", + ipLimiter, + createPreprocessorMiddleware({ + inApi: "openai", + outApi: "openai", + service: "deepseek" + }), + deepseekProxy +); + +export const deepseek = deepseekRouter; diff --git a/src/proxy/middleware/request/mutators/add-key.ts b/src/proxy/middleware/request/mutators/add-key.ts index af37e35..17f2019 100644 --- a/src/proxy/middleware/request/mutators/add-key.ts +++ b/src/proxy/middleware/request/mutators/add-key.ts @@ -88,6 +88,9 @@ export const addKey: ProxyReqMutator = (manager) => { const azureKey = assignedKey.key; manager.setHeader("api-key", azureKey); break; + case "deepseek": + manager.setHeader("Authorization", `Bearer ${assignedKey.key}`); + break; case "aws": case "gcp": case "google-ai": diff --git a/src/proxy/middleware/request/preprocessors/validate-context-size.ts b/src/proxy/middleware/request/preprocessors/validate-context-size.ts index e639b37..ce99b69 100644 --- a/src/proxy/middleware/request/preprocessors/validate-context-size.ts +++ b/src/proxy/middleware/request/preprocessors/validate-context-size.ts @@ -96,6 +96,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => { modelMax = 200000; } else if (model.match(/^anthropic\.claude/)) { modelMax = 100000; + } else if (model.match(/^deepseek/)) { + modelMax = 64000; } else if (model.match(/tral/)) { // catches mistral, mixtral, codestral, mathstral, etc. mistral models have // no name convention and wildly different context windows so this is a diff --git a/src/proxy/middleware/response/index.ts b/src/proxy/middleware/response/index.ts index 932ec5c..a4c7ab7 100644 --- a/src/proxy/middleware/response/index.ts +++ b/src/proxy/middleware/response/index.ts @@ -246,6 +246,9 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async ( errorPayload.proxy_note = `The upstream API rejected the request. Check the error message for details.`; } break; + case "deepseek": + await handleDeepseekBadRequestError(req, errorPayload); + break; case "anthropic": case "aws": case "gcp": @@ -261,6 +264,12 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async ( // Key is invalid or was revoked keyPool.disable(req.key!, "revoked"); errorPayload.proxy_note = `Assigned API key is invalid or revoked, please try again.`; + } else if (statusCode === 402) { + // Deepseek specific - insufficient balance + if (service === "deepseek") { + keyPool.disable(req.key!, "quota"); + errorPayload.proxy_note = `Assigned key has insufficient balance. Please try again.`; + } } else if (statusCode === 403) { switch (service) { case "anthropic": @@ -328,6 +337,9 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async ( case "google-ai": await handleGoogleAIRateLimitError(req, errorPayload); break; + case "deepseek": + await handleDeepseekRateLimitError(req, errorPayload); + break; default: assertNever(service); } @@ -351,6 +363,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async ( case "aws": case "gcp": case "azure": + case "deepseek": errorPayload.proxy_note = `The key assigned to your prompt does not support the requested model.`; break; default: @@ -484,6 +497,23 @@ async function handleGcpRateLimitError( } } +async function handleDeepseekRateLimitError( + req: Request, + errorPayload: ProxiedErrorPayload +) { + keyPool.markRateLimited(req.key!); + await reenqueueRequest(req); + throw new RetryableError("Deepseek rate-limited request re-enqueued."); +} + +async function handleDeepseekBadRequestError( + req: Request, + errorPayload: ProxiedErrorPayload +) { + // Based on the checker code, a 400 response means the key is valid but there was some other error + errorPayload.proxy_note = `The API rejected the request. Check the error message for details.`; +} + async function handleOpenAIRateLimitError( req: Request, errorPayload: ProxiedErrorPayload @@ -723,6 +753,8 @@ const omittedHeaders = new Set<string>([ "set-cookie", "openai-organization", "x-request-id", + "x-ds-request-id", + "x-ds-trace-id", "cf-ray", ]); const copyHttpHeaders: ProxyResHandlerWithBody = async ( diff --git a/src/proxy/routes.ts b/src/proxy/routes.ts index 069f0ab..9b4bf97 100644 --- a/src/proxy/routes.ts +++ b/src/proxy/routes.ts @@ -10,6 +10,7 @@ import { googleAI } from "./google-ai"; import { mistralAI } from "./mistral-ai"; import { openai } from "./openai"; import { openaiImage } from "./openai-image"; +import { deepseek } from "./deepseek"; import { sendErrorToClient } from "./middleware/response/error-generator"; const proxyRouter = express.Router(); @@ -49,6 +50,7 @@ proxyRouter.use("/mistral-ai", addV1, mistralAI); proxyRouter.use("/aws", aws); proxyRouter.use("/gcp/claude", addV1, gcp); proxyRouter.use("/azure/openai", addV1, azure); +proxyRouter.use("/deepseek", addV1, deepseek); // Redirect browser requests to the homepage. proxyRouter.get("*", (req, res, next) => { diff --git a/src/service-info.ts b/src/service-info.ts index 5996e99..e8df550 100644 --- a/src/service-info.ts +++ b/src/service-info.ts @@ -19,6 +19,7 @@ import { MODEL_FAMILY_SERVICE, ModelFamily, OpenAIModelFamily, + DeepseekModelFamily, } from "./shared/models"; import { getCostSuffix, getTokenCostUsd, prettyTokens } from "./shared/stats"; import { getUniqueIps } from "./proxy/rate-limit"; @@ -96,6 +97,7 @@ export type ServiceInfo = { uptime: number; endpoints: { openai?: string; + deepseek?: string; anthropic?: string; "google-ai"?: string; "mistral-ai"?: string; @@ -117,7 +119,8 @@ export type ServiceInfo = { & { [f in GcpModelFamily]?: GcpInfo } & { [f in AzureOpenAIModelFamily]?: BaseFamilyInfo; } & { [f in GoogleAIModelFamily]?: BaseFamilyInfo } - & { [f in MistralAIModelFamily]?: BaseFamilyInfo }; + & { [f in MistralAIModelFamily]?: BaseFamilyInfo } + & { [f in DeepseekModelFamily]?: BaseFamilyInfo }; // https://stackoverflow.com/a/66661477 // type DeepKeyOf<T> = ( @@ -159,6 +162,9 @@ const SERVICE_ENDPOINTS: { [s in LLMService]: Record<string, string> } = { azure: `%BASE%/azure/openai`, "azure-image": `%BASE%/azure/openai`, }, + deepseek: { + deepseek: `%BASE%/deepseek`, + }, }; const familyStats = new Map<ModelAggregateKey, number>(); @@ -309,6 +315,7 @@ function addKeyToAggregates(k: KeyPoolKey) { addToService("aws__keys", k.service === "aws" ? 1 : 0); addToService("gcp__keys", k.service === "gcp" ? 1 : 0); addToService("azure__keys", k.service === "azure" ? 1 : 0); + addToService("deepseek__keys", k.service === "deepseek" ? 1 : 0); let sumTokens = 0; let sumCost = 0; @@ -376,6 +383,7 @@ function addKeyToAggregates(k: KeyPoolKey) { case "azure": case "google-ai": case "mistral-ai": + case "deepseek": k.modelFamilies.forEach(incrementGenericFamilyStats); break; default: diff --git a/src/shared/key-management/deepseek/checker.ts b/src/shared/key-management/deepseek/checker.ts new file mode 100644 index 0000000..d6fbb07 --- /dev/null +++ b/src/shared/key-management/deepseek/checker.ts @@ -0,0 +1,104 @@ +import { DeepseekKey } from "./provider"; +import { logger } from "../../../logger"; +import { assertNever } from "../../utils"; + +const CHECK_TIMEOUT = 10000; + +export class DeepseekKeyChecker { + private log = logger.child({ module: "key-checker", service: "deepseek" }); + + constructor(private readonly update: (hash: string, key: Partial<DeepseekKey>) => void) {} + + public async checkKey(key: DeepseekKey): Promise<void> { + try { + const result = await this.validateKey(key); + this.handleCheckResult(key, result); + } catch (error) { + this.log.warn( + { error, hash: key.hash }, + "Failed to check key status" + ); + } + } + + private async validateKey(key: DeepseekKey): Promise<"valid" | "invalid" | "quota"> { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), CHECK_TIMEOUT); + + try { + const response = await fetch("https://api.deepseek.com/chat/completions", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${key.key}`, + }, + body: JSON.stringify({ + model: "deepseek-chat", + messages: [{ role: "user", content: "hi" }], + max_tokens: 0, + }), + signal: controller.signal, + }); + + const rateLimit = { + limit: parseInt(response.headers.get("x-ratelimit-limit") || "200"), + remaining: parseInt(response.headers.get("x-ratelimit-remaining") || "199"), + }; + + switch (response.status) { + case 400: + this.log.debug( + { key: key.hash, rateLimit }, + "Key check successful, updating rate limit info" + ); + return "valid"; + case 401: + return "invalid"; + case 402: + return "quota"; + case 429: + this.log.warn({ key: key.hash }, "Key is rate limited"); + return "valid"; + default: + this.log.warn( + { status: response.status, hash: key.hash }, + "Unexpected status code while checking key" + ); + return "valid"; + } + } finally { + clearTimeout(timeout); + } + } + + private handleCheckResult( + key: DeepseekKey, + result: "valid" | "invalid" | "quota" + ): void { + switch (result) { + case "valid": + this.update(key.hash, { + isDisabled: false, + lastChecked: Date.now(), + }); + break; + case "invalid": + this.log.warn({ hash: key.hash }, "Key is invalid"); + this.update(key.hash, { + isDisabled: true, + isRevoked: true, + lastChecked: Date.now(), + }); + break; + case "quota": + this.log.warn({ hash: key.hash }, "Key has exceeded its quota"); + this.update(key.hash, { + isDisabled: true, + lastChecked: Date.now(), + }); + break; + default: + assertNever(result); + } + } +} diff --git a/src/shared/key-management/deepseek/provider.ts b/src/shared/key-management/deepseek/provider.ts new file mode 100644 index 0000000..5fc8521 --- /dev/null +++ b/src/shared/key-management/deepseek/provider.ts @@ -0,0 +1,152 @@ +import { Key, KeyProvider, createGenericGetLockoutPeriod } from ".."; +import { DeepseekKeyChecker } from "./checker"; +import { config } from "../../../config"; +import { logger } from "../../../logger"; +import { DeepseekModelFamily } from "../../models"; + +type DeepseekKeyUsage = { + "deepseekTokens": number; +}; + +export interface DeepseekKey extends Key, DeepseekKeyUsage { + readonly service: "deepseek"; + readonly modelFamilies: DeepseekModelFamily[]; +} + +export class DeepseekKeyProvider implements KeyProvider<DeepseekKey> { + readonly service = "deepseek"; + + private keys: DeepseekKey[] = []; + private checker?: DeepseekKeyChecker; + private log = logger.child({ module: "key-provider", service: this.service }); + + constructor() { + const keyConfig = config.deepseekKey?.trim(); + if (!keyConfig) { + return; + } + + const keys = keyConfig.split(",").map((k) => k.trim()); + for (const key of keys) { + if (!key) continue; + this.keys.push({ + key, + service: this.service, + modelFamilies: ["deepseek"], + isDisabled: false, + isRevoked: false, + promptCount: 0, + lastUsed: 0, + lastChecked: 0, + hash: this.hashKey(key), + rateLimitedAt: 0, + rateLimitedUntil: 0, + "deepseekTokens": 0, + }); + } + } + + private hashKey(key: string): string { + return require("crypto").createHash("sha256").update(key).digest("hex"); + } + + public init() { + if (this.keys.length === 0) return; + if (!config.checkKeys) { + this.log.warn( + "Key checking is disabled. Keys will not be verified." + ); + return; + } + this.checker = new DeepseekKeyChecker(this.update.bind(this)); + for (const key of this.keys) { + void this.checker.checkKey(key); + } + } + + public get(model: string): DeepseekKey { + const availableKeys = this.keys.filter((k) => !k.isDisabled); + if (availableKeys.length === 0) { + throw new Error("No Deepseek keys available"); + } + const key = availableKeys[Math.floor(Math.random() * availableKeys.length)]; + key.lastUsed = Date.now(); + this.throttle(key.hash); + return { ...key }; + } + + public list(): Omit<DeepseekKey, "key">[] { + return this.keys.map(({ key, ...rest }) => rest); + } + + public disable(key: DeepseekKey): void { + const found = this.keys.find((k) => k.hash === key.hash); + if (found) { + found.isDisabled = true; + } + } + + public update(hash: string, update: Partial<DeepseekKey>): void { + const key = this.keys.find((k) => k.hash === hash); + if (key) { + Object.assign(key, update); + } + } + + public available(): number { + return this.keys.filter((k) => !k.isDisabled).length; + } + + public incrementUsage(hash: string, model: string, tokens: number) { + const key = this.keys.find((k) => k.hash === hash); + if (!key) return; + key.promptCount++; + key[`deepseekTokens`] += tokens; + } + + + /** + * Upon being rate limited, a key will be locked out for this many milliseconds + * while we wait for other concurrent requests to finish. + */ + private static readonly RATE_LIMIT_LOCKOUT = 2000; + /** + * Upon assigning a key, we will wait this many milliseconds before allowing it + * to be used again. This is to prevent the queue from flooding a key with too + * many requests while we wait to learn whether previous ones succeeded. + */ + private static readonly KEY_REUSE_DELAY = 500; + + getLockoutPeriod = createGenericGetLockoutPeriod(() => this.keys); + + public markRateLimited(keyHash: string) { + this.log.debug({ key: keyHash }, "Key rate limited"); + const key = this.keys.find((k) => k.hash === keyHash)!; + const now = Date.now(); + key.rateLimitedAt = now; + key.rateLimitedUntil = now + DeepseekKeyProvider.RATE_LIMIT_LOCKOUT; + } + + public recheck(): void { + if (!this.checker || !config.checkKeys) return; + for (const key of this.keys) { + void this.checker.checkKey(key); + } + } + + /** + * Applies a short artificial delay to the key upon dequeueing, in order to + * prevent it from being immediately assigned to another request before the + * current one can be dispatched. + **/ + private throttle(hash: string) { + const now = Date.now(); + const key = this.keys.find((k) => k.hash === hash)!; + + const currentRateLimit = key.rateLimitedUntil; + const nextRateLimit = now + DeepseekKeyProvider.KEY_REUSE_DELAY; + + key.rateLimitedAt = now; + key.rateLimitedUntil = Math.max(currentRateLimit, nextRateLimit); + } +} diff --git a/src/shared/key-management/index.ts b/src/shared/key-management/index.ts index 1dd58dc..53a2495 100644 --- a/src/shared/key-management/index.ts +++ b/src/shared/key-management/index.ts @@ -92,3 +92,4 @@ export { AzureOpenAIKey } from "./azure/provider"; export { GoogleAIKey } from "././google-ai/provider"; export { MistralAIKey } from "./mistral-ai/provider"; export { OpenAIKey } from "./openai/provider"; +export { DeepseekKey } from "./deepseek/provider"; diff --git a/src/shared/key-management/key-pool.ts b/src/shared/key-management/key-pool.ts index cab1eae..5c6db18 100644 --- a/src/shared/key-management/key-pool.ts +++ b/src/shared/key-management/key-pool.ts @@ -13,6 +13,7 @@ import { AwsBedrockKeyProvider } from "./aws/provider"; import { GcpKeyProvider, GcpKey } from "./gcp/provider"; import { AzureOpenAIKeyProvider } from "./azure/provider"; import { MistralAIKeyProvider } from "./mistral-ai/provider"; +import { DeepseekKeyProvider } from "./deepseek/provider"; type AllowedPartial = OpenAIKeyUpdate | AnthropicKeyUpdate | Partial<GcpKey>; @@ -30,6 +31,7 @@ export class KeyPool { this.keyProviders.push(new AwsBedrockKeyProvider()); this.keyProviders.push(new GcpKeyProvider()); this.keyProviders.push(new AzureOpenAIKeyProvider()); + this.keyProviders.push(new DeepseekKeyProvider()); } public init() { @@ -129,7 +131,9 @@ export class KeyPool { } private getServiceForModel(model: string): LLMService { - if ( + if (model.startsWith("deepseek")) { + return "deepseek"; + } else if ( model.startsWith("gpt") || model.startsWith("text-embedding-ada") || model.startsWith("dall-e") diff --git a/src/shared/models.ts b/src/shared/models.ts index 2a013d6..60a40d8 100644 --- a/src/shared/models.ts +++ b/src/shared/models.ts @@ -14,7 +14,8 @@ export type LLMService = | "mistral-ai" | "aws" | "gcp" - | "azure"; + | "azure" + | "deepseek"; export type OpenAIModelFamily = | "turbo" @@ -39,6 +40,8 @@ export type AwsBedrockModelFamily = `aws-${ | MistralAIModelFamily}`; export type GcpModelFamily = "gcp-claude" | "gcp-claude-opus"; export type AzureOpenAIModelFamily = `azure-${OpenAIModelFamily}`; +export type DeepseekModelFamily = "deepseek"; + export type ModelFamily = | OpenAIModelFamily | AnthropicModelFamily @@ -46,11 +49,13 @@ export type ModelFamily = | MistralAIModelFamily | AwsBedrockModelFamily | GcpModelFamily - | AzureOpenAIModelFamily; + | AzureOpenAIModelFamily + | DeepseekModelFamily; export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>( arr: A & ([ModelFamily] extends [A[number]] ? unknown : never) ) => arr)([ + "deepseek", "turbo", "gpt4", "gpt4-32k", @@ -96,11 +101,13 @@ export const LLM_SERVICES = (<A extends readonly LLMService[]>( "aws", "gcp", "azure", + "deepseek", ] as const); export const MODEL_FAMILY_SERVICE: { [f in ModelFamily]: LLMService; } = { + deepseek: "deepseek", turbo: "openai", gpt4: "openai", "gpt4-turbo": "openai", @@ -272,7 +279,11 @@ export function getModelFamilyForRequest(req: Request): ModelFamily { case "openai": case "openai-text": case "openai-image": - modelFamily = getOpenAIModelFamily(model); + if (req.service === "deepseek") { + modelFamily = "deepseek"; + } else { + modelFamily = getOpenAIModelFamily(model); + } break; case "google-ai": modelFamily = getGoogleAIModelFamily(model);
Editor is loading...