khanon deepseek diff

 avatar
unknown
diff
4 months ago
22 kB
120
No Index
diff --git a/src/config.ts b/src/config.ts
index db38ffa..f8af911 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -33,6 +33,10 @@ type Config = {
    * Comma-delimited list of Mistral AI API keys.
    */
   mistralAIKey?: string;
+  /**
+   * Comma-delimited list of Deepseek API keys.
+   */
+  deepseekKey?: string;
   /**
    * Comma-delimited list of AWS credentials. Each credential item should be a
    * colon-delimited list of access key, secret key, and AWS region.
@@ -426,6 +430,7 @@ export const config: Config = {
   anthropicKey: getEnvWithDefault("ANTHROPIC_KEY", ""),
   googleAIKey: getEnvWithDefault("GOOGLE_AI_KEY", ""),
   mistralAIKey: getEnvWithDefault("MISTRAL_AI_KEY", ""),
+  deepseekKey: getEnvWithDefault("DEEPSEEK_KEY", ""),
   awsCredentials: getEnvWithDefault("AWS_CREDENTIALS", ""),
   gcpCredentials: getEnvWithDefault("GCP_CREDENTIALS", ""),
   azureCredentials: getEnvWithDefault("AZURE_CREDENTIALS", ""),
@@ -541,6 +546,7 @@ function generateSigningKey() {
     config.anthropicKey,
     config.googleAIKey,
     config.mistralAIKey,
+    config.deepseekKey,
     config.awsCredentials,
     config.gcpCredentials,
     config.azureCredentials,
@@ -689,6 +695,7 @@ export const OMITTED_KEYS = [
   "openaiKey",
   "anthropicKey",
   "googleAIKey",
+  "deepseekKey",
   "mistralAIKey",
   "awsCredentials",
   "gcpCredentials",
diff --git a/src/info-page.ts b/src/info-page.ts
index e55c810..e656e69 100644
--- a/src/info-page.ts
+++ b/src/info-page.ts
@@ -12,6 +12,7 @@ import { checkCsrfToken, injectCsrfToken } from "./shared/inject-csrf";
 
 const INFO_PAGE_TTL = 2000;
 const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
+  deepseek: "Deepseek Chat",
   turbo: "GPT-4o Mini / 3.5 Turbo",
   gpt4: "GPT-4",
   "gpt4-32k": "GPT-4 32k",
diff --git a/src/proxy/deepseek.ts b/src/proxy/deepseek.ts
new file mode 100644
index 0000000..c741a3b
--- /dev/null
+++ b/src/proxy/deepseek.ts
@@ -0,0 +1,42 @@
+import { Router } from "express";
+import { createPreprocessorMiddleware } from "./middleware/request";
+import { ipLimiter } from "./rate-limit";
+import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middleware-factory";
+import { addKey, finalizeBody } from "./middleware/request";
+import { ProxyResHandlerWithBody } from "./middleware/response";
+
+const deepseekResponseHandler: ProxyResHandlerWithBody = async (
+  _proxyRes,
+  req,
+  res,
+  body
+) => {
+  if (typeof body !== "object") {
+    throw new Error("Expected body to be an object");
+  }
+
+  let newBody = body;
+
+  res.status(200).json({ ...newBody, proxy: body.proxy });
+};
+
+const deepseekProxy = createQueuedProxyMiddleware({
+  mutations: [addKey, finalizeBody],
+  target: "https://api.deepseek.com",
+  blockingResponseHandler: deepseekResponseHandler,
+});
+
+const deepseekRouter = Router();
+
+deepseekRouter.post(
+  "/v1/chat/completions",
+  ipLimiter,
+  createPreprocessorMiddleware({ 
+    inApi: "openai",
+    outApi: "openai",
+    service: "deepseek"
+  }),
+  deepseekProxy
+);
+
+export const deepseek = deepseekRouter;
diff --git a/src/proxy/middleware/request/mutators/add-key.ts b/src/proxy/middleware/request/mutators/add-key.ts
index af37e35..17f2019 100644
--- a/src/proxy/middleware/request/mutators/add-key.ts
+++ b/src/proxy/middleware/request/mutators/add-key.ts
@@ -88,6 +88,9 @@ export const addKey: ProxyReqMutator = (manager) => {
       const azureKey = assignedKey.key;
       manager.setHeader("api-key", azureKey);
       break;
+    case "deepseek":
+      manager.setHeader("Authorization", `Bearer ${assignedKey.key}`);
+      break;
     case "aws":
     case "gcp":
     case "google-ai":
diff --git a/src/proxy/middleware/request/preprocessors/validate-context-size.ts b/src/proxy/middleware/request/preprocessors/validate-context-size.ts
index e639b37..ce99b69 100644
--- a/src/proxy/middleware/request/preprocessors/validate-context-size.ts
+++ b/src/proxy/middleware/request/preprocessors/validate-context-size.ts
@@ -96,6 +96,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
     modelMax = 200000;
   } else if (model.match(/^anthropic\.claude/)) {
     modelMax = 100000;
+  } else if (model.match(/^deepseek/)) {
+      modelMax = 64000;
   } else if (model.match(/tral/)) {
     // catches mistral, mixtral, codestral, mathstral, etc. mistral models have
     // no name convention and wildly different context windows so this is a
diff --git a/src/proxy/middleware/response/index.ts b/src/proxy/middleware/response/index.ts
index 932ec5c..a4c7ab7 100644
--- a/src/proxy/middleware/response/index.ts
+++ b/src/proxy/middleware/response/index.ts
@@ -246,6 +246,9 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
           errorPayload.proxy_note = `The upstream API rejected the request. Check the error message for details.`;
         }
         break;
+      case "deepseek":
+        await handleDeepseekBadRequestError(req, errorPayload);
+        break;
       case "anthropic":
       case "aws":
       case "gcp":
@@ -261,6 +264,12 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
     // Key is invalid or was revoked
     keyPool.disable(req.key!, "revoked");
     errorPayload.proxy_note = `Assigned API key is invalid or revoked, please try again.`;
+  } else if (statusCode === 402) {
+    // Deepseek specific - insufficient balance
+    if (service === "deepseek") {
+      keyPool.disable(req.key!, "quota");
+      errorPayload.proxy_note = `Assigned key has insufficient balance. Please try again.`;
+    }
   } else if (statusCode === 403) {
     switch (service) {
       case "anthropic":
@@ -328,6 +337,9 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
       case "google-ai":
         await handleGoogleAIRateLimitError(req, errorPayload);
         break;
+      case "deepseek":
+        await handleDeepseekRateLimitError(req, errorPayload);
+        break;
       default:
         assertNever(service);
     }
@@ -351,6 +363,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
       case "aws":
       case "gcp":
       case "azure":
+      case "deepseek":
         errorPayload.proxy_note = `The key assigned to your prompt does not support the requested model.`;
         break;
       default:
@@ -484,6 +497,23 @@ async function handleGcpRateLimitError(
   }
 }
 
+async function handleDeepseekRateLimitError(
+  req: Request,
+  errorPayload: ProxiedErrorPayload
+) {
+  keyPool.markRateLimited(req.key!);
+  await reenqueueRequest(req);
+  throw new RetryableError("Deepseek rate-limited request re-enqueued.");
+}
+
+async function handleDeepseekBadRequestError(
+  req: Request, 
+  errorPayload: ProxiedErrorPayload
+) {
+  // Based on the checker code, a 400 response means the key is valid but there was some other error
+  errorPayload.proxy_note = `The API rejected the request. Check the error message for details.`;
+}
+
 async function handleOpenAIRateLimitError(
   req: Request,
   errorPayload: ProxiedErrorPayload
@@ -723,6 +753,8 @@ const omittedHeaders = new Set<string>([
   "set-cookie",
   "openai-organization",
   "x-request-id",
+  "x-ds-request-id",
+  "x-ds-trace-id",
   "cf-ray",
 ]);
 const copyHttpHeaders: ProxyResHandlerWithBody = async (
diff --git a/src/proxy/routes.ts b/src/proxy/routes.ts
index 069f0ab..9b4bf97 100644
--- a/src/proxy/routes.ts
+++ b/src/proxy/routes.ts
@@ -10,6 +10,7 @@ import { googleAI } from "./google-ai";
 import { mistralAI } from "./mistral-ai";
 import { openai } from "./openai";
 import { openaiImage } from "./openai-image";
+import { deepseek } from "./deepseek";
 import { sendErrorToClient } from "./middleware/response/error-generator";
 
 const proxyRouter = express.Router();
@@ -49,6 +50,7 @@ proxyRouter.use("/mistral-ai", addV1, mistralAI);
 proxyRouter.use("/aws", aws);
 proxyRouter.use("/gcp/claude", addV1, gcp);
 proxyRouter.use("/azure/openai", addV1, azure);
+proxyRouter.use("/deepseek", addV1, deepseek);
 
 // Redirect browser requests to the homepage.
 proxyRouter.get("*", (req, res, next) => {
diff --git a/src/service-info.ts b/src/service-info.ts
index 5996e99..e8df550 100644
--- a/src/service-info.ts
+++ b/src/service-info.ts
@@ -19,6 +19,7 @@ import {
   MODEL_FAMILY_SERVICE,
   ModelFamily,
   OpenAIModelFamily,
+  DeepseekModelFamily,
 } from "./shared/models";
 import { getCostSuffix, getTokenCostUsd, prettyTokens } from "./shared/stats";
 import { getUniqueIps } from "./proxy/rate-limit";
@@ -96,6 +97,7 @@ export type ServiceInfo = {
   uptime: number;
   endpoints: {
     openai?: string;
+    deepseek?: string;
     anthropic?: string;
     "google-ai"?: string;
     "mistral-ai"?: string;
@@ -117,7 +119,8 @@ export type ServiceInfo = {
   & { [f in GcpModelFamily]?: GcpInfo }
   & { [f in AzureOpenAIModelFamily]?: BaseFamilyInfo; }
   & { [f in GoogleAIModelFamily]?: BaseFamilyInfo }
-  & { [f in MistralAIModelFamily]?: BaseFamilyInfo };
+  & { [f in MistralAIModelFamily]?: BaseFamilyInfo }
+  & { [f in DeepseekModelFamily]?: BaseFamilyInfo };
 
 // https://stackoverflow.com/a/66661477
 // type DeepKeyOf<T> = (
@@ -159,6 +162,9 @@ const SERVICE_ENDPOINTS: { [s in LLMService]: Record<string, string> } = {
     azure: `%BASE%/azure/openai`,
     "azure-image": `%BASE%/azure/openai`,
   },
+  deepseek: {
+    deepseek: `%BASE%/deepseek`,
+  },
 };
 
 const familyStats = new Map<ModelAggregateKey, number>();
@@ -309,6 +315,7 @@ function addKeyToAggregates(k: KeyPoolKey) {
   addToService("aws__keys", k.service === "aws" ? 1 : 0);
   addToService("gcp__keys", k.service === "gcp" ? 1 : 0);
   addToService("azure__keys", k.service === "azure" ? 1 : 0);
+  addToService("deepseek__keys", k.service === "deepseek" ? 1 : 0);
 
   let sumTokens = 0;
   let sumCost = 0;
@@ -376,6 +383,7 @@ function addKeyToAggregates(k: KeyPoolKey) {
     case "azure":
     case "google-ai":
     case "mistral-ai":
+    case "deepseek":
       k.modelFamilies.forEach(incrementGenericFamilyStats);
       break;
     default:
diff --git a/src/shared/key-management/deepseek/checker.ts b/src/shared/key-management/deepseek/checker.ts
new file mode 100644
index 0000000..d6fbb07
--- /dev/null
+++ b/src/shared/key-management/deepseek/checker.ts
@@ -0,0 +1,104 @@
+import { DeepseekKey } from "./provider";
+import { logger } from "../../../logger";
+import { assertNever } from "../../utils";
+
+const CHECK_TIMEOUT = 10000;
+
+export class DeepseekKeyChecker {
+  private log = logger.child({ module: "key-checker", service: "deepseek" });
+
+  constructor(private readonly update: (hash: string, key: Partial<DeepseekKey>) => void) {}
+
+  public async checkKey(key: DeepseekKey): Promise<void> {
+    try {
+      const result = await this.validateKey(key);
+      this.handleCheckResult(key, result);
+    } catch (error) {
+      this.log.warn(
+        { error, hash: key.hash },
+        "Failed to check key status"
+      );
+    }
+  }
+
+  private async validateKey(key: DeepseekKey): Promise<"valid" | "invalid" | "quota"> {
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), CHECK_TIMEOUT);
+
+    try {
+      const response = await fetch("https://api.deepseek.com/chat/completions", {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          Authorization: `Bearer ${key.key}`,
+        },
+        body: JSON.stringify({
+          model: "deepseek-chat",
+          messages: [{ role: "user", content: "hi" }],
+          max_tokens: 0,
+        }),
+        signal: controller.signal,
+      });
+
+      const rateLimit = {
+        limit: parseInt(response.headers.get("x-ratelimit-limit") || "200"),
+        remaining: parseInt(response.headers.get("x-ratelimit-remaining") || "199"),
+      };
+
+      switch (response.status) {
+        case 400:
+          this.log.debug(
+            { key: key.hash, rateLimit },
+            "Key check successful, updating rate limit info"
+          );
+          return "valid";
+        case 401:
+          return "invalid";
+        case 402:
+          return "quota";
+        case 429:
+          this.log.warn({ key: key.hash }, "Key is rate limited");
+          return "valid";
+        default:
+          this.log.warn(
+            { status: response.status, hash: key.hash },
+            "Unexpected status code while checking key"
+          );
+          return "valid";
+      }
+    } finally {
+      clearTimeout(timeout);
+    }
+  }
+
+  private handleCheckResult(
+    key: DeepseekKey,
+    result: "valid" | "invalid" | "quota"
+  ): void {
+    switch (result) {
+      case "valid":
+        this.update(key.hash, {
+          isDisabled: false,
+          lastChecked: Date.now(),
+        });
+        break;
+      case "invalid":
+        this.log.warn({ hash: key.hash }, "Key is invalid");
+        this.update(key.hash, {
+          isDisabled: true,
+          isRevoked: true,
+          lastChecked: Date.now(),
+        });
+        break;
+      case "quota":
+        this.log.warn({ hash: key.hash }, "Key has exceeded its quota");
+        this.update(key.hash, {
+          isDisabled: true,
+          lastChecked: Date.now(),
+        });
+        break;
+      default:
+        assertNever(result);
+    }
+  }
+}
diff --git a/src/shared/key-management/deepseek/provider.ts b/src/shared/key-management/deepseek/provider.ts
new file mode 100644
index 0000000..5fc8521
--- /dev/null
+++ b/src/shared/key-management/deepseek/provider.ts
@@ -0,0 +1,152 @@
+import { Key, KeyProvider, createGenericGetLockoutPeriod } from "..";
+import { DeepseekKeyChecker } from "./checker";
+import { config } from "../../../config";
+import { logger } from "../../../logger";
+import { DeepseekModelFamily } from "../../models";
+
+type DeepseekKeyUsage = {
+  "deepseekTokens": number;
+};
+
+export interface DeepseekKey extends Key, DeepseekKeyUsage {
+  readonly service: "deepseek";
+  readonly modelFamilies: DeepseekModelFamily[];
+}
+
+export class DeepseekKeyProvider implements KeyProvider<DeepseekKey> {
+  readonly service = "deepseek";
+
+  private keys: DeepseekKey[] = [];
+  private checker?: DeepseekKeyChecker;
+  private log = logger.child({ module: "key-provider", service: this.service });
+
+  constructor() {
+    const keyConfig = config.deepseekKey?.trim();
+    if (!keyConfig) {
+      return;
+    }
+
+    const keys = keyConfig.split(",").map((k) => k.trim());
+    for (const key of keys) {
+      if (!key) continue;
+      this.keys.push({
+        key,
+        service: this.service,
+        modelFamilies: ["deepseek"],
+        isDisabled: false,
+        isRevoked: false,
+        promptCount: 0,
+        lastUsed: 0,
+        lastChecked: 0,
+        hash: this.hashKey(key),
+        rateLimitedAt: 0,
+        rateLimitedUntil: 0,
+        "deepseekTokens": 0,
+      });
+    }
+  }
+
+  private hashKey(key: string): string {
+    return require("crypto").createHash("sha256").update(key).digest("hex");
+  }
+
+  public init() {
+    if (this.keys.length === 0) return;
+    if (!config.checkKeys) {
+      this.log.warn(
+        "Key checking is disabled. Keys will not be verified."
+      );
+      return;
+    }
+    this.checker = new DeepseekKeyChecker(this.update.bind(this));
+    for (const key of this.keys) {
+      void this.checker.checkKey(key);
+    }
+  }
+
+  public get(model: string): DeepseekKey {
+    const availableKeys = this.keys.filter((k) => !k.isDisabled);
+    if (availableKeys.length === 0) {
+      throw new Error("No Deepseek keys available");
+    }
+    const key = availableKeys[Math.floor(Math.random() * availableKeys.length)];
+    key.lastUsed = Date.now();
+    this.throttle(key.hash);
+    return { ...key };
+  }
+
+  public list(): Omit<DeepseekKey, "key">[] {
+    return this.keys.map(({ key, ...rest }) => rest);
+  }
+
+  public disable(key: DeepseekKey): void {
+    const found = this.keys.find((k) => k.hash === key.hash);
+    if (found) {
+      found.isDisabled = true;
+    }
+  }
+
+  public update(hash: string, update: Partial<DeepseekKey>): void {
+    const key = this.keys.find((k) => k.hash === hash);
+    if (key) {
+      Object.assign(key, update);
+    }
+  }
+
+  public available(): number {
+    return this.keys.filter((k) => !k.isDisabled).length;
+  }
+
+  public incrementUsage(hash: string, model: string, tokens: number) {
+    const key = this.keys.find((k) => k.hash === hash);
+    if (!key) return;
+    key.promptCount++;
+    key[`deepseekTokens`] += tokens;
+  }
+
+
+  /**
+   * Upon being rate limited, a key will be locked out for this many milliseconds
+   * while we wait for other concurrent requests to finish.
+   */
+  private static readonly RATE_LIMIT_LOCKOUT = 2000;
+  /**
+   * Upon assigning a key, we will wait this many milliseconds before allowing it
+   * to be used again. This is to prevent the queue from flooding a key with too
+   * many requests while we wait to learn whether previous ones succeeded.
+   */
+  private static readonly KEY_REUSE_DELAY = 500;
+
+  getLockoutPeriod = createGenericGetLockoutPeriod(() => this.keys);
+
+  public markRateLimited(keyHash: string) {
+    this.log.debug({ key: keyHash }, "Key rate limited");
+    const key = this.keys.find((k) => k.hash === keyHash)!;
+    const now = Date.now();
+    key.rateLimitedAt = now;
+    key.rateLimitedUntil = now + DeepseekKeyProvider.RATE_LIMIT_LOCKOUT;
+  }
+
+  public recheck(): void {
+    if (!this.checker || !config.checkKeys) return;
+    for (const key of this.keys) {
+      void this.checker.checkKey(key);
+    }
+  }
+
+  /**
+   * Applies a short artificial delay to the key upon dequeueing, in order to
+   * prevent it from being immediately assigned to another request before the
+   * current one can be dispatched.
+   **/
+  private throttle(hash: string) {
+    const now = Date.now();
+    const key = this.keys.find((k) => k.hash === hash)!;
+
+    const currentRateLimit = key.rateLimitedUntil;
+    const nextRateLimit = now + DeepseekKeyProvider.KEY_REUSE_DELAY;
+
+    key.rateLimitedAt = now;
+    key.rateLimitedUntil = Math.max(currentRateLimit, nextRateLimit);
+  }
+}
diff --git a/src/shared/key-management/index.ts b/src/shared/key-management/index.ts
index 1dd58dc..53a2495 100644
--- a/src/shared/key-management/index.ts
+++ b/src/shared/key-management/index.ts
@@ -92,3 +92,4 @@ export { AzureOpenAIKey } from "./azure/provider";
 export { GoogleAIKey } from "././google-ai/provider";
 export { MistralAIKey } from "./mistral-ai/provider";
 export { OpenAIKey } from "./openai/provider";
+export { DeepseekKey } from "./deepseek/provider";
diff --git a/src/shared/key-management/key-pool.ts b/src/shared/key-management/key-pool.ts
index cab1eae..5c6db18 100644
--- a/src/shared/key-management/key-pool.ts
+++ b/src/shared/key-management/key-pool.ts
@@ -13,6 +13,7 @@ import { AwsBedrockKeyProvider } from "./aws/provider";
 import { GcpKeyProvider, GcpKey } from "./gcp/provider";
 import { AzureOpenAIKeyProvider } from "./azure/provider";
 import { MistralAIKeyProvider } from "./mistral-ai/provider";
+import { DeepseekKeyProvider } from "./deepseek/provider";
 
 type AllowedPartial = OpenAIKeyUpdate | AnthropicKeyUpdate | Partial<GcpKey>;
 
@@ -30,6 +31,7 @@ export class KeyPool {
     this.keyProviders.push(new AwsBedrockKeyProvider());
     this.keyProviders.push(new GcpKeyProvider());
     this.keyProviders.push(new AzureOpenAIKeyProvider());
+    this.keyProviders.push(new DeepseekKeyProvider());
   }
 
   public init() {
@@ -129,7 +131,9 @@ export class KeyPool {
   }
 
   private getServiceForModel(model: string): LLMService {
-    if (
+    if (model.startsWith("deepseek")) {
+      return "deepseek";
+    } else if (
       model.startsWith("gpt") ||
       model.startsWith("text-embedding-ada") ||
       model.startsWith("dall-e")
diff --git a/src/shared/models.ts b/src/shared/models.ts
index 2a013d6..60a40d8 100644
--- a/src/shared/models.ts
+++ b/src/shared/models.ts
@@ -14,7 +14,8 @@ export type LLMService =
   | "mistral-ai"
   | "aws"
   | "gcp"
-  | "azure";
+  | "azure"
+  | "deepseek";
 
 export type OpenAIModelFamily =
   | "turbo"
@@ -39,6 +40,8 @@ export type AwsBedrockModelFamily = `aws-${
   | MistralAIModelFamily}`;
 export type GcpModelFamily = "gcp-claude" | "gcp-claude-opus";
 export type AzureOpenAIModelFamily = `azure-${OpenAIModelFamily}`;
+export type DeepseekModelFamily = "deepseek";
+
 export type ModelFamily =
   | OpenAIModelFamily
   | AnthropicModelFamily
@@ -46,11 +49,13 @@ export type ModelFamily =
   | MistralAIModelFamily
   | AwsBedrockModelFamily
   | GcpModelFamily
-  | AzureOpenAIModelFamily;
+  | AzureOpenAIModelFamily
+  | DeepseekModelFamily;
 
 export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
   arr: A & ([ModelFamily] extends [A[number]] ? unknown : never)
 ) => arr)([
+  "deepseek",
   "turbo",
   "gpt4",
   "gpt4-32k",
@@ -96,11 +101,13 @@ export const LLM_SERVICES = (<A extends readonly LLMService[]>(
   "aws",
   "gcp",
   "azure",
+  "deepseek",
 ] as const);
 
 export const MODEL_FAMILY_SERVICE: {
   [f in ModelFamily]: LLMService;
 } = {
+  deepseek: "deepseek",
   turbo: "openai",
   gpt4: "openai",
   "gpt4-turbo": "openai",
@@ -272,7 +279,11 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
       case "openai":
       case "openai-text":
       case "openai-image":
-        modelFamily = getOpenAIModelFamily(model);
+        if (req.service === "deepseek") {
+          modelFamily = "deepseek";
+        } else {
+          modelFamily = getOpenAIModelFamily(model);
+        }
         break;
       case "google-ai":
         modelFamily = getGoogleAIModelFamily(model);
Editor is loading...