Compare commits

...

3 Commits

Author SHA1 Message Date
Soulter 4c957ffe35 fix: improve logging for invalid embedding_dimensions configuration 2026-03-17 17:51:38 +08:00
jnMetaCode 41a7a660c8 fix: handle invalid dimensions config and align get_dim return
- Add try-except around int() conversion in _embedding_kwargs to
  gracefully handle invalid embedding_dimensions config values
- Update get_dim() to return 0 when embedding_dimensions is not
  explicitly configured, so callers know dimensions weren't specified
  and can handle it accordingly
- Both methods now share consistent logic for reading the config

Signed-off-by: JiangNan <1394485448@qq.com>
2026-03-16 18:06:26 +08:00
jiangnan 44c8c63899 fix: only pass dimensions param when explicitly configured
Models like bge-m3 don't support the dimensions parameter in the
embedding API, causing HTTP 400 errors. Previously dimensions was
always sent with a default value of 1024, even when the user never
configured it. Now dimensions is only included in the request when
embedding_dimensions is explicitly set in provider config.

Closes #6421

Signed-off-by: JiangNan <1394485448@qq.com>
2026-03-16 17:42:35 +08:00
@@ -40,25 +40,46 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
async def get_embedding(self, text: str) -> list[float]:
"""获取文本的嵌入"""
kwargs = self._embedding_kwargs()
embedding = await self.client.embeddings.create(
input=text,
model=self.model,
dimensions=self.get_dim(),
**kwargs,
)
return embedding.data[0].embedding
async def get_embeddings(self, text: list[str]) -> list[list[float]]:
"""批量获取文本的嵌入"""
kwargs = self._embedding_kwargs()
embeddings = await self.client.embeddings.create(
input=text,
model=self.model,
dimensions=self.get_dim(),
**kwargs,
)
return [item.embedding for item in embeddings.data]
def _embedding_kwargs(self) -> dict:
"""构建嵌入请求的可选参数"""
kwargs = {}
if "embedding_dimensions" in self.provider_config:
try:
kwargs["dimensions"] = int(self.provider_config["embedding_dimensions"])
except (ValueError, TypeError):
logger.warning(
f"embedding_dimensions in embedding configs is not a valid integer: '{self.provider_config['embedding_dimensions']}', ignored."
)
return kwargs
def get_dim(self) -> int:
"""获取向量的维度"""
return int(self.provider_config.get("embedding_dimensions", 1024))
if "embedding_dimensions" in self.provider_config:
try:
return int(self.provider_config["embedding_dimensions"])
except (ValueError, TypeError):
logger.warning(
f"embedding_dimensions in embedding configs is not a valid integer: '{self.provider_config['embedding_dimensions']}', ignored."
)
return 0
async def terminate(self):
if self.client: