Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions aieng-eval-agents/aieng/agent_evals/async_client_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from aieng.agent_evals.configs import Configs
from langfuse import Langfuse
from openai import AsyncOpenAI
from langfuse.openai import AsyncOpenAI


logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
Expand Down Expand Up @@ -91,7 +91,11 @@ def openai_client(self) -> AsyncOpenAI:
if self._openai_client is None:
api_key = self.configs.openai_api_key.get_secret_value()

self._openai_client = AsyncOpenAI(api_key=api_key, base_url=self.configs.openai_base_url)
self._openai_client = AsyncOpenAI(
api_key=api_key,
base_url=self.configs.openai_base_url,
max_retries=0, # Using custom retry logic (tenacity) elsewhere
)
self._initialized = True
return self._openai_client

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,15 @@
"""

from .llm_judge import DEFAULT_LLM_JUDGE_RUBRIC, LLMJudgeMetric, LLMJudgeResponse, create_llm_as_judge_evaluator
from .trace_groundedness import TraceGroundednessClaim, TraceGroundednessResponse, create_trace_groundedness_evaluator


__all__ = [
"DEFAULT_LLM_JUDGE_RUBRIC",
"LLMJudgeMetric",
"LLMJudgeResponse",
"TraceGroundednessClaim",
"TraceGroundednessResponse",
"create_llm_as_judge_evaluator",
"create_trace_groundedness_evaluator",
]
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,14 @@ def build_error_evaluation(*, name: str, error: Exception, prefix: str) -> Evalu
)


def render_system_prompt_with_optional_rubric(*, system_prompt_template: str, rubric_text: str | None) -> str:
def render_system_prompt_with_optional_rubric(*, system_prompt_template: str, rubric: str | None) -> str:
"""Render system prompt and inject rubric text when available.

Parameters
----------
system_prompt_template : str
Base system prompt template.
rubric_text : str | None
rubric : str | None
Rubric content in markdown format.

Returns
Expand All @@ -140,8 +140,8 @@ def render_system_prompt_with_optional_rubric(*, system_prompt_template: str, ru
Rendered system prompt with rubric inserted or appended.
"""
rubric_section = ""
if rubric_text:
rubric_section = f"# Rubric\n{rubric_text.strip()}"
if rubric:
rubric_section = f"# Rubric\n{rubric.strip()}"

if "{rubric_section}" in system_prompt_template:
return system_prompt_template.format(rubric_section=rubric_section)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,9 @@ def create_llm_as_judge_evaluator(

# Load and render rubric text into the system prompt
rubric_source = rubric_markdown if rubric_markdown is not None else DEFAULT_LLM_JUDGE_RUBRIC
rubric_text = load_markdown(rubric_source)
rubric = load_markdown(rubric_source)
rendered_system_prompt = render_system_prompt_with_optional_rubric(
system_prompt_template=system_prompt_template, rubric_text=rubric_text
system_prompt_template=system_prompt_template, rubric=rubric
)

# Metric name to use when the judge call fails
Expand Down
Loading