diff --git a/package.json b/package.json index d5845f6..470d5cf 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "web-codegen-scorer", - "version": "0.0.54", + "version": "0.0.55", "scripts": { "build-runner": "tsc", "release-build": "tsx ./scripts/release-build.ts", diff --git a/runner/configuration/constants.ts b/runner/configuration/constants.ts index 19c555c..f74c5bf 100644 --- a/runner/configuration/constants.ts +++ b/runner/configuration/constants.ts @@ -17,6 +17,9 @@ export const DEFAULT_MODEL_NAME = 'gemini-2.5-pro'; // slower than `flash`, but */ export const DEFAULT_AUTORATER_MODEL_NAME = 'gemini-2.5-flash'; // use less expensive model +/** Model used for AI summarization by default. */ +export const DEFAULT_SUMMARY_MODEL = 'gemini-2.5-flash-lite'; + /** Name of the root folder where we store LLM-generated code for debugging */ export const LLM_OUTPUT_DIR = join(rootDir, 'llm-output'); diff --git a/runner/configuration/environment-config.ts b/runner/configuration/environment-config.ts index ab7ef39..fefd21c 100644 --- a/runner/configuration/environment-config.ts +++ b/runner/configuration/environment-config.ts @@ -108,6 +108,7 @@ export const environmentConfigSchema = z.object({ z.object({ name: z.string(), path: z.string(), + model: z.string().optional(), reportsFilter: z .enum([ReportContextFilter.AllReports, ReportContextFilter.NonPerfectReports]) .optional(), diff --git a/runner/configuration/environment.ts b/runner/configuration/environment.ts index c6abbc3..fb46334 100644 --- a/runner/configuration/environment.ts +++ b/runner/configuration/environment.ts @@ -18,6 +18,7 @@ import {EnvironmentConfig} from './environment-config.js'; import {EvalPromptWithMetadata, MultiStepPrompt} from './prompts.js'; import {renderPromptTemplate} from './prompt-templating.js'; import {getSha256Hash} from '../utils/hashing.js'; +import {DEFAULT_SUMMARY_MODEL} from './constants.js'; interface CategoryConfig { name: string; @@ -27,6 +28,7 @@ interface CategoryConfig { interface AnalysisPrompt { name: string; prompt: string; + model: string; reportsFilter: ReportContextFilter; ratingsFilter: RatingContextFilter; } @@ -463,12 +465,13 @@ export class Environment { private resolveAnalysisPrompts(config: EnvironmentConfig): AnalysisPrompt[] { const result: AnalysisPrompt[] = []; - config.analysisPrompts?.forEach(({name, path, reportsFilter, ratingsFilter}) => { + config.analysisPrompts?.forEach(({name, path, model, reportsFilter, ratingsFilter}) => { const prompt = this.renderEnvironmentPrompt(path).result; result.push({ name, prompt, + model: model || DEFAULT_SUMMARY_MODEL, reportsFilter: reportsFilter ?? ReportContextFilter.NonPerfectReports, ratingsFilter: ratingsFilter ?? RatingContextFilter.NonPerfectRatings, }); diff --git a/runner/orchestration/generate-summary.ts b/runner/orchestration/generate-summary.ts index 27ba1d0..38da6c5 100644 --- a/runner/orchestration/generate-summary.ts +++ b/runner/orchestration/generate-summary.ts @@ -12,7 +12,7 @@ import {AssessmentResult, CompletionStats, RunSummary} from '../shared-interface export async function prepareSummary( generateAiSummaryLlm: GenkitRunner | null, abortSignal: AbortSignal, - model: string, + evalRunModel: string, env: Environment, assessments: AssessmentResult[], completionStats: CompletionStats, @@ -75,7 +75,7 @@ export async function prepareSummary( abortSignal, assessments, [], - model, + config.model, { reportContextFilter: config.reportsFilter, ratingContextFilter: config.ratingsFilter, @@ -101,7 +101,7 @@ export async function prepareSummary( const executorInfo = await env.executor.getExecutorInfo?.(); return { - model, + model: evalRunModel, environmentId: env.id, displayName: env.displayName, framework: { diff --git a/runner/reporting/report-ai-summary.ts b/runner/reporting/report-ai-summary.ts index 872f79e..b6918fc 100644 --- a/runner/reporting/report-ai-summary.ts +++ b/runner/reporting/report-ai-summary.ts @@ -1,4 +1,5 @@ import {GenkitRunner} from '../codegen/genkit/genkit-runner.js'; +import {DEFAULT_SUMMARY_MODEL} from '../configuration/constants.js'; import {AssessmentResult, ReportContextFilter, RatingContextFilter} from '../shared-interfaces.js'; import {chatWithReportAI} from './report-ai-chat.js'; @@ -7,7 +8,7 @@ export async function summarizeReportWithAI( abortSignal: AbortSignal, assessments: AssessmentResult[], ) { - const model = 'gemini-2.5-flash-lite'; + const model = DEFAULT_SUMMARY_MODEL; if (!llm.getSupportedModels().includes(model)) { throw new Error(`Unable to generate AI summary due to unsupported model: ${model}`);