Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "web-codegen-scorer",
"version": "0.0.54",
"version": "0.0.55",
"scripts": {
"build-runner": "tsc",
"release-build": "tsx ./scripts/release-build.ts",
Expand Down
3 changes: 3 additions & 0 deletions runner/configuration/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ export const DEFAULT_MODEL_NAME = 'gemini-2.5-pro'; // slower than `flash`, but
*/
export const DEFAULT_AUTORATER_MODEL_NAME = 'gemini-2.5-flash'; // use less expensive model

/** Model used for AI summarization by default. */
export const DEFAULT_SUMMARY_MODEL = 'gemini-2.5-flash-lite';

/** Name of the root folder where we store LLM-generated code for debugging */
export const LLM_OUTPUT_DIR = join(rootDir, 'llm-output');

Expand Down
1 change: 1 addition & 0 deletions runner/configuration/environment-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ export const environmentConfigSchema = z.object({
z.object({
name: z.string(),
path: z.string(),
model: z.string().optional(),
reportsFilter: z
.enum([ReportContextFilter.AllReports, ReportContextFilter.NonPerfectReports])
.optional(),
Expand Down
5 changes: 4 additions & 1 deletion runner/configuration/environment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {EnvironmentConfig} from './environment-config.js';
import {EvalPromptWithMetadata, MultiStepPrompt} from './prompts.js';
import {renderPromptTemplate} from './prompt-templating.js';
import {getSha256Hash} from '../utils/hashing.js';
import {DEFAULT_SUMMARY_MODEL} from './constants.js';

interface CategoryConfig {
name: string;
Expand All @@ -27,6 +28,7 @@ interface CategoryConfig {
interface AnalysisPrompt {
name: string;
prompt: string;
model: string;
reportsFilter: ReportContextFilter;
ratingsFilter: RatingContextFilter;
}
Expand Down Expand Up @@ -463,12 +465,13 @@ export class Environment {
private resolveAnalysisPrompts(config: EnvironmentConfig): AnalysisPrompt[] {
const result: AnalysisPrompt[] = [];

config.analysisPrompts?.forEach(({name, path, reportsFilter, ratingsFilter}) => {
config.analysisPrompts?.forEach(({name, path, model, reportsFilter, ratingsFilter}) => {
const prompt = this.renderEnvironmentPrompt(path).result;

result.push({
name,
prompt,
model: model || DEFAULT_SUMMARY_MODEL,
reportsFilter: reportsFilter ?? ReportContextFilter.NonPerfectReports,
ratingsFilter: ratingsFilter ?? RatingContextFilter.NonPerfectRatings,
});
Expand Down
6 changes: 3 additions & 3 deletions runner/orchestration/generate-summary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import {AssessmentResult, CompletionStats, RunSummary} from '../shared-interface
export async function prepareSummary(
generateAiSummaryLlm: GenkitRunner | null,
abortSignal: AbortSignal,
model: string,
evalRunModel: string,
env: Environment,
assessments: AssessmentResult[],
completionStats: CompletionStats,
Expand Down Expand Up @@ -75,7 +75,7 @@ export async function prepareSummary(
abortSignal,
assessments,
[],
model,
config.model,
{
reportContextFilter: config.reportsFilter,
ratingContextFilter: config.ratingsFilter,
Expand All @@ -101,7 +101,7 @@ export async function prepareSummary(
const executorInfo = await env.executor.getExecutorInfo?.();

return {
model,
model: evalRunModel,
environmentId: env.id,
displayName: env.displayName,
framework: {
Expand Down
3 changes: 2 additions & 1 deletion runner/reporting/report-ai-summary.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import {GenkitRunner} from '../codegen/genkit/genkit-runner.js';
import {DEFAULT_SUMMARY_MODEL} from '../configuration/constants.js';
import {AssessmentResult, ReportContextFilter, RatingContextFilter} from '../shared-interfaces.js';
import {chatWithReportAI} from './report-ai-chat.js';

Expand All @@ -7,7 +8,7 @@ export async function summarizeReportWithAI(
abortSignal: AbortSignal,
assessments: AssessmentResult[],
) {
const model = 'gemini-2.5-flash-lite';
const model = DEFAULT_SUMMARY_MODEL;

if (!llm.getSupportedModels().includes(model)) {
throw new Error(`Unable to generate AI summary due to unsupported model: ${model}`);
Expand Down
Loading