@@ -258,7 +258,7 @@ struct BenchmarkManifest {
258258 benchmarks : Vec < BenchmarkManifestCase > ,
259259}
260260
261- #[ derive( Debug , Deserialize ) ]
261+ #[ derive( Debug , Clone , Deserialize ) ]
262262struct BenchmarkManifestCase {
263263 name : Option < String > ,
264264 command : String ,
@@ -273,6 +273,43 @@ struct BenchmarkManifestCase {
273273 file : Option < String > ,
274274 language : Option < String > ,
275275 duration_secs : Option < u64 > ,
276+ matrix : Option < BenchmarkManifestMatrix > ,
277+ }
278+
279+ #[ derive( Debug , Clone , Deserialize , Default ) ]
280+ struct BenchmarkManifestMatrix {
281+ model : Option < Vec < String > > ,
282+ iterations : Option < Vec < u32 > > ,
283+ concurrent : Option < Vec < u32 > > ,
284+ warmup : Option < Vec < bool > > ,
285+ prompt : Option < Vec < String > > ,
286+ system : Option < Vec < String > > ,
287+ max_tokens : Option < Vec < usize > > ,
288+ text : Option < Vec < String > > ,
289+ file : Option < Vec < String > > ,
290+ language : Option < Vec < String > > ,
291+ duration_secs : Option < Vec < u64 > > ,
292+ }
293+
294+ #[ derive( Debug , Clone ) ]
295+ struct MatrixDimension {
296+ key : & ' static str ,
297+ values : Vec < MatrixValue > ,
298+ }
299+
300+ #[ derive( Debug , Clone ) ]
301+ enum MatrixValue {
302+ Model ( String ) ,
303+ Iterations ( u32 ) ,
304+ Concurrent ( u32 ) ,
305+ Warmup ( bool ) ,
306+ Prompt ( String ) ,
307+ System ( String ) ,
308+ MaxTokens ( usize ) ,
309+ Text ( String ) ,
310+ File ( String ) ,
311+ Language ( String ) ,
312+ DurationSecs ( u64 ) ,
276313}
277314
278315#[ derive( Debug , Serialize ) ]
@@ -530,6 +567,198 @@ fn format_case_list(cases: &[String]) -> String {
530567 }
531568}
532569
570+ impl MatrixValue {
571+ fn apply ( & self , case : & mut BenchmarkManifestCase ) {
572+ match self {
573+ MatrixValue :: Model ( value) => case. model = Some ( value. clone ( ) ) ,
574+ MatrixValue :: Iterations ( value) => case. iterations = Some ( * value) ,
575+ MatrixValue :: Concurrent ( value) => case. concurrent = Some ( * value) ,
576+ MatrixValue :: Warmup ( value) => case. warmup = Some ( * value) ,
577+ MatrixValue :: Prompt ( value) => case. prompt = Some ( value. clone ( ) ) ,
578+ MatrixValue :: System ( value) => case. system = Some ( value. clone ( ) ) ,
579+ MatrixValue :: MaxTokens ( value) => case. max_tokens = Some ( * value) ,
580+ MatrixValue :: Text ( value) => case. text = Some ( value. clone ( ) ) ,
581+ MatrixValue :: File ( value) => case. file = Some ( value. clone ( ) ) ,
582+ MatrixValue :: Language ( value) => case. language = Some ( value. clone ( ) ) ,
583+ MatrixValue :: DurationSecs ( value) => case. duration_secs = Some ( * value) ,
584+ }
585+ }
586+
587+ fn label_value ( & self ) -> String {
588+ match self {
589+ MatrixValue :: Model ( value)
590+ | MatrixValue :: Prompt ( value)
591+ | MatrixValue :: System ( value)
592+ | MatrixValue :: Text ( value)
593+ | MatrixValue :: File ( value)
594+ | MatrixValue :: Language ( value) => matrix_label_string ( value) ,
595+ MatrixValue :: Iterations ( value) => value. to_string ( ) ,
596+ MatrixValue :: Concurrent ( value) => value. to_string ( ) ,
597+ MatrixValue :: MaxTokens ( value) => value. to_string ( ) ,
598+ MatrixValue :: DurationSecs ( value) => value. to_string ( ) ,
599+ MatrixValue :: Warmup ( value) => value. to_string ( ) ,
600+ }
601+ }
602+ }
603+
604+ impl BenchmarkManifestMatrix {
605+ fn dimensions ( & self ) -> Result < Vec < MatrixDimension > > {
606+ let mut dimensions = Vec :: new ( ) ;
607+ add_matrix_dimension ( & mut dimensions, "model" , & self . model , MatrixValue :: Model ) ?;
608+ add_matrix_dimension (
609+ & mut dimensions,
610+ "iterations" ,
611+ & self . iterations ,
612+ MatrixValue :: Iterations ,
613+ ) ?;
614+ add_matrix_dimension (
615+ & mut dimensions,
616+ "concurrent" ,
617+ & self . concurrent ,
618+ MatrixValue :: Concurrent ,
619+ ) ?;
620+ add_matrix_dimension ( & mut dimensions, "warmup" , & self . warmup , MatrixValue :: Warmup ) ?;
621+ add_matrix_dimension ( & mut dimensions, "prompt" , & self . prompt , MatrixValue :: Prompt ) ?;
622+ add_matrix_dimension ( & mut dimensions, "system" , & self . system , MatrixValue :: System ) ?;
623+ add_matrix_dimension (
624+ & mut dimensions,
625+ "max_tokens" ,
626+ & self . max_tokens ,
627+ MatrixValue :: MaxTokens ,
628+ ) ?;
629+ add_matrix_dimension ( & mut dimensions, "text" , & self . text , MatrixValue :: Text ) ?;
630+ add_matrix_dimension ( & mut dimensions, "file" , & self . file , MatrixValue :: File ) ?;
631+ add_matrix_dimension (
632+ & mut dimensions,
633+ "language" ,
634+ & self . language ,
635+ MatrixValue :: Language ,
636+ ) ?;
637+ add_matrix_dimension (
638+ & mut dimensions,
639+ "duration_secs" ,
640+ & self . duration_secs ,
641+ MatrixValue :: DurationSecs ,
642+ ) ?;
643+ if dimensions. is_empty ( ) {
644+ return Err ( CliError :: InvalidInput (
645+ "Benchmark matrix must include at least one non-empty field" . to_string ( ) ,
646+ ) ) ;
647+ }
648+ Ok ( dimensions)
649+ }
650+ }
651+
652+ fn add_matrix_dimension < T , F > (
653+ dimensions : & mut Vec < MatrixDimension > ,
654+ key : & ' static str ,
655+ values : & Option < Vec < T > > ,
656+ map : F ,
657+ ) -> Result < ( ) >
658+ where
659+ T : Clone ,
660+ F : Fn ( T ) -> MatrixValue ,
661+ {
662+ let Some ( values) = values else {
663+ return Ok ( ( ) ) ;
664+ } ;
665+ if values. is_empty ( ) {
666+ return Err ( CliError :: InvalidInput ( format ! (
667+ "Benchmark matrix field `{key}` must contain at least one value"
668+ ) ) ) ;
669+ }
670+ dimensions. push ( MatrixDimension {
671+ key,
672+ values : values. iter ( ) . cloned ( ) . map ( map) . collect ( ) ,
673+ } ) ;
674+ Ok ( ( ) )
675+ }
676+
677+ fn matrix_label_string ( value : & str ) -> String {
678+ let normalized = value. split_whitespace ( ) . collect :: < Vec < _ > > ( ) . join ( " " ) ;
679+ if normalized. chars ( ) . count ( ) <= 32 {
680+ normalized
681+ } else {
682+ format ! (
683+ "{}~{:016x}" ,
684+ normalized. chars( ) . take( 32 ) . collect:: <String >( ) ,
685+ stable_label_hash( & normalized)
686+ )
687+ }
688+ }
689+
690+ fn stable_label_hash ( value : & str ) -> u64 {
691+ let mut hash = 0xcbf29ce484222325_u64 ;
692+ for byte in value. as_bytes ( ) {
693+ hash ^= u64:: from ( * byte) ;
694+ hash = hash. wrapping_mul ( 0x100000001b3 ) ;
695+ }
696+ hash
697+ }
698+
699+ fn expand_manifest_cases ( manifest : & BenchmarkManifest ) -> Result < Vec < BenchmarkManifestCase > > {
700+ let mut expanded = Vec :: new ( ) ;
701+ for case in & manifest. benchmarks {
702+ expanded. extend ( expand_manifest_case ( case) ?) ;
703+ }
704+ reject_duplicate_manifest_case_names ( & expanded) ?;
705+ Ok ( expanded)
706+ }
707+
708+ fn expand_manifest_case ( case : & BenchmarkManifestCase ) -> Result < Vec < BenchmarkManifestCase > > {
709+ let Some ( matrix) = case. matrix . as_ref ( ) else {
710+ return Ok ( vec ! [ case. clone( ) ] ) ;
711+ } ;
712+ let dimensions = matrix. dimensions ( ) ?;
713+ let mut expanded = vec ! [ ( case. clone( ) , Vec :: <String >:: new( ) ) ] ;
714+
715+ for dimension in dimensions {
716+ let mut next = Vec :: new ( ) ;
717+ for ( base, labels) in expanded {
718+ for value in & dimension. values {
719+ let mut case = base. clone ( ) ;
720+ value. apply ( & mut case) ;
721+ let mut labels = labels. clone ( ) ;
722+ labels. push ( format ! ( "{}={}" , dimension. key, value. label_value( ) ) ) ;
723+ next. push ( ( case, labels) ) ;
724+ }
725+ }
726+ expanded = next;
727+ }
728+
729+ Ok ( expanded
730+ . into_iter ( )
731+ . map ( |( mut case, labels) | {
732+ case. matrix = None ;
733+ case. name = Some ( match case. name . as_deref ( ) {
734+ Some ( name) => format ! ( "{name}[{}]" , labels. join( "," ) ) ,
735+ None => format ! (
736+ "{}[{}]" ,
737+ case. command. to_ascii_lowercase( ) ,
738+ labels. join( "," )
739+ ) ,
740+ } ) ;
741+ case
742+ } )
743+ . collect ( ) )
744+ }
745+
746+ fn reject_duplicate_manifest_case_names ( cases : & [ BenchmarkManifestCase ] ) -> Result < ( ) > {
747+ let mut names = BTreeSet :: new ( ) ;
748+ for ( index, case) in cases. iter ( ) . enumerate ( ) {
749+ let name = case
750+ . name
751+ . clone ( )
752+ . unwrap_or_else ( || format ! ( "case-{}" , index + 1 ) ) ;
753+ if !names. insert ( name. clone ( ) ) {
754+ return Err ( CliError :: InvalidInput ( format ! (
755+ "Benchmark manifest expands to duplicate case name `{name}`"
756+ ) ) ) ;
757+ }
758+ }
759+ Ok ( ( ) )
760+ }
761+
533762async fn read_json_report ( path : & Path ) -> Result < serde_json:: Value > {
534763 let text = tokio:: fs:: read_to_string ( path)
535764 . await
@@ -654,6 +883,7 @@ async fn bench_manifest(
654883 "Benchmark manifest must include at least one [[benchmarks]] entry" . to_string ( ) ,
655884 ) ) ;
656885 }
886+ let benchmark_cases = expand_manifest_cases ( & manifest) ?;
657887
658888 let suite_server = manifest. server . as_deref ( ) . unwrap_or ( server) . to_string ( ) ;
659889 let started_at = Utc :: now ( ) ;
@@ -664,18 +894,18 @@ async fn bench_manifest(
664894 if options. interactive ( ) {
665895 theme. step (
666896 1 ,
667- manifest . benchmarks . len ( ) ,
897+ benchmark_cases . len ( ) ,
668898 & format ! ( "Running benchmark manifest {}" , manifest_path. display( ) ) ,
669899 ) ;
670900 }
671901
672- for ( index, case) in manifest . benchmarks . iter ( ) . enumerate ( ) {
902+ for ( index, case) in benchmark_cases . iter ( ) . enumerate ( ) {
673903 if options. interactive ( ) {
674904 let label = case. name . as_deref ( ) . unwrap_or ( case. command . as_str ( ) ) ;
675905 theme. info ( & format ! (
676906 "Case {}/{}: {}" ,
677907 index + 1 ,
678- manifest . benchmarks . len( ) ,
908+ benchmark_cases . len( ) ,
679909 label
680910 ) ) ;
681911 }
@@ -2454,4 +2684,59 @@ mod tests {
24542684 let err = report_entry_map ( entries, "Current" ) . expect_err ( "duplicates should fail" ) ;
24552685 assert ! ( format!( "{err}" ) . contains( "duplicate benchmark case name `duplicate`" ) ) ;
24562686 }
2687+
2688+ #[ test]
2689+ fn manifest_matrix_expands_cartesian_cases ( ) {
2690+ let manifest: BenchmarkManifest = toml:: from_str (
2691+ r#"
2692+ [[benchmarks]]
2693+ name = "chat-short"
2694+ command = "chat"
2695+ prompt = "hello"
2696+ iterations = 1
2697+
2698+ [benchmarks.matrix]
2699+ model = ["m1", "m2"]
2700+ concurrent = [1, 2]
2701+ "# ,
2702+ )
2703+ . expect ( "manifest should parse" ) ;
2704+
2705+ let cases = expand_manifest_cases ( & manifest) . expect ( "matrix should expand" ) ;
2706+ let names: Vec < _ > = cases
2707+ . iter ( )
2708+ . map ( |case| case. name . as_deref ( ) . expect ( "expanded cases are named" ) )
2709+ . collect ( ) ;
2710+ assert_eq ! (
2711+ names,
2712+ vec![
2713+ "chat-short[model=m1,concurrent=1]" ,
2714+ "chat-short[model=m1,concurrent=2]" ,
2715+ "chat-short[model=m2,concurrent=1]" ,
2716+ "chat-short[model=m2,concurrent=2]" ,
2717+ ]
2718+ ) ;
2719+ assert_eq ! ( cases[ 0 ] . model. as_deref( ) , Some ( "m1" ) ) ;
2720+ assert_eq ! ( cases[ 0 ] . concurrent, Some ( 1 ) ) ;
2721+ assert_eq ! ( cases[ 3 ] . model. as_deref( ) , Some ( "m2" ) ) ;
2722+ assert_eq ! ( cases[ 3 ] . concurrent, Some ( 2 ) ) ;
2723+ }
2724+
2725+ #[ test]
2726+ fn manifest_matrix_rejects_duplicate_expanded_names ( ) {
2727+ let manifest: BenchmarkManifest = toml:: from_str (
2728+ r#"
2729+ [[benchmarks]]
2730+ name = "chat-short"
2731+ command = "chat"
2732+
2733+ [benchmarks.matrix]
2734+ concurrent = [1, 1]
2735+ "# ,
2736+ )
2737+ . expect ( "manifest should parse" ) ;
2738+
2739+ let err = expand_manifest_cases ( & manifest) . expect_err ( "duplicate matrix names should fail" ) ;
2740+ assert ! ( format!( "{err}" ) . contains( "duplicate case name `chat-short[concurrent=1]`" ) ) ;
2741+ }
24572742}
0 commit comments