From c7425123d519ce9ab2c5322a17654fabb333a320 Mon Sep 17 00:00:00 2001 From: Windy Phung Date: Mon, 9 Feb 2026 11:35:14 +0100 Subject: [PATCH 1/5] Save diversity and complexity bin in the program --- openevolve/database.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openevolve/database.py b/openevolve/database.py index d39792c0ce..6dd1e9f190 100644 --- a/openevolve/database.py +++ b/openevolve/database.py @@ -847,6 +847,7 @@ def _calculate_feature_coords(self, program: Program) -> List[int]: # Use code length as complexity measure complexity = len(program.code) bin_idx = self._calculate_complexity_bin(complexity) + program.complexity = bin_idx # Store complexity bin in program coords.append(bin_idx) elif dim == "diversity": # Use cached diversity calculation with reference set @@ -855,6 +856,7 @@ def _calculate_feature_coords(self, program: Program) -> List[int]: else: diversity = self._get_cached_diversity(program) bin_idx = self._calculate_diversity_bin(diversity) + program.diversity = bin_idx # Store diversity bin in program coords.append(bin_idx) elif dim == "score": # Use average of numeric metrics From c53c195babcbd111eb81bbfbba0f9cd540ace183 Mon Sep 17 00:00:00 2001 From: Windy Phung Date: Mon, 9 Feb 2026 11:36:04 +0100 Subject: [PATCH 2/5] Use llm_feedback_weight for final score --- openevolve/evaluator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py index b1142ece50..5268020c3b 100644 --- a/openevolve/evaluator.py +++ b/openevolve/evaluator.py @@ -210,7 +210,7 @@ async def evaluate_program( accuracy = eval_result.metrics["combined_score"] # Combine with LLM average (70% accuracy, 30% LLM quality) eval_result.metrics["combined_score"] = ( - accuracy * 0.7 + llm_average * 0.3 + accuracy * (1-self.config.llm_feedback_weight) + llm_average * self.config.llm_feedback_weight ) # Store artifacts if enabled and present From 74f7b0f5e441ee713a236a8f038d39646867e306 Mon Sep 17 00:00:00 2001 From: Windy Phung Date: Mon, 9 Feb 2026 21:49:06 +0100 Subject: [PATCH 3/5] Update openevolve/evaluator.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- openevolve/evaluator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py index 5268020c3b..5827019549 100644 --- a/openevolve/evaluator.py +++ b/openevolve/evaluator.py @@ -208,7 +208,8 @@ async def evaluate_program( if "combined_score" in eval_result.metrics: # Original combined_score is just accuracy accuracy = eval_result.metrics["combined_score"] - # Combine with LLM average (70% accuracy, 30% LLM quality) + # Combine accuracy with LLM average using dynamic weighting: + # (1 - llm_feedback_weight) * accuracy + llm_feedback_weight * LLM quality eval_result.metrics["combined_score"] = ( accuracy * (1-self.config.llm_feedback_weight) + llm_average * self.config.llm_feedback_weight ) From 7672b73a082dfffb99130d6ecfebe4677bbc1aeb Mon Sep 17 00:00:00 2001 From: Windy Phung Date: Tue, 3 Mar 2026 11:29:16 +0100 Subject: [PATCH 4/5] Adjust formatting --- openevolve/evaluator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py index 5827019549..9addca39a6 100644 --- a/openevolve/evaluator.py +++ b/openevolve/evaluator.py @@ -208,10 +208,10 @@ async def evaluate_program( if "combined_score" in eval_result.metrics: # Original combined_score is just accuracy accuracy = eval_result.metrics["combined_score"] - # Combine accuracy with LLM average using dynamic weighting: - # (1 - llm_feedback_weight) * accuracy + llm_feedback_weight * LLM quality + # Combine with LLM average using dynamic weighting eval_result.metrics["combined_score"] = ( - accuracy * (1-self.config.llm_feedback_weight) + llm_average * self.config.llm_feedback_weight + accuracy * (1 - self.config.llm_feedback_weight) + + llm_average * self.config.llm_feedback_weight ) # Store artifacts if enabled and present From 15c6cb67a2d40cd66f3e9f2d5b7179a530f3e373 Mon Sep 17 00:00:00 2001 From: Windy Phung Date: Tue, 3 Mar 2026 11:29:39 +0100 Subject: [PATCH 5/5] Removed metrics prefix (e.g. performs well on) --- openevolve/prompts/defaults/fragments.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openevolve/prompts/defaults/fragments.json b/openevolve/prompts/defaults/fragments.json index b59410405a..7ba517dff6 100644 --- a/openevolve/prompts/defaults/fragments.json +++ b/openevolve/prompts/defaults/fragments.json @@ -22,8 +22,8 @@ "attempt_all_metrics_improved": "Improvement in all metrics", "attempt_all_metrics_regressed": "Regression in all metrics", "attempt_mixed_metrics": "Mixed results", - "top_program_metrics_prefix": "Performs well on", - "diverse_program_metrics_prefix": "Alternative approach to", + "top_program_metrics_prefix": "", + "diverse_program_metrics_prefix": "", "inspiration_type_diverse": "Diverse", "inspiration_type_migrant": "Migrant", "inspiration_type_random": "Random",