boundml.evaluation
77class Evaluator: 78 """ 79 Evaluates a set of solvers against a set of instances. 80 Separates the configuration from the parallel execution logic. 81 """ 82 83 def __init__( 84 self, 85 metrics: List[str], 86 fail_on_error: bool = True, 87 limit_gbytes: Optional[int] = None, 88 reporter: Optional[BaseReporter] = None, 89 callback: Callable[[str, int, int, int, np.ndarray], None] | None = None 90 ): 91 """ 92 Parameters 93 ---------- 94 fail_one_error : bool 95 Whether to raise an exception when a solver fails. 96 If True and an error occurs, the resulting metrics are all 0. 97 Default it False. 98 limit_gbytes : int | None 99 Memory limit applied to the children processes in GB. If None, no limit is applied. 100 When specified, if the child reach the memory limit, it catches the exception and cancel the solving process. 101 All the resulting metrics are 0. 102 /!\\ Unexpected behavior when no executor is given to the evaluate method. As no multiprocessing is used, 103 it will change the memory limit of the main process. 104 Default None. 105 reporter: Optional[BaseReporter] 106 BaseReporter used to report the results during the evalution. 107 If None, a simple ComsoleReporter is built. It prints the results of the solvers on stdout 108 callback: Callable[[str, int, int, int, np.ndarray], None] | None 109 Callback function called after an instance is solved by a solver. Take as argument the instance name, 110 the instance index, the solver index, the ndarray d containing all the results. d[i,j,s,:] contains all the 111 metrics from the solving of instances i by solver j with the seed seeds[s]. 112 """ 113 self.metrics = metrics 114 self.fail_on_error = fail_on_error 115 self.limit_gbytes = limit_gbytes 116 self.reporter = reporter if reporter is not None else ConsoleReporter() # Default to a console reporter 117 self.callback = callback 118 119 @staticmethod 120 def _monitor_memory(pid, limit_bytes, stop_event): 121 process = psutil.Process(pid) 122 while not stop_event.is_set(): 123 try: 124 # Check strictly PHYSICAL memory (RSS) 125 rss = process.memory_info().rss 126 if rss > limit_bytes: 127 warnings.warn( 128 f"[{pid}] KILLED: Used {rss / 1024 ** 3:.2f} GB > Limit {limit_bytes / 1024 ** 3:.2f} GB") 129 130 # Setting the RLIMIT_AS now will force the underlying solver to crash. 131 resource.setrlimit(resource.RLIMIT_AS, (limit_bytes, limit_bytes)) 132 break 133 except psutil.NoSuchProcess: 134 break 135 time.sleep(1) 136 137 @staticmethod 138 def _solve(solver, prob_file_name, metrics, seed, fail_on_error, fail_on_memory_error): 139 try: 140 solver.set_seed(seed) 141 solver.solve(prob_file_name) 142 return [solver[metric] for metric in metrics] 143 except MemoryError as e: 144 print(fail_on_memory_error) 145 if fail_on_memory_error: 146 raise e 147 warnings.warn(f"Memory usage reached while solvign {prob_file_name} with {solver}") 148 return [0 for _ in metrics] 149 except Exception as e: 150 if fail_on_error: 151 raise e 152 warnings.warn(f"Error while solving {prob_file_name} with {solver}: {e}") 153 return [0 for _ in metrics] 154 155 @staticmethod 156 def _solve_wrapper(args): 157 i, j, s, seed, solver, instance_path, metrics, instance_name, fail_on_error, limit_rss_bytes = args 158 159 stop_event, watcher = None, None 160 if limit_rss_bytes is not None: 161 stop_event = threading.Event() 162 watcher = threading.Thread(target=Evaluator._monitor_memory, args=(os.getpid(), limit_rss_bytes, stop_event)) 163 watcher.start() 164 165 try: 166 metrics_values = Evaluator._solve(solver, instance_path, metrics, seed, fail_on_error, limit_rss_bytes is None) 167 finally: 168 if limit_rss_bytes is not None: 169 stop_event.set() 170 watcher.join() 171 172 return i, j, s, metrics_values, instance_name 173 174 def evaluate( 175 self, 176 solvers: List[Solver], 177 instances: Instances, 178 n_instances: int, 179 seeds: List[int] = (0,), 180 executor: Optional[concurrent.futures.Executor] = None, 181 display_instance_names: bool = False 182 ) -> SolverEvaluationResults: 183 """ 184 Executes the evaluation. 185 186 Parameters 187 ---------- 188 solvers : List[Solver] 189 List of solvers that will solve each instance 190 instances : Instances 191 Instances generator. Yields either pyscipopt Model or a str path. 192 n_instances : int 193 Number of instances to evaluate 194 seeds: List[int] 195 List of seeds used to solve an instance. 196 executor : concurrent.futures.Executor | None 197 A pool executor for parallel processing. If None, runs sequentially. 198 Compatible with ProcessPoolExecutor, ThreadPoolExecutor, or MPIPoolExecutor. 199 display_instance_names : bool 200 Whether to record and display instance names. Default is False. 201 202 Returns 203 ------- 204 Return a SolverEvaluationResults object which can be used to compute a report on the computed data. 205 See SolverEvaluationReport for more details 206 """ 207 names = [] 208 limit_rss_bytes = self.limit_gbytes * (1024 ** 3) if self.limit_gbytes is not None else None 209 210 data = np.zeros((n_instances, len(solvers), len(seeds), len(self.metrics))) 211 files = {} 212 213 task_generator = TaskGenerator( 214 solvers, 215 iter(instances), 216 n_instances, 217 seeds, 218 self.metrics, 219 files, 220 display_instance_names, 221 self.fail_on_error, 222 limit_rss_bytes 223 ) 224 225 self.reporter.on_evaluation_start([str(s) for s in solvers], self.metrics) 226 227 def _process_result(i, j, s, line, instance_name): 228 if j == 0 and s == 0: # new line 229 names.append(instance_name) 230 self.reporter.on_instance_start(instance_name) 231 232 for k, d in enumerate(line): 233 data[i, j, s, k] = d 234 235 if s == len(seeds) - 1: 236 l = data[i, j, :, :] 237 mean_line = np.mean(l, axis=0) 238 self.reporter.on_solver_finish(mean_line) 239 240 if self.callback is not None: 241 self.callback(instance_name, i, j, s, data) 242 243 if j == len(solvers) - 1 and s == len(seeds) - 1: 244 self.reporter.on_instance_end() 245 if i in files: 246 files[i].close() 247 248 # Execute tasks 249 if executor is not None: 250 # Map returns an iterator yielding results in the exact same order tasks were generated 251 results_stream = executor.map(Evaluator._solve_wrapper, task_generator) 252 for solve_res in results_stream: 253 _process_result(*solve_res) 254 else: 255 # Sequential fallback 256 for args in task_generator: 257 solve_res = Evaluator._solve_wrapper(args) 258 _process_result(*solve_res) 259 260 res = SolverEvaluationResults( 261 data, 262 [str(s) for s in solvers], 263 self.metrics, 264 names if display_instance_names else None 265 ) 266 267 self.reporter.on_evaluation_end(res, self.metrics, [str(s) for s in solvers]) 268 269 return res
Evaluates a set of solvers against a set of instances. Separates the configuration from the parallel execution logic.
83 def __init__( 84 self, 85 metrics: List[str], 86 fail_on_error: bool = True, 87 limit_gbytes: Optional[int] = None, 88 reporter: Optional[BaseReporter] = None, 89 callback: Callable[[str, int, int, int, np.ndarray], None] | None = None 90 ): 91 """ 92 Parameters 93 ---------- 94 fail_one_error : bool 95 Whether to raise an exception when a solver fails. 96 If True and an error occurs, the resulting metrics are all 0. 97 Default it False. 98 limit_gbytes : int | None 99 Memory limit applied to the children processes in GB. If None, no limit is applied. 100 When specified, if the child reach the memory limit, it catches the exception and cancel the solving process. 101 All the resulting metrics are 0. 102 /!\\ Unexpected behavior when no executor is given to the evaluate method. As no multiprocessing is used, 103 it will change the memory limit of the main process. 104 Default None. 105 reporter: Optional[BaseReporter] 106 BaseReporter used to report the results during the evalution. 107 If None, a simple ComsoleReporter is built. It prints the results of the solvers on stdout 108 callback: Callable[[str, int, int, int, np.ndarray], None] | None 109 Callback function called after an instance is solved by a solver. Take as argument the instance name, 110 the instance index, the solver index, the ndarray d containing all the results. d[i,j,s,:] contains all the 111 metrics from the solving of instances i by solver j with the seed seeds[s]. 112 """ 113 self.metrics = metrics 114 self.fail_on_error = fail_on_error 115 self.limit_gbytes = limit_gbytes 116 self.reporter = reporter if reporter is not None else ConsoleReporter() # Default to a console reporter 117 self.callback = callback
Parameters
- fail_one_error (bool): Whether to raise an exception when a solver fails. If True and an error occurs, the resulting metrics are all 0. Default it False.
- limit_gbytes (int | None): Memory limit applied to the children processes in GB. If None, no limit is applied. When specified, if the child reach the memory limit, it catches the exception and cancel the solving process. All the resulting metrics are 0. /!\ Unexpected behavior when no executor is given to the evaluate method. As no multiprocessing is used, it will change the memory limit of the main process. Default None.
- reporter (Optional[BaseReporter]): BaseReporter used to report the results during the evalution. If None, a simple ComsoleReporter is built. It prints the results of the solvers on stdout
- callback (Callable[[str, int, int, int, np.ndarray], None] | None): Callback function called after an instance is solved by a solver. Take as argument the instance name, the instance index, the solver index, the ndarray d containing all the results. d[i,j,s,:] contains all the metrics from the solving of instances i by solver j with the seed seeds[s].
174 def evaluate( 175 self, 176 solvers: List[Solver], 177 instances: Instances, 178 n_instances: int, 179 seeds: List[int] = (0,), 180 executor: Optional[concurrent.futures.Executor] = None, 181 display_instance_names: bool = False 182 ) -> SolverEvaluationResults: 183 """ 184 Executes the evaluation. 185 186 Parameters 187 ---------- 188 solvers : List[Solver] 189 List of solvers that will solve each instance 190 instances : Instances 191 Instances generator. Yields either pyscipopt Model or a str path. 192 n_instances : int 193 Number of instances to evaluate 194 seeds: List[int] 195 List of seeds used to solve an instance. 196 executor : concurrent.futures.Executor | None 197 A pool executor for parallel processing. If None, runs sequentially. 198 Compatible with ProcessPoolExecutor, ThreadPoolExecutor, or MPIPoolExecutor. 199 display_instance_names : bool 200 Whether to record and display instance names. Default is False. 201 202 Returns 203 ------- 204 Return a SolverEvaluationResults object which can be used to compute a report on the computed data. 205 See SolverEvaluationReport for more details 206 """ 207 names = [] 208 limit_rss_bytes = self.limit_gbytes * (1024 ** 3) if self.limit_gbytes is not None else None 209 210 data = np.zeros((n_instances, len(solvers), len(seeds), len(self.metrics))) 211 files = {} 212 213 task_generator = TaskGenerator( 214 solvers, 215 iter(instances), 216 n_instances, 217 seeds, 218 self.metrics, 219 files, 220 display_instance_names, 221 self.fail_on_error, 222 limit_rss_bytes 223 ) 224 225 self.reporter.on_evaluation_start([str(s) for s in solvers], self.metrics) 226 227 def _process_result(i, j, s, line, instance_name): 228 if j == 0 and s == 0: # new line 229 names.append(instance_name) 230 self.reporter.on_instance_start(instance_name) 231 232 for k, d in enumerate(line): 233 data[i, j, s, k] = d 234 235 if s == len(seeds) - 1: 236 l = data[i, j, :, :] 237 mean_line = np.mean(l, axis=0) 238 self.reporter.on_solver_finish(mean_line) 239 240 if self.callback is not None: 241 self.callback(instance_name, i, j, s, data) 242 243 if j == len(solvers) - 1 and s == len(seeds) - 1: 244 self.reporter.on_instance_end() 245 if i in files: 246 files[i].close() 247 248 # Execute tasks 249 if executor is not None: 250 # Map returns an iterator yielding results in the exact same order tasks were generated 251 results_stream = executor.map(Evaluator._solve_wrapper, task_generator) 252 for solve_res in results_stream: 253 _process_result(*solve_res) 254 else: 255 # Sequential fallback 256 for args in task_generator: 257 solve_res = Evaluator._solve_wrapper(args) 258 _process_result(*solve_res) 259 260 res = SolverEvaluationResults( 261 data, 262 [str(s) for s in solvers], 263 self.metrics, 264 names if display_instance_names else None 265 ) 266 267 self.reporter.on_evaluation_end(res, self.metrics, [str(s) for s in solvers]) 268 269 return res
Executes the evaluation.
Parameters
- solvers (List[Solver]): List of solvers that will solve each instance
- instances (Instances): Instances generator. Yields either pyscipopt Model or a str path.
- n_instances (int): Number of instances to evaluate
- seeds (List[int]): List of seeds used to solve an instance.
- executor (concurrent.futures.Executor | None): A pool executor for parallel processing. If None, runs sequentially. Compatible with ProcessPoolExecutor, ThreadPoolExecutor, or MPIPoolExecutor.
- display_instance_names (bool): Whether to record and display instance names. Default is False.
Returns
- Return a SolverEvaluationResults object which can be used to compute a report on the computed data.
- See SolverEvaluationReport for more details
14class SolverEvaluationResults: 15 def __init__(self, raw_data: np.ndarray, solvers: List[str], metrics: List[str], names: List[str] | None = None): 16 self.data = raw_data 17 self.solvers = solvers 18 self.metrics = metrics 19 self.names = names 20 21 @property 22 def metric_index(self) -> dict: 23 """Returns a dictionary mapping metric names to their indices""" 24 return {metric: idx for idx, metric in enumerate(self.metrics)} 25 26 def get_metric_data(self, metric: str, std=False, count_zeros=False) -> np.ndarray: 27 """Get all data for a specific metric. Average over all the seeds (or std if std=True)""" 28 29 if metric == "names" and self.names: 30 return self.names 31 32 data = self.data[:, :, :, self.metric_index[metric]] 33 if not count_zeros: 34 mask = np.any(data.reshape(data.shape[0], -1) != 0, axis=1) 35 data = data[mask] 36 37 if std: 38 data = np.std(data, axis=2) 39 mean = self.get_metric_data(metric, False, count_zeros) 40 data = data / mean * 100 41 else: 42 data = np.median(data, axis=2) 43 return data 44 45 def aggregate(self, metric: str, aggregation_func: callable, std=False, count_zeros=False) -> np.ndarray: 46 """ 47 Apply aggregation function to a specific metric 48 Args: 49 metric: metric name to aggregate 50 aggregation_func: function to apply (e.g., np.sum, np.mean) 51 std: If True the aggregation is done on the std over the seeds. Else over the mean. 52 """ 53 res = np.array([aggregation_func(self.get_metric_data(metric, std, count_zeros)[:, i]) for i in range(len(self.solvers))]) 54 return np.nan_to_num(res) 55 56 def split_instances_over(self, metric: str, condition, require_all: bool = True): 57 """ 58 Splits instances into positives and negatives based on a condition. 59 60 Parameters 61 ---------- 62 metric : str 63 The metric to evaluate the condition against. 64 condition : callable 65 A function that returns a boolean array when applied to the metric data. 66 require_all : bool, default True 67 If True, ALL solvers must meet the condition for an instance to be positive. 68 If False, AT LEAST ONE solver must meet the condition for an instance to be positive. 69 70 Returns 71 ------- 72 tuple 73 Two SolverEvaluationResults objects: (positives, negatives) 74 """ 75 assert metric in self.metrics, "Cannot make a split on a non-existing metric" 76 77 d = self.get_metric_data(metric, count_zeros=True) 78 79 # Apply the condition to the data 80 condition_met = np.apply_along_axis(condition, 1, d) 81 82 # Filter based on the require_all flag 83 if require_all: 84 indexes = np.where(np.all(condition_met, axis=1))[0] 85 else: 86 indexes = np.where(np.any(condition_met, axis=1))[0] 87 88 positives = self.data[indexes,] 89 negatives = np.delete(self.data, indexes, axis=0) 90 91 # For compatibility with older results 92 if not hasattr(self, 'names'): 93 self.names = None 94 95 if self.names is not None: 96 names_pos = [self.names[i] for i in indexes] 97 names_neg = [self.names[i] for i in range(len(self.names)) if i not in indexes] 98 else: 99 names_pos = None 100 names_neg = None 101 102 return ( 103 SolverEvaluationResults(positives, self.solvers, self.metrics, names_pos), 104 SolverEvaluationResults(negatives, self.solvers, self.metrics, names_neg), 105 ) 106 107 def remove_solver(self, solver: str): 108 index = self.solvers.index(solver) 109 self.data = np.delete(self.data, index, axis=1) 110 self.solvers.remove(solver) 111 112 def performance_profile(self, metric: str = "nnodes", ratios=np.arange(0, 1.00, .01), filename=None, plot=True, logx=True): 113 114 if filename: 115 backend = matplotlib.get_backend() 116 matplotlib.use('pgf') 117 118 n_instances = self.data.shape[0] 119 120 data = self.get_metric_data(metric) 121 min = np.min(data) 122 max = np.max(data) 123 124 xs = ratios * (max - min) + min 125 126 res = [] 127 for s, solver in enumerate(self.solvers): 128 ys = np.zeros(len(ratios)) 129 for i in range(n_instances): 130 val = data[i, s] 131 indexes = np.where(val <= xs) 132 ys[indexes] += 1 133 134 ys /= n_instances 135 label = solver 136 137 if logx: 138 auc = np.trapezoid(ys, np.log(xs)) / np.log(max) 139 else: 140 auc = np.trapezoid(ys, xs) / max 141 142 res.append(auc) 143 if plot: 144 plt.plot(xs, ys, label=label) 145 146 if plot: 147 plt.legend() 148 plt.xlabel(metric) 149 plt.ylabel("frequency") 150 plt.title(f"Performance profile w.r.t. {metric}") 151 152 if logx: 153 plt.xscale("log") 154 155 if filename: 156 plt.savefig(filename) 157 matplotlib.use(backend) 158 159 else: 160 plt.show() 161 162 return np.array(res) 163 164 def compute_report(self, *aggregations: tuple[str, callable], **kwargs): 165 data = {"solver": [s for s in self.solvers]} 166 167 for i, aggregation in enumerate(aggregations): 168 data[aggregation[0]] = list(aggregation[1](self)) 169 170 return SolverEvaluationReport(data, **kwargs) 171 172 def combine_solvers(self, other): 173 """ 174 Combine the results of another SolverEvaluationResults. 175 The 2 SolverEvaluationResults (self and other) must have the same metrics and be for the same instances. 176 Parameters 177 ---------- 178 other : SolverEvaluationResults 179 The result to combine 180 181 Returns 182 ------- 183 The new SolverEvaluationResults object 184 """ 185 assert self.metrics == other.metrics, "Both results have different metrics" 186 assert self.data.shape[0] == other.data.shape[0], "Both results have different number of instances" 187 assert self.data.shape[3] == other.data.shape[3], "Both results have different number of seeds repetitions" 188 189 # For compatibility with older results 190 self_names = getattr(self, 'names', None) 191 other_names = getattr(other, 'names', None) 192 193 assert self_names == other_names, "Both results solved instances have different names" 194 195 solvers = self.solvers + other.solvers 196 data = np.hstack((self.data, other.data)) 197 198 return SolverEvaluationResults(data, solvers, self.metrics[:], self_names) 199 200 def combine_instances(self, other): 201 """ 202 Combine the results of another SolverEvaluationResults. 203 The 2 SolverEvaluationResults (self and other) must have the same metrics and be for the same solvers. 204 Parameters 205 ---------- 206 other : SolverEvaluationResults 207 The result to combine 208 209 Returns 210 ------- 211 The new SolverEvaluationResults object 212 """ 213 assert self.metrics == other.metrics, "Both results have different metrics" 214 assert self.data.shape[3] == other.data.shape[3], "Both results have different number of seeds repetitions" 215 assert self.solvers == other.solvers, "Both results have different solvers" 216 217 # For compatibility with older results 218 self_names = getattr(self, 'names', None) 219 other_names = getattr(other, 'names', None) 220 221 assert type(self_names) == type(other_names), \ 222 "Both results have different names structure. Once has names for the instances, not the other" 223 224 data = np.vstack((self.data, other.data)) 225 226 names = None 227 if self.names is not None: 228 names = self_names + other_names 229 230 return SolverEvaluationResults(data, self.solvers[:], self.metrics[:], names) 231 232 @staticmethod 233 def sg_metric(metric, s, std=False): 234 name = metric if not std else f"{metric} std (%)" 235 return (name, lambda evaluationResults: 236 evaluationResults.aggregate(metric, lambda values: shifted_geometric_mean(values, shift=s), std, count_zeros=False) 237 ) 238 239 @staticmethod 240 def nwins(metric, dir=1, count_if_not_optimal = False): 241 def get_wins(evaluationResults: SolverEvaluationResults): 242 data = evaluationResults.get_metric_data(metric, count_zeros=True) 243 gaps = evaluationResults.get_metric_data("gap", count_zeros=True) 244 res = [] 245 for i in range(len(evaluationResults.solvers)): 246 c = 0 247 for j in range(len(data[:, i])): 248 # Does not count as a win if the instance was not solved optimally. 249 if count_if_not_optimal or gaps[j, i] == 0 or metric == "gap": 250 c += dir * data[j, i] <= dir * np.min(data[j, :]) 251 res.append(c) 252 return np.array(res) 253 254 return f"wins ({metric})", get_wins 255 256 @staticmethod 257 def nsolved(): 258 return ("nsolved", 259 lambda evaluationResults: evaluationResults.aggregate( 260 "gap", 261 lambda values: values.shape[0] - np.count_nonzero(values), 262 count_zeros=True 263 ) 264 ) 265 266 @staticmethod 267 def auc_score(metric, **kwargs): 268 return ("AUC", lambda evaluationResults: evaluationResults.performance_profile(metric, plot=False, **kwargs)) 269 270 def get_names(self): 271 return self.names 272 273 def __str__(self): 274 reporter = TextReporter() 275 res = "" 276 res += reporter.on_evaluation_start(self.solvers, self.metrics) 277 278 n_instances = self.data.shape[0] 279 n_solvers = len(self.solvers) 280 281 for i in range(n_instances): 282 instance_name = self.names[i] if self.names is not None else str(i) 283 res += reporter.on_instance_start(instance_name) 284 for j in range(n_solvers): 285 line = self.data[i, j, :, :] 286 line = np.mean(line, axis=0) 287 res += reporter.on_solver_finish(line) 288 289 res += reporter.on_instance_end() 290 291 res += reporter.on_evaluation_end(self, self.metrics, self.solvers) 292 293 return res
21 @property 22 def metric_index(self) -> dict: 23 """Returns a dictionary mapping metric names to their indices""" 24 return {metric: idx for idx, metric in enumerate(self.metrics)}
Returns a dictionary mapping metric names to their indices
26 def get_metric_data(self, metric: str, std=False, count_zeros=False) -> np.ndarray: 27 """Get all data for a specific metric. Average over all the seeds (or std if std=True)""" 28 29 if metric == "names" and self.names: 30 return self.names 31 32 data = self.data[:, :, :, self.metric_index[metric]] 33 if not count_zeros: 34 mask = np.any(data.reshape(data.shape[0], -1) != 0, axis=1) 35 data = data[mask] 36 37 if std: 38 data = np.std(data, axis=2) 39 mean = self.get_metric_data(metric, False, count_zeros) 40 data = data / mean * 100 41 else: 42 data = np.median(data, axis=2) 43 return data
Get all data for a specific metric. Average over all the seeds (or std if std=True)
45 def aggregate(self, metric: str, aggregation_func: callable, std=False, count_zeros=False) -> np.ndarray: 46 """ 47 Apply aggregation function to a specific metric 48 Args: 49 metric: metric name to aggregate 50 aggregation_func: function to apply (e.g., np.sum, np.mean) 51 std: If True the aggregation is done on the std over the seeds. Else over the mean. 52 """ 53 res = np.array([aggregation_func(self.get_metric_data(metric, std, count_zeros)[:, i]) for i in range(len(self.solvers))]) 54 return np.nan_to_num(res)
Apply aggregation function to a specific metric Args: metric: metric name to aggregate aggregation_func: function to apply (e.g., np.sum, np.mean) std: If True the aggregation is done on the std over the seeds. Else over the mean.
56 def split_instances_over(self, metric: str, condition, require_all: bool = True): 57 """ 58 Splits instances into positives and negatives based on a condition. 59 60 Parameters 61 ---------- 62 metric : str 63 The metric to evaluate the condition against. 64 condition : callable 65 A function that returns a boolean array when applied to the metric data. 66 require_all : bool, default True 67 If True, ALL solvers must meet the condition for an instance to be positive. 68 If False, AT LEAST ONE solver must meet the condition for an instance to be positive. 69 70 Returns 71 ------- 72 tuple 73 Two SolverEvaluationResults objects: (positives, negatives) 74 """ 75 assert metric in self.metrics, "Cannot make a split on a non-existing metric" 76 77 d = self.get_metric_data(metric, count_zeros=True) 78 79 # Apply the condition to the data 80 condition_met = np.apply_along_axis(condition, 1, d) 81 82 # Filter based on the require_all flag 83 if require_all: 84 indexes = np.where(np.all(condition_met, axis=1))[0] 85 else: 86 indexes = np.where(np.any(condition_met, axis=1))[0] 87 88 positives = self.data[indexes,] 89 negatives = np.delete(self.data, indexes, axis=0) 90 91 # For compatibility with older results 92 if not hasattr(self, 'names'): 93 self.names = None 94 95 if self.names is not None: 96 names_pos = [self.names[i] for i in indexes] 97 names_neg = [self.names[i] for i in range(len(self.names)) if i not in indexes] 98 else: 99 names_pos = None 100 names_neg = None 101 102 return ( 103 SolverEvaluationResults(positives, self.solvers, self.metrics, names_pos), 104 SolverEvaluationResults(negatives, self.solvers, self.metrics, names_neg), 105 )
Splits instances into positives and negatives based on a condition.
Parameters
- metric (str): The metric to evaluate the condition against.
- condition (callable): A function that returns a boolean array when applied to the metric data.
- require_all (bool, default True): If True, ALL solvers must meet the condition for an instance to be positive. If False, AT LEAST ONE solver must meet the condition for an instance to be positive.
Returns
- tuple: Two SolverEvaluationResults objects: (positives, negatives)
112 def performance_profile(self, metric: str = "nnodes", ratios=np.arange(0, 1.00, .01), filename=None, plot=True, logx=True): 113 114 if filename: 115 backend = matplotlib.get_backend() 116 matplotlib.use('pgf') 117 118 n_instances = self.data.shape[0] 119 120 data = self.get_metric_data(metric) 121 min = np.min(data) 122 max = np.max(data) 123 124 xs = ratios * (max - min) + min 125 126 res = [] 127 for s, solver in enumerate(self.solvers): 128 ys = np.zeros(len(ratios)) 129 for i in range(n_instances): 130 val = data[i, s] 131 indexes = np.where(val <= xs) 132 ys[indexes] += 1 133 134 ys /= n_instances 135 label = solver 136 137 if logx: 138 auc = np.trapezoid(ys, np.log(xs)) / np.log(max) 139 else: 140 auc = np.trapezoid(ys, xs) / max 141 142 res.append(auc) 143 if plot: 144 plt.plot(xs, ys, label=label) 145 146 if plot: 147 plt.legend() 148 plt.xlabel(metric) 149 plt.ylabel("frequency") 150 plt.title(f"Performance profile w.r.t. {metric}") 151 152 if logx: 153 plt.xscale("log") 154 155 if filename: 156 plt.savefig(filename) 157 matplotlib.use(backend) 158 159 else: 160 plt.show() 161 162 return np.array(res)
172 def combine_solvers(self, other): 173 """ 174 Combine the results of another SolverEvaluationResults. 175 The 2 SolverEvaluationResults (self and other) must have the same metrics and be for the same instances. 176 Parameters 177 ---------- 178 other : SolverEvaluationResults 179 The result to combine 180 181 Returns 182 ------- 183 The new SolverEvaluationResults object 184 """ 185 assert self.metrics == other.metrics, "Both results have different metrics" 186 assert self.data.shape[0] == other.data.shape[0], "Both results have different number of instances" 187 assert self.data.shape[3] == other.data.shape[3], "Both results have different number of seeds repetitions" 188 189 # For compatibility with older results 190 self_names = getattr(self, 'names', None) 191 other_names = getattr(other, 'names', None) 192 193 assert self_names == other_names, "Both results solved instances have different names" 194 195 solvers = self.solvers + other.solvers 196 data = np.hstack((self.data, other.data)) 197 198 return SolverEvaluationResults(data, solvers, self.metrics[:], self_names)
Combine the results of another SolverEvaluationResults. The 2 SolverEvaluationResults (self and other) must have the same metrics and be for the same instances.
Parameters
- other (SolverEvaluationResults): The result to combine
Returns
- The new SolverEvaluationResults object
200 def combine_instances(self, other): 201 """ 202 Combine the results of another SolverEvaluationResults. 203 The 2 SolverEvaluationResults (self and other) must have the same metrics and be for the same solvers. 204 Parameters 205 ---------- 206 other : SolverEvaluationResults 207 The result to combine 208 209 Returns 210 ------- 211 The new SolverEvaluationResults object 212 """ 213 assert self.metrics == other.metrics, "Both results have different metrics" 214 assert self.data.shape[3] == other.data.shape[3], "Both results have different number of seeds repetitions" 215 assert self.solvers == other.solvers, "Both results have different solvers" 216 217 # For compatibility with older results 218 self_names = getattr(self, 'names', None) 219 other_names = getattr(other, 'names', None) 220 221 assert type(self_names) == type(other_names), \ 222 "Both results have different names structure. Once has names for the instances, not the other" 223 224 data = np.vstack((self.data, other.data)) 225 226 names = None 227 if self.names is not None: 228 names = self_names + other_names 229 230 return SolverEvaluationResults(data, self.solvers[:], self.metrics[:], names)
Combine the results of another SolverEvaluationResults. The 2 SolverEvaluationResults (self and other) must have the same metrics and be for the same solvers.
Parameters
- other (SolverEvaluationResults): The result to combine
Returns
- The new SolverEvaluationResults object
239 @staticmethod 240 def nwins(metric, dir=1, count_if_not_optimal = False): 241 def get_wins(evaluationResults: SolverEvaluationResults): 242 data = evaluationResults.get_metric_data(metric, count_zeros=True) 243 gaps = evaluationResults.get_metric_data("gap", count_zeros=True) 244 res = [] 245 for i in range(len(evaluationResults.solvers)): 246 c = 0 247 for j in range(len(data[:, i])): 248 # Does not count as a win if the instance was not solved optimally. 249 if count_if_not_optimal or gaps[j, i] == 0 or metric == "gap": 250 c += dir * data[j, i] <= dir * np.min(data[j, :]) 251 res.append(c) 252 return np.array(res) 253 254 return f"wins ({metric})", get_wins
298class SolverEvaluationReport: 299 def __init__(self, data=None, header=None, df_=None): 300 assert (data is None) != (df_ is None), "Only one of data and df_ must be given" 301 302 if df_ is not None: 303 self.df = df_ 304 return 305 306 if header is not None: 307 data_ = {} 308 for key in data: 309 if key != "solver": 310 data_[(header, key)] = data[key] 311 else: 312 data_[("", key)] = data[key] 313 314 else: 315 data_ = data 316 317 self.df = pd.DataFrame(data_) 318 if header is not None: 319 self.df.set_index(("","solver"), inplace=True) 320 321 def __str__(self): 322 return tabulate(self.df, headers="keys", tablefmt='grid', showindex=False) 323 324 def to_latex(self, *args, **kwargs): 325 return self.df.to_latex(index=False, *args, **kwargs) 326 327 def __add__(self, other): 328 print(self.df.to_dict(orient='list')) 329 print(other.df.to_dict(orient='list')) 330 df2 = pd.concat( 331 [self.df, other.df], 332 axis=1 333 ) 334 335 df2 = df2.reset_index().rename(columns={'index': ('', 'solver')}) 336 return SolverEvaluationReport(df_ = df2)
299 def __init__(self, data=None, header=None, df_=None): 300 assert (data is None) != (df_ is None), "Only one of data and df_ must be given" 301 302 if df_ is not None: 303 self.df = df_ 304 return 305 306 if header is not None: 307 data_ = {} 308 for key in data: 309 if key != "solver": 310 data_[(header, key)] = data[key] 311 else: 312 data_[("", key)] = data[key] 313 314 else: 315 data_ = data 316 317 self.df = pd.DataFrame(data_) 318 if header is not None: 319 self.df.set_index(("","solver"), inplace=True)