Edit on GitHub

boundml.evaluation

1from .evaluation_tools import Evaluator
2from .solver_evaluation_results import SolverEvaluationResults, SolverEvaluationReport
3
4__all__ = ["Evaluator", "SolverEvaluationResults", "SolverEvaluationReport"]
class Evaluator:
 77class Evaluator:
 78    """
 79    Evaluates a set of solvers against a set of instances.
 80    Separates the configuration from the parallel execution logic.
 81    """
 82
 83    def __init__(
 84            self,
 85            metrics: List[str],
 86            fail_on_error: bool = True,
 87            limit_gbytes: Optional[int] = None,
 88            reporter: Optional[BaseReporter] = None,
 89            callback: Callable[[str, int, int, int, np.ndarray], None] | None = None
 90    ):
 91        """
 92        Parameters
 93        ----------
 94        fail_one_error : bool
 95            Whether to raise an exception when a solver fails.
 96            If True and an error occurs, the resulting metrics are all 0.
 97            Default it False.
 98        limit_gbytes : int | None
 99            Memory limit applied to the children processes in GB. If None, no limit is applied.
100            When specified, if the child reach the memory limit, it catches the exception and cancel the solving process.
101            All the resulting metrics are 0.
102            /!\\ Unexpected behavior when no executor is given to the evaluate method. As no multiprocessing is used,
103             it will change the memory limit of the main process.
104            Default None.
105        reporter: Optional[BaseReporter]
106            BaseReporter used to report the results during the evalution.
107            If None, a simple ComsoleReporter is built. It prints the results of the solvers on stdout
108        callback: Callable[[str, int, int, int, np.ndarray], None] | None
109            Callback function called after an instance is solved by a solver. Take as argument the instance name,
110            the instance index, the solver index, the ndarray d containing all the results. d[i,j,s,:] contains all the
111            metrics from the solving of instances i by solver j with the seed seeds[s].
112        """
113        self.metrics = metrics
114        self.fail_on_error = fail_on_error
115        self.limit_gbytes = limit_gbytes
116        self.reporter = reporter if reporter is not None else ConsoleReporter()  # Default to a console reporter
117        self.callback = callback
118
119    @staticmethod
120    def _monitor_memory(pid, limit_bytes, stop_event):
121        process = psutil.Process(pid)
122        while not stop_event.is_set():
123            try:
124                # Check strictly PHYSICAL memory (RSS)
125                rss = process.memory_info().rss
126                if rss > limit_bytes:
127                    warnings.warn(
128                        f"[{pid}] KILLED: Used {rss / 1024 ** 3:.2f} GB > Limit {limit_bytes / 1024 ** 3:.2f} GB")
129
130                    # Setting the RLIMIT_AS now will force the underlying solver to crash.
131                    resource.setrlimit(resource.RLIMIT_AS, (limit_bytes, limit_bytes))
132                    break
133            except psutil.NoSuchProcess:
134                break
135            time.sleep(1)
136
137    @staticmethod
138    def _solve(solver, prob_file_name, metrics, seed, fail_on_error, fail_on_memory_error):
139        try:
140            solver.set_seed(seed)
141            solver.solve(prob_file_name)
142            return [solver[metric] for metric in metrics]
143        except MemoryError as e:
144            print(fail_on_memory_error)
145            if fail_on_memory_error:
146                raise e
147            warnings.warn(f"Memory usage reached while solvign {prob_file_name} with {solver}")
148            return [0 for _ in metrics]
149        except Exception as e:
150            if fail_on_error:
151                raise e
152            warnings.warn(f"Error while solving {prob_file_name} with {solver}: {e}")
153            return [0 for _ in metrics]
154
155    @staticmethod
156    def _solve_wrapper(args):
157        i, j, s, seed, solver, instance_path, metrics, instance_name, fail_on_error, limit_rss_bytes = args
158
159        stop_event, watcher = None, None
160        if limit_rss_bytes is not None:
161            stop_event = threading.Event()
162            watcher = threading.Thread(target=Evaluator._monitor_memory, args=(os.getpid(), limit_rss_bytes, stop_event))
163            watcher.start()
164
165        try:
166            metrics_values = Evaluator._solve(solver, instance_path, metrics, seed, fail_on_error, limit_rss_bytes is None)
167        finally:
168            if limit_rss_bytes is not None:
169                stop_event.set()
170                watcher.join()
171
172        return i, j, s, metrics_values, instance_name
173
174    def evaluate(
175            self,
176            solvers: List[Solver],
177            instances: Instances,
178            n_instances: int,
179            seeds: List[int] = (0,),
180            executor: Optional[concurrent.futures.Executor] = None,
181            display_instance_names: bool = False
182    ) -> SolverEvaluationResults:
183        """
184        Executes the evaluation.
185
186        Parameters
187        ----------
188        solvers : List[Solver]
189            List of solvers that will solve each instance
190        instances : Instances
191            Instances generator. Yields either pyscipopt Model or a str path.
192        n_instances : int
193            Number of instances to evaluate
194        seeds: List[int]
195            List of seeds used to solve an instance.
196        executor : concurrent.futures.Executor | None
197            A pool executor for parallel processing. If None, runs sequentially.
198            Compatible with ProcessPoolExecutor, ThreadPoolExecutor, or MPIPoolExecutor.
199        display_instance_names : bool
200            Whether to record and display instance names. Default is False.
201
202        Returns
203        -------
204        Return a SolverEvaluationResults object which can be used to compute a report on the computed data.
205        See SolverEvaluationReport for more details
206        """
207        names = []
208        limit_rss_bytes = self.limit_gbytes * (1024 ** 3) if self.limit_gbytes is not None else None
209
210        data = np.zeros((n_instances, len(solvers), len(seeds), len(self.metrics)))
211        files = {}
212
213        task_generator = TaskGenerator(
214            solvers,
215            iter(instances),
216            n_instances,
217            seeds,
218            self.metrics,
219            files,
220            display_instance_names,
221            self.fail_on_error,
222            limit_rss_bytes
223        )
224
225        self.reporter.on_evaluation_start([str(s) for s in solvers], self.metrics)
226
227        def _process_result(i, j, s, line, instance_name):
228            if j == 0 and s == 0:  # new line
229                names.append(instance_name)
230                self.reporter.on_instance_start(instance_name)
231
232            for k, d in enumerate(line):
233                data[i, j, s, k] = d
234
235            if s == len(seeds) - 1:
236                l = data[i, j, :, :]
237                mean_line = np.mean(l, axis=0)
238                self.reporter.on_solver_finish(mean_line)
239
240            if self.callback is not None:
241                self.callback(instance_name, i, j, s, data)
242
243            if j == len(solvers) - 1 and s == len(seeds) - 1:
244                self.reporter.on_instance_end()
245                if i in files:
246                    files[i].close()
247
248        # Execute tasks
249        if executor is not None:
250            # Map returns an iterator yielding results in the exact same order tasks were generated
251            results_stream = executor.map(Evaluator._solve_wrapper, task_generator)
252            for solve_res in results_stream:
253                _process_result(*solve_res)
254        else:
255            # Sequential fallback
256            for args in task_generator:
257                solve_res = Evaluator._solve_wrapper(args)
258                _process_result(*solve_res)
259
260        res = SolverEvaluationResults(
261            data,
262            [str(s) for s in solvers],
263            self.metrics,
264            names if display_instance_names else None
265        )
266
267        self.reporter.on_evaluation_end(res, self.metrics, [str(s) for s in solvers])
268
269        return res

Evaluates a set of solvers against a set of instances. Separates the configuration from the parallel execution logic.

Evaluator( metrics: List[str], fail_on_error: bool = True, limit_gbytes: Optional[int] = None, reporter: Optional[boundml.evaluation.reporters.BaseReporter] = None, callback: Optional[Callable[[str, int, int, int, numpy.ndarray], NoneType]] = None)
 83    def __init__(
 84            self,
 85            metrics: List[str],
 86            fail_on_error: bool = True,
 87            limit_gbytes: Optional[int] = None,
 88            reporter: Optional[BaseReporter] = None,
 89            callback: Callable[[str, int, int, int, np.ndarray], None] | None = None
 90    ):
 91        """
 92        Parameters
 93        ----------
 94        fail_one_error : bool
 95            Whether to raise an exception when a solver fails.
 96            If True and an error occurs, the resulting metrics are all 0.
 97            Default it False.
 98        limit_gbytes : int | None
 99            Memory limit applied to the children processes in GB. If None, no limit is applied.
100            When specified, if the child reach the memory limit, it catches the exception and cancel the solving process.
101            All the resulting metrics are 0.
102            /!\\ Unexpected behavior when no executor is given to the evaluate method. As no multiprocessing is used,
103             it will change the memory limit of the main process.
104            Default None.
105        reporter: Optional[BaseReporter]
106            BaseReporter used to report the results during the evalution.
107            If None, a simple ComsoleReporter is built. It prints the results of the solvers on stdout
108        callback: Callable[[str, int, int, int, np.ndarray], None] | None
109            Callback function called after an instance is solved by a solver. Take as argument the instance name,
110            the instance index, the solver index, the ndarray d containing all the results. d[i,j,s,:] contains all the
111            metrics from the solving of instances i by solver j with the seed seeds[s].
112        """
113        self.metrics = metrics
114        self.fail_on_error = fail_on_error
115        self.limit_gbytes = limit_gbytes
116        self.reporter = reporter if reporter is not None else ConsoleReporter()  # Default to a console reporter
117        self.callback = callback
Parameters
  • fail_one_error (bool): Whether to raise an exception when a solver fails. If True and an error occurs, the resulting metrics are all 0. Default it False.
  • limit_gbytes (int | None): Memory limit applied to the children processes in GB. If None, no limit is applied. When specified, if the child reach the memory limit, it catches the exception and cancel the solving process. All the resulting metrics are 0. /!\ Unexpected behavior when no executor is given to the evaluate method. As no multiprocessing is used, it will change the memory limit of the main process. Default None.
  • reporter (Optional[BaseReporter]): BaseReporter used to report the results during the evalution. If None, a simple ComsoleReporter is built. It prints the results of the solvers on stdout
  • callback (Callable[[str, int, int, int, np.ndarray], None] | None): Callback function called after an instance is solved by a solver. Take as argument the instance name, the instance index, the solver index, the ndarray d containing all the results. d[i,j,s,:] contains all the metrics from the solving of instances i by solver j with the seed seeds[s].
metrics
fail_on_error
limit_gbytes
reporter
callback
def evaluate( self, solvers: List[boundml.solvers.Solver], instances: boundml.instances.Instances, n_instances: int, seeds: List[int] = (0,), executor: Optional[concurrent.futures._base.Executor] = None, display_instance_names: bool = False) -> SolverEvaluationResults:
174    def evaluate(
175            self,
176            solvers: List[Solver],
177            instances: Instances,
178            n_instances: int,
179            seeds: List[int] = (0,),
180            executor: Optional[concurrent.futures.Executor] = None,
181            display_instance_names: bool = False
182    ) -> SolverEvaluationResults:
183        """
184        Executes the evaluation.
185
186        Parameters
187        ----------
188        solvers : List[Solver]
189            List of solvers that will solve each instance
190        instances : Instances
191            Instances generator. Yields either pyscipopt Model or a str path.
192        n_instances : int
193            Number of instances to evaluate
194        seeds: List[int]
195            List of seeds used to solve an instance.
196        executor : concurrent.futures.Executor | None
197            A pool executor for parallel processing. If None, runs sequentially.
198            Compatible with ProcessPoolExecutor, ThreadPoolExecutor, or MPIPoolExecutor.
199        display_instance_names : bool
200            Whether to record and display instance names. Default is False.
201
202        Returns
203        -------
204        Return a SolverEvaluationResults object which can be used to compute a report on the computed data.
205        See SolverEvaluationReport for more details
206        """
207        names = []
208        limit_rss_bytes = self.limit_gbytes * (1024 ** 3) if self.limit_gbytes is not None else None
209
210        data = np.zeros((n_instances, len(solvers), len(seeds), len(self.metrics)))
211        files = {}
212
213        task_generator = TaskGenerator(
214            solvers,
215            iter(instances),
216            n_instances,
217            seeds,
218            self.metrics,
219            files,
220            display_instance_names,
221            self.fail_on_error,
222            limit_rss_bytes
223        )
224
225        self.reporter.on_evaluation_start([str(s) for s in solvers], self.metrics)
226
227        def _process_result(i, j, s, line, instance_name):
228            if j == 0 and s == 0:  # new line
229                names.append(instance_name)
230                self.reporter.on_instance_start(instance_name)
231
232            for k, d in enumerate(line):
233                data[i, j, s, k] = d
234
235            if s == len(seeds) - 1:
236                l = data[i, j, :, :]
237                mean_line = np.mean(l, axis=0)
238                self.reporter.on_solver_finish(mean_line)
239
240            if self.callback is not None:
241                self.callback(instance_name, i, j, s, data)
242
243            if j == len(solvers) - 1 and s == len(seeds) - 1:
244                self.reporter.on_instance_end()
245                if i in files:
246                    files[i].close()
247
248        # Execute tasks
249        if executor is not None:
250            # Map returns an iterator yielding results in the exact same order tasks were generated
251            results_stream = executor.map(Evaluator._solve_wrapper, task_generator)
252            for solve_res in results_stream:
253                _process_result(*solve_res)
254        else:
255            # Sequential fallback
256            for args in task_generator:
257                solve_res = Evaluator._solve_wrapper(args)
258                _process_result(*solve_res)
259
260        res = SolverEvaluationResults(
261            data,
262            [str(s) for s in solvers],
263            self.metrics,
264            names if display_instance_names else None
265        )
266
267        self.reporter.on_evaluation_end(res, self.metrics, [str(s) for s in solvers])
268
269        return res

Executes the evaluation.

Parameters
  • solvers (List[Solver]): List of solvers that will solve each instance
  • instances (Instances): Instances generator. Yields either pyscipopt Model or a str path.
  • n_instances (int): Number of instances to evaluate
  • seeds (List[int]): List of seeds used to solve an instance.
  • executor (concurrent.futures.Executor | None): A pool executor for parallel processing. If None, runs sequentially. Compatible with ProcessPoolExecutor, ThreadPoolExecutor, or MPIPoolExecutor.
  • display_instance_names (bool): Whether to record and display instance names. Default is False.
Returns
  • Return a SolverEvaluationResults object which can be used to compute a report on the computed data.
  • See SolverEvaluationReport for more details
class SolverEvaluationResults:
 14class SolverEvaluationResults:
 15    def __init__(self, raw_data: np.ndarray, solvers: List[str], metrics: List[str], names: List[str] | None = None):
 16        self.data = raw_data
 17        self.solvers = solvers
 18        self.metrics = metrics
 19        self.names = names
 20
 21    @property
 22    def metric_index(self) -> dict:
 23        """Returns a dictionary mapping metric names to their indices"""
 24        return {metric: idx for idx, metric in enumerate(self.metrics)}
 25
 26    def get_metric_data(self, metric: str, std=False, count_zeros=False) -> np.ndarray:
 27        """Get all data for a specific metric. Average over all the seeds (or std if std=True)"""
 28
 29        if metric == "names" and self.names:
 30            return self.names
 31
 32        data = self.data[:, :, :, self.metric_index[metric]]
 33        if not count_zeros:
 34            mask = np.any(data.reshape(data.shape[0], -1) != 0, axis=1)
 35            data = data[mask]
 36
 37        if std:
 38            data = np.std(data, axis=2)
 39            mean = self.get_metric_data(metric, False, count_zeros)
 40            data = data / mean * 100
 41        else:
 42            data = np.median(data, axis=2)
 43        return data
 44
 45    def aggregate(self, metric: str, aggregation_func: callable, std=False, count_zeros=False) -> np.ndarray:
 46        """
 47        Apply aggregation function to a specific metric
 48        Args:
 49            metric: metric name to aggregate
 50            aggregation_func: function to apply (e.g., np.sum, np.mean)
 51            std: If True the aggregation is done on the std over the seeds. Else over the mean.
 52        """
 53        res = np.array([aggregation_func(self.get_metric_data(metric, std, count_zeros)[:, i]) for i in range(len(self.solvers))])
 54        return np.nan_to_num(res)
 55
 56    def split_instances_over(self, metric: str, condition, require_all: bool = True):
 57        """
 58        Splits instances into positives and negatives based on a condition.
 59
 60        Parameters
 61        ----------
 62        metric : str
 63            The metric to evaluate the condition against.
 64        condition : callable
 65            A function that returns a boolean array when applied to the metric data.
 66        require_all : bool, default True
 67            If True, ALL solvers must meet the condition for an instance to be positive.
 68            If False, AT LEAST ONE solver must meet the condition for an instance to be positive.
 69
 70        Returns
 71        -------
 72        tuple
 73            Two SolverEvaluationResults objects: (positives, negatives)
 74        """
 75        assert metric in self.metrics, "Cannot make a split on a non-existing metric"
 76
 77        d = self.get_metric_data(metric, count_zeros=True)
 78
 79        # Apply the condition to the data
 80        condition_met = np.apply_along_axis(condition, 1, d)
 81
 82        # Filter based on the require_all flag
 83        if require_all:
 84            indexes = np.where(np.all(condition_met, axis=1))[0]
 85        else:
 86            indexes = np.where(np.any(condition_met, axis=1))[0]
 87
 88        positives = self.data[indexes,]
 89        negatives = np.delete(self.data, indexes, axis=0)
 90
 91        # For compatibility with older results
 92        if not hasattr(self, 'names'):
 93            self.names = None
 94
 95        if self.names is not None:
 96            names_pos = [self.names[i] for i in indexes]
 97            names_neg = [self.names[i] for i in range(len(self.names)) if i not in indexes]
 98        else:
 99            names_pos = None
100            names_neg = None
101
102        return (
103            SolverEvaluationResults(positives, self.solvers, self.metrics, names_pos),
104            SolverEvaluationResults(negatives, self.solvers, self.metrics, names_neg),
105        )
106
107    def remove_solver(self, solver: str):
108        index = self.solvers.index(solver)
109        self.data = np.delete(self.data, index, axis=1)
110        self.solvers.remove(solver)
111
112    def performance_profile(self, metric: str = "nnodes", ratios=np.arange(0, 1.00, .01), filename=None, plot=True, logx=True):
113
114        if filename:
115            backend = matplotlib.get_backend()
116            matplotlib.use('pgf')
117
118        n_instances = self.data.shape[0]
119
120        data = self.get_metric_data(metric)
121        min = np.min(data)
122        max = np.max(data)
123
124        xs = ratios * (max - min) + min
125
126        res = []
127        for s, solver in enumerate(self.solvers):
128            ys = np.zeros(len(ratios))
129            for i in range(n_instances):
130                val = data[i, s]
131                indexes = np.where(val <= xs)
132                ys[indexes] += 1
133
134            ys /= n_instances
135            label = solver
136
137            if logx:
138                auc = np.trapezoid(ys, np.log(xs)) / np.log(max)
139            else:
140                auc = np.trapezoid(ys, xs) / max
141
142            res.append(auc)
143            if plot:
144                plt.plot(xs, ys, label=label)
145
146        if plot:
147            plt.legend()
148            plt.xlabel(metric)
149            plt.ylabel("frequency")
150            plt.title(f"Performance profile w.r.t. {metric}")
151
152            if logx:
153                plt.xscale("log")
154
155            if filename:
156                plt.savefig(filename)
157                matplotlib.use(backend)
158
159            else:
160                plt.show()
161
162        return np.array(res)
163
164    def compute_report(self, *aggregations: tuple[str, callable], **kwargs):
165        data = {"solver": [s for s in self.solvers]}
166
167        for i, aggregation in enumerate(aggregations):
168            data[aggregation[0]] = list(aggregation[1](self))
169
170        return SolverEvaluationReport(data, **kwargs)
171
172    def combine_solvers(self, other):
173        """
174        Combine the results of another SolverEvaluationResults.
175        The 2 SolverEvaluationResults (self and other) must have the same metrics and be for the same instances.
176        Parameters
177        ----------
178        other : SolverEvaluationResults
179            The result to combine
180
181        Returns
182        -------
183        The new SolverEvaluationResults object
184        """
185        assert self.metrics == other.metrics, "Both results have different metrics"
186        assert self.data.shape[0] == other.data.shape[0], "Both results have different number of instances"
187        assert self.data.shape[3] == other.data.shape[3], "Both results have different number of seeds repetitions"
188
189        # For compatibility with older results
190        self_names = getattr(self, 'names', None)
191        other_names = getattr(other, 'names', None)
192
193        assert self_names == other_names, "Both results solved instances have different names"
194
195        solvers = self.solvers + other.solvers
196        data = np.hstack((self.data, other.data))
197
198        return SolverEvaluationResults(data, solvers, self.metrics[:], self_names)
199
200    def combine_instances(self, other):
201        """
202        Combine the results of another SolverEvaluationResults.
203        The 2 SolverEvaluationResults (self and other) must have the same metrics and be for the same solvers.
204        Parameters
205        ----------
206        other : SolverEvaluationResults
207            The result to combine
208
209        Returns
210        -------
211        The new SolverEvaluationResults object
212        """
213        assert self.metrics == other.metrics, "Both results have different metrics"
214        assert self.data.shape[3] == other.data.shape[3], "Both results have different number of seeds repetitions"
215        assert self.solvers == other.solvers, "Both results have different solvers"
216
217        # For compatibility with older results
218        self_names = getattr(self, 'names', None)
219        other_names = getattr(other, 'names', None)
220
221        assert type(self_names) == type(other_names), \
222            "Both results have different names structure. Once has names for the instances, not the other"
223
224        data = np.vstack((self.data, other.data))
225
226        names = None
227        if self.names is not None:
228            names = self_names + other_names
229
230        return SolverEvaluationResults(data, self.solvers[:], self.metrics[:], names)
231
232    @staticmethod
233    def sg_metric(metric, s, std=False):
234        name = metric if not std else f"{metric} std (%)"
235        return (name, lambda evaluationResults:
236        evaluationResults.aggregate(metric, lambda values: shifted_geometric_mean(values, shift=s), std, count_zeros=False)
237                )
238
239    @staticmethod
240    def nwins(metric, dir=1, count_if_not_optimal = False):
241        def get_wins(evaluationResults: SolverEvaluationResults):
242            data = evaluationResults.get_metric_data(metric, count_zeros=True)
243            gaps = evaluationResults.get_metric_data("gap", count_zeros=True)
244            res = []
245            for i in range(len(evaluationResults.solvers)):
246                c = 0
247                for j in range(len(data[:, i])):
248                    # Does not count as a win if the instance was not solved optimally.
249                    if count_if_not_optimal or gaps[j, i] == 0 or metric == "gap":
250                        c += dir * data[j, i] <= dir * np.min(data[j, :])
251                res.append(c)
252            return np.array(res)
253
254        return f"wins ({metric})", get_wins
255
256    @staticmethod
257    def nsolved():
258        return ("nsolved",
259                lambda evaluationResults: evaluationResults.aggregate(
260                    "gap",
261                    lambda values: values.shape[0] - np.count_nonzero(values),
262                    count_zeros=True
263                )
264            )
265
266    @staticmethod
267    def auc_score(metric, **kwargs):
268        return ("AUC", lambda evaluationResults: evaluationResults.performance_profile(metric, plot=False, **kwargs))
269
270    def get_names(self):
271        return self.names
272
273    def __str__(self):
274        reporter = TextReporter()
275        res = ""
276        res += reporter.on_evaluation_start(self.solvers, self.metrics)
277
278        n_instances = self.data.shape[0]
279        n_solvers = len(self.solvers)
280
281        for i in range(n_instances):
282            instance_name = self.names[i] if self.names is not None else str(i)
283            res += reporter.on_instance_start(instance_name)
284            for j in range(n_solvers):
285                line = self.data[i, j, :, :]
286                line = np.mean(line, axis=0)
287                res += reporter.on_solver_finish(line)
288
289            res += reporter.on_instance_end()
290
291        res += reporter.on_evaluation_end(self, self.metrics, self.solvers)
292
293        return res
SolverEvaluationResults( raw_data: numpy.ndarray, solvers: List[str], metrics: List[str], names: Optional[List[str]] = None)
15    def __init__(self, raw_data: np.ndarray, solvers: List[str], metrics: List[str], names: List[str] | None = None):
16        self.data = raw_data
17        self.solvers = solvers
18        self.metrics = metrics
19        self.names = names
data
solvers
metrics
names
metric_index: dict
21    @property
22    def metric_index(self) -> dict:
23        """Returns a dictionary mapping metric names to their indices"""
24        return {metric: idx for idx, metric in enumerate(self.metrics)}

Returns a dictionary mapping metric names to their indices

def get_metric_data(self, metric: str, std=False, count_zeros=False) -> numpy.ndarray:
26    def get_metric_data(self, metric: str, std=False, count_zeros=False) -> np.ndarray:
27        """Get all data for a specific metric. Average over all the seeds (or std if std=True)"""
28
29        if metric == "names" and self.names:
30            return self.names
31
32        data = self.data[:, :, :, self.metric_index[metric]]
33        if not count_zeros:
34            mask = np.any(data.reshape(data.shape[0], -1) != 0, axis=1)
35            data = data[mask]
36
37        if std:
38            data = np.std(data, axis=2)
39            mean = self.get_metric_data(metric, False, count_zeros)
40            data = data / mean * 100
41        else:
42            data = np.median(data, axis=2)
43        return data

Get all data for a specific metric. Average over all the seeds (or std if std=True)

def aggregate( self, metric: str, aggregation_func: <built-in function callable>, std=False, count_zeros=False) -> numpy.ndarray:
45    def aggregate(self, metric: str, aggregation_func: callable, std=False, count_zeros=False) -> np.ndarray:
46        """
47        Apply aggregation function to a specific metric
48        Args:
49            metric: metric name to aggregate
50            aggregation_func: function to apply (e.g., np.sum, np.mean)
51            std: If True the aggregation is done on the std over the seeds. Else over the mean.
52        """
53        res = np.array([aggregation_func(self.get_metric_data(metric, std, count_zeros)[:, i]) for i in range(len(self.solvers))])
54        return np.nan_to_num(res)

Apply aggregation function to a specific metric Args: metric: metric name to aggregate aggregation_func: function to apply (e.g., np.sum, np.mean) std: If True the aggregation is done on the std over the seeds. Else over the mean.

def split_instances_over(self, metric: str, condition, require_all: bool = True):
 56    def split_instances_over(self, metric: str, condition, require_all: bool = True):
 57        """
 58        Splits instances into positives and negatives based on a condition.
 59
 60        Parameters
 61        ----------
 62        metric : str
 63            The metric to evaluate the condition against.
 64        condition : callable
 65            A function that returns a boolean array when applied to the metric data.
 66        require_all : bool, default True
 67            If True, ALL solvers must meet the condition for an instance to be positive.
 68            If False, AT LEAST ONE solver must meet the condition for an instance to be positive.
 69
 70        Returns
 71        -------
 72        tuple
 73            Two SolverEvaluationResults objects: (positives, negatives)
 74        """
 75        assert metric in self.metrics, "Cannot make a split on a non-existing metric"
 76
 77        d = self.get_metric_data(metric, count_zeros=True)
 78
 79        # Apply the condition to the data
 80        condition_met = np.apply_along_axis(condition, 1, d)
 81
 82        # Filter based on the require_all flag
 83        if require_all:
 84            indexes = np.where(np.all(condition_met, axis=1))[0]
 85        else:
 86            indexes = np.where(np.any(condition_met, axis=1))[0]
 87
 88        positives = self.data[indexes,]
 89        negatives = np.delete(self.data, indexes, axis=0)
 90
 91        # For compatibility with older results
 92        if not hasattr(self, 'names'):
 93            self.names = None
 94
 95        if self.names is not None:
 96            names_pos = [self.names[i] for i in indexes]
 97            names_neg = [self.names[i] for i in range(len(self.names)) if i not in indexes]
 98        else:
 99            names_pos = None
100            names_neg = None
101
102        return (
103            SolverEvaluationResults(positives, self.solvers, self.metrics, names_pos),
104            SolverEvaluationResults(negatives, self.solvers, self.metrics, names_neg),
105        )

Splits instances into positives and negatives based on a condition.

Parameters
  • metric (str): The metric to evaluate the condition against.
  • condition (callable): A function that returns a boolean array when applied to the metric data.
  • require_all (bool, default True): If True, ALL solvers must meet the condition for an instance to be positive. If False, AT LEAST ONE solver must meet the condition for an instance to be positive.
Returns
  • tuple: Two SolverEvaluationResults objects: (positives, negatives)
def remove_solver(self, solver: str):
107    def remove_solver(self, solver: str):
108        index = self.solvers.index(solver)
109        self.data = np.delete(self.data, index, axis=1)
110        self.solvers.remove(solver)
def performance_profile( self, metric: str = 'nnodes', ratios=array([0. , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]), filename=None, plot=True, logx=True):
112    def performance_profile(self, metric: str = "nnodes", ratios=np.arange(0, 1.00, .01), filename=None, plot=True, logx=True):
113
114        if filename:
115            backend = matplotlib.get_backend()
116            matplotlib.use('pgf')
117
118        n_instances = self.data.shape[0]
119
120        data = self.get_metric_data(metric)
121        min = np.min(data)
122        max = np.max(data)
123
124        xs = ratios * (max - min) + min
125
126        res = []
127        for s, solver in enumerate(self.solvers):
128            ys = np.zeros(len(ratios))
129            for i in range(n_instances):
130                val = data[i, s]
131                indexes = np.where(val <= xs)
132                ys[indexes] += 1
133
134            ys /= n_instances
135            label = solver
136
137            if logx:
138                auc = np.trapezoid(ys, np.log(xs)) / np.log(max)
139            else:
140                auc = np.trapezoid(ys, xs) / max
141
142            res.append(auc)
143            if plot:
144                plt.plot(xs, ys, label=label)
145
146        if plot:
147            plt.legend()
148            plt.xlabel(metric)
149            plt.ylabel("frequency")
150            plt.title(f"Performance profile w.r.t. {metric}")
151
152            if logx:
153                plt.xscale("log")
154
155            if filename:
156                plt.savefig(filename)
157                matplotlib.use(backend)
158
159            else:
160                plt.show()
161
162        return np.array(res)
def compute_report(self, *aggregations: tuple[str, callable], **kwargs):
164    def compute_report(self, *aggregations: tuple[str, callable], **kwargs):
165        data = {"solver": [s for s in self.solvers]}
166
167        for i, aggregation in enumerate(aggregations):
168            data[aggregation[0]] = list(aggregation[1](self))
169
170        return SolverEvaluationReport(data, **kwargs)
def combine_solvers(self, other):
172    def combine_solvers(self, other):
173        """
174        Combine the results of another SolverEvaluationResults.
175        The 2 SolverEvaluationResults (self and other) must have the same metrics and be for the same instances.
176        Parameters
177        ----------
178        other : SolverEvaluationResults
179            The result to combine
180
181        Returns
182        -------
183        The new SolverEvaluationResults object
184        """
185        assert self.metrics == other.metrics, "Both results have different metrics"
186        assert self.data.shape[0] == other.data.shape[0], "Both results have different number of instances"
187        assert self.data.shape[3] == other.data.shape[3], "Both results have different number of seeds repetitions"
188
189        # For compatibility with older results
190        self_names = getattr(self, 'names', None)
191        other_names = getattr(other, 'names', None)
192
193        assert self_names == other_names, "Both results solved instances have different names"
194
195        solvers = self.solvers + other.solvers
196        data = np.hstack((self.data, other.data))
197
198        return SolverEvaluationResults(data, solvers, self.metrics[:], self_names)

Combine the results of another SolverEvaluationResults. The 2 SolverEvaluationResults (self and other) must have the same metrics and be for the same instances.

Parameters
  • other (SolverEvaluationResults): The result to combine
Returns
  • The new SolverEvaluationResults object
def combine_instances(self, other):
200    def combine_instances(self, other):
201        """
202        Combine the results of another SolverEvaluationResults.
203        The 2 SolverEvaluationResults (self and other) must have the same metrics and be for the same solvers.
204        Parameters
205        ----------
206        other : SolverEvaluationResults
207            The result to combine
208
209        Returns
210        -------
211        The new SolverEvaluationResults object
212        """
213        assert self.metrics == other.metrics, "Both results have different metrics"
214        assert self.data.shape[3] == other.data.shape[3], "Both results have different number of seeds repetitions"
215        assert self.solvers == other.solvers, "Both results have different solvers"
216
217        # For compatibility with older results
218        self_names = getattr(self, 'names', None)
219        other_names = getattr(other, 'names', None)
220
221        assert type(self_names) == type(other_names), \
222            "Both results have different names structure. Once has names for the instances, not the other"
223
224        data = np.vstack((self.data, other.data))
225
226        names = None
227        if self.names is not None:
228            names = self_names + other_names
229
230        return SolverEvaluationResults(data, self.solvers[:], self.metrics[:], names)

Combine the results of another SolverEvaluationResults. The 2 SolverEvaluationResults (self and other) must have the same metrics and be for the same solvers.

Parameters
  • other (SolverEvaluationResults): The result to combine
Returns
  • The new SolverEvaluationResults object
@staticmethod
def sg_metric(metric, s, std=False):
232    @staticmethod
233    def sg_metric(metric, s, std=False):
234        name = metric if not std else f"{metric} std (%)"
235        return (name, lambda evaluationResults:
236        evaluationResults.aggregate(metric, lambda values: shifted_geometric_mean(values, shift=s), std, count_zeros=False)
237                )
@staticmethod
def nwins(metric, dir=1, count_if_not_optimal=False):
239    @staticmethod
240    def nwins(metric, dir=1, count_if_not_optimal = False):
241        def get_wins(evaluationResults: SolverEvaluationResults):
242            data = evaluationResults.get_metric_data(metric, count_zeros=True)
243            gaps = evaluationResults.get_metric_data("gap", count_zeros=True)
244            res = []
245            for i in range(len(evaluationResults.solvers)):
246                c = 0
247                for j in range(len(data[:, i])):
248                    # Does not count as a win if the instance was not solved optimally.
249                    if count_if_not_optimal or gaps[j, i] == 0 or metric == "gap":
250                        c += dir * data[j, i] <= dir * np.min(data[j, :])
251                res.append(c)
252            return np.array(res)
253
254        return f"wins ({metric})", get_wins
@staticmethod
def nsolved():
256    @staticmethod
257    def nsolved():
258        return ("nsolved",
259                lambda evaluationResults: evaluationResults.aggregate(
260                    "gap",
261                    lambda values: values.shape[0] - np.count_nonzero(values),
262                    count_zeros=True
263                )
264            )
@staticmethod
def auc_score(metric, **kwargs):
266    @staticmethod
267    def auc_score(metric, **kwargs):
268        return ("AUC", lambda evaluationResults: evaluationResults.performance_profile(metric, plot=False, **kwargs))
def get_names(self):
270    def get_names(self):
271        return self.names
class SolverEvaluationReport:
298class SolverEvaluationReport:
299    def __init__(self, data=None, header=None, df_=None):
300        assert (data is None) != (df_ is None), "Only one of data and df_ must be given"
301
302        if df_ is not None:
303            self.df = df_
304            return
305
306        if header is not None:
307            data_ = {}
308            for key in data:
309                if key != "solver":
310                    data_[(header, key)] = data[key]
311                else:
312                    data_[("", key)] = data[key]
313
314        else:
315            data_ = data
316
317        self.df = pd.DataFrame(data_)
318        if header is not None:
319            self.df.set_index(("","solver"), inplace=True)
320
321    def __str__(self):
322        return tabulate(self.df, headers="keys", tablefmt='grid', showindex=False)
323
324    def to_latex(self, *args, **kwargs):
325        return self.df.to_latex(index=False, *args, **kwargs)
326
327    def __add__(self, other):
328        print(self.df.to_dict(orient='list'))
329        print(other.df.to_dict(orient='list'))
330        df2 = pd.concat(
331            [self.df, other.df],
332            axis=1
333        )
334
335        df2 = df2.reset_index().rename(columns={'index': ('', 'solver')})
336        return SolverEvaluationReport(df_ = df2)
SolverEvaluationReport(data=None, header=None, df_=None)
299    def __init__(self, data=None, header=None, df_=None):
300        assert (data is None) != (df_ is None), "Only one of data and df_ must be given"
301
302        if df_ is not None:
303            self.df = df_
304            return
305
306        if header is not None:
307            data_ = {}
308            for key in data:
309                if key != "solver":
310                    data_[(header, key)] = data[key]
311                else:
312                    data_[("", key)] = data[key]
313
314        else:
315            data_ = data
316
317        self.df = pd.DataFrame(data_)
318        if header is not None:
319            self.df.set_index(("","solver"), inplace=True)
df
def to_latex(self, *args, **kwargs):
324    def to_latex(self, *args, **kwargs):
325        return self.df.to_latex(index=False, *args, **kwargs)