boundml.ml
class
BranchingDatasetGenerator:
15class BranchingDatasetGenerator: 16 """ 17 Class to generate a dataset from branching decisions. Useful to train later a model to imitate a strategy. 18 """ 19 20 def __init__(self, instances: Instances, expert_strategy: ScoringBranchingStrategy, state_component_observer: Component, 21 exploration_strategy: ScoringBranchingStrategy = Pseudocosts(), expert_probability: float = 0.1, 22 seed=None, sample_counter: int = 0, episode_counter: int = 0, **kwargs): 23 self.rng = np.random.default_rng(seed) 24 25 strategy = ConditionalBranchingComponent( 26 (expert_strategy, lambda _: self.rng.random() < expert_probability), (exploration_strategy, lambda _: True)) 27 28 self.storer = DatasetStorer(expert_strategy, strategy, state_component_observer, sample_counter) 29 30 self.solver = ModularSolver(self.storer, **kwargs) 31 32 self.instances = instances 33 self.episode_counter = episode_counter 34 35 # Skip the first episode_counter instances if not 0 36 for _ in range(self.episode_counter): 37 next(self.instances) 38 39 def generate(self, folder_name: str, max_samples: int = -1, max_instances: int = -1, sample_prefix: str = ""): 40 """ 41 Generate the dataset 42 Parameters 43 ---------- 44 folder_name : str 45 Folder name to store the samples 46 max_samples : int 47 Maximum number of samples to generate 48 max_instances : int 49 Maximum number of instances used for the generation 50 sample_prefix : str 51 Prefix name of the generated samples. Useful if generations done in parallel 52 """ 53 Path(folder_name).mkdir(parents=True, exist_ok=True) 54 assert max_samples > 0 or max_instances > 0, "One of these parameters must be > 0" 55 self.storer.setup(folder_name, max_samples, sample_prefix) 56 sample_counter = self.storer.sample_counter 57 count = 0 58 while (max_samples < 0 or sample_counter < max_samples) and (max_instances < 0 or count < max_instances): 59 instance = next(self.instances) 60 self.episode_counter += 1 61 count += 1 62 self.solver.solve_model(instance) 63 64 sample_counter = self.storer.sample_counter 65 print(f"Episode {self.episode_counter}, {sample_counter} samples collected so far")
Class to generate a dataset from branching decisions. Useful to train later a model to imitate a strategy.
BranchingDatasetGenerator( instances: boundml.instances.Instances, expert_strategy: boundml.components.ScoringBranchingStrategy, state_component_observer: boundml.components.Component, exploration_strategy: boundml.components.ScoringBranchingStrategy = <boundml.components.Pseudocosts object>, expert_probability: float = 0.1, seed=None, sample_counter: int = 0, episode_counter: int = 0, **kwargs)
20 def __init__(self, instances: Instances, expert_strategy: ScoringBranchingStrategy, state_component_observer: Component, 21 exploration_strategy: ScoringBranchingStrategy = Pseudocosts(), expert_probability: float = 0.1, 22 seed=None, sample_counter: int = 0, episode_counter: int = 0, **kwargs): 23 self.rng = np.random.default_rng(seed) 24 25 strategy = ConditionalBranchingComponent( 26 (expert_strategy, lambda _: self.rng.random() < expert_probability), (exploration_strategy, lambda _: True)) 27 28 self.storer = DatasetStorer(expert_strategy, strategy, state_component_observer, sample_counter) 29 30 self.solver = ModularSolver(self.storer, **kwargs) 31 32 self.instances = instances 33 self.episode_counter = episode_counter 34 35 # Skip the first episode_counter instances if not 0 36 for _ in range(self.episode_counter): 37 next(self.instances)
def
generate( self, folder_name: str, max_samples: int = -1, max_instances: int = -1, sample_prefix: str = ''):
39 def generate(self, folder_name: str, max_samples: int = -1, max_instances: int = -1, sample_prefix: str = ""): 40 """ 41 Generate the dataset 42 Parameters 43 ---------- 44 folder_name : str 45 Folder name to store the samples 46 max_samples : int 47 Maximum number of samples to generate 48 max_instances : int 49 Maximum number of instances used for the generation 50 sample_prefix : str 51 Prefix name of the generated samples. Useful if generations done in parallel 52 """ 53 Path(folder_name).mkdir(parents=True, exist_ok=True) 54 assert max_samples > 0 or max_instances > 0, "One of these parameters must be > 0" 55 self.storer.setup(folder_name, max_samples, sample_prefix) 56 sample_counter = self.storer.sample_counter 57 count = 0 58 while (max_samples < 0 or sample_counter < max_samples) and (max_instances < 0 or count < max_instances): 59 instance = next(self.instances) 60 self.episode_counter += 1 61 count += 1 62 self.solver.solve_model(instance) 63 64 sample_counter = self.storer.sample_counter 65 print(f"Episode {self.episode_counter}, {sample_counter} samples collected so far")
Generate the dataset
Parameters
- folder_name (str): Folder name to store the samples
- max_samples (int): Maximum number of samples to generate
- max_instances (int): Maximum number of instances used for the generation
- sample_prefix (str): Prefix name of the generated samples. Useful if generations done in parallel
def
load_policy(filename, try_use_gpu=False, **kwargs):
401def load_policy(filename, try_use_gpu=False, **kwargs): 402 device = torch.device("cuda" if try_use_gpu and torch.cuda.is_available() else "cpu") 403 policy = GNNPolicy(**kwargs).to(device) 404 if device.type == "cpu": 405 map_location = torch.device('cpu') 406 else: 407 map_location = None 408 policy.load_state_dict(torch.load(filename, map_location=map_location)) 409 return policy
def
train( sample_folder: str, learning_rate: float, n_epochs: int, output='agent.pkl', policy=None, dataset=<class 'boundml.ml.model.GraphDataset'>, patience=5, reduce_factor=0.2, n_epochs_without_improvement=None, **kwargs):
338def train(sample_folder: str, learning_rate: float, n_epochs: int, output="agent.pkl", policy=None, 339 dataset=GraphDataset, patience=5, reduce_factor=0.2, n_epochs_without_improvement=None, **kwargs): 340 341 if n_epochs_without_improvement is None: 342 n_epochs_without_improvement = n_epochs 343 344 sample_files = get_sample_files(sample_folder) 345 train_files = sample_files[: int(0.8 * len(sample_files))] 346 valid_files = sample_files[int(0.8 * len(sample_files)):] 347 348 train_data = dataset(train_files) 349 train_loader = torch_geometric.loader.DataLoader(train_data, batch_size=32, shuffle=True) 350 valid_data = dataset(valid_files) 351 valid_loader = torch_geometric.loader.DataLoader(valid_data, batch_size=32, shuffle=False) 352 353 if policy is None: 354 policy = GNNPolicy(**kwargs) 355 policy = policy.to(get_device()) 356 357 # observation = train_data[0].to(DEVICE) 358 # 359 # logits = policy( 360 # observation.constraint_features, 361 # observation.edge_index, 362 # observation.edge_attr, 363 # observation.variable_features, 364 # observation.n_nodes, 365 # observation.tree_features, 366 # ) 367 # action_distribution = F.softmax(logits[observation.candidates], dim=-1) 368 369 # print(action_distribution) 370 371 optimizer = torch.optim.Adam(policy.parameters(), lr=learning_rate) 372 scheduler = ReduceLROnPlateau(optimizer, 'min', factor=reduce_factor, patience=patience, threshold=1e-3) 373 374 best_loss = float("inf") 375 epochs_since_improvement = 0 376 377 for epoch in range(n_epochs): 378 lr = optimizer.param_groups[0]['lr'] 379 print(f"Epoch {epoch + 1}, lr {lr}") 380 381 train_loss, train_acc = process(policy, train_loader, optimizer) 382 print(f"Train loss: {train_loss:0.3f}, accuracy {train_acc:0.3f}") 383 384 valid_loss, valid_acc = process(policy, valid_loader, None) 385 print(f"Valid loss: {valid_loss:0.3f}, accuracy {valid_acc:0.3f}") 386 387 scheduler.step(valid_loss) 388 389 torch.save(policy.state_dict(), output) 390 391 epochs_since_improvement += 1 392 if valid_loss < best_loss: 393 best_loss = valid_loss 394 epochs_since_improvement = 0 395 396 if epochs_since_improvement >= n_epochs_without_improvement: 397 return policy 398 return policy
class
BipartiteNodeData(torch_geometric.data.data.Data):
17class BipartiteNodeData(torch_geometric.data.Data): 18 """ 19 This class encode a node bipartite graph observation as returned by the `ecole.observation.NodeBipartite` 20 observation function in a format understood by the pytorch geometric data handlers. 21 """ 22 23 def __init__( 24 self, 25 constraint_features, 26 edge_indices, 27 edge_features, 28 variable_features, 29 tree_features, 30 candidates, 31 nb_candidates, 32 candidate_choice, 33 candidate_scores, 34 ): 35 super().__init__() 36 self.constraint_features = constraint_features 37 self.edge_index = edge_indices 38 self.edge_attr = edge_features 39 self.variable_features = variable_features 40 self.tree_features = tree_features 41 self.candidates = candidates 42 if variable_features is not None: 43 self.n_nodes = variable_features.size()[0] 44 else: 45 self.n_nodes = 0 46 self.nb_candidates = nb_candidates 47 self.candidate_choices = candidate_choice 48 self.candidate_scores = candidate_scores 49 50 def __inc__(self, key, value, store, *args, **kwargs): 51 """ 52 We overload the pytorch geometric method that tells how to increment indices when concatenating graphs 53 for those entries (edge index, candidates) for which this is not obvious. 54 """ 55 if key == "edge_index": 56 return torch.tensor( 57 [[self.constraint_features.size(0)], [self.variable_features.size(0)]] 58 ) 59 elif key == "candidates": 60 return self.variable_features.size(0) 61 else: 62 return super().__inc__(key, value, *args, **kwargs)
This class encode a node bipartite graph observation as returned by the ecole.observation.NodeBipartite
observation function in a format understood by the pytorch geometric data handlers.
BipartiteNodeData( constraint_features, edge_indices, edge_features, variable_features, tree_features, candidates, nb_candidates, candidate_choice, candidate_scores)
23 def __init__( 24 self, 25 constraint_features, 26 edge_indices, 27 edge_features, 28 variable_features, 29 tree_features, 30 candidates, 31 nb_candidates, 32 candidate_choice, 33 candidate_scores, 34 ): 35 super().__init__() 36 self.constraint_features = constraint_features 37 self.edge_index = edge_indices 38 self.edge_attr = edge_features 39 self.variable_features = variable_features 40 self.tree_features = tree_features 41 self.candidates = candidates 42 if variable_features is not None: 43 self.n_nodes = variable_features.size()[0] 44 else: 45 self.n_nodes = 0 46 self.nb_candidates = nb_candidates 47 self.candidate_choices = candidate_choice 48 self.candidate_scores = candidate_scores