boundml.ml
class
BranchingDatasetGenerator:
15class BranchingDatasetGenerator: 16 """ 17 Class to generate a dataset from branching decisions. Useful to train later a model to imitate a strategy. 18 """ 19 20 def __init__(self, instances: Instances, expert_strategy: ScoringBranchingStrategy, state_component_observer: Component, 21 exploration_strategy: ScoringBranchingStrategy = Pseudocosts(), expert_probability: float = 0.1, 22 seed=None, sample_counter: int = 0, episode_counter: int = 0, **kwargs): 23 self.rng = np.random.default_rng(seed) 24 25 strategy = ConditionalBranchingComponent( 26 (expert_strategy, lambda _: self.rng.random() < expert_probability), (exploration_strategy, lambda _: True)) 27 28 self.storer = DatasetStorer(expert_strategy, strategy, state_component_observer, sample_counter) 29 30 self.solver = ModularSolver(self.storer, **kwargs) 31 32 self.instances = instances 33 self.episode_counter = episode_counter 34 35 # Skip the first episode_counter instances if not 0 36 for _ in range(self.episode_counter): 37 next(self.instances) 38 39 def generate(self, folder_name: str, max_samples: int = -1, max_instances: int = -1, sample_prefix: str = ""): 40 """ 41 Generate the dataset 42 Parameters 43 ---------- 44 folder_name : str 45 Folder name to store the samples 46 max_samples : int 47 Maximum number of samples to generate 48 max_instances : int 49 Maximum number of instances used for the generation 50 sample_prefix : str 51 Prefix name of the generated samples. Useful if generations done in parallel 52 """ 53 Path(folder_name).mkdir(parents=True, exist_ok=True) 54 assert max_samples > 0 or max_instances > 0, "One of these parameters must be > 0" 55 self.storer.setup(folder_name, max_samples, sample_prefix) 56 sample_counter = self.storer.sample_counter 57 count = 0 58 while (max_samples < 0 or sample_counter < max_samples) and (max_instances < 0 or count < max_instances): 59 instance = next(self.instances) 60 self.episode_counter += 1 61 count += 1 62 self.solver.solve_model(instance) 63 64 sample_counter = self.storer.sample_counter 65 print(f"Episode {self.episode_counter}, {sample_counter} samples collected so far")
Class to generate a dataset from branching decisions. Useful to train later a model to imitate a strategy.
BranchingDatasetGenerator( instances: boundml.instances.Instances, expert_strategy: boundml.components.ScoringBranchingStrategy, state_component_observer: boundml.components.Component, exploration_strategy: boundml.components.ScoringBranchingStrategy = <boundml.components.Pseudocosts object>, expert_probability: float = 0.1, seed=None, sample_counter: int = 0, episode_counter: int = 0, **kwargs)
20 def __init__(self, instances: Instances, expert_strategy: ScoringBranchingStrategy, state_component_observer: Component, 21 exploration_strategy: ScoringBranchingStrategy = Pseudocosts(), expert_probability: float = 0.1, 22 seed=None, sample_counter: int = 0, episode_counter: int = 0, **kwargs): 23 self.rng = np.random.default_rng(seed) 24 25 strategy = ConditionalBranchingComponent( 26 (expert_strategy, lambda _: self.rng.random() < expert_probability), (exploration_strategy, lambda _: True)) 27 28 self.storer = DatasetStorer(expert_strategy, strategy, state_component_observer, sample_counter) 29 30 self.solver = ModularSolver(self.storer, **kwargs) 31 32 self.instances = instances 33 self.episode_counter = episode_counter 34 35 # Skip the first episode_counter instances if not 0 36 for _ in range(self.episode_counter): 37 next(self.instances)
def
generate( self, folder_name: str, max_samples: int = -1, max_instances: int = -1, sample_prefix: str = ''):
39 def generate(self, folder_name: str, max_samples: int = -1, max_instances: int = -1, sample_prefix: str = ""): 40 """ 41 Generate the dataset 42 Parameters 43 ---------- 44 folder_name : str 45 Folder name to store the samples 46 max_samples : int 47 Maximum number of samples to generate 48 max_instances : int 49 Maximum number of instances used for the generation 50 sample_prefix : str 51 Prefix name of the generated samples. Useful if generations done in parallel 52 """ 53 Path(folder_name).mkdir(parents=True, exist_ok=True) 54 assert max_samples > 0 or max_instances > 0, "One of these parameters must be > 0" 55 self.storer.setup(folder_name, max_samples, sample_prefix) 56 sample_counter = self.storer.sample_counter 57 count = 0 58 while (max_samples < 0 or sample_counter < max_samples) and (max_instances < 0 or count < max_instances): 59 instance = next(self.instances) 60 self.episode_counter += 1 61 count += 1 62 self.solver.solve_model(instance) 63 64 sample_counter = self.storer.sample_counter 65 print(f"Episode {self.episode_counter}, {sample_counter} samples collected so far")
Generate the dataset
Parameters
- folder_name (str): Folder name to store the samples
- max_samples (int): Maximum number of samples to generate
- max_instances (int): Maximum number of instances used for the generation
- sample_prefix (str): Prefix name of the generated samples. Useful if generations done in parallel
def
load_policy(filename, try_use_gpu=False, **kwargs):
386def load_policy(filename, try_use_gpu=False, **kwargs): 387 device = torch.device("cuda" if try_use_gpu and torch.cuda.is_available() else "cpu") 388 policy = GNNPolicy(**kwargs).to(device) 389 if device.type == "cpu": 390 map_location = torch.device('cpu') 391 else: 392 map_location = None 393 policy.load_state_dict(torch.load(filename, map_location=map_location)) 394 return policy
def
train( sample_folder: str, learning_rate: float, n_epochs: int, output='agent.pkl', policy=None, dataset=<class 'boundml.ml.model.GraphDataset'>, **kwargs):
338def train(sample_folder: str, learning_rate: float, n_epochs: int, output="agent.pkl", policy=None, 339 dataset=GraphDataset, **kwargs): 340 sample_files = get_sample_files(sample_folder) 341 train_files = sample_files[: int(0.8 * len(sample_files))] 342 valid_files = sample_files[int(0.8 * len(sample_files)):] 343 344 train_data = dataset(train_files) 345 train_loader = torch_geometric.loader.DataLoader(train_data, batch_size=32, shuffle=True) 346 valid_data = dataset(valid_files) 347 valid_loader = torch_geometric.loader.DataLoader(valid_data, batch_size=32, shuffle=False) 348 349 if policy is None: 350 policy = GNNPolicy(**kwargs) 351 policy = policy.to(get_device()) 352 353 # observation = train_data[0].to(DEVICE) 354 # 355 # logits = policy( 356 # observation.constraint_features, 357 # observation.edge_index, 358 # observation.edge_attr, 359 # observation.variable_features, 360 # observation.n_nodes, 361 # observation.tree_features, 362 # ) 363 # action_distribution = F.softmax(logits[observation.candidates], dim=-1) 364 365 # print(action_distribution) 366 367 optimizer = torch.optim.Adam(policy.parameters(), lr=learning_rate) 368 scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.2, patience=5, threshold=1e-2) 369 for epoch in range(n_epochs): 370 lr = optimizer.param_groups[0]['lr'] 371 print(f"Epoch {epoch + 1}, lr {lr}") 372 373 train_loss, train_acc = process(policy, train_loader, optimizer) 374 print(f"Train loss: {train_loss:0.3f}, accuracy {train_acc:0.3f}") 375 376 valid_loss, valid_acc = process(policy, valid_loader, None) 377 print(f"Valid loss: {valid_loss:0.3f}, accuracy {valid_acc:0.3f}") 378 379 scheduler.step(valid_loss) 380 381 torch.save(policy.state_dict(), output) 382 383 return policy
class
BipartiteNodeData(torch_geometric.data.data.Data):
17class BipartiteNodeData(torch_geometric.data.Data): 18 """ 19 This class encode a node bipartite graph observation as returned by the `ecole.observation.NodeBipartite` 20 observation function in a format understood by the pytorch geometric data handlers. 21 """ 22 23 def __init__( 24 self, 25 constraint_features, 26 edge_indices, 27 edge_features, 28 variable_features, 29 tree_features, 30 candidates, 31 nb_candidates, 32 candidate_choice, 33 candidate_scores, 34 ): 35 super().__init__() 36 self.constraint_features = constraint_features 37 self.edge_index = edge_indices 38 self.edge_attr = edge_features 39 self.variable_features = variable_features 40 self.tree_features = tree_features 41 self.candidates = candidates 42 if variable_features is not None: 43 self.n_nodes = variable_features.size()[0] 44 else: 45 self.n_nodes = 0 46 self.nb_candidates = nb_candidates 47 self.candidate_choices = candidate_choice 48 self.candidate_scores = candidate_scores 49 50 def __inc__(self, key, value, store, *args, **kwargs): 51 """ 52 We overload the pytorch geometric method that tells how to increment indices when concatenating graphs 53 for those entries (edge index, candidates) for which this is not obvious. 54 """ 55 if key == "edge_index": 56 return torch.tensor( 57 [[self.constraint_features.size(0)], [self.variable_features.size(0)]] 58 ) 59 elif key == "candidates": 60 return self.variable_features.size(0) 61 else: 62 return super().__inc__(key, value, *args, **kwargs)
This class encode a node bipartite graph observation as returned by the ecole.observation.NodeBipartite
observation function in a format understood by the pytorch geometric data handlers.
BipartiteNodeData( constraint_features, edge_indices, edge_features, variable_features, tree_features, candidates, nb_candidates, candidate_choice, candidate_scores)
23 def __init__( 24 self, 25 constraint_features, 26 edge_indices, 27 edge_features, 28 variable_features, 29 tree_features, 30 candidates, 31 nb_candidates, 32 candidate_choice, 33 candidate_scores, 34 ): 35 super().__init__() 36 self.constraint_features = constraint_features 37 self.edge_index = edge_indices 38 self.edge_attr = edge_features 39 self.variable_features = variable_features 40 self.tree_features = tree_features 41 self.candidates = candidates 42 if variable_features is not None: 43 self.n_nodes = variable_features.size()[0] 44 else: 45 self.n_nodes = 0 46 self.nb_candidates = nb_candidates 47 self.candidate_choices = candidate_choice 48 self.candidate_scores = candidate_scores