Source code for KGBN.BMatrix

## Created by Tazein on 1/29/24
"""
Resources for loading boolean networks from files (strings)
"""

import numpy as np
import pandas as pd
import re
from itertools import product
import os

################## equations for simulation variables ##################

BUILT_INS = {'0', '1', 'True', 'False'}

[docs] def get_equations(file = None, string = None): if file is not None: with open(file, 'r') as file: string = file.readlines() elif string is not None: string = string.split('\n') else: raise ValueError("Either file or string must be provided") seen_genes = set() duplicate_genes = [] equations = [] for equation in string: equation = equation.strip() if len(equation) > 0 and not equation.startswith('#'): # Remove inline comments if '#' in equation: equation = equation.split('#')[0].strip() if len(equation) > 0 and '=' in equation: parts = equation.split('=') gene = parts[0].strip() if gene and gene not in seen_genes: seen_genes.add(gene) equations.append(equation) elif gene and gene in seen_genes: duplicate_genes.append(gene) if len(duplicate_genes) > 0: print(f"Duplicate genes found: {duplicate_genes}\nUsing the first occurrence of each gene.") return equations
[docs] def get_gene_dict(equations): left_side = [] for equation in equations: parts = equation.split('=') value = parts[0].strip() if value not in left_side: left_side.append(value) genes = left_side # making a dictionary for the genes starting from 0 gene_dict = {gene: i for i, gene in enumerate(genes)} return(gene_dict)
[docs] def get_upstream_genes(equations): "Returns a string of gene names, space-separated, for each of the equations." #get only the right side of the equations right_side = [] for equation in equations: parts = equation.split('=') value = parts[1].strip() right_side.append(value) functions = right_side #getting rid of the Boolean characters ! | & and () characters_to_remove = "!|&()" values = [] for function in functions: translation_table = str.maketrans({c: ' ' for c in characters_to_remove}) cleaned_expression = function.translate(translation_table) tokens = list(set(cleaned_expression.split())) tokens = [x for x in tokens if x not in BUILT_INS] cleaned_expression = ' '.join(tokens) values.append(cleaned_expression) upstream_genes = values return(upstream_genes)
[docs] def get_connectivity_matrix(equations,upstream_genes,gene_dict): #now we actually make the connectivity matrix result_list = [] for function in upstream_genes: genes = function.split() values = tuple([gene_dict[gene] for gene in genes]) result_list.append(values) result_array = np.array(result_list, dtype=object) #they are not all the same length #now we fix the length by adding the -1 (aka the padding) max_length = max(len(t) for t in result_array) connectivity_matrix = [tuple(np.pad(t, (0, max_length - len(t)), constant_values=-1)) for t in result_array] connectivity_matrix = np.array(connectivity_matrix, dtype=int) return(connectivity_matrix)
[docs] def get_truth_table(equations,upstream_genes,show_functions=None): if show_functions is None: show_functions = False #get only the right side of the equations right_side = [] for equation in equations: parts = equation.split('=') value = parts[1].strip() right_side.append(value) functions = right_side functions = [function.replace('!', ' not ').replace('|', ' or ').replace('&',' and ') for function in functions] truth = [] var1 = [] i = 1 for i in range(len(equations)): function = functions[i] if show_functions is False: pass else: print(function) variables = [upstream_genes[i]] #get the genes in the expression (ex: FLT3) variables = variables[0].split() combinations = product([0, 1], repeat=len(variables)) #gets all possiblities i += 1 for combo in combinations: values = dict(zip(variables, combo)) if len(variables) == 1 and variables[0] == function: #if the node is equal to itself (aka FLT3=FLT3) output = values[variables[0]] else: output = (int(eval(function, values))) #evaluates the equations var1.append(output) #adds the output to var1 truth.append(tuple(var1)) var1 = [] truth_table = np.array(truth, dtype=object) #they are not all the same length #now we fix the length by adding the -1 (aka the padding) max_length = max(len(t) for t in truth_table) truth_table = [tuple(np.pad(t, (0, max_length - len(t)), constant_values=-1)) for t in truth_table] truth_table = np.array(truth_table) return(truth_table)
################## knocking in/out genes after creating variables but before simulation ################## ##can also use mutation_dict for perturbed_dict, just replace the file
[docs] def get_mutation_dict(file): mutation_dict = {} with open(file) as f: for line in f: if bool(re.search('=', line)) == False: #there is no = sign print('There is a formatting error: ' + str(line) + '\nMake sure that it is formatted with an equal sign. For example: FLT3 = 1') return key, val = line.split("=") mutation_dict[key.strip()] = int(val.strip()) return(mutation_dict)
[docs] def get_knocking_genes(profile, mutation_dict, connectivity_matrix, gene_dict, perturbed_genes=None, perturbed_dict=None): ngenes = len(gene_dict) mutated_connectivity_matrix = connectivity_matrix.copy() # Create a copy of connectivity_matrix for each iteration x0 = np.random.randint(2, size=ngenes) # Random initial state resets with every profile if perturbed_genes is None: perturbed_genes = [] if perturbed_dict is None: perturbed_dict = {} if profile is not None: #if there is a profile mutation_profile = list(set(profile.split(','))) # Removes any repeat values if profile is not None and mutation_dict is None: #there are no mutation_dict (aka no mutations) mutation_profile = '' if perturbed_genes is not None: #if there are perturbed genes perturbed_genes = perturbed_genes if isinstance(perturbed_genes, list) else perturbed_genes.split(',') perturbed_genes = list(set(perturbed_genes)) # Removes any repeat values # Setting that gene's value to wild value for gene in mutation_dict: if gene == '' or gene == 'NA': print('no_mutation') else: if ( mutation_dict[gene] > 0 ) : x0[gene_dict[gene]] = 0 else : x0[gene_dict[gene]] = 1 # Make the mutated_connectivity_matrix rows in mutation_profile all -1 for gene in mutation_profile: if gene == '' or gene == 'NA': print('no_mutation') else: mutated_connectivity_matrix[[gene_dict[gene]], :] = -1 # Knock the connectivity_matrix to -1 x0[gene_dict[gene]] = mutation_dict.get(gene, 0) # Setting that gene's value to mutation value for gene in perturbed_genes: if len(gene) == 0: print('no perturbed genes in the simulation') else: mutated_connectivity_matrix[[gene_dict[gene]], :] = -1 # Knock the connectivity_matrix to -1 x0[gene_dict[gene]] = perturbed_dict.get(gene, 0) # Setting that gene's value to mutation value return(mutated_connectivity_matrix,x0)
################## equations for calculating phenotype and network ################## #getting the equations uses the same function (equations(file))
[docs] def get_cal_upstream_genes(equations): right_side = [] for equation in equations: parts = equation.split('=') value = parts[1].strip() right_side.append(value) functions = right_side #getting rid of the Boolean characters ! | & and () characters_to_remove = "!|&()" values = [] for function in functions: translation_table = str.maketrans("", "", characters_to_remove) cleaned_expression = function.translate(translation_table) cleaned_expression = ' '.join(list(set(cleaned_expression.split()))) values.append(cleaned_expression) cal_upstream_genes = values for i in range(len(cal_upstream_genes)): cal_upstream_genes[i] = cal_upstream_genes[i].split() return cal_upstream_genes
[docs] def get_cal_functions(equations): right_side = [] for equation in equations: parts = equation.split('=') value = parts[1].strip() right_side.append(value) cal_functions = right_side cal_functions = [function.replace('!', '-').replace('|', '+').replace('&','+') for function in cal_functions] characters_to_remove = "()" values = [] for function in cal_functions: translation_table = str.maketrans("", "", characters_to_remove) cleaned_expression = function.translate(translation_table) values.append(cleaned_expression) cal_functions = values #cleaning up the cal_functions format so it can be eval() cal_functions = [function.replace(' - ', ' -') for function in cal_functions] cal_functions = [function.replace('- ', '-') for function in cal_functions] cal_functions = [function.replace('- ', '-') for function in cal_functions] cal_functions = [function.replace(' ', ' ') for function in cal_functions] values = [] for function in cal_functions: new_func = re.sub(r'(-\w+)', r'(\1)', function) values.append(new_func) cal_functions = values return(cal_functions)
#assumes that cal_functions == len(scores_dict)
[docs] def get_calculating_scores(network_traj, cal_functions, cal_upstream_genes, gene_dict, cal_range=None, scores_dict=None, title=None): if scores_dict is None: scores_dict = {"Apoptosis": [], "Differentiation": [], "Proliferation": [], "Network": []} if cal_range is None: cal_range = network_traj[-100000:] if title is None: title = ["Apoptosis", "Differentiation", "Proliferation", "Network"] for i in range(len(cal_functions)): score_function = cal_functions[i] variables = cal_upstream_genes[i] scores = [] # List to store scores for this iteration for row in cal_range: gene_values = [] # Clear gene_values for each row for gene in variables: value = row[gene_dict[gene]] gene_values.append(value) values = dict(zip(variables, gene_values)) output = int(eval(score_function, values)) scores.append(output) # Append the score to the list for this iteration scores_dict[title[i]] = scores # Calculate the 'Network' scores scores = [] Apoptosis = np.mean( scores_dict['Apoptosis'] ) Differentiation = np.mean( scores_dict['Differentiation'] ) Proliferation = np.mean( scores_dict['Proliferation'] ) final_scores_dict = {} final_scores_dict['Apoptosis'] = Apoptosis final_scores_dict['Differentiation'] = Differentiation final_scores_dict['Proliferation'] = Proliferation #for i in range(len(cal_range)): #output = Proliferation[i] - (Differentiation[i] + Apoptosis[i]) #scores.append(output) final_score = Proliferation - Differentiation - Apoptosis final_scores_dict['Network'] = final_score #scores #final_score = np.mean(scores_dict['Network']) return (final_scores_dict,final_score)
################## load BN/PBN from files/strings ##################
[docs] def load_network_from_file(filename, initial_state=None): """ Given a file representing a boolean network, this generates a BooleanNetwork object. Formatting: gene = equation - all genes must have their own equation in a line (sometimes the equation is just A = A) - each equation must have an equal sign and a space before and after it - If the equation is a constant value (0 or 1), meaning that the gene is set as mutated/perturbed Parameters: ----------- filename : str Path to the file containing the network definition initial_state : array-like or dict, optional Initial values for each node. If array-like, order matches gene order in file. If dict, keys are gene names and values are initial states (0 or 1). If None, random initial values are used. """ from .booleanNetwork import BooleanNetwork equations = get_equations(filename) ngenes = len(equations) gene_dict = get_gene_dict(equations) upstream_genes = get_upstream_genes(equations) connectivity_matrix = get_connectivity_matrix(equations, upstream_genes, gene_dict) truth_table = get_truth_table(equations, upstream_genes) # Initialize x0 array if initial_state is None: print('No initial state provided, using a random initial state') x0 = np.random.randint(2, size=ngenes) #random inital state elif isinstance(initial_state, dict): # Dictionary input: keys are gene names, values are initial states x0 = np.zeros(ngenes, dtype=int) for gene, value in initial_state.items(): if gene in gene_dict: x0[gene_dict[gene]] = int(value) print(f'Initial state set from dictionary. Genes not specified default to 0.') else: x0 = np.array(initial_state) # Handle constant values (0 or 1) in equations for equation in equations: parts = equation.split('=') gene = parts[0].strip() value = parts[1].strip() # Check if the equation is a constant value (0 or 1) if value == '0' or value == '1': gene_index = gene_dict[gene] # Set the gene's initial state to the constant value x0[gene_index] = int(value) # Set the connectivity matrix row to -1 to indicate it's a constant/perturbed gene connectivity_matrix[gene_index, :] = -1 # create a Boolean network object network = BooleanNetwork(ngenes, connectivity_matrix, truth_table, x0, nodeDict=gene_dict, equations=equations) print(f"Network loaded successfully. There are {ngenes} genes in the network.") return network
[docs] def load_network_from_string(network_string, initial_state=None): """ Given a string representing a boolean network, this generates a BooleanNetwork object. Formatting: - all genes must have their own equation (sometimes the equation is just A = A) - each equation must have an equal sign and a space before and after it - If the equation is a constant value (0 or 1), meaning that the gene is set as mutated/perturbed Parameters: ----------- network_string : str String containing the network definition initial_state : array-like or dict, optional Initial values for each node. If array-like, order matches gene order in string. If dict, keys are gene names and values are initial states (0 or 1). If None, random initial values are used. """ from .booleanNetwork import BooleanNetwork equations = get_equations(string = network_string) ngenes = len(equations) gene_dict = get_gene_dict(equations) upstream_genes = get_upstream_genes(equations) connectivity_matrix = get_connectivity_matrix(equations, upstream_genes, gene_dict) truth_table = get_truth_table(equations, upstream_genes) if initial_state is None: print('No initial state provided, using a random initial state') x0 = np.random.randint(2, size=ngenes) #random inital state elif isinstance(initial_state, dict): # Dictionary input: keys are gene names, values are initial states x0 = np.zeros(ngenes, dtype=int) for gene, value in initial_state.items(): if gene in gene_dict: x0[gene_dict[gene]] = int(value) print(f'Initial state set from dictionary. Genes not specified default to 0.') else: x0 = np.array(initial_state) # Handle constant values (0 or 1) in equations for equation in equations: parts = equation.split('=') gene = parts[0].strip() value = parts[1].strip() # Check if the equation is a constant value (0 or 1) if value == '0' or value == '1': gene_index = gene_dict[gene] x0[gene_index] = int(value) connectivity_matrix[gene_index, :] = -1 network = BooleanNetwork(ngenes, connectivity_matrix, truth_table, x0, nodeDict=gene_dict, equations=equations) print(f"Network loaded successfully. There are {ngenes} genes in the network.") return network
[docs] def load_pbn_from_file(filename, initial_state=None): """ Given a file representing a probabilistic boolean network, this generates a ProbabilisticBN object. File format example:: x1 = (x1 | x2 | x3) & (!x1 | x2 | x3), 0.6 x1 = (x1 | x2 | x3) & (x1 | !x2 | !x3) & (!x1 | x2 | x3), 0.4 x2 = (x1 | x2 | x3) & (x1 | !x2 | !x3) & (!x1 | !x2 | x3) x3 = (!x1 & x2 & x3) | (x1 & !x2 & x3) | (x1 & x2 & !x3) | (x1 & x2 & x3), 0.5 x3 = (x1 & x2 & x3), 0.5 Each line has format: node = boolean_function, probability The probability part can be omitted if there is only one function for that gene. The boolean_function can also be a constant value (0 or 1), meaning that the gene is set as mutated/perturbed Parameters: ----------- filename : str Path to the file containing the PBN definition initial_state : array-like or dict, optional Initial values for each node. If array-like, order matches gene order in file. If dict, keys are gene names and values are initial states (0 or 1). If None, random initial values are used. Returns: -------- ProbabilisticBN A probabilistic boolean network object """ from .PBN import ProbabilisticBN with open(filename, 'r') as file: lines = file.readlines() # Filter out empty lines and comment lines (starting with #) # For non-comment lines, remove inline comments (after #) processed_lines = [] for line in lines: line = line.strip() if len(line) > 0 and not line.startswith('#'): # Remove inline comments if '#' in line: line = line.split('#')[0].strip() if len(line) > 0: processed_lines.append(line) lines = processed_lines gene_funcs = {} gene_probs = {} seen_gene_funcs = {} # Track which gene-function combinations we've seen for line in lines: # Check if line contains probability if ',' in line: func_part, prob_part = line.rsplit(',', 1) probability = float(prob_part.strip()) else: func_part = line probability = None # Will be set to 1.0 later if it's the only function if '=' not in func_part: continue gene, equation = func_part.split('=', 1) gene = gene.strip() equation = equation.strip() # Check for duplicate gene-function pairs gene_func_key = (gene, equation, probability) if gene_func_key in seen_gene_funcs: continue # Skip duplicate seen_gene_funcs[gene_func_key] = True if gene not in gene_funcs: gene_funcs[gene] = [] gene_probs[gene] = [] gene_funcs[gene].append(equation) gene_probs[gene].append(probability) # Validate and set probabilities for gene in gene_funcs: if len(gene_funcs[gene]) == 1: # If there's only one function, set probability to 1.0 if not specified if gene_probs[gene][0] is None: gene_probs[gene][0] = 1.0 else: # Check if all probabilities are specified if None in gene_probs[gene]: raise ValueError(f"Gene {gene} has multiple functions but not all have probabilities specified") # Check if probabilities sum to 1 prob_sum = sum(gene_probs[gene]) if not np.isclose(prob_sum, 1.0, atol=1e-6): raise ValueError(f"Probabilities for gene {gene} sum to {prob_sum}, not 1.0") # Create a mapping of gene names to indices gene_dict = {gene: i for i, gene in enumerate(gene_funcs.keys())} ngenes = len(gene_dict) # Create array for number of functions per node nf = np.zeros(ngenes, dtype=int) for gene, funcs in gene_funcs.items(): idx = gene_dict[gene] nf[idx] = len(funcs) # Create probability matrix max_funcs = max(nf) cij = np.full((ngenes, max_funcs), -1.0) for gene, probs in gene_probs.items(): idx = gene_dict[gene] for j, prob in enumerate(probs): cij[idx, j] = prob # Process each function to get connectivity and truth tables all_equations = [] for gene in gene_funcs: for func in gene_funcs[gene]: all_equations.append(f"{gene} = {func}") upstream_genes = get_upstream_genes(all_equations) # Create connectivity matrix and truth table connectivity_matrix = get_connectivity_matrix(all_equations, upstream_genes, gene_dict) truth_table = get_truth_table(all_equations, upstream_genes) # Set initial state if initial_state is None: print('No initial state provided, using a random initial state') x0 = np.random.randint(2, size=ngenes) else: x0 = np.array(initial_state) # Handle constant values (0 or 1) in equations for equation in all_equations: parts = equation.split('=') gene = parts[0].strip() value = parts[1].strip() # Check if the equation is a constant value (0 or 1) if value == '0' or value == '1': gene_index = gene_dict[gene] # Set the gene's initial state to the constant value x0[gene_index] = int(value) # Set the connectivity matrix row to -1 to indicate it's a constant connectivity_matrix[gene_index, :] = -1 # Create and return the PBN network = ProbabilisticBN(ngenes, connectivity_matrix, nf, truth_table, cij, x0, nodeDict=gene_dict) network.gene_functions = gene_funcs # Store the function strings network.equations = all_equations # Store the expanded equations print(f"PBN loaded successfully. There are {ngenes} genes in the network.") return network
[docs] def load_pbn_from_string(network_string, initial_state=None): """ Given a string representing a probabilistic boolean network, this generates a ProbabilisticBN object. String format should match the file format expected by load_pbn_from_file. Parameters: ----------- network_string : str String containing the PBN definition initial_state : array-like or dict, optional Initial values for each node. If array-like, order matches gene order in string. If dict, keys are gene names and values are initial states (0 or 1). If None, random initial values are used. Returns: -------- ProbabilisticBN A probabilistic boolean network object """ from .PBN import ProbabilisticBN # Split the string into lines lines = [x.strip() for x in network_string.strip().split('\n') if x.strip()] # Filter out empty lines and comment lines (starting with #) # For non-comment lines, remove inline comments (after #) processed_lines = [] for line in lines: if len(line) > 0 and not line.startswith('#'): # Remove inline comments if '#' in line: line = line.split('#')[0].strip() if len(line) > 0: processed_lines.append(line) lines = processed_lines gene_funcs = {} gene_probs = {} seen_gene_funcs = {} # Track which gene-function combinations we've seen for line in lines: # Skip empty lines if not line: continue # Split by the last comma to separate function and probability # Check if line contains probability if ',' in line: func_part, prob_part = line.rsplit(',', 1) probability = float(prob_part.strip()) else: func_part = line probability = None # Will be set to 1.0 later if it's the only function if '=' not in func_part: continue gene, equation = func_part.split('=', 1) gene = gene.strip() equation = equation.strip() # Check for duplicate gene-function pairs gene_func_key = (gene, equation, probability) if gene_func_key in seen_gene_funcs: continue # Skip duplicate seen_gene_funcs[gene_func_key] = True if gene not in gene_funcs: gene_funcs[gene] = [] gene_probs[gene] = [] gene_funcs[gene].append(equation) gene_probs[gene].append(probability) # Validate and set probabilities for gene in gene_funcs: if len(gene_funcs[gene]) == 1: # If there's only one function, set probability to 1.0 if not specified if gene_probs[gene][0] is None: gene_probs[gene][0] = 1.0 else: # Check if all probabilities are specified if None in gene_probs[gene]: raise ValueError(f"Gene {gene} has multiple functions but not all have probabilities specified") # Check if probabilities sum to 1 prob_sum = sum(gene_probs[gene]) if not np.isclose(prob_sum, 1.0, atol=1e-6): raise ValueError(f"Probabilities for gene {gene} sum to {prob_sum}, not 1.0") # Create a mapping of gene names to indices gene_dict = {gene: i for i, gene in enumerate(gene_funcs.keys())} ngenes = len(gene_dict) # Create array for number of functions per node nf = np.zeros(ngenes, dtype=int) for gene, funcs in gene_funcs.items(): idx = gene_dict[gene] nf[idx] = len(funcs) # Create probability matrix max_funcs = max(nf) cij = np.full((ngenes, max_funcs), -1.0) for gene, probs in gene_probs.items(): idx = gene_dict[gene] for j, prob in enumerate(probs): cij[idx, j] = prob # Process each function to get connectivity and truth tables all_equations = [] for gene in gene_funcs: for func in gene_funcs[gene]: all_equations.append(f"{gene} = {func}") upstream_genes = get_upstream_genes(all_equations) # Create connectivity matrix and truth table connectivity_matrix = get_connectivity_matrix(all_equations, upstream_genes, gene_dict) truth_table = get_truth_table(all_equations, upstream_genes) # Set initial state if initial_state is None: print('No initial state provided, using a random initial state') x0 = np.random.randint(2, size=ngenes) elif isinstance(initial_state, dict): # Dictionary input: keys are gene names, values are initial states x0 = np.zeros(ngenes, dtype=int) for gene, value in initial_state.items(): if gene in gene_dict: x0[gene_dict[gene]] = int(value) print(f'Initial state set from dictionary. Genes not specified default to 0.') else: x0 = np.array(initial_state) # Handle constant values (0 or 1) in equations for equation in all_equations: parts = equation.split('=') gene = parts[0].strip() value = parts[1].strip() # Check if the equation is a constant value (0 or 1) if value == '0' or value == '1': gene_index = gene_dict[gene] x0[gene_index] = int(value) connectivity_matrix[gene_index, :] = -1 # Create and return the PBN network = ProbabilisticBN(ngenes, connectivity_matrix, nf, truth_table, cij, x0, nodeDict=gene_dict) network.gene_functions = gene_funcs # Store the function strings network.equations = all_equations # Store the expanded equations print(f"PBN loaded successfully. There are {ngenes} genes in the network.") return network
[docs] def load_network(source, initial_state=None, network_type='auto'): """ Unified function to load a Boolean Network or Probabilistic Boolean Network from a file or string. This function automatically detects whether the input is a file path or a network string, and whether it represents a Boolean Network (BN) or Probabilistic Boolean Network (PBN). Parameters: ----------- source : str Either a file path to a network definition file, or a string containing the network definition. The function automatically detects which one it is. initial_state : array-like or dict, optional Initial values for each node. If array-like, order matches gene order in the network. If dict, keys are gene names and values are initial states (0 or 1). If None, random initial values are used. network_type : str, optional Type of network to load. Options are: - 'auto' (default): Automatically detect BN vs PBN based on presence of probabilities - 'bn': Force loading as Boolean Network - 'pbn': Force loading as Probabilistic Boolean Network Returns: -------- BooleanNetwork or ProbabilisticBN A network object of the appropriate type Examples: --------- Load a BN from file: >>> network = load_network('my_network.txt') Load a BN from string: >>> network_str = ''' ... A = A ... B = A & C ... C = B | A ... ''' >>> network = load_network(network_str) Load a PBN from file: >>> pbn = load_network('my_pbn.txt') Load a PBN from string with initial state: >>> pbn_str = ''' ... x1 = (x1 | x2), 0.6 ... x1 = (!x1 & x2), 0.4 ... x2 = x1 & x2 ... ''' >>> pbn = load_network(pbn_str, initial_state={'x1': 1, 'x2': 0}) Notes: ------ - For BN format: Each line should have format 'gene = boolean_expression' - For PBN format: Each line should have format 'gene = boolean_expression, probability' - Probabilities can be omitted if there's only one function for a gene - Duplicate equations for the same gene are automatically filtered (first occurrence is kept) - Comments (lines starting with #) and inline comments (after #) are ignored - Constant values (0 or 1) are supported for genes """ # Detect if source is a file or string is_file = os.path.isfile(source) # Get raw lines for detection if is_file: with open(source, 'r') as f: raw_lines = f.readlines() else: raw_lines = source.strip().split('\n') # Clean and filter lines for detection detection_lines = [] for line in raw_lines: line = line.strip() if len(line) > 0 and not line.startswith('#'): # Remove inline comments if '#' in line: line = line.split('#')[0].strip() if len(line) > 0 and '=' in line: detection_lines.append(line) # Auto-detect network type if needed if network_type == 'auto': is_pbn = False for line in detection_lines: # Check if line contains a comma after the equation (probability indicator) if '=' in line: parts = line.split('=', 1) if len(parts) == 2: right_side = parts[1].strip() # Check if there's a comma that's not inside parentheses paren_count = 0 for i, char in enumerate(right_side): if char == '(': paren_count += 1 elif char == ')': paren_count -= 1 elif char == ',' and paren_count == 0: # Found a comma outside parentheses - this is a probability is_pbn = True break if is_pbn: break network_type = 'pbn' if is_pbn else 'bn' # Load the appropriate network type if network_type == 'bn': if is_file: return load_network_from_file(source, initial_state) else: return load_network_from_string(source, initial_state) elif network_type == 'pbn': if is_file: return load_pbn_from_file(source, initial_state) else: return load_pbn_from_string(source, initial_state) else: raise ValueError(f"Invalid network_type: {network_type}. Must be 'auto', 'bn', or 'pbn'")
[docs] def rename_nodes(network, mapping, expand_complexes=False): """ Rename nodes in a boolean network based on a mapping. Parameters: ----------- network : str Network definition as file path or string mapping : str or dict Excel file path with 'Node' and 'NewName' columns, or dictionary with original names as keys and new names as values expand_complexes : bool, optional Whether to expand complex nodes into their components. Default is False. Returns: -------- str Updated network string with renamed nodes and optionally expanded complexes """ # Load network from file if needed if os.path.isfile(network): with open(network, 'r') as f: network_string = f.read() else: network_string = network # Process mapping input - either file or dictionary node_mapping = {} complex_nodes = {} single_node_mapping = {} if isinstance(mapping, str): mapping_df = pd.read_excel(mapping) for _, row in mapping_df.iterrows(): original_node = row['Node'].strip() new_name = row['NewName'] if pd.isnull(new_name): new_name = '' else: new_name = str(new_name).strip() if not original_node or not new_name: continue if ',' in new_name: if expand_complexes: # Parse components for expansion components = [comp.strip() for comp in new_name.split(',') if comp.strip()] if components: node_mapping[original_node] = components complex_nodes[original_node] = components # If expand_complexes=False, skip complex nodes (keep original name) else: node_mapping[original_node] = [new_name] single_node_mapping[original_node] = new_name elif isinstance(mapping, dict): for original_node, new_name in mapping.items(): original_node = str(original_node).strip() if not original_node: continue if isinstance(new_name, list): if expand_complexes: components = [str(comp).strip() for comp in new_name if str(comp).strip()] if components: node_mapping[original_node] = components complex_nodes[original_node] = components # If expand_complexes=False, skip complex nodes (keep original name) else: new_name = str(new_name).strip() if not new_name: continue if ',' in new_name: if expand_complexes: components = [comp.strip() for comp in new_name.split(',') if comp.strip()] if components: node_mapping[original_node] = components complex_nodes[original_node] = components # If expand_complexes=False, skip complex nodes (keep original name) else: node_mapping[original_node] = [new_name] single_node_mapping[original_node] = new_name else: raise TypeError("mapping must be either a file path or a dictionary") # Parse original network string into equations, ignore blank lines original_equations = [line.strip() for line in network_string.strip().split('\n') if line.strip()] equation_dict = {} for eq in original_equations: if '=' in eq: left, right = eq.split('=', 1) node = left.strip() rule = right.strip() if not node or not rule: continue equation_dict[node] = rule new_equations = [] if expand_complexes: # Track which component equations have been created to avoid duplicates processed_components = set() # 1 Process complex expansions only for original_node, rule in equation_dict.items(): if original_node in node_mapping: # This is a complex node that should be expanded new_rule = rule # Replace complexes with their expanded forms for complex_node, components in complex_nodes.items(): if not complex_node or not components: continue pattern = r'\b' + re.escape(complex_node) + r'\b' replacement = '(' + ' & '.join(components) + ')' new_rule = re.sub(pattern, replacement, new_rule) # Replace single nodes for old_node, new_node in single_node_mapping.items(): if not old_node or not new_node: continue pattern = r'\b' + re.escape(old_node) + r'\b' new_rule = re.sub(pattern, new_node, new_rule) # Create equations for new components new_nodes = node_mapping[original_node] for new_node in new_nodes: if not new_node: continue # Only create equation if this component hasn't been processed yet # and doesn't already exist as an original node if new_node not in processed_components and new_node not in equation_dict: new_equations.append(f"{new_node} = {new_rule}") processed_components.add(new_node) # 2 Process remaining nodes (including renamed original nodes) for original_node, rule in equation_dict.items(): if original_node not in node_mapping: # This is not a complex node, process normally new_rule = rule # Replace complexes with their expanded forms for complex_node, components in complex_nodes.items(): if not complex_node or not components: continue pattern = r'\b' + re.escape(complex_node) + r'\b' replacement = '(' + ' & '.join(components) + ')' new_rule = re.sub(pattern, replacement, new_rule) # Replace single nodes for old_node, new_node in single_node_mapping.items(): if not old_node or not new_node: continue pattern = r'\b' + re.escape(old_node) + r'\b' new_rule = re.sub(pattern, new_node, new_rule) # Handle left side if original_node in single_node_mapping: renamed_node = single_node_mapping[original_node] new_equations.append(f"{renamed_node} = {new_rule}") else: new_equations.append(f"{original_node} = {new_rule}") else: # Simple rename - no complex expansion for original_node, rule in equation_dict.items(): new_rule = rule # Only replace single nodes (no complex expansion) for old_node, new_node in single_node_mapping.items(): if not old_node or not new_node: continue pattern = r'\b' + re.escape(old_node) + r'\b' new_rule = re.sub(pattern, new_node, new_rule) # Handle left side if original_node in single_node_mapping: renamed_node = single_node_mapping[original_node] new_equations.append(f"{renamed_node} = {new_rule}") else: new_equations.append(f"{original_node} = {new_rule}") # reorder the equations alphabetically new_equations.sort(key=lambda x: x.split('=')[0]) return '\n'.join(new_equations)