Complete cross-over changes

ITMO-NSS-team · Mar 5, 2024 · 32ea5f9 · 32ea5f9
1 parent 5f3a0d8
commit 32ea5f9
Show file tree

Hide file tree

Showing 10 changed files with 193 additions and 118 deletions.
diff --git a/epde/interface/token_family.py b/epde/interface/token_family.py
@@ -19,7 +19,6 @@
 import seaborn as sns
 import matplotlib.pyplot as plt
 
-from symnet.initcoefficients import get_csym_tsym
 from sympy import Symbol, Mul
 
 def constancy_hard_equality(tensor, epsilon=1e-7):
@@ -445,7 +444,7 @@ def __init__(self, pool):
         self.cross_distr = self.get_cross_distr(self.pool.custom_cross_prob)
 
     @staticmethod
-    def get_cross_distr(custom_cross_prob, min_pr=0.15, max_pr=0.75):
+    def get_cross_distr(custom_cross_prob, min_pr=0.2, max_pr=0.7): # 0.15 0.75 for burg sindy 9 0.2-0.8 bo
         mmf = 2.4
         values = list(custom_cross_prob.values())
 

diff --git a/epde/operators/multiobjective/variation.py b/epde/operators/multiobjective/variation.py
@@ -103,19 +103,12 @@ def apply(self, objective : tuple, arguments : dict):
 
         assert objective[0].vals.same_encoding(objective[1].vals)
         offspring_1 = objective[0]; offspring_2 = objective[1]
-
-        if objective[0].crossover_dominator_count >= objective[1].crossover_dominator_count:
-            objective[0].incr_dominator_count()
-        else:
-            objective[1].incr_dominator_count()
 
         eqs_keys = objective[0].vals.equation_keys; params_keys = objective[1].vals.params_keys
         for eq_key in eqs_keys:
             temp_eq_1, temp_eq_2 = self.suboperators['equation_crossover'].apply(objective = (objective[0].vals[eq_key],
                                                                                               objective[1].vals[eq_key]),
-                                                                                 arguments = subop_args['equation_crossover'],
-                                                                                 dominator_count=(objective[0].crossover_dominator_count,
-                                                                                                  objective[1].crossover_dominator_count))
+                                                                                 arguments = subop_args['equation_crossover'])
             # except TypeError:
             #     pass
             objective[0].vals.replace_gene(gene_key = eq_key, value = temp_eq_1)
@@ -155,7 +148,7 @@ class EquationCrossover(CompoundOperator):
     key = 'EquationCrossover'
 
     @HistoryExtender(f'\n -> performing equation crossover', 'ba')
-    def apply(self, objective : tuple, arguments : dict, dominator_count : tuple):
+    def apply(self, objective : tuple, arguments : dict):
         self_args, subop_args = self.parse_suboperator_args(arguments = arguments)
 
         equation1_terms, equation2_terms = detect_similar_terms(objective[0], objective[1])
@@ -171,22 +164,80 @@ def apply(self, objective : tuple, arguments : dict, dominator_count : tuple):
                 check_uniqueness(temp_term_2, objective[1].structure[:i] + objective[1].structure[i+1:])):                     
                 objective[0].structure[i] = temp_term_1; objective[1].structure[i] = temp_term_2
 
-        for i in range(same_num + similar_num, len(objective[0].structure)):
-            if check_uniqueness(objective[0].structure[i], objective[1].structure) and check_uniqueness(objective[1].structure[i], objective[0].structure):
-                objective[0].structure[i], objective[1].structure[i] = self.suboperators['term_crossover'].apply(objective = (objective[0].structure[i], 
-                                                                                                                              objective[1].structure[i]),
-                                                                                                                 arguments = subop_args['term_crossover'])
-
+        if same_num + similar_num < len(objective[0].structure):
+            start_idx = same_num + similar_num
+            eq1_distr = self.get_equation_cross_distr(objective[0], start_idx)
+            eq2_distr = self.get_equation_cross_distr(objective[1], start_idx)
+            for _ in range(len(eq1_distr)):
+                idx0 = np.random.choice(list(eq1_distr.keys()), p=list(eq1_distr.values()))
+                idx1 = np.random.choice(list(eq2_distr.keys()), p=list(eq2_distr.values()))
+
+                if check_uniqueness(objective[0].structure[idx0], objective[1].structure) and check_uniqueness(
+                    objective[1].structure[idx1], objective[0].structure):
+
+                    objective[0].structure[idx0], objective[1].structure[idx1], recalc_distr = self.suboperators['term_crossover'].apply(
+                        objective=(objective[0].structure[idx0],objective[1].structure[idx1]),
+                        arguments=subop_args['term_crossover'])
+
+                    if recalc_distr:
+                        eq1_distr = self.get_equation_cross_distr(objective[0], start_idx)
+                        eq2_distr = self.get_equation_cross_distr(objective[1], start_idx)
+
+
+        # for i in range(same_num + similar_num, len(objective[0].structure)):
+        #     if check_uniqueness(objective[0].structure[i], objective[1].structure) and check_uniqueness(objective[1].structure[i], objective[0].structure):
+        #         objective[0].structure[i], objective[1].structure[i] = self.suboperators['term_crossover'].apply(objective = (objective[0].structure[i],
+        #                                                                                                                       objective[1].structure[i]),
+        #                                                                                                          arguments = subop_args['term_crossover'])
+
         return objective[0], objective[1]
 
     def use_default_tags(self):
         self._tags = {'crossover', 'gene level', 'contains suboperators', 'standard'}
 
+    @staticmethod
+    def to_symbolic(term):
+        if type(term.cache_label[0]) == tuple:
+            labels = []
+            for label in term.cache_label:
+                labels.append(str(label[0]))
+            symlabels = list(map(lambda token: Symbol(token), labels))
+            return Mul(*symlabels)
+        else:
+            return Symbol(str(term.cache_label[0]))
+
+    def get_equation_cross_distr(self, equation, start_idx):
+        importance_coeffs = {}
+        for i in range(start_idx, len(equation.structure)):
+            sym_term = self.to_symbolic(equation.structure[i])
+            importance_coeffs[sym_term] = equation.pool.custom_cross_prob.get(sym_term)
+        cross_distr = self.get_cross_distr(importance_coeffs, start_idx, len(equation.structure))
+        return cross_distr
+
+    @staticmethod
+    def get_cross_distr(custom_cross_prob, start_idx, end_idx_exclude):
+        mmf = 2.4
+        values = list(custom_cross_prob.values())
+        csym_arr = np.fabs(np.array(values))
+
+        if np.max(csym_arr) / np.min(csym_arr) > 2.6:
+            min_max_coeff = mmf * np.min(csym_arr) - np.max(csym_arr)
+            smoothing_factor = min_max_coeff / (min_max_coeff - (mmf - 1) * np.average(csym_arr))
+            uniform_csym = np.array([np.sum(csym_arr) / len(csym_arr)] * len(csym_arr))
+
+            smoothed_array = (1 - smoothing_factor) * csym_arr + smoothing_factor * uniform_csym
+            inv = 1 / smoothed_array
+        else:
+            inv = 1 / csym_arr
+        inv_norm = inv / np.sum(inv)
+
+        return dict(zip([i for i in range(start_idx, end_idx_exclude)], inv_norm.tolist()))
+
 class EquationExchangeCrossover(CompoundOperator):
     key = 'EquationExchangeCrossover'
 
     @HistoryExtender(f'\n -> performing equation exchange crossover', 'ba')
-    def apply(self, objective : tuple, arguments : dict, dominator_count : tuple):
+    def apply(self, objective : tuple, arguments : dict):
         self_args, subop_args = self.parse_suboperator_args(arguments = arguments)
 
         objective[0].structure, objective[1].structure = objective[1].structure, objective[0].structure
@@ -280,7 +331,7 @@ class TermCrossover(CompoundOperator):
     """    
     key = 'TermCrossover'
 
-    def apply(self, objective : tuple, arguments : dict, dominator_count : tuple):
+    def apply(self, objective : tuple, arguments : dict):
         """
         Get the offspring terms, which are the same parents' ones, but in different order, if the crossover occured.
         
@@ -296,21 +347,11 @@ def apply(self, objective : tuple, arguments : dict, dominator_count : tuple):
         
         """
         self_args, subop_args = self.parse_suboperator_args(arguments = arguments)
-
-        idx = int(np.argmin(dominator_count))
-        cross_tokens = []
-        if type(objective[idx].cache_label[0]) == tuple:
-            for lbl_tuple in objective[idx].cache_label:
-                cross_tokens.append(Symbol(lbl_tuple[0]))
-        else:
-            cross_tokens.append(Symbol(objective[idx].cache_label[0]))
-        cross_prob = objective[0].pool.cross_prob_distr.get(Mul(*cross_tokens))
-
-        if (np.random.uniform(0, 1) <= cross_prob and
+        if (np.random.uniform(0, 1) <= self.params["crossover_probability"] and
             objective[1].descr_variable_marker == objective[0].descr_variable_marker):
-                return objective[1], objective[0]
+                return objective[1], objective[0], True
         else:
-                return objective[0], objective[1]
+                return objective[0], objective[1], False
 
     def use_default_tags(self):
         self._tags = {'crossover', 'term level', 'exploration', 'no suboperators', 'standard'}

diff --git a/epde/structure/main_structures.py b/epde/structure/main_structures.py
@@ -795,7 +795,6 @@ def __init__(self, pool: TFPool, metaparameters: dict):
         self.tokens_for_eq = TFPool(pool.families_demand_equation, custom_cross_prob=pool.custom_cross_prob, max_factors_in_term=pool.max_factors_in_term)
         self.tokens_supp = TFPool(pool.families_equationless, custom_cross_prob=pool.custom_cross_prob, max_factors_in_term=pool.max_factors_in_term)
         self.moeadd_set = False
-
         self.vars_to_describe = [token_family.ftype for token_family in self.tokens_for_eq.families] # Made list from set
 
     def use_default_multiobjective_function(self):

diff --git a/exscripts/experiment_burgers.py b/exscripts/experiment_burgers.py
@@ -8,7 +8,6 @@
 import logging
 import os
 from pathlib import Path
-from sympy import Mul, Symbol
 
 
 def find_coeff_diff(res, coefficients: dict):
@@ -43,10 +42,10 @@ def coefficients_difference(terms_dict, coefficients):
 
 def out_formatting(string):
     string = string.replace("u{power: 1.0}", "u")
+    string = string.replace("d^2u/dx2^2{power: 1.0}", "d^2u/dx2^2")
     string = string.replace("d^2u/dx1^2{power: 1.0}", "d^2u/dx1^2")
-    string = string.replace("d^2u/dx0^2{power: 1.0}", "d^2u/dx0^2")
-    string = string.replace("du/dx0{power: 1.0}", "du/dx0")
     string = string.replace("du/dx1{power: 1.0}", "du/dx1")
+    string = string.replace("du/dx2{power: 1.0}", "du/dx2")
     string = string.replace(" ", "")
 
     ls_equal = string.split('=')
@@ -92,13 +91,6 @@ def hash_term(term):
     x = np.linspace(-1000, 0, 101)
     t = np.linspace(0, 1, 101)
 
-    distr = {Symbol('u') : 1,
-                Symbol('du/dx0') : 7,
-                Symbol('du/dx1') : 2,
-                Mul(Symbol('u'), Symbol('du/dx0')) : 1,
-                Mul(Symbol('u'), Symbol('du/dx1')) : 8,
-                Mul(Symbol('du/dx1'), Symbol('du/dx0')) : 3}
-
     boundary = 10
     dimensionality = u.ndim
     grids = np.meshgrid(t, x, indexing='ij')
@@ -107,9 +99,9 @@ def hash_term(term):
     write_csv = True
     print_results = True
     max_iter_number = 50
-    title = 'dfo0'
+    title = 'dfs0'
 
-    terms = [('du/dx0', ), ('du/dx1', 'u'), ('u',), ('du/dx1',), ('u', 'du/dx0'), ('du/dx0', 'du/dx1'),]
+    terms = [('du/dx1', ), ('du/dx2', 'u'), ('u',), ('du/dx2',), ('u', 'du/dx1'), ('du/dx1', 'du/dx2'),]
     hashed_ls = [hash_term(term) for term in terms]
     coefficients = dict(zip(hashed_ls, [-1., -1., 0., 0., 0., 0.]))
     coefficients[1] = 0.
@@ -136,10 +128,10 @@ def hash_term(term):
             population_error += 1
             continue
         end = time.time()
-        epde_search_obj.equations(only_print=True, num=2)
+        epde_search_obj.equation_search_results(only_print=True, num=2)
         time1 = end-start
 
-        res = epde_search_obj.equations(only_print=False, num=2)
+        res = epde_search_obj.equation_search_results(only_print=False, num=2)
         difference_ls = find_coeff_diff(res, coefficients)
 
         if len(difference_ls) != 0:

diff --git a/exscripts/experiment_burgers_sindy.py b/exscripts/experiment_burgers_sindy.py
@@ -34,7 +34,7 @@ def coefficients_difference(terms_dict, coefficients):
     eq_found = 0
     for term_hash in terms_dict.keys():
         mae += abs(terms_dict.get(term_hash) - coefficients.get(term_hash))
-        if coefficients.get(term_hash) != 0.0 and abs(terms_dict.get(term_hash) - coefficients.get(term_hash)) < 0.15:
+        if coefficients.get(term_hash) != 0.0 and abs(terms_dict.get(term_hash) - coefficients.get(term_hash)) < 0.1:
             eq_found += 1
 
     mae /= len(terms_dict)
@@ -47,9 +47,9 @@ def coefficients_difference(terms_dict, coefficients):
 def out_formatting(string):
     string = string.replace("u{power: 1.0}", "u")
     string = string.replace("d^2u/dx1^2{power: 1.0}", "d^2u/dx1^2")
-    string = string.replace("d^2u/dx0^2{power: 1.0}", "d^2u/dx0^2")
-    string = string.replace("du/dx0{power: 1.0}", "du/dx0")
+    string = string.replace("d^2u/dx2^2{power: 1.0}", "d^2u/dx2^2")
     string = string.replace("du/dx1{power: 1.0}", "du/dx1")
+    string = string.replace("du/dx2{power: 1.0}", "du/dx2")
     string = string.replace(" ", "")
 
     ls_equal = string.split('=')
@@ -97,24 +97,23 @@ def hash_term(term):
     grids = np.meshgrid(t, x, indexing='ij')
 
     ''' Parameters of the experiment '''
-    write_csv = False
+    write_csv = True
     print_results = True
-    max_iter_number = 50
-    title = 'dfo0'
+    max_iter_number = 2000
+    title = f'dfs0_{max_iter_number}_simstart2'
 
-    terms = [('u',), ('du/dx0',), ('du/dx1',), ('d^2u/dx1^2',), ('u', 'du/dx0'), ('u', 'du/dx1'), ('u', 'd^2u/dx1^2'),
-             ('du/dx0', 'du/dx1'), ('du/dx0', 'd^2u/dx1^2'), ('du/dx1', 'd^2u/dx1^2')]
+    terms = [('u',), ('du/dx1',), ('du/dx2',), ('d^2u/dx2^2',), ('u', 'du/dx1'), ('u', 'du/dx2'), ('u', 'd^2u/dx2^2'),
+             ('du/dx1', 'du/dx2'), ('du/dx1', 'd^2u/dx2^2'), ('du/dx2', 'd^2u/dx2^2')]
     cross_distr = {Symbol('u'): 2,
-                   Symbol('du/dx0'): 10,
-                   Symbol('du/dx1'): 2,
-                   Symbol('d^2u/dx1^2'): 9,
-                   Mul(Symbol('u'), Symbol('du/dx0')): 4,
-                   Mul(Symbol('u'), Symbol('du/dx1')): 8,
-                   Mul(Symbol('u'), Symbol('d^2u/dx1^2')): 2,
-                   Mul(Symbol('du/dx0'), Symbol('du/dx1')): 3,
-                   Mul(Symbol('du/dx0'), Symbol('d^2u/dx1^2')): 2,
-                   Mul(Symbol('du/dx1'), Symbol('d^2u/dx1^2')): 3}
-
+                   Symbol('du/dx1'): 10,
+                   Symbol('du/dx2'): 2,
+                   Symbol('d^2u/dx2^2'): 9,
+                   Mul(Symbol('u'), Symbol('du/dx1')): 4,
+                   Mul(Symbol('u'), Symbol('du/dx2')): 8,
+                   Mul(Symbol('u'), Symbol('d^2u/dx2^2')): 2,
+                   Mul(Symbol('du/dx1'), Symbol('du/dx2')): 3,
+                   Mul(Symbol('du/dx1'), Symbol('d^2u/dx2^2')): 2,
+                   Mul(Symbol('du/dx2'), Symbol('d^2u/dx2^2')): 3}
     hashed_ls = [hash_term(term) for term in terms]
     coefficients = dict(zip(hashed_ls, [0., -1., 0., 0.1, 0., -1., 0., 0., 0., 0.]))
     coefficients[1] = 0.
@@ -126,6 +125,7 @@ def hash_term(term):
     differences_ls_none = []
     i = 0
     population_error = 0
+    alg_time_start = time.time()
     while i < max_iter_number:
         epde_search_obj = epde_alg.EpdeSearch(use_solver=False, boundary=boundary,
                                               dimensionality=dimensionality, coordinate_tensors=grids)
@@ -135,15 +135,16 @@ def hash_term(term):
         try:
             epde_search_obj.fit(data=u, max_deriv_order=(1, 2),
                                 equation_terms_max_number=3, equation_factors_max_number=2,
-                                eq_sparsity_interval=(1e-08, 1e-1))
+                                eq_sparsity_interval=(1e-08, 1e-1), custom_cross_prob=cross_distr)
         except Exception as e:
             logging.error(traceback.format_exc())
             population_error += 1
             continue
         end = time.time()
-        epde_search_obj.equations(only_print=True, num=4)
+        epde_search_obj.equation_search_results(only_print=True, num=4)
         time1 = end-start
-        res = epde_search_obj.equations(only_print=False, num=4)
+
+        res = epde_search_obj.equation_search_results(only_print=False, num=4)
         difference_ls = find_coeff_diff(res, coefficients)
 
         if len(difference_ls) != 0:
@@ -155,23 +156,22 @@ def hash_term(term):
 
         num_found_eq.append(len(difference_ls))
         print('Overall time is:', time1)
-        print(f'Iteration processed: {i+1}/{max_iter_number}\n')
+        print(f'Iteration processed: {i+1}/{max_iter_number}')
+        print(f"Equations found: {len(difference_ls)}\n")
         i += 1
         time_ls.append(time1)
-
+    alg_time_end = time.time()
     if write_csv:
         arr = np.array([differences_ls_none, time_ls, num_found_eq])
         arr = arr.T
         df = pd.DataFrame(data=arr, columns=['MAE', 'time', 'number_found_eq'])
         df.to_csv(os.path.join(Path().absolute().parent, "data_burg_sindy", f"{title}.csv"))
 
     if print_results:
-        print('\nTime for every run, s:')
-        for item in time_ls:
-            print(f'{item: .4f}')
-
         print()
         print(f'\nAverage time, s: {sum(time_ls) / len(time_ls):.2f}')
+        print(f"Time for all runs, min: {(alg_time_end - alg_time_start) / 60: .2f}")
+        print(f"Time for all runs, hours: {(alg_time_end - alg_time_start) / 3600: .2f}")
         print(f'Average MAE per eq: {sum(mean_diff_ls) / len(mean_diff_ls):.6f}')
         print(f'Average minimum MAE per run: {sum(differences_ls) / max_iter_number:.6f}')
         print(f'Average # of found eq per run: {sum(num_found_eq) / max_iter_number:.2f}')