From 5121800c66c20b3b2a4c2620773d00ae551845bf Mon Sep 17 00:00:00 2001
From: tashiro akira <fj1755jk@fujitsu.com>
Date: Tue, 24 Oct 2023 10:40:45 +0900
Subject: [PATCH 01/16] Fixed an error when there were many missing bool
 columns in the input data

Signed-off-by: tashiro akira <fj1755jk@fujitsu.com>
---
 .../preprocessing_templates/fillna-type-string.py.jinja         | 2 ++
 .../preprocessing_templates/fillna-type-string_predict.py.jinja | 1 +
 .../preprocessing_templates/fillna-type-string_train.py.jinja   | 1 +
 3 files changed, 4 insertions(+)

diff --git a/sapientml_core/templates/preprocessing_templates/fillna-type-string.py.jinja b/sapientml_core/templates/preprocessing_templates/fillna-type-string.py.jinja
index 5a8da92..5d0787b 100644
--- a/sapientml_core/templates/preprocessing_templates/fillna-type-string.py.jinja
+++ b/sapientml_core/templates/preprocessing_templates/fillna-type-string.py.jinja
@@ -9,6 +9,8 @@ simple_imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
 {% endif %}
 {% if cols_almost_missing_string %}
 STRING_ALMOST_MISSING_COLS = {{ cols_almost_missing_string }}
+{{ train_dataset }}[STRING_ALMOST_MISSING_COLS] = {{ train_dataset }}[STRING_ALMOST_MISSING_COLS].astype(str)
+{{ test_dataset }}[STRING_ALMOST_MISSING_COLS] = {{ test_dataset }}[STRING_ALMOST_MISSING_COLS].astype(str)
 {{ train_dataset }}[STRING_ALMOST_MISSING_COLS] = {{ train_dataset }}[STRING_ALMOST_MISSING_COLS].fillna('')
 {{ test_dataset }}[STRING_ALMOST_MISSING_COLS] = {{ test_dataset }}[STRING_ALMOST_MISSING_COLS].fillna('')
 {% endif %}
\ No newline at end of file
diff --git a/sapientml_core/templates/preprocessing_templates/fillna-type-string_predict.py.jinja b/sapientml_core/templates/preprocessing_templates/fillna-type-string_predict.py.jinja
index ef58ba7..472ec11 100644
--- a/sapientml_core/templates/preprocessing_templates/fillna-type-string_predict.py.jinja
+++ b/sapientml_core/templates/preprocessing_templates/fillna-type-string_predict.py.jinja
@@ -7,5 +7,6 @@ STRING_COLS_WITH_MISSING_VALUES = {{ columns }}
 {% endif %}
 {% if cols_almost_missing_string %}
 STRING_ALMOST_MISSING_COLS = {{ cols_almost_missing_string }}
+{{ test_dataset }}[STRING_ALMOST_MISSING_COLS] = {{ test_dataset }}[STRING_ALMOST_MISSING_COLS].astype(str)
 {{ test_dataset }}[STRING_ALMOST_MISSING_COLS] = {{ test_dataset }}[STRING_ALMOST_MISSING_COLS].fillna('')
 {% endif %}
\ No newline at end of file
diff --git a/sapientml_core/templates/preprocessing_templates/fillna-type-string_train.py.jinja b/sapientml_core/templates/preprocessing_templates/fillna-type-string_train.py.jinja
index 404804b..81d5621 100644
--- a/sapientml_core/templates/preprocessing_templates/fillna-type-string_train.py.jinja
+++ b/sapientml_core/templates/preprocessing_templates/fillna-type-string_train.py.jinja
@@ -11,5 +11,6 @@ with open('simpleimputer-string.pkl', 'wb') as f:
 {% endif %}
 {% if cols_almost_missing_string %}
 STRING_ALMOST_MISSING_COLS = {{ cols_almost_missing_string }}
+{{ train_dataset }}[STRING_ALMOST_MISSING_COLS] = {{ train_dataset }}[STRING_ALMOST_MISSING_COLS].astype(str)
 {{ train_dataset }}[STRING_ALMOST_MISSING_COLS] = {{ train_dataset }}[STRING_ALMOST_MISSING_COLS].fillna('')
 {% endif %}
\ No newline at end of file

From b684d5c5ade4d1b8dd8c3492a46c4293f518d96c Mon Sep 17 00:00:00 2001
From: tashiro akira <fj1755jk@fujitsu.com>
Date: Tue, 7 Nov 2023 18:10:21 +0900
Subject: [PATCH 02/16] fix:Add action to return column names

Signed-off-by: tashiro akira <fj1755jk@fujitsu.com>
---
 sapientml_core/generator.py                            | 10 +++++++++-
 .../other_templates/return_column_name.py.jinja        |  6 ++++++
 sapientml_core/templates/pipeline_test.py.jinja        |  9 ++++++---
 3 files changed, 21 insertions(+), 4 deletions(-)
 create mode 100644 sapientml_core/templates/other_templates/return_column_name.py.jinja

diff --git a/sapientml_core/generator.py b/sapientml_core/generator.py
index 1a141e5..ec9488b 100644
--- a/sapientml_core/generator.py
+++ b/sapientml_core/generator.py
@@ -32,6 +32,7 @@
 from sapientml.params import Code, Dataset, PipelineResult, RunningResult, Task
 from sapientml.util.json_util import JSONEncoder
 from sapientml.util.logging import setup_logger
+from jinja2 import Environment, FileSystemLoader
 from tqdm import tqdm
 
 from . import internal_path
@@ -42,7 +43,7 @@
 from .training import project_corpus
 
 logger = setup_logger()
-
+env = Environment(loader=FileSystemLoader(f"{os.path.dirname(__file__)}/templates"), trim_blocks=True)
 
 def add_prefix(filename, prefix):
     """Add prefix to filename if prefix exists.
@@ -216,11 +217,18 @@ def generate_pipeline(self, dataset: Dataset, task: Task):
         dataset, preprocess_block = self.preprocess.generate_code(dataset, task)
         code_block = loaddata_block + preprocess_block
         dataset, sapientml_results = self.generate_code(dataset, task)
+        tpl_return_column_name = env.get_template("other_templates/return_column_name.py.jinja")
+        code_return_column_name = tpl_return_column_name.render()
+
 
         result_pipelines: list[Code] = []
         for pipeline in sapientml_results:
             pipeline.validation = code_block.validation + pipeline.validation
             pipeline.test = code_block.test + pipeline.test
+            if "cols_has_symbols" in pipeline.test:
+                addindex = pipeline.test.index("# OUTPUT PREDICTION")
+                pipeline.test = pipeline.test[:addindex-1] + code_return_column_name + pipeline.test[addindex-1:]
+
             pipeline.train = code_block.train + pipeline.train
             pipeline.predict = code_block.predict + pipeline.predict
             result_pipelines.append(pipeline)
diff --git a/sapientml_core/templates/other_templates/return_column_name.py.jinja b/sapientml_core/templates/other_templates/return_column_name.py.jinja
new file mode 100644
index 0000000..39fb405
--- /dev/null
+++ b/sapientml_core/templates/other_templates/return_column_name.py.jinja
@@ -0,0 +1,6 @@
+# Undo special symbols for column names
+feature_train = feature_train.rename(columns={TARGET_COLUMNS[0]: cols_has_symbols})
+target_train = target_train.rename(columns={TARGET_COLUMNS[0]: cols_has_symbols}) 
+TARGET_COLUMNS = cols_has_symbols
+
+
diff --git a/sapientml_core/templates/pipeline_test.py.jinja b/sapientml_core/templates/pipeline_test.py.jinja
index e2c6cbb..8be348f 100644
--- a/sapientml_core/templates/pipeline_test.py.jinja
+++ b/sapientml_core/templates/pipeline_test.py.jinja
@@ -64,14 +64,17 @@ if set(TARGET_COLUMNS).issubset(test_dataset.columns.tolist()):
 {{ pipeline_json['confusion_matrix']['code'] }}
 {% endfilter %}
 {% endif %}
+{% if 'permutation_importance' in pipeline_json %}
+
+{{ pipeline_json['permutation_importance']['code'] }}
+{% endif %}
+
+
 {% if 'output_prediction' in pipeline_json %}
 
 {{ pipeline_json['output_prediction']['code'] }}
 {% endif %}
-{% if 'permutation_importance' in pipeline_json %}
 
-{{ pipeline_json['permutation_importance']['code'] }}
-{% endif %}
 
 {% if 'shap' in pipeline_json %}
 

From 5572a5484dd6f616cd3bd75f3843df82ffb1fd19 Mon Sep 17 00:00:00 2001
From: tashiro akira <fj1755jk@fujitsu.com>
Date: Tue, 14 Nov 2023 16:25:29 +0900
Subject: [PATCH 03/16] fix:Reflects the findings of the review

Signed-off-by: tashiro akira <fj1755jk@fujitsu.com>
---
 sapientml_core/generator.py                                 | 6 +++---
 .../templates/other_templates/return_column_name.py.jinja   | 4 +---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/sapientml_core/generator.py b/sapientml_core/generator.py
index ec9488b..ca5557f 100644
--- a/sapientml_core/generator.py
+++ b/sapientml_core/generator.py
@@ -26,13 +26,13 @@
 from shutil import copyfile
 from typing import Tuple, Union
 
+from jinja2 import Environment, FileSystemLoader
 from sapientml.executor import PipelineExecutor
 from sapientml.generator import CodeBlockGenerator, PipelineGenerator
 from sapientml.macros import metric_lower_is_better
 from sapientml.params import Code, Dataset, PipelineResult, RunningResult, Task
 from sapientml.util.json_util import JSONEncoder
 from sapientml.util.logging import setup_logger
-from jinja2 import Environment, FileSystemLoader
 from tqdm import tqdm
 
 from . import internal_path
@@ -45,6 +45,7 @@
 logger = setup_logger()
 env = Environment(loader=FileSystemLoader(f"{os.path.dirname(__file__)}/templates"), trim_blocks=True)
 
+
 def add_prefix(filename, prefix):
     """Add prefix to filename if prefix exists.
 
@@ -220,14 +221,13 @@ def generate_pipeline(self, dataset: Dataset, task: Task):
         tpl_return_column_name = env.get_template("other_templates/return_column_name.py.jinja")
         code_return_column_name = tpl_return_column_name.render()
 
-
         result_pipelines: list[Code] = []
         for pipeline in sapientml_results:
             pipeline.validation = code_block.validation + pipeline.validation
             pipeline.test = code_block.test + pipeline.test
             if "cols_has_symbols" in pipeline.test:
                 addindex = pipeline.test.index("# OUTPUT PREDICTION")
-                pipeline.test = pipeline.test[:addindex-1] + code_return_column_name + pipeline.test[addindex-1:]
+                pipeline.test = pipeline.test[: addindex - 1] + code_return_column_name + pipeline.test[addindex - 1 :]
 
             pipeline.train = code_block.train + pipeline.train
             pipeline.predict = code_block.predict + pipeline.predict
diff --git a/sapientml_core/templates/other_templates/return_column_name.py.jinja b/sapientml_core/templates/other_templates/return_column_name.py.jinja
index 39fb405..4212954 100644
--- a/sapientml_core/templates/other_templates/return_column_name.py.jinja
+++ b/sapientml_core/templates/other_templates/return_column_name.py.jinja
@@ -1,6 +1,4 @@
-# Undo special symbols for column names
-feature_train = feature_train.rename(columns={TARGET_COLUMNS[0]: cols_has_symbols})
-target_train = target_train.rename(columns={TARGET_COLUMNS[0]: cols_has_symbols}) 
+# Undo special symbols for column names 
 TARGET_COLUMNS = cols_has_symbols
 
 

From 334f69031cf14f3ebfe3fb844cc5765b4318b52a Mon Sep 17 00:00:00 2001
From: tashiro akira <fj1755jk@fujitsu.com>
Date: Mon, 20 Nov 2023 16:37:01 +0900
Subject: [PATCH 04/16] fix:Fixed an error with mixed target column types.

Signed-off-by: tashiro akira <fj1755jk@fujitsu.com>
---
 .../templates/other_templates/target_separation_test.py.jinja | 2 +-
 .../other_templates/target_separation_train.py.jinja          | 2 +-
 .../other_templates/target_separation_validation.py.jinja     | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sapientml_core/templates/other_templates/target_separation_test.py.jinja b/sapientml_core/templates/other_templates/target_separation_test.py.jinja
index 55e7101..caae186 100644
--- a/sapientml_core/templates/other_templates/target_separation_test.py.jinja
+++ b/sapientml_core/templates/other_templates/target_separation_test.py.jinja
@@ -1,7 +1,7 @@
 # DETACH TARGET
 TARGET_COLUMNS = {{ pipeline.task.target_columns }}
 feature_train = train_dataset.drop(TARGET_COLUMNS, axis=1)
-target_train = train_dataset[TARGET_COLUMNS].copy()
+target_train = train_dataset[TARGET_COLUMNS].copy().astype(str)
 if set(TARGET_COLUMNS).issubset(test_dataset.columns.tolist()):
     feature_test = test_dataset.drop(TARGET_COLUMNS, axis=1)
     target_test = test_dataset[TARGET_COLUMNS].copy()
diff --git a/sapientml_core/templates/other_templates/target_separation_train.py.jinja b/sapientml_core/templates/other_templates/target_separation_train.py.jinja
index 525f82f..6a24da1 100644
--- a/sapientml_core/templates/other_templates/target_separation_train.py.jinja
+++ b/sapientml_core/templates/other_templates/target_separation_train.py.jinja
@@ -1,4 +1,4 @@
 # DETACH TARGET
 TARGET_COLUMNS = {{ pipeline.task.target_columns }}
 feature_train = train_dataset.drop(TARGET_COLUMNS, axis=1)
-target_train = train_dataset[TARGET_COLUMNS].copy()
\ No newline at end of file
+target_train = train_dataset[TARGET_COLUMNS].copy().astype(str)
\ No newline at end of file
diff --git a/sapientml_core/templates/other_templates/target_separation_validation.py.jinja b/sapientml_core/templates/other_templates/target_separation_validation.py.jinja
index 7b68025..1e0750f 100644
--- a/sapientml_core/templates/other_templates/target_separation_validation.py.jinja
+++ b/sapientml_core/templates/other_templates/target_separation_validation.py.jinja
@@ -1,7 +1,7 @@
 # DETACH TARGET
 TARGET_COLUMNS = {{ pipeline.task.target_columns }}
-feature_train = train_dataset.drop(TARGET_COLUMNS, axis=1)
-target_train = train_dataset[TARGET_COLUMNS].copy()
+feature_train = train_dataset.drop(TARGET_COLUMNS, axis=1).astype(str)
+target_train = train_dataset[TARGET_COLUMNS].copy().astype(str)
 feature_test = test_dataset.drop(TARGET_COLUMNS, axis=1)
 target_test = test_dataset[TARGET_COLUMNS].copy()
 

From 83438c0304d1fa7efdd239ab7d6402f9c3a864f9 Mon Sep 17 00:00:00 2001
From: tashiro akira <fj1755jk@fujitsu.com>
Date: Tue, 21 Nov 2023 09:32:26 +0900
Subject: [PATCH 05/16] fix: Return changes to move modifications to another
 branch.

Signed-off-by: tashiro akira <fj1755jk@fujitsu.com>
---
 .../templates/other_templates/target_separation_test.py.jinja | 2 +-
 .../other_templates/target_separation_train.py.jinja          | 2 +-
 .../other_templates/target_separation_validation.py.jinja     | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sapientml_core/templates/other_templates/target_separation_test.py.jinja b/sapientml_core/templates/other_templates/target_separation_test.py.jinja
index caae186..55e7101 100644
--- a/sapientml_core/templates/other_templates/target_separation_test.py.jinja
+++ b/sapientml_core/templates/other_templates/target_separation_test.py.jinja
@@ -1,7 +1,7 @@
 # DETACH TARGET
 TARGET_COLUMNS = {{ pipeline.task.target_columns }}
 feature_train = train_dataset.drop(TARGET_COLUMNS, axis=1)
-target_train = train_dataset[TARGET_COLUMNS].copy().astype(str)
+target_train = train_dataset[TARGET_COLUMNS].copy()
 if set(TARGET_COLUMNS).issubset(test_dataset.columns.tolist()):
     feature_test = test_dataset.drop(TARGET_COLUMNS, axis=1)
     target_test = test_dataset[TARGET_COLUMNS].copy()
diff --git a/sapientml_core/templates/other_templates/target_separation_train.py.jinja b/sapientml_core/templates/other_templates/target_separation_train.py.jinja
index 6a24da1..525f82f 100644
--- a/sapientml_core/templates/other_templates/target_separation_train.py.jinja
+++ b/sapientml_core/templates/other_templates/target_separation_train.py.jinja
@@ -1,4 +1,4 @@
 # DETACH TARGET
 TARGET_COLUMNS = {{ pipeline.task.target_columns }}
 feature_train = train_dataset.drop(TARGET_COLUMNS, axis=1)
-target_train = train_dataset[TARGET_COLUMNS].copy().astype(str)
\ No newline at end of file
+target_train = train_dataset[TARGET_COLUMNS].copy()
\ No newline at end of file
diff --git a/sapientml_core/templates/other_templates/target_separation_validation.py.jinja b/sapientml_core/templates/other_templates/target_separation_validation.py.jinja
index 1e0750f..7b68025 100644
--- a/sapientml_core/templates/other_templates/target_separation_validation.py.jinja
+++ b/sapientml_core/templates/other_templates/target_separation_validation.py.jinja
@@ -1,7 +1,7 @@
 # DETACH TARGET
 TARGET_COLUMNS = {{ pipeline.task.target_columns }}
-feature_train = train_dataset.drop(TARGET_COLUMNS, axis=1).astype(str)
-target_train = train_dataset[TARGET_COLUMNS].copy().astype(str)
+feature_train = train_dataset.drop(TARGET_COLUMNS, axis=1)
+target_train = train_dataset[TARGET_COLUMNS].copy()
 feature_test = test_dataset.drop(TARGET_COLUMNS, axis=1)
 target_test = test_dataset[TARGET_COLUMNS].copy()
 

From 2f43a68e8b6617dbad61f1c95dfc2e3d9ef851f0 Mon Sep 17 00:00:00 2001
From: tashiro akira <fj1755jk@fujitsu.com>
Date: Tue, 12 Dec 2023 15:17:48 +0900
Subject: [PATCH 06/16] fix:Reflected review results

Signed-off-by: tashiro akira <fj1755jk@fujitsu.com>
---
 .../templates/other_templates/return_column_name.py.jinja  | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sapientml_core/templates/other_templates/return_column_name.py.jinja b/sapientml_core/templates/other_templates/return_column_name.py.jinja
index 4212954..317f25c 100644
--- a/sapientml_core/templates/other_templates/return_column_name.py.jinja
+++ b/sapientml_core/templates/other_templates/return_column_name.py.jinja
@@ -1,4 +1,5 @@
-# Undo special symbols for column names 
-TARGET_COLUMNS = cols_has_symbols
-
 
+# Undo special symbols for column names 
+target_train[cols_has_symbols_target] = target_train[TARGET_COLUMNS].copy()
+target_train = target_train.drop(TARGET_COLUMNS, axis=1)
+TARGET_COLUMNS = cols_has_symbols_target

From e67846181a237f74d6bf40be3b37be6884d367e9 Mon Sep 17 00:00:00 2001
From: tashiro akira <fj1755jk@fujitsu.com>
Date: Tue, 12 Dec 2023 15:50:49 +0900
Subject: [PATCH 07/16] style:Removed unnecessary blank lines.

Signed-off-by: tashiro akira <fj1755jk@fujitsu.com>
---
 sapientml_core/templates/pipeline_test.py.jinja | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sapientml_core/templates/pipeline_test.py.jinja b/sapientml_core/templates/pipeline_test.py.jinja
index ee4574a..5850baa 100644
--- a/sapientml_core/templates/pipeline_test.py.jinja
+++ b/sapientml_core/templates/pipeline_test.py.jinja
@@ -73,7 +73,6 @@ if set(TARGET_COLUMNS).issubset(test_dataset.columns.tolist()):
 {{ pipeline_json['permutation_importance']['code'] }}
 {% endif %}
 
-
 {% if 'shap' in pipeline_json and not pipeline.task.is_multiclass %}
 
 {{ pipeline_json['shap']['code'] }}

From 5254cfe7cd44da7f1264b8f9134d000c4c9e5a6f Mon Sep 17 00:00:00 2001
From: tashiro akira <fj1755jk@fujitsu.com>
Date: Fri, 22 Dec 2023 10:11:45 +0900
Subject: [PATCH 08/16] fix:Fixed to return column names in csv file

Signed-off-by: tashiro akira <fj1755jk@fujitsu.com>
---
 sapientml_core/generator.py                   | 27 ++++++++++++++-----
 .../permutation_importance.py.jinja           |  4 +--
 .../prediction_result.py.jinja                |  8 +++---
 .../return_column_name.py.jinja               |  5 ----
 4 files changed, 27 insertions(+), 17 deletions(-)
 delete mode 100644 sapientml_core/templates/other_templates/return_column_name.py.jinja

diff --git a/sapientml_core/generator.py b/sapientml_core/generator.py
index ca5557f..8f8146e 100644
--- a/sapientml_core/generator.py
+++ b/sapientml_core/generator.py
@@ -26,7 +26,6 @@
 from shutil import copyfile
 from typing import Tuple, Union
 
-from jinja2 import Environment, FileSystemLoader
 from sapientml.executor import PipelineExecutor
 from sapientml.generator import CodeBlockGenerator, PipelineGenerator
 from sapientml.macros import metric_lower_is_better
@@ -43,7 +42,6 @@
 from .training import project_corpus
 
 logger = setup_logger()
-env = Environment(loader=FileSystemLoader(f"{os.path.dirname(__file__)}/templates"), trim_blocks=True)
 
 
 def add_prefix(filename, prefix):
@@ -218,16 +216,33 @@ def generate_pipeline(self, dataset: Dataset, task: Task):
         dataset, preprocess_block = self.preprocess.generate_code(dataset, task)
         code_block = loaddata_block + preprocess_block
         dataset, sapientml_results = self.generate_code(dataset, task)
-        tpl_return_column_name = env.get_template("other_templates/return_column_name.py.jinja")
-        code_return_column_name = tpl_return_column_name.render()
 
         result_pipelines: list[Code] = []
         for pipeline in sapientml_results:
             pipeline.validation = code_block.validation + pipeline.validation
             pipeline.test = code_block.test + pipeline.test
             if "cols_has_symbols" in pipeline.test:
-                addindex = pipeline.test.index("# OUTPUT PREDICTION")
-                pipeline.test = pipeline.test[: addindex - 1] + code_return_column_name + pipeline.test[addindex - 1 :]
+                addindex = pipeline.test.index("perm_df = pd.DataFrame")
+                pipeline.test = (
+                    pipeline.test[:addindex]
+                    + "feature_train_csv = feature_train.rename(columns=rename_symbol_cols)\n    "
+                    + pipeline.test[addindex:]
+                )
+                addindex = pipeline.test.index("prediction = pd.DataFrame")
+                pipeline.test = (
+                    pipeline.test[:addindex]
+                    + "TARGET_COLUMNS_csv = [rename_symbol_cols[TARGET_COLUMNS[0]]]\n"
+                    + pipeline.test[addindex:]
+                )
+            else:
+                addindex = pipeline.test.index("perm_df = pd.DataFrame")
+                pipeline.test = (
+                    pipeline.test[:addindex] + "feature_train_csv = feature_train\n    " + pipeline.test[addindex:]
+                )
+                addindex = pipeline.test.index("prediction = pd.DataFrame")
+                pipeline.test = (
+                    pipeline.test[:addindex] + "TARGET_COLUMNS_csv = [TARGET_COLUMNS[0]]\n" + pipeline.test[addindex:]
+                )
 
             pipeline.train = code_block.train + pipeline.train
             pipeline.predict = code_block.predict + pipeline.predict
diff --git a/sapientml_core/templates/other_templates/permutation_importance.py.jinja b/sapientml_core/templates/other_templates/permutation_importance.py.jinja
index 9d10b79..89da8dd 100644
--- a/sapientml_core/templates/other_templates/permutation_importance.py.jinja
+++ b/sapientml_core/templates/other_templates/permutation_importance.py.jinja
@@ -13,13 +13,13 @@ if len(feature_train.columns) <= 100:
     perm = permutation_importance(model, feature_train.sparse.to_dense(), {{ TARGET_TRAIN }},
                                     n_repeats=5,
                                     random_state=0)
-    perm_df = pd.DataFrame({"feature": feature_train.columns, "importance": perm.importances_mean})
+    perm_df = pd.DataFrame({"feature": feature_train_csv.columns, "importance": perm.importances_mean})
     perm_df.to_csv("./permutation_importance.csv", index=False)
 {% else %}
 if len(feature_train.columns) <= 100:
     perm = permutation_importance(model, feature_train, {{ TARGET_TRAIN }},
                                     n_repeats=5,
                                     random_state=0)
-    perm_df = pd.DataFrame({"feature": feature_train.columns, "importance": perm.importances_mean})
+    perm_df = pd.DataFrame({"feature": feature_train_csv.columns, "importance": perm.importances_mean})
     perm_df.to_csv("./permutation_importance.csv", index=False)
 {% endif %}
diff --git a/sapientml_core/templates/other_templates/prediction_result.py.jinja b/sapientml_core/templates/other_templates/prediction_result.py.jinja
index b7d2c1d..eadde69 100644
--- a/sapientml_core/templates/other_templates/prediction_result.py.jinja
+++ b/sapientml_core/templates/other_templates/prediction_result.py.jinja
@@ -5,17 +5,17 @@ prediction = pd.DataFrame(y_prob, columns=label_encoder.inverse_transform(model.
 {% elif pipeline.config.predict_option == macros.PRED_PROBABILITY and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metrics_for_classification) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
 prediction = pd.DataFrame(y_prob, columns=model.classes_, index=feature_test.index)
 {% elif pipeline.config.predict_option == macros.PRED_PROBABILITY and (pipeline.adaptation_metric in macros.metrics_for_classification) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
-prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS, index=feature_test.index)
+prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS_csv, index=feature_test.index)
 {% elif pipeline.config.predict_option is none and model_name == xgbclassifier and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
 prediction = pd.DataFrame(y_prob, columns=label_encoder.inverse_transform(model.classes_), index=feature_test.index)
 {% elif pipeline.config.predict_option is none and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
 prediction = pd.DataFrame(y_prob, columns=model.classes_, index=feature_test.index)
 {% elif pipeline.config.predict_option is none and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
-prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS, index=feature_test.index)
+prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS_csv, index=feature_test.index)
 {% elif pipeline.adaptation_metric.startswith("MAP_") %}
 {% set k = pipeline.adaptation_metric.split("_")[1] %}
-prediction = pd.DataFrame(y_prob, columns=[TARGET_COLUMNS[0] + "_" +str(i) for i in range(1, y_prob.shape[1] + 1)], index=feature_test.index)
+prediction = pd.DataFrame(y_prob, columns=[TARGET_COLUMNS_csv[0] + "_" +str(i) for i in range(1, y_prob.shape[1] + 1)], index=feature_test.index)
 {% else %}
-prediction = pd.DataFrame(y_pred, columns=TARGET_COLUMNS, index=feature_test.index)
+prediction = pd.DataFrame(y_pred, columns=TARGET_COLUMNS_csv, index=feature_test.index)
 {% endif %}
 prediction.to_csv("./prediction_result.csv")
diff --git a/sapientml_core/templates/other_templates/return_column_name.py.jinja b/sapientml_core/templates/other_templates/return_column_name.py.jinja
deleted file mode 100644
index 317f25c..0000000
--- a/sapientml_core/templates/other_templates/return_column_name.py.jinja
+++ /dev/null
@@ -1,5 +0,0 @@
-
-# Undo special symbols for column names 
-target_train[cols_has_symbols_target] = target_train[TARGET_COLUMNS].copy()
-target_train = target_train.drop(TARGET_COLUMNS, axis=1)
-TARGET_COLUMNS = cols_has_symbols_target

From 83817045c9b9eb82f8951d78f30c8397c8bd77da Mon Sep 17 00:00:00 2001
From: tashiro akira <fj1755jk@fujitsu.com>
Date: Thu, 11 Jan 2024 15:40:18 +0900
Subject: [PATCH 09/16] fix:Reflected the content of the review

Signed-off-by: tashiro akira <fj1755jk@fujitsu.com>
---
 sapientml_core/generator.py                   | 33 ++++++++-----------
 .../permutation_importance.py.jinja           |  4 +--
 .../prediction_result.py.jinja                |  8 ++---
 3 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/sapientml_core/generator.py b/sapientml_core/generator.py
index 8f8146e..d91bae0 100644
--- a/sapientml_core/generator.py
+++ b/sapientml_core/generator.py
@@ -222,28 +222,23 @@ def generate_pipeline(self, dataset: Dataset, task: Task):
             pipeline.validation = code_block.validation + pipeline.validation
             pipeline.test = code_block.test + pipeline.test
             if "cols_has_symbols" in pipeline.test:
-                addindex = pipeline.test.index("perm_df = pd.DataFrame")
-                pipeline.test = (
-                    pipeline.test[:addindex]
-                    + "feature_train_csv = feature_train.rename(columns=rename_symbol_cols)\n    "
-                    + pipeline.test[addindex:]
+                pipeline.test = pipeline.test.replace(
+                    '"feature": feature_train.columns',
+                    '"feature": feature_train.rename(columns=rename_symbol_cols).columns',
                 )
-                addindex = pipeline.test.index("prediction = pd.DataFrame")
-                pipeline.test = (
-                    pipeline.test[:addindex]
-                    + "TARGET_COLUMNS_csv = [rename_symbol_cols[TARGET_COLUMNS[0]]]\n"
-                    + pipeline.test[addindex:]
-                )
-            else:
-                addindex = pipeline.test.index("perm_df = pd.DataFrame")
-                pipeline.test = (
-                    pipeline.test[:addindex] + "feature_train_csv = feature_train\n    " + pipeline.test[addindex:]
-                )
-                addindex = pipeline.test.index("prediction = pd.DataFrame")
-                pipeline.test = (
-                    pipeline.test[:addindex] + "TARGET_COLUMNS_csv = [TARGET_COLUMNS[0]]\n" + pipeline.test[addindex:]
+                pipeline.test = pipeline.test.replace(
+                    "prediction.to_csv", "prediction.rename(columns=rename_symbol_cols).to_csv"
                 )
 
+                def replace_targets(match_obj):
+                    return match_obj[0].replace(
+                        "TARGET_COLUMNS", "[rename_symbol_cols.get(v, v) for v in TARGET_COLUMNS]"
+                    )
+
+                pat = r"prediction = pd.DataFrame\(y_prob, columns=.?TARGET_COLUMNS.*, index=feature_test.index\)"
+                pipeline.test = re.sub(pat, replace_targets, pipeline.test)
+                pipeline.predict = re.sub(pat, replace_targets, pipeline.predict)
+
             pipeline.train = code_block.train + pipeline.train
             pipeline.predict = code_block.predict + pipeline.predict
             result_pipelines.append(pipeline)
diff --git a/sapientml_core/templates/other_templates/permutation_importance.py.jinja b/sapientml_core/templates/other_templates/permutation_importance.py.jinja
index 89da8dd..9d10b79 100644
--- a/sapientml_core/templates/other_templates/permutation_importance.py.jinja
+++ b/sapientml_core/templates/other_templates/permutation_importance.py.jinja
@@ -13,13 +13,13 @@ if len(feature_train.columns) <= 100:
     perm = permutation_importance(model, feature_train.sparse.to_dense(), {{ TARGET_TRAIN }},
                                     n_repeats=5,
                                     random_state=0)
-    perm_df = pd.DataFrame({"feature": feature_train_csv.columns, "importance": perm.importances_mean})
+    perm_df = pd.DataFrame({"feature": feature_train.columns, "importance": perm.importances_mean})
     perm_df.to_csv("./permutation_importance.csv", index=False)
 {% else %}
 if len(feature_train.columns) <= 100:
     perm = permutation_importance(model, feature_train, {{ TARGET_TRAIN }},
                                     n_repeats=5,
                                     random_state=0)
-    perm_df = pd.DataFrame({"feature": feature_train_csv.columns, "importance": perm.importances_mean})
+    perm_df = pd.DataFrame({"feature": feature_train.columns, "importance": perm.importances_mean})
     perm_df.to_csv("./permutation_importance.csv", index=False)
 {% endif %}
diff --git a/sapientml_core/templates/other_templates/prediction_result.py.jinja b/sapientml_core/templates/other_templates/prediction_result.py.jinja
index eadde69..b7d2c1d 100644
--- a/sapientml_core/templates/other_templates/prediction_result.py.jinja
+++ b/sapientml_core/templates/other_templates/prediction_result.py.jinja
@@ -5,17 +5,17 @@ prediction = pd.DataFrame(y_prob, columns=label_encoder.inverse_transform(model.
 {% elif pipeline.config.predict_option == macros.PRED_PROBABILITY and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metrics_for_classification) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
 prediction = pd.DataFrame(y_prob, columns=model.classes_, index=feature_test.index)
 {% elif pipeline.config.predict_option == macros.PRED_PROBABILITY and (pipeline.adaptation_metric in macros.metrics_for_classification) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
-prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS_csv, index=feature_test.index)
+prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS, index=feature_test.index)
 {% elif pipeline.config.predict_option is none and model_name == xgbclassifier and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
 prediction = pd.DataFrame(y_prob, columns=label_encoder.inverse_transform(model.classes_), index=feature_test.index)
 {% elif pipeline.config.predict_option is none and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
 prediction = pd.DataFrame(y_prob, columns=model.classes_, index=feature_test.index)
 {% elif pipeline.config.predict_option is none and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
-prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS_csv, index=feature_test.index)
+prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS, index=feature_test.index)
 {% elif pipeline.adaptation_metric.startswith("MAP_") %}
 {% set k = pipeline.adaptation_metric.split("_")[1] %}
-prediction = pd.DataFrame(y_prob, columns=[TARGET_COLUMNS_csv[0] + "_" +str(i) for i in range(1, y_prob.shape[1] + 1)], index=feature_test.index)
+prediction = pd.DataFrame(y_prob, columns=[TARGET_COLUMNS[0] + "_" +str(i) for i in range(1, y_prob.shape[1] + 1)], index=feature_test.index)
 {% else %}
-prediction = pd.DataFrame(y_pred, columns=TARGET_COLUMNS_csv, index=feature_test.index)
+prediction = pd.DataFrame(y_pred, columns=TARGET_COLUMNS, index=feature_test.index)
 {% endif %}
 prediction.to_csv("./prediction_result.csv")

From e1e4644b0cee36640cadb820f8139325637c35f7 Mon Sep 17 00:00:00 2001
From: tashiro akira <fj1755jk@fujitsu.com>
Date: Tue, 27 Feb 2024 11:41:39 +0900
Subject: [PATCH 10/16] fix:Save Changes Temporarily

Signed-off-by: tashiro akira <fj1755jk@fujitsu.com>
---
 sapientml_core/explain/AutoVisualization.py |  6 ++++++
 sapientml_core/params.py                    | 21 +++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/sapientml_core/explain/AutoVisualization.py b/sapientml_core/explain/AutoVisualization.py
index ef72c06..4fa671c 100644
--- a/sapientml_core/explain/AutoVisualization.py
+++ b/sapientml_core/explain/AutoVisualization.py
@@ -18,6 +18,7 @@
 
 import numpy as np
 import pandas as pd
+import re
 from sapientml.util.logging import setup_logger
 
 warnings.filterwarnings("ignore")
@@ -56,6 +57,11 @@ def AutoVisualization(
         """
         problem_type = problem_type.lower()
 
+        # cols_has_symbols = df.columns.tolist()
+        # inhibited_symbol_pattern = re.compile(r"[\{\}\[\]\",:<'\\\+]+")
+        # rename_symbol_cols = {inhibited_symbol_pattern.sub("", col): col if col in cols_has_symbols else col in cols_has_symbols for col in cols_has_symbols }
+        # target_columns = {rename_symbol_cols[k] for k in target_columns}
+
         if not ignore_columns:
             ignore_columns = []
 
diff --git a/sapientml_core/params.py b/sapientml_core/params.py
index aacf73d..8cf3107 100644
--- a/sapientml_core/params.py
+++ b/sapientml_core/params.py
@@ -18,6 +18,7 @@
 
 import numpy as np
 import pandas as pd
+import collections
 from pandas.core.dtypes.common import is_numeric_dtype
 from pydantic import BaseModel, Field, field_validator
 from sapientml.params import Code, Config, Task
@@ -410,6 +411,26 @@ def summarize_dataset(df_train: pd.DataFrame, task: Task) -> DatasetSummary:
         DatasetSummary
     """
     is_multi_classes: list[bool] = []
+
+    #kakunin
+    # target_columns={}
+    # print(df_train.columns)
+    # print(task.target_columns)
+    # sametargetcolumn = collections.Counter(task.target_columns)
+    # for target in sametargetcolumn.keys():
+    #     if sametargetcolumn[target] > 1:
+    #         for i in range(sametargetcolumn[target]):
+    #             if target + str(i+1) not in df_train.columns :
+    #                 target_columns[target+str(i+1)] = target
+    #     else:
+    #         target_columns.append(target)
+    
+    # for target in target_columns.keys():
+    #     df_train = df_train.rename({target_columns[target]:target}, axis=1)
+
+    # print(target_columns)
+    # print(df_train.columns)
+
     for target in task.target_columns:
         is_multi_classes.append(len(df_train[target].unique()) > 1)
     has_multi_class_targets = all(is_multi_classes)

From 1619b10ad7d499e1072ef9c4f04eb2f929af36b9 Mon Sep 17 00:00:00 2001
From: tashiro akira <fj1755jk@fujitsu.com>
Date: Mon, 11 Mar 2024 10:48:42 +0900
Subject: [PATCH 11/16] fix:Reflect Modifications

Signed-off-by: tashiro akira <fj1755jk@fujitsu.com>
---
 sapientml_core/generator.py |  3 ++-
 sapientml_core/params.py    | 19 -------------------
 2 files changed, 2 insertions(+), 20 deletions(-)

diff --git a/sapientml_core/generator.py b/sapientml_core/generator.py
index d91bae0..cc9744c 100644
--- a/sapientml_core/generator.py
+++ b/sapientml_core/generator.py
@@ -221,6 +221,7 @@ def generate_pipeline(self, dataset: Dataset, task: Task):
         for pipeline in sapientml_results:
             pipeline.validation = code_block.validation + pipeline.validation
             pipeline.test = code_block.test + pipeline.test
+            pipeline.predict = code_block.predict + pipeline.predict
             if "cols_has_symbols" in pipeline.test:
                 pipeline.test = pipeline.test.replace(
                     '"feature": feature_train.columns',
@@ -240,7 +241,7 @@ def replace_targets(match_obj):
                 pipeline.predict = re.sub(pat, replace_targets, pipeline.predict)
 
             pipeline.train = code_block.train + pipeline.train
-            pipeline.predict = code_block.predict + pipeline.predict
+            # pipeline.predict = code_block.predict + pipeline.predict
             result_pipelines.append(pipeline)
 
         logger.info("Executing generated pipelines...")
diff --git a/sapientml_core/params.py b/sapientml_core/params.py
index 8cf3107..5150145 100644
--- a/sapientml_core/params.py
+++ b/sapientml_core/params.py
@@ -412,25 +412,6 @@ def summarize_dataset(df_train: pd.DataFrame, task: Task) -> DatasetSummary:
     """
     is_multi_classes: list[bool] = []
 
-    #kakunin
-    # target_columns={}
-    # print(df_train.columns)
-    # print(task.target_columns)
-    # sametargetcolumn = collections.Counter(task.target_columns)
-    # for target in sametargetcolumn.keys():
-    #     if sametargetcolumn[target] > 1:
-    #         for i in range(sametargetcolumn[target]):
-    #             if target + str(i+1) not in df_train.columns :
-    #                 target_columns[target+str(i+1)] = target
-    #     else:
-    #         target_columns.append(target)
-    
-    # for target in target_columns.keys():
-    #     df_train = df_train.rename({target_columns[target]:target}, axis=1)
-
-    # print(target_columns)
-    # print(df_train.columns)
-
     for target in task.target_columns:
         is_multi_classes.append(len(df_train[target].unique()) > 1)
     has_multi_class_targets = all(is_multi_classes)

From 1cee4514011ac84c7a349243d62466fba0018cd2 Mon Sep 17 00:00:00 2001
From: tashiro akira <fj1755jk@fujitsu.com>
Date: Fri, 22 Mar 2024 16:11:33 +0900
Subject: [PATCH 12/16] fix:Reflected review results

Signed-off-by: tashiro akira <fj1755jk@fujitsu.com>
---
 sapientml_core/explain/AutoVisualization.py |  6 ---
 sapientml_core/explain/main.py              | 52 ++++++++++++++++++---
 sapientml_core/generator.py                 | 18 ++++++-
 sapientml_core/params.py                    |  2 +-
 4 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/sapientml_core/explain/AutoVisualization.py b/sapientml_core/explain/AutoVisualization.py
index 4fa671c..ef72c06 100644
--- a/sapientml_core/explain/AutoVisualization.py
+++ b/sapientml_core/explain/AutoVisualization.py
@@ -18,7 +18,6 @@
 
 import numpy as np
 import pandas as pd
-import re
 from sapientml.util.logging import setup_logger
 
 warnings.filterwarnings("ignore")
@@ -57,11 +56,6 @@ def AutoVisualization(
         """
         problem_type = problem_type.lower()
 
-        # cols_has_symbols = df.columns.tolist()
-        # inhibited_symbol_pattern = re.compile(r"[\{\}\[\]\",:<'\\\+]+")
-        # rename_symbol_cols = {inhibited_symbol_pattern.sub("", col): col if col in cols_has_symbols else col in cols_has_symbols for col in cols_has_symbols }
-        # target_columns = {rename_symbol_cols[k] for k in target_columns}
-
         if not ignore_columns:
             ignore_columns = []
 
diff --git a/sapientml_core/explain/main.py b/sapientml_core/explain/main.py
index 0b17761..14ffb34 100644
--- a/sapientml_core/explain/main.py
+++ b/sapientml_core/explain/main.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import collections
 from typing import Literal, Optional
 
 import pandas as pd
 from sapientml.params import CancellationToken
 from sapientml.util.logging import setup_logger
+from sapientml_preprocess.generator import check_cols_has_symbols, remove_symbols
 
 from .AutoEDA import EDA
 from .AutoVisualization import AutoVisualization_Class
@@ -81,12 +83,50 @@ def process(
     if visualization:
         # Call AutoVisualization to generate visualization codes
         AV = AutoVisualization_Class()
-        visualization_code = AV.AutoVisualization(
-            df=dataframe,
-            target_columns=target_columns,
-            problem_type=problem_type,
-            ignore_columns=ignore_columns,
-        )
+        cols_has_symbols = check_cols_has_symbols(dataframe.columns.to_list())
+        rename_col = []
+        rename_dict = {}
+        if cols_has_symbols:
+            df = list(
+                dataframe.rename(columns=lambda col: remove_symbols(col) if col in cols_has_symbols else col).columns
+            )
+            same_column = collections.Counter(df)
+            for target in same_column.keys():
+                rename_col = []
+                rename_dict = {}
+                i = 1
+                for col in df:
+                    if target in col and same_column[target] > 1:
+                        rename_col.append(str(col + str(i)))
+                        rename_dict[str(col + str(i))] = dataframe.columns[len(rename_dict)]
+                        i = i + 1
+                    else:
+                        rename_col.append(str(col))
+                        rename_dict[col] = dataframe.columns[len(rename_dict)]
+                df = rename_col
+
+            if len(rename_dict) != 0:
+                col_has_target = [rename_dict[col] for col in target_columns]
+                visualization_code = AV.AutoVisualization(
+                    df=dataframe,
+                    target_columns=col_has_target,
+                    problem_type=problem_type,
+                    ignore_columns=ignore_columns,
+                )
+            else:
+                visualization_code = AV.AutoVisualization(
+                    df=dataframe,
+                    target_columns=target_columns,
+                    problem_type=problem_type,
+                    ignore_columns=ignore_columns,
+                )
+        else:
+            visualization_code = AV.AutoVisualization(
+                df=dataframe,
+                target_columns=target_columns,
+                problem_type=problem_type,
+                ignore_columns=ignore_columns,
+            )
     else:
         visualization_code = None
 
diff --git a/sapientml_core/generator.py b/sapientml_core/generator.py
index 50dbb21..a6a1a17 100644
--- a/sapientml_core/generator.py
+++ b/sapientml_core/generator.py
@@ -231,6 +231,22 @@ def generate_pipeline(self, dataset: Dataset, task: Task):
                     "prediction.to_csv", "prediction.rename(columns=rename_symbol_cols).to_csv"
                 )
 
+                pipeline.predict = pipeline.predict.replace(
+                    '"feature": feature_train.columns',
+                    '"feature": feature_train.rename(columns=rename_symbol_cols).columns',
+                )
+                pipeline.predict = pipeline.predict.replace(
+                    "prediction.to_csv", "prediction.rename(columns=rename_symbol_cols).to_csv"
+                )
+
+                pipeline.validation = pipeline.validation.replace(
+                    '"feature": feature_train.columns',
+                    '"feature": feature_train.rename(columns=rename_symbol_cols).columns',
+                )
+                pipeline.validation = pipeline.validation.replace(
+                    "prediction.to_csv", "prediction.rename(columns=rename_symbol_cols).to_csv"
+                )
+
                 def replace_targets(match_obj):
                     return match_obj[0].replace(
                         "TARGET_COLUMNS", "[rename_symbol_cols.get(v, v) for v in TARGET_COLUMNS]"
@@ -239,9 +255,9 @@ def replace_targets(match_obj):
                 pat = r"prediction = pd.DataFrame\(y_prob, columns=.?TARGET_COLUMNS.*, index=feature_test.index\)"
                 pipeline.test = re.sub(pat, replace_targets, pipeline.test)
                 pipeline.predict = re.sub(pat, replace_targets, pipeline.predict)
+                pipeline.validation =  re.sub(pat, replace_targets, pipeline.validation)
 
             pipeline.train = code_block.train + pipeline.train
-            # pipeline.predict = code_block.predict + pipeline.predict
             result_pipelines.append(pipeline)
 
         logger.info("Executing generated pipelines...")
diff --git a/sapientml_core/params.py b/sapientml_core/params.py
index e2c2bf5..58a8375 100644
--- a/sapientml_core/params.py
+++ b/sapientml_core/params.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import collections
 import re
 from collections import defaultdict
 from typing import Any, Literal, Optional, Union
 
 import numpy as np
 import pandas as pd
-import collections
 from pandas.core.dtypes.common import is_numeric_dtype
 from pydantic import BaseModel, Field, field_validator
 from sapientml.params import Code, Config, Task

From 78d1aadc1822b5f7932a5b84c470710565e9e1c5 Mon Sep 17 00:00:00 2001
From: tashiro akira <fj1755jk@fujitsu.com>
Date: Fri, 22 Mar 2024 16:24:02 +0900
Subject: [PATCH 13/16] fix:Remove Unnecessary Modifications

Signed-off-by: tashiro akira <fj1755jk@fujitsu.com>
---
 sapientml_core/params.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sapientml_core/params.py b/sapientml_core/params.py
index 58a8375..3a71eb0 100644
--- a/sapientml_core/params.py
+++ b/sapientml_core/params.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import collections
 import re
 from collections import defaultdict
 from typing import Any, Literal, Optional, Union
@@ -415,7 +414,6 @@ def summarize_dataset(df_train: pd.DataFrame, task: Task) -> DatasetSummary:
         DatasetSummary
     """
     is_multi_classes: list[bool] = []
-
     for target in task.target_columns:
         is_multi_classes.append(len(df_train[target].unique()) > 1)
     has_multi_class_targets = all(is_multi_classes)

From 7724cd190a7d7e096168b01e58d19f694f4bc223 Mon Sep 17 00:00:00 2001
From: tashiro akira <fj1755jk@fujitsu.com>
Date: Fri, 22 Mar 2024 16:56:00 +0900
Subject: [PATCH 14/16] fix:The format has been modified.

Signed-off-by: tashiro akira <fj1755jk@fujitsu.com>
---
 sapientml_core/generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sapientml_core/generator.py b/sapientml_core/generator.py
index a6a1a17..2b089d9 100644
--- a/sapientml_core/generator.py
+++ b/sapientml_core/generator.py
@@ -255,7 +255,7 @@ def replace_targets(match_obj):
                 pat = r"prediction = pd.DataFrame\(y_prob, columns=.?TARGET_COLUMNS.*, index=feature_test.index\)"
                 pipeline.test = re.sub(pat, replace_targets, pipeline.test)
                 pipeline.predict = re.sub(pat, replace_targets, pipeline.predict)
-                pipeline.validation =  re.sub(pat, replace_targets, pipeline.validation)
+                pipeline.validation = re.sub(pat, replace_targets, pipeline.validation)
 
             pipeline.train = code_block.train + pipeline.train
             result_pipelines.append(pipeline)

From 969ded3d3be7d1ac7be106607f7e1d561276d5aa Mon Sep 17 00:00:00 2001
From: tashiro-akira <fj0822cr@fujitsu.com>
Date: Thu, 25 Apr 2024 09:41:24 +0900
Subject: [PATCH 15/16] fix:Reflect Review Results

Signed-off-by: tashiro-akira <fj0822cr@fujitsu.com>
---
 sapientml_core/explain/main.py | 43 ++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/sapientml_core/explain/main.py b/sapientml_core/explain/main.py
index 14ffb34..f893278 100644
--- a/sapientml_core/explain/main.py
+++ b/sapientml_core/explain/main.py
@@ -84,29 +84,42 @@ def process(
         # Call AutoVisualization to generate visualization codes
         AV = AutoVisualization_Class()
         cols_has_symbols = check_cols_has_symbols(dataframe.columns.to_list())
-        rename_col = []
+        no_symbol_columns = [ col for col in dataframe.columns.values if col not in cols_has_symbols ]
         rename_dict = {}
         if cols_has_symbols:
             df = list(
                 dataframe.rename(columns=lambda col: remove_symbols(col) if col in cols_has_symbols else col).columns
             )
-            same_column = collections.Counter(df)
-            for target in same_column.keys():
-                rename_col = []
-                rename_dict = {}
-                i = 1
-                for col in df:
-                    if target in col and same_column[target] > 1:
-                        rename_col.append(str(col + str(i)))
-                        rename_dict[str(col + str(i))] = dataframe.columns[len(rename_dict)]
-                        i = i + 1
+            rename_dict = {}
+            if len(df) != len(set(df)) :
+                same_column = {k: v for k, v in collections.Counter(df).items() if v > 1 and k in no_symbol_columns}
+                for target, org_column in zip(df, dataframe.columns.tolist()):
+                    if target in same_column.keys():
+                        # rename_dict[org_column] = target + str(same_column[target] - 1)
+                        rename_dict[target + str(same_column[target] - 1)] = org_column
+                        same_column[target] = same_column[target] - 1
                     else:
-                        rename_col.append(str(col))
-                        rename_dict[col] = dataframe.columns[len(rename_dict)]
-                df = rename_col
+                        rename_dict[target] = org_column
+                    
+                df = list(rename_dict.values())
+                
+            # same_column = collections.Counter(df)
+            # for target in same_column.keys():
+            #     rename_col = []
+            #     rename_dict = {}
+            #     i = 1
+            #     for col in df:
+            #         if target in col and same_column[target] > 1:
+            #             rename_col.append(str(col + str(i)))
+            #             rename_dict[str(col + str(i))] = dataframe.columns[len(rename_dict)]
+            #             i = i + 1
+            #         else:
+            #             rename_col.append(str(col))
+            #             rename_dict[col] = dataframe.columns[len(rename_dict)]
+            #     df = rename_col
 
             if len(rename_dict) != 0:
-                col_has_target = [rename_dict[col] for col in target_columns]
+                col_has_target = [target for target in rename_dict.keys() if rename_dict.values() == target_columns ]
                 visualization_code = AV.AutoVisualization(
                     df=dataframe,
                     target_columns=col_has_target,

From 426a32d85906e22c4036234ccb7c76f4ca99098d Mon Sep 17 00:00:00 2001
From: tashiro-akira <fj0822cr@fujitsu.com>
Date: Thu, 25 Apr 2024 11:01:51 +0900
Subject: [PATCH 16/16] fix:Fixed error in running lint

Signed-off-by: tashiro-akira <fj0822cr@fujitsu.com>
---
 sapientml_core/explain/main.py | 39 ++++++++++------------------------
 1 file changed, 11 insertions(+), 28 deletions(-)

diff --git a/sapientml_core/explain/main.py b/sapientml_core/explain/main.py
index f893278..57783b5 100644
--- a/sapientml_core/explain/main.py
+++ b/sapientml_core/explain/main.py
@@ -84,42 +84,25 @@ def process(
         # Call AutoVisualization to generate visualization codes
         AV = AutoVisualization_Class()
         cols_has_symbols = check_cols_has_symbols(dataframe.columns.to_list())
-        no_symbol_columns = [ col for col in dataframe.columns.values if col not in cols_has_symbols ]
+        no_symbol_columns = [col for col in dataframe.columns.values if col not in cols_has_symbols]
         rename_dict = {}
         if cols_has_symbols:
             df = list(
                 dataframe.rename(columns=lambda col: remove_symbols(col) if col in cols_has_symbols else col).columns
             )
             rename_dict = {}
-            if len(df) != len(set(df)) :
-                same_column = {k: v for k, v in collections.Counter(df).items() if v > 1 and k in no_symbol_columns}
-                for target, org_column in zip(df, dataframe.columns.tolist()):
-                    if target in same_column.keys():
-                        # rename_dict[org_column] = target + str(same_column[target] - 1)
-                        rename_dict[target + str(same_column[target] - 1)] = org_column
-                        same_column[target] = same_column[target] - 1
-                    else:
-                        rename_dict[target] = org_column
-                    
-                df = list(rename_dict.values())
-                
-            # same_column = collections.Counter(df)
-            # for target in same_column.keys():
-            #     rename_col = []
-            #     rename_dict = {}
-            #     i = 1
-            #     for col in df:
-            #         if target in col and same_column[target] > 1:
-            #             rename_col.append(str(col + str(i)))
-            #             rename_dict[str(col + str(i))] = dataframe.columns[len(rename_dict)]
-            #             i = i + 1
-            #         else:
-            #             rename_col.append(str(col))
-            #             rename_dict[col] = dataframe.columns[len(rename_dict)]
-            #     df = rename_col
+            same_column = {k: v for k, v in collections.Counter(df).items() if v > 1 and k in no_symbol_columns}
+            for target, org_column in zip(df, dataframe.columns.tolist()):
+                if target in same_column.keys():
+                    rename_dict[target + str(same_column[target] - 1)] = org_column
+                    same_column[target] = same_column[target] - 1
+                else:
+                    rename_dict[target] = org_column
+
+            df = list(rename_dict.values())
 
             if len(rename_dict) != 0:
-                col_has_target = [target for target in rename_dict.keys() if rename_dict.values() == target_columns ]
+                col_has_target = [target for target in rename_dict.keys() if rename_dict.values() == target_columns]
                 visualization_code = AV.AutoVisualization(
                     df=dataframe,
                     target_columns=col_has_target,