@@ -62,7 +62,7 @@ def input_data() -> pd.DataFrame:
62
62
def data (input_data : pd .DataFrame ) -> DataContainer :
63
63
data = DataContainer ({"is_train" : True })
64
64
data .columns_to_ignore_for_training = []
65
- data .train = input_data
65
+ data .X_train = input_data
66
66
return data
67
67
68
68
@@ -72,7 +72,7 @@ def test_skipping_with_no_parameters(data: DataContainer):
72
72
result = calculate_features_step .execute (data )
73
73
74
74
assert isinstance (result , DataContainer )
75
- assert result .train .equals (data .train )
75
+ assert result .X_train .equals (data .X_train )
76
76
77
77
78
78
def test_feature_names (data : DataContainer ):
@@ -87,22 +87,22 @@ def test_feature_names(data: DataContainer):
87
87
result = calculate_features_step .execute (data )
88
88
89
89
assert isinstance (result , DataContainer )
90
- assert "creation_date_year" in result .train .columns
91
- assert "creation_date_month" in result .train .columns
92
- assert "creation_date_day" in result .train .columns
93
- assert "creation_date_hour" in result .train .columns
94
- assert "creation_date_minute" in result .train .columns
95
- assert "creation_date_second" in result .train .columns
96
- assert "creation_date_weekday" in result .train .columns
97
- assert "creation_date_dayofyear" in result .train .columns
98
- assert "deletion_date_year" in result .train .columns
99
- assert "deletion_date_month" in result .train .columns
100
- assert "deletion_date_day" in result .train .columns
101
- assert "deletion_date_hour" in result .train .columns
102
- assert "deletion_date_minute" in result .train .columns
103
- assert "deletion_date_second" in result .train .columns
104
- assert "deletion_date_weekday" in result .train .columns
105
- assert "deletion_date_dayofyear" in result .train .columns
90
+ assert "creation_date_year" in result .X_train .columns
91
+ assert "creation_date_month" in result .X_train .columns
92
+ assert "creation_date_day" in result .X_train .columns
93
+ assert "creation_date_hour" in result .X_train .columns
94
+ assert "creation_date_minute" in result .X_train .columns
95
+ assert "creation_date_second" in result .X_train .columns
96
+ assert "creation_date_weekday" in result .X_train .columns
97
+ assert "creation_date_dayofyear" in result .X_train .columns
98
+ assert "deletion_date_year" in result .X_train .columns
99
+ assert "deletion_date_month" in result .X_train .columns
100
+ assert "deletion_date_day" in result .X_train .columns
101
+ assert "deletion_date_hour" in result .X_train .columns
102
+ assert "deletion_date_minute" in result .X_train .columns
103
+ assert "deletion_date_second" in result .X_train .columns
104
+ assert "deletion_date_weekday" in result .X_train .columns
105
+ assert "deletion_date_dayofyear" in result .X_train .columns
106
106
107
107
108
108
def test_date_columns_are_ignored_for_training (data : DataContainer ):
@@ -117,8 +117,8 @@ def test_date_columns_are_ignored_for_training(data: DataContainer):
117
117
result = calculate_features_step .execute (data )
118
118
119
119
assert isinstance (result , DataContainer )
120
- assert "creation_date" in result .columns_to_ignore_for_training
121
- assert "deletion_date" in result .columns_to_ignore_for_training
120
+ assert "creation_date" not in result .X_train . columns
121
+ assert "deletion_date" not in result .X_train . columns
122
122
123
123
124
124
def test_output_dtypes (data : DataContainer ):
@@ -133,14 +133,14 @@ def test_output_dtypes(data: DataContainer):
133
133
result = calculate_features_step .execute (data )
134
134
135
135
assert isinstance (result , DataContainer )
136
- assert result .train ["creation_date_year" ].dtype == np .dtype ("uint16" )
137
- assert result .train ["creation_date_month" ].dtype == np .dtype ("uint8" )
138
- assert result .train ["creation_date_day" ].dtype == np .dtype ("uint8" )
139
- assert result .train ["creation_date_hour" ].dtype == np .dtype ("uint8" )
140
- assert result .train ["creation_date_minute" ].dtype == np .dtype ("uint8" )
141
- assert result .train ["creation_date_second" ].dtype == np .dtype ("uint8" )
142
- assert result .train ["creation_date_weekday" ].dtype == np .dtype ("uint8" )
143
- assert result .train ["creation_date_dayofyear" ].dtype == np .dtype ("uint16" )
136
+ assert result .X_train ["creation_date_year" ].dtype == np .dtype ("uint16" )
137
+ assert result .X_train ["creation_date_month" ].dtype == np .dtype ("uint8" )
138
+ assert result .X_train ["creation_date_day" ].dtype == np .dtype ("uint8" )
139
+ assert result .X_train ["creation_date_hour" ].dtype == np .dtype ("uint8" )
140
+ assert result .X_train ["creation_date_minute" ].dtype == np .dtype ("uint8" )
141
+ assert result .X_train ["creation_date_second" ].dtype == np .dtype ("uint8" )
142
+ assert result .X_train ["creation_date_weekday" ].dtype == np .dtype ("uint8" )
143
+ assert result .X_train ["creation_date_dayofyear" ].dtype == np .dtype ("uint16" )
144
144
145
145
146
146
def test_output_values (data : DataContainer ):
@@ -155,28 +155,28 @@ def test_output_values(data: DataContainer):
155
155
result = calculate_features_step .execute (data )
156
156
157
157
assert isinstance (result , DataContainer )
158
- assert result .train ["creation_date_year" ].equals (
158
+ assert result .X_train ["creation_date_year" ].equals (
159
159
pd .Series ([2023 , 2023 , 2023 , 2023 , 2023 , 2023 , 2024 , 2024 ], dtype = "uint16" )
160
160
)
161
- assert result .train ["creation_date_month" ].equals (
161
+ assert result .X_train ["creation_date_month" ].equals (
162
162
pd .Series ([1 , 1 , 1 , 1 , 1 , 11 , 2 , 3 ], dtype = "uint8" )
163
163
)
164
- assert result .train ["creation_date_day" ].equals (
164
+ assert result .X_train ["creation_date_day" ].equals (
165
165
pd .Series ([1 , 2 , 3 , 4 , 5 , 1 , 28 , 28 ], dtype = "uint8" )
166
166
)
167
- assert result .train ["creation_date_hour" ].equals (
167
+ assert result .X_train ["creation_date_hour" ].equals (
168
168
pd .Series ([0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ], dtype = "uint8" )
169
169
)
170
- assert result .train ["creation_date_minute" ].equals (
170
+ assert result .X_train ["creation_date_minute" ].equals (
171
171
pd .Series ([0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ], dtype = "uint8" )
172
172
)
173
- assert result .train ["creation_date_second" ].equals (
173
+ assert result .X_train ["creation_date_second" ].equals (
174
174
pd .Series ([0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ], dtype = "uint8" )
175
175
)
176
- assert result .train ["creation_date_weekday" ].equals (
176
+ assert result .X_train ["creation_date_weekday" ].equals (
177
177
pd .Series ([6 , 0 , 1 , 2 , 3 , 2 , 2 , 3 ], dtype = "uint8" )
178
178
)
179
- assert result .train ["creation_date_dayofyear" ].equals (
179
+ assert result .X_train ["creation_date_dayofyear" ].equals (
180
180
pd .Series ([1 , 2 , 3 , 4 , 5 , 305 , 59 , 88 ], dtype = "uint16" )
181
181
)
182
182
@@ -214,7 +214,7 @@ def test_init_with_unsupported_features():
214
214
215
215
def test_execute_with_prediction (data : DataContainer ):
216
216
data .is_train = False
217
- data .flow = data .train .copy ()
217
+ data .X_prediction = data .X_train .copy ()
218
218
219
219
datetime_columns = ["creation_date" ]
220
220
features = ["year" , "month" , "day" ]
@@ -226,6 +226,6 @@ def test_execute_with_prediction(data: DataContainer):
226
226
result = calculate_features_step .execute (data )
227
227
228
228
assert isinstance (result , DataContainer )
229
- assert "creation_date_year" in result .flow .columns
230
- assert "creation_date_month" in result .flow .columns
231
- assert "creation_date_day" in result .flow .columns
229
+ assert "creation_date_year" in result .X_prediction .columns
230
+ assert "creation_date_month" in result .X_prediction .columns
231
+ assert "creation_date_day" in result .X_prediction .columns
0 commit comments