99from data_flow .lib import FileType
1010from data_flow .lib .data_columns import data_get_columns , data_delete_columns , data_rename_columns , data_select_columns
1111from data_flow .lib .data_from import (
12- df_from_tmp_filename ,
1312 from_csv_2_file ,
1413 from_feather_2_file ,
1514 from_parquet_2_file ,
2322 to_json_from_file ,
2423 to_hdf_from_file ,
2524)
25+ from data_flow .lib .fireducks import from_fireducks_2_file , to_fireducks_from_file
26+ from data_flow .lib .pandas import from_pandas_2_file
27+ from data_flow .lib .polars import from_polars_2_file , to_polars_from_file
2628from data_flow .lib .tools import generate_temporary_filename , delete_file
2729
2830
@@ -45,25 +47,44 @@ def __del__(self):
4547 if not self .__in_memory :
4648 delete_file (self .__filename )
4749
50+ def from_fireducks (self , df : fd .DataFrame ):
51+ if self .__in_memory :
52+ self .__data = df
53+ else :
54+ from_fireducks_2_file (df = df , tmp_filename = self .__filename , file_type = self .__file_type )
55+ return self
56+
4857 def to_fireducks (self ) -> fd .DataFrame :
4958 if self .__in_memory :
5059 return self .__data
5160 else :
52- return df_from_tmp_filename (tmp_filename = self .__filename , file_type = self .__file_type )
61+ return to_fireducks_from_file (tmp_filename = self .__filename , file_type = self .__file_type )
62+
63+ def from_pandas (self , df : pd .DataFrame ):
64+ if self .__in_memory :
65+ self .__data = fd .from_pandas (df )
66+ else :
67+ from_pandas_2_file (df = df , tmp_filename = self .__filename , file_type = self .__file_type )
68+ return self
5369
5470 def to_pandas (self ) -> pd .DataFrame :
5571 if self .__in_memory :
5672 return self .__data .to_pandas ()
5773 else :
58- return df_from_tmp_filename (tmp_filename = self .__filename , file_type = self .__file_type ).to_pandas ()
74+ return to_fireducks_from_file (tmp_filename = self .__filename , file_type = self .__file_type ).to_pandas ()
75+
76+ def from_polars (self , df : pl .DataFrame ):
77+ if self .__in_memory :
78+ self .__data = fd .from_pandas (df .to_pandas ())
79+ else :
80+ from_polars_2_file (df = df , tmp_filename = self .__filename , file_type = self .__file_type )
81+ return self
5982
6083 def to_polars (self ) -> pl .DataFrame :
6184 if self .__in_memory :
6285 return pl .from_pandas (self .__data .to_pandas ())
6386 else :
64- return pl .from_pandas (
65- df_from_tmp_filename (tmp_filename = self .__filename , file_type = self .__file_type ).to_pandas ()
66- )
87+ return to_polars_from_file (tmp_filename = self .__filename , file_type = self .__file_type )
6788
6889 def from_csv (self , filename : str ):
6990 if self .__in_memory :
@@ -139,14 +160,14 @@ def head(self):
139160 if self .__in_memory :
140161 print (self .__data .head ())
141162 else :
142- print (df_from_tmp_filename (tmp_filename = self .__filename , file_type = self .__file_type ).head ())
163+ print (to_fireducks_from_file (tmp_filename = self .__filename , file_type = self .__file_type ).head ())
143164 return self
144165
145166 def stats (self ):
146167 if self .__in_memory :
147168 data = self .__data
148169 else :
149- data = df_from_tmp_filename (tmp_filename = self .__filename , file_type = self .__file_type )
170+ data = to_fireducks_from_file (tmp_filename = self .__filename , file_type = self .__file_type )
150171
151172 print ("***** Data stats *****" )
152173 print (f"Columns names : { data .columns .to_list ()} " )
0 commit comments