1
+ """
2
+ Pandas programming task
3
+
4
+ 1. Load the data into a Pandas dataframe
5
+ 2. Remove records where scheme_code is NULL
6
+ 3. Sort the data by nav_date
7
+ 4. Fill Missing values in nav column by the last known value in the field after sorting by nav_date (Fill forward)
8
+ 5. Save the output dataframe with the name pandas_challenge_output.csv
9
+ 6. Send output file as well as the code in a zip file
10
+
11
+ Code evaluation criteria:
12
+ 1. Correctness of the output file and code
13
+ 2. Code should be clean and readable
14
+ 3. optimal with respect to time and space complexity (e.g. avoid unnecessary extra variables and loops)
15
+
16
+ """
17
+
18
+ def process_df (input_csv ):
19
+
20
+ #TODO: implement your code here
21
+
22
+ # loading pandas library
23
+ import pandas as pd
24
+
25
+ # 1. Load the data into a Pandas dataframe
26
+
27
+ df = pd .read_csv (input_csv )
28
+
29
+ # 2. Remove records where scheme_code is NULL
30
+
31
+ df .dropna (subset = ['scheme_code' ], inplace = True )
32
+
33
+ # 3. Sort the data by nav_date
34
+
35
+ # first converting to datetime format
36
+ df ['nav_date' ] = pd .to_datetime (df ['nav_date' ])
37
+
38
+ df .sort_values (by = 'nav_date' , inplace = True )
39
+
40
+ # 4. Fill Missing values in nav column by the last known value in the field after sorting by nav_date (Fill forward)
41
+
42
+ df ['nav' ].ffill (axis = 0 , inplace = True )
43
+
44
+ # 5. Save the output dataframe with the name pandas_challenge_output.csv
45
+
46
+ df .to_csv ('pandas_challenge_output.csv' )
47
+
48
+
49
+ process_df ('challenge2_input.csv' )
0 commit comments