33import os
44from os import listdir
55from os .path import join
6- from typing import List , Union , Callable
6+ from typing import List
77import pandas as pd
88import parallelbar
99import ray
1717
1818num_cpus = psutil .cpu_count (logical = False )
1919
20+ opt_load_args = [
21+ "opt" ,
22+ "-load" ,
23+ "RemoveFunctionBodyPass/build/libRemoveFunctionBody.so" ,
24+ "-load-pass-plugin=RemoveFunctionBodyPass/build/libRemoveFunctionBody.so" ,
25+ "-passes=remove-fn-body" ,
26+ ]
2027
2128"""
2229 inputs:
@@ -30,9 +37,9 @@ def remove_fn(i: int, src: str, dst: str):
3037 if not (os .path .isfile (src ) or os .path .exists (os .path .dirname (dst ))):
3138 print ("invalid file path in either src or dst argument" )
3239 return None
33- command = [
34- "/p/lustre1/khoidng/LLVM/build/bin/opt" , # TODO: replace this
35- f"-passes=remove-fn-body<i= { i } > " ,
40+ command = opt_load_args + [
41+ "-index" ,
42+ f"{ i } " ,
3643 src ,
3744 "-o" ,
3845 dst ,
@@ -55,15 +62,16 @@ def remove_fn(i: int, src: str, dst: str):
5562
5663
5764def remove_fn_bc (i : int , bc ):
58- if i == - 1 :
59- print ("Currently not supporting i == -1 " )
65+ if i < 0 :
66+ print ("No negative index! " )
6067 return None
6168 try :
6269 with subprocess .Popen (
63- [
64- "/p/lustre1/khoidng/LLVM/build/bin/opt" ,
65- f"-passes=remove-fn-body<i={ i } >" ,
66- ], # TODO: replace this
70+ opt_load_args
71+ + [
72+ "-index" ,
73+ f"{ i } " ,
74+ ],
6775 stdout = subprocess .PIPE ,
6876 stderr = subprocess .STDOUT ,
6977 stdin = subprocess .PIPE ,
@@ -85,9 +93,10 @@ def get_n_functions(file_path: str):
8593 bc = f .read ()
8694 try :
8795 with subprocess .Popen (
88- [
89- "/p/lustre1/khoidng/LLVM/build/bin/opt" , # TODO: replace this
90- "-passes=remove-fn-body<i=-1>" ,
96+ opt_load_args
97+ + [
98+ "-index" ,
99+ "-1" ,
91100 "--disable-output" ,
92101 ],
93102 stdout = subprocess .PIPE ,
@@ -108,9 +117,10 @@ def get_n_functions(file_path: str):
108117
109118def get_ir (bitcode_module ):
110119 with subprocess .Popen (
111- [
112- "/p/lustre1/khoidng/LLVM/build/bin/opt" , # TODO: replace this
113- "-passes=remove-fn-body<i=-1>" ,
120+ opt_load_args
121+ + [
122+ "-index" ,
123+ "-1" ,
114124 "-S" ,
115125 ],
116126 stdout = subprocess .PIPE ,
@@ -121,9 +131,12 @@ def get_ir(bitcode_module):
121131
122132
123133"""
124- - outlier_check_fn: must be a callable function. Requires at least time analysis data and ref_data as arguments.
125- must return True if outlier, and False otherwise.
126- - Return: number of functions removed, total number of functions, average fraction of passes being outliers if ith function is removed
134+ Non-optimized version of get outliers.
135+ Inputs:
136+ - file_path: path to bitcode file
137+ - opt: O1, O2, O3, Oz,... optimization options (case-sensitive)
138+ - outlier_threshold: ratio of number of outlier passes over total number of passes
139+ - quantile: percentile of passes given runtime data to be considered outlier for every pass
127140"""
128141
129142
@@ -136,9 +149,11 @@ def get_outliers(file_path: str, opt: str, outlier_threshold=1, quantile=0.95):
136149 os .makedirs (os .path .dirname (tmp_path ), exist_ok = True )
137150
138151 ref_data = pd .read_csv (
139- f"{ opt .lower ()} _cpp.csv"
152+ f"pass_runtime/transformations/ { opt .lower ()} _cpp.csv"
140153 ) # TODO: filename needs to be abstracted away!
141-
154+ groups = ref_data .groupby ("pass" ).quantile (
155+ q = quantile
156+ ) # TODO: put this outside of the loop
142157 bc = None
143158 n_removed = 0
144159 src_path = file_path
@@ -155,11 +170,6 @@ def get_outliers(file_path: str, opt: str, outlier_threshold=1, quantile=0.95):
155170 # this ignores the src_path and only takes the bitcode module
156171 time_analysis_tmp = parse_pass_analysis_exec (src_path , True , True , opt , tmp )
157172
158- # pd.DataFrame consisting of quantiles grouped by passes
159- groups = ref_data .groupby ("pass" ).quantile (
160- q = quantile
161- ) # TODO: put this outside of the loop
162-
163173 # if removed function still results in module being in outlier range,
164174 # delete that function because it doesn't affect the outliers
165175 # (minimize functions in module such that outliers are preserved)
@@ -201,8 +211,6 @@ def check_outliers(time, pass_name, ref_data, time_col="fraction_total_time"):
201211def is_outlier (
202212 data ,
203213 ref_data : pd .DataFrame ,
204- quantile = 0.95 ,
205- pass_col = "pass" ,
206214 time_col = "fraction_total_time" ,
207215 threshold = 1 ,
208216):
@@ -252,7 +260,7 @@ def preserve_outliers_dir(dir_path: str, opt: str, outlier_threshold=1):
252260outlier functions extracted.
253261
254262Inputs:
255- - file_path: path to bitcode module file.
263+ - file_path: absolute path to bitcode module file.
256264- opt: optimization pipeline in {O1,O2,O3,Oz}.
257265- pass_name: pass name in which outlier threshold used to extract
258266 outlier functions.
@@ -287,7 +295,9 @@ def get_outliers_pass_specific(
287295 os .makedirs (os .path .dirname (tmp_path ), exist_ok = True )
288296
289297 if ref_data is None :
290- ref_data = pd .read_csv (f"{ opt .lower ()} _cpp.csv" ) # TODO: optimize this away
298+ ref_data = pd .read_csv (
299+ f"pass_runtime/transformations/{ opt .lower ()} _cpp.csv"
300+ ) # TODO: abstract this away. currently hard coding transformations pass data
291301
292302 bc = None
293303 n_removed = 0
@@ -388,7 +398,9 @@ def preserve_outliers_dir_pass_specific(
388398 if ref_data is not None :
389399 df = ref_data
390400 else :
391- df = pd .read_csv (f"{ opt .lower ()} _cpp.csv" ) # TODO: change this
401+ df = pd .read_csv (
402+ f"pass_runtime/transformations/{ opt .lower ()} _cpp.csv"
403+ ) # TODO: change this
392404 data = []
393405 s = time .time ()
394406 results = [
@@ -461,27 +473,6 @@ def check_correctness(
461473 / n_files
462474 )
463475
464- # for i in range(len(passes)):
465- # # print(f"{p}", flush=True)
466- # # tmp_dir_name = f"tmp_{p.split('<')[0]}"
467- # # print(f"in directory {tmp_dir_name}", flush=True)
468- # # result = ray.get(
469- # # preserve_outliers_dir_pass_specific.remote(
470- # # dir_path, opt, p, quantile, fp_list=fp, dst=tmp_dir_name
471- # # )
472- # # )
473- # result_dict = Counter(results[i])
474- # data[passes[i]] = (
475- # 1
476- # - (
477- # result_dict[(0, 0)]
478- # + result_dict[(-1, -1)]
479- # + result_dict[(-2, -2)]
480- # + result_dict[(-3, -3)]
481- # )
482- # / n_files
483- # )
484-
485476 return data
486477
487478
0 commit comments