Skip to content

Commit 58a06e4

Browse files
committed
refactored more functions and fixed bugs
1 parent 6eb554c commit 58a06e4

File tree

2 files changed

+62
-86
lines changed

2 files changed

+62
-86
lines changed

llvm_ir_dataset_utils/tools/opt_analysis_tools.py

Lines changed: 19 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -290,13 +290,7 @@ def parse_pass_analysis_exec(
290290
else:
291291
data = read_data(output_file_path, bitcode_file, opt).split("\n")
292292

293-
if relative: # include both relative and absolute
294-
result = {
295-
"pass-exec": None,
296-
"analysis-exec": None,
297-
}
298-
else:
299-
result = {"pass-exec": None, "analysis-exec": None}
293+
result = {"pass-exec": None, "analysis-exec": None}
300294

301295
line_start = find_start_line(data)
302296
if line_start is None:
@@ -334,7 +328,7 @@ def parse_pass_analysis_exec(
334328

335329
def read_data(file_path: str, bitcode_file: bool, opt: str):
336330
command = [
337-
"/p/lustre1/khoidng/LLVM/build/bin/opt",
331+
"opt",
338332
"-" + opt,
339333
"--stats",
340334
"--disable-output",
@@ -359,13 +353,12 @@ def read_data(file_path: str, bitcode_file: bool, opt: str):
359353

360354
def read_data_bc(bitcode_module, opt: str):
361355
command = [
362-
"/p/lustre1/khoidng/LLVM/build/bin/opt",
356+
"opt",
363357
"-" + opt,
364358
"--stats",
365359
"--disable-output",
366360
"--time-passes",
367-
] # TODO: replace the hardcoded opt path with something more flexible
368-
361+
]
369362
with subprocess.Popen(
370363
command,
371364
stdout=subprocess.PIPE,
@@ -394,7 +387,6 @@ def sampling(
394387
bitcode_file: bool = False,
395388
opt: str = "O3",
396389
):
397-
# available_pass_list = import_pass_from_file('./opt_passes.txt')
398390
wall_time = {k: [] for k in OPT_O3_PASS_LIST}
399391
passes = set(OPT_O3_PASS_LIST)
400392
files = listdir(dir_path)
@@ -459,6 +451,19 @@ def sampling_csv(
459451
return result
460452

461453

454+
"""
455+
source_dir: source directory of bitcode files
456+
fp: name for output csv file
457+
nsamples: number of files to sample
458+
ncols: number of columns for output file table
459+
relative (deprecated, don't use): output whether data table is relative time or absolute time
460+
col_labels: list of labels for the output data table (recommend: []abs_time, rel_time, pass])
461+
bitcode_file: whether type of files in source_dir bitcode file
462+
opt: optimization pipeline
463+
data_type: output transformation pass ('pass-exec') or analysis pass ('analysis-exec') data
464+
"""
465+
466+
462467
def sample_then_export_csv(
463468
source_dir: str,
464469
fp: str,
@@ -475,27 +480,6 @@ def sample_then_export_csv(
475480
return export_to_csv(o, fp, ncols, col_labels)
476481

477482

478-
def export_pass_name(pass_collection: list[str], fp, append=True):
479-
# append_cond = lambda append: 'a' if append else 'w'
480-
def append_cond(append):
481-
if append:
482-
return "a"
483-
return "w"
484-
485-
with open(fp, append_cond) as f:
486-
for i in pass_collection:
487-
f.write(i)
488-
f.write("\n")
489-
return 0
490-
491-
492-
def import_pass_from_file(fp, delimeter="\n"):
493-
data = None
494-
with open(fp, "r") as f:
495-
data = f.read().split(delimeter)
496-
return data
497-
498-
499483
def export_to_json(data: Union[str, list[float]], fn: str = "", indent: int = 2):
500484
out = json.dumps(data, indent=indent)
501485
name = fn if fn != "" else "json_file.json"
@@ -659,7 +643,7 @@ def cat_encode(data: Union[str, list[float]]):
659643
return (result, encoding)
660644

661645

662-
def download_bitcode(target_dir: str, languages: list[str], n: int = -1):
646+
def download_bitcode(target_dir: str, languages: list[str], n: int = -1, random=False):
663647
if not isinstance(languages, list):
664648
print("languages arg has to be list type")
665649
return 1
@@ -685,6 +669,7 @@ def download_bitcode(target_dir: str, languages: list[str], n: int = -1):
685669
print("converted to python iterable")
686670
counter = 0
687671
spin_text = "Downloading bitcode files"
672+
688673
with yaspin(text=spin_text) as sp:
689674
while (n != -1 and counter < n) or (row is not None and n == -1):
690675
if row["language"] in lang_set:

llvm_ir_dataset_utils/tools/outlier_analysis.py

Lines changed: 43 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import os
44
from os import listdir
55
from os.path import join
6-
from typing import List, Union, Callable
6+
from typing import List
77
import pandas as pd
88
import parallelbar
99
import ray
@@ -17,6 +17,13 @@
1717

1818
num_cpus = psutil.cpu_count(logical=False)
1919

20+
opt_load_args = [
21+
"opt",
22+
"-load",
23+
"RemoveFunctionBodyPass/build/libRemoveFunctionBody.so",
24+
"-load-pass-plugin=RemoveFunctionBodyPass/build/libRemoveFunctionBody.so",
25+
"-passes=remove-fn-body",
26+
]
2027

2128
"""
2229
inputs:
@@ -30,9 +37,9 @@ def remove_fn(i: int, src: str, dst: str):
3037
if not (os.path.isfile(src) or os.path.exists(os.path.dirname(dst))):
3138
print("invalid file path in either src or dst argument")
3239
return None
33-
command = [
34-
"/p/lustre1/khoidng/LLVM/build/bin/opt", # TODO: replace this
35-
f"-passes=remove-fn-body<i={i}>",
40+
command = opt_load_args + [
41+
"-index",
42+
f"{i}",
3643
src,
3744
"-o",
3845
dst,
@@ -55,15 +62,16 @@ def remove_fn(i: int, src: str, dst: str):
5562

5663

5764
def remove_fn_bc(i: int, bc):
58-
if i == -1:
59-
print("Currently not supporting i == -1")
65+
if i < 0:
66+
print("No negative index!")
6067
return None
6168
try:
6269
with subprocess.Popen(
63-
[
64-
"/p/lustre1/khoidng/LLVM/build/bin/opt",
65-
f"-passes=remove-fn-body<i={i}>",
66-
], # TODO: replace this
70+
opt_load_args
71+
+ [
72+
"-index",
73+
f"{i}",
74+
],
6775
stdout=subprocess.PIPE,
6876
stderr=subprocess.STDOUT,
6977
stdin=subprocess.PIPE,
@@ -85,9 +93,10 @@ def get_n_functions(file_path: str):
8593
bc = f.read()
8694
try:
8795
with subprocess.Popen(
88-
[
89-
"/p/lustre1/khoidng/LLVM/build/bin/opt", # TODO: replace this
90-
"-passes=remove-fn-body<i=-1>",
96+
opt_load_args
97+
+ [
98+
"-index",
99+
"-1",
91100
"--disable-output",
92101
],
93102
stdout=subprocess.PIPE,
@@ -108,9 +117,10 @@ def get_n_functions(file_path: str):
108117

109118
def get_ir(bitcode_module):
110119
with subprocess.Popen(
111-
[
112-
"/p/lustre1/khoidng/LLVM/build/bin/opt", # TODO: replace this
113-
"-passes=remove-fn-body<i=-1>",
120+
opt_load_args
121+
+ [
122+
"-index",
123+
"-1",
114124
"-S",
115125
],
116126
stdout=subprocess.PIPE,
@@ -121,9 +131,12 @@ def get_ir(bitcode_module):
121131

122132

123133
"""
124-
- outlier_check_fn: must be a callable function. Requires at least time analysis data and ref_data as arguments.
125-
must return True if outlier, and False otherwise.
126-
- Return: number of functions removed, total number of functions, average fraction of passes being outliers if ith function is removed
134+
Non-optimized version of get outliers.
135+
Inputs:
136+
- file_path: path to bitcode file
137+
- opt: O1, O2, O3, Oz,... optimization options (case-sensitive)
138+
- outlier_threshold: ratio of number of outlier passes over total number of passes
139+
- quantile: percentile of passes given runtime data to be considered outlier for every pass
127140
"""
128141

129142

@@ -136,9 +149,11 @@ def get_outliers(file_path: str, opt: str, outlier_threshold=1, quantile=0.95):
136149
os.makedirs(os.path.dirname(tmp_path), exist_ok=True)
137150

138151
ref_data = pd.read_csv(
139-
f"{opt.lower()}_cpp.csv"
152+
f"pass_runtime/transformations/{opt.lower()}_cpp.csv"
140153
) # TODO: filename needs to be abstracted away!
141-
154+
groups = ref_data.groupby("pass").quantile(
155+
q=quantile
156+
) # TODO: put this outside of the loop
142157
bc = None
143158
n_removed = 0
144159
src_path = file_path
@@ -155,11 +170,6 @@ def get_outliers(file_path: str, opt: str, outlier_threshold=1, quantile=0.95):
155170
# this ignores the src_path and only takes the bitcode module
156171
time_analysis_tmp = parse_pass_analysis_exec(src_path, True, True, opt, tmp)
157172

158-
# pd.DataFrame consisting of quantiles grouped by passes
159-
groups = ref_data.groupby("pass").quantile(
160-
q=quantile
161-
) # TODO: put this outside of the loop
162-
163173
# if removed function still results in module being in outlier range,
164174
# delete that function because it doesn't affect the outliers
165175
# (minimize functions in module such that outliers are preserved)
@@ -201,8 +211,6 @@ def check_outliers(time, pass_name, ref_data, time_col="fraction_total_time"):
201211
def is_outlier(
202212
data,
203213
ref_data: pd.DataFrame,
204-
quantile=0.95,
205-
pass_col="pass",
206214
time_col="fraction_total_time",
207215
threshold=1,
208216
):
@@ -252,7 +260,7 @@ def preserve_outliers_dir(dir_path: str, opt: str, outlier_threshold=1):
252260
outlier functions extracted.
253261
254262
Inputs:
255-
- file_path: path to bitcode module file.
263+
- file_path: absolute path to bitcode module file.
256264
- opt: optimization pipeline in {O1,O2,O3,Oz}.
257265
- pass_name: pass name in which outlier threshold used to extract
258266
outlier functions.
@@ -287,7 +295,9 @@ def get_outliers_pass_specific(
287295
os.makedirs(os.path.dirname(tmp_path), exist_ok=True)
288296

289297
if ref_data is None:
290-
ref_data = pd.read_csv(f"{opt.lower()}_cpp.csv") # TODO: optimize this away
298+
ref_data = pd.read_csv(
299+
f"pass_runtime/transformations/{opt.lower()}_cpp.csv"
300+
) # TODO: abstract this away. currently hard coding transformations pass data
291301

292302
bc = None
293303
n_removed = 0
@@ -388,7 +398,9 @@ def preserve_outliers_dir_pass_specific(
388398
if ref_data is not None:
389399
df = ref_data
390400
else:
391-
df = pd.read_csv(f"{opt.lower()}_cpp.csv") # TODO: change this
401+
df = pd.read_csv(
402+
f"pass_runtime/transformations/{opt.lower()}_cpp.csv"
403+
) # TODO: change this
392404
data = []
393405
s = time.time()
394406
results = [
@@ -461,27 +473,6 @@ def check_correctness(
461473
/ n_files
462474
)
463475

464-
# for i in range(len(passes)):
465-
# # print(f"{p}", flush=True)
466-
# # tmp_dir_name = f"tmp_{p.split('<')[0]}"
467-
# # print(f"in directory {tmp_dir_name}", flush=True)
468-
# # result = ray.get(
469-
# # preserve_outliers_dir_pass_specific.remote(
470-
# # dir_path, opt, p, quantile, fp_list=fp, dst=tmp_dir_name
471-
# # )
472-
# # )
473-
# result_dict = Counter(results[i])
474-
# data[passes[i]] = (
475-
# 1
476-
# - (
477-
# result_dict[(0, 0)]
478-
# + result_dict[(-1, -1)]
479-
# + result_dict[(-2, -2)]
480-
# + result_dict[(-3, -3)]
481-
# )
482-
# / n_files
483-
# )
484-
485476
return data
486477

487478

0 commit comments

Comments
 (0)