Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions Gopkg.lock

This file was deleted.

22 changes: 0 additions & 22 deletions Gopkg.toml

This file was deleted.

21 changes: 21 additions & 0 deletions Thoughts.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
A place to convey my thoughts.

Feb 4th 2019\
DiffParser is mostly "complete" functionally speaking but some of the regex are hard coded and untested.\
Before moving to ASTParser I will do a small check. Then once both AST and Diff are complete, I will do another sweep to refactor the code.\
- I tried allowing users to input a flag they wanted to log information on, but allowing users to manipulate the regex seems to be opening up the program a bit too much, instead the information should be kept within\
- Is there any way I can group the list of "if" statements? \
- I think I have misunderstood what functionCalls is asking for.\
- Commas are optional for regions\
- Replaced [^n]* with .*
- For some reason it catches 196480 in * (0x007d0000-0x00800000) starting at offset 196480 (0x2ff80). as a function call.\

Feb 16th 2019\
DiffParser FunctionCall is still incorrect, but I have moved on to ASTParser.\
ASTParser seems relatively simple, because we are only looking for declared variables.
- Recursive traversal of AST should return a node instead of variable_declaration
- Should variable_declaration be in ast_result? Would a tuple suffice?

Feb 21st 2019\
DiffParser is complete, but functionCall captures "8" as a functional when parsing assembly offset such as 8(%rdi).
ASTParser is complete, not sure if the tree traversal is too hardcoded.
34 changes: 0 additions & 34 deletions astResult.go

This file was deleted.

64 changes: 64 additions & 0 deletions ast_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import json
from ast_result import ASTResult

# *Goal*
# Parse an AST to return all the declared variables in the following format
# {int}{myInt}
# {string}{myInt}
# {Foo}{myFooObject}


class ASTParser:
def __init__(self):
print("ASTParser created")

def parse(self, file):
""" Load JSON file into dictionary and parse """
tree_json = json.load(file)
ast_res = ASTResult()
root = tree_json['Root']
var_nodes = []
# Returns a list of nodes from the AST that are variables
traverse_search(root, 'VariableDeclaration', var_nodes)
# Parses each variable node to a variable tuple and returns a list of tuples
ast_res.variableDeclarations = node_to_var(var_nodes)
return ast_res


def node_to_var(var_nodes):
"""Converts each variable node to a variable tuple"""
var_array = []
for node in var_nodes:
array_variables = []
traverse_search(node, 'ArrayCreationExpression', array_variables)
var_type = find_val(node, 'PredefinedType')
var_name = find_val(node, 'VariableDeclarator')
if array_variables:
var_type += "[]"
var_array.append((var_type, var_name))
return var_array


def traverse_search(root, look_for, result_list):
"""
Recursive traversal of AST to find a node with Type == lookfor
Appends all nodes that match to resultList which is maintained because python is pass by reference
"""
for child in root['Children']:
if child['Type'] == look_for:
result_list.append(child)
else:
traverse_search(child, look_for, result_list)


def find_val(var_node, look_for):
"""
Use traverseSearch() to find...
Variable Name found under node VariableDeclarator
Variable Type found under node PredefinedType
"""
found = []
traverse_search(var_node, look_for, found)
if found:
return found[0]['Children'][0]['ValueText']

8 changes: 8 additions & 0 deletions ast_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class ASTResult:
def __init__(self):
self.variableDeclarations = []

def to_text(self):
with open('astResult.txt', 'w') as output:
for variable in self.variableDeclarations:
output.write("{" + variable[0] + "}{" + variable[1] + "}\n")
50 changes: 0 additions & 50 deletions diffResult.go

This file was deleted.

59 changes: 59 additions & 0 deletions diff_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import re
from diff_result import DiffResult
from difflib import SequenceMatcher

# *Goal*
# Parse a diff files in the most efficient way possible.
# Keep these in mind, speed, maintainability, evolvability, etc....
# Compute the following
# - List of files in the diffs
# - number of regions
# - number of lines added
# - number of lines deleted
# - list of function calls seen in the diffs and their number of calls


class DiffParser:

def __init__(self):
print("DiffParser created")

def parse(self, file):
# Regex Patterns
filelist_rgx = r'^diff --[^\s]* (.*)'
region_rgx = r'^@@ -\d+(,\d+)? \+\d+(,\d+)? @@.*'
added_rgx = r'^(\+).*'
deleted_rgx = r'^(\-).*'
fnlist_rgx = r'(?<=(?:\s|\.))([\w]+)(?=\()'

# Object holding results
diff_res = DiffResult()

lines = file.readlines()
# Lines such as
# +++ <filename>
# --- <filename>
# are caught in the regex for added lines
# having a "bubble" after a region starts allows us to manually filter those out.
area_start = 0
for line in lines:
if re.search(filelist_rgx, line):
path_a = re.search(filelist_rgx, line).group(1).split(" ")[0]
path_b = re.search(filelist_rgx, line).group(1).split(" ")[1]
if len(path_a) is 0 or len(path_b) is 0:
raise ValueError
match = SequenceMatcher(None, path_a, path_b).find_longest_match(0, len(path_a), 0, len(path_b))
diff_res.files.append(path_a[match.a: match.a + match.size])

area_start = 4
if re.search(region_rgx, line):
diff_res.regions += 1
if re.search(added_rgx, line) and area_start < 0:
diff_res.lineAdded += 1
if re.search(deleted_rgx, line) and area_start < 0:
diff_res.lineDeleted += 1
if re.search(fnlist_rgx, line):
diff_res.functionCalls[re.search(fnlist_rgx, line).group(1)] += 1
area_start -= 1
return diff_res

31 changes: 31 additions & 0 deletions diff_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from collections import defaultdict

class DiffResult:
def __init__(self):
self.files = []
self.regions = 0
self.lineAdded = 0
self.lineDeleted = 0
self.functionCalls = defaultdict(int)

def to_text(self):
with open('diffResult.txt', 'w') as output:
output.write("Files: \n")
for file in self.files:
output.write(" - ")
output.write(file)
output.write("\n")
output.write("Regions: " + str(self.regions) + "\n")
output.write("Lines Added: " + str(self.lineAdded) + "\n")
output.write("Lines Deleted: " + str(self.lineDeleted) + "\n")
output.write("Function Calls: \n")
for key,value in self.functionCalls.items():
output.write(" - ")
output.write(key + ": " + str(value))
output.write("\n")






Loading