diff --git a/countFunctionCalls.go b/countFunctionCalls.go new file mode 100644 index 0000000..abc8d8e --- /dev/null +++ b/countFunctionCalls.go @@ -0,0 +1,169 @@ +package main + +import ( + "bytes" +) + +func beginsIdentifier(b byte) bool { + return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || b == '_' +} + +func insideIdentifier(b byte) bool { + return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || ('0' <= b && b <= '9') || b == '_' +} + +type tokenType int + +const ( + endOfString tokenType = -1 + identifier tokenType = iota + somethingElse +) + +// A "tokenizer" that removes characters to be ignored and splits its input +// into things that look like identifiers and all other characters. +// +// It could be replaced by a more complete tokenizer. One that takes care of +// comments and strings for example. +type tokenizer struct { + text []byte + toBeIgnored []byte +} + +func byteInSlice(b byte, slice []byte) bool { + for _, c := range slice { + if b == c { + return true + } + } + return false +} + +// Get next token and text slice that goes with it +func (r *tokenizer) Next() (token tokenType, text []byte) { + + for len(r.text) > 0 && byteInSlice(r.text[0], r.toBeIgnored) { + r.text = r.text[1:] + } + + if len(r.text) == 0 { + return endOfString, nil + } + + if beginsIdentifier(r.text[0]) { + var i = 1 + for i < len(r.text) && insideIdentifier(r.text[i]) { + i++ + } + var result = r.text[0:i] + r.text = r.text[i:] + return identifier, result + } + + var result = r.text[:1] + r.text = r.text[1:] + return somethingElse, result + +} + +func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { + + var whitespace = []byte{ + ' ', + '\t', + '\n', + '\r', + '\f', + } + + var keywords = map[string]bool{ + "if": true, + "for": true, + "while": true, + "else": true, + } + + var tokenizer = tokenizer{ + buffer.Bytes(), + whitespace, + } + + var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse} + var strings = [3][]byte{{' '}, {' '}, {' '}} + + for { + tok, s := tokenizer.Next() + if tok == endOfString { + return + } + + tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok + strings[0], strings[1], strings[2] = strings[1], strings[2], s + + if !(tokens[0] == identifier && !keywords[string(strings[0])]) && + tokens[1] == identifier && !keywords[string(strings[1])] && + tokens[2] == somethingElse && strings[2][0] == '(' { + (*counts)[string(strings[1])]++ + } + } +} + +func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { + + // Since the open parenthesis for a function call must be on the same line as + // the name, I only ignore spaces and tabs. + var whitespace = []byte{ + ' ', + '\t', + } + + var keywords = map[string]bool{ + "if": true, + "in": true, + "or": true, + "and": true, + "for": true, + "while": true, + "else": true, + "elif": true, + "def": true, + } + + var tokenizer = tokenizer{ + buffer.Bytes(), + whitespace, + } + + var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse} + var strings = [3][]byte{{' '}, {' '}, {' '}} + + for { + tok, s := tokenizer.Next() + if tok == endOfString { + return + } + + tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok + strings[0], strings[1], strings[2] = strings[1], strings[2], s + + if !(tokens[0] == identifier && string(strings[0]) == "def") && + tokens[1] == identifier && !keywords[string(tokens[1])] && + tokens[2] == somethingElse && strings[2][0] == '(' { + (*counts)[string(strings[1])]++ + } + } +} + +//Given a bytes.Buffer containing a code segment, its extension, and a map to +//use for counting, counts the function calls +func countFunctionCalls(buffer *bytes.Buffer, ext string, counts *map[string]int) { + switch ext { + case ".c", ".h": + countCFunctionCalls(buffer, counts) + case ".py": + countPythonFunctionCalls(buffer, counts) + + default: + + } +} diff --git a/main.go b/main.go index f62aa00..841ceeb 100644 --- a/main.go +++ b/main.go @@ -1,7 +1,13 @@ package main import ( + "bufio" + "bytes" "fmt" + "log" + "os" + "path/filepath" + "strings" "time" ) @@ -32,6 +38,138 @@ func main() { // number of line deleted // list of function calls seen in the diffs and their number of calls func compute() *result { + var r result + var functionCallsBefore = make(map[string]int) + var functionCallsAfter = make(map[string]int) + r.functionCalls = make(map[string]struct{ before, after int }) - return nil + // I use sets instead of lists for files that we've seen + var seenFiles = make(map[string]struct{}) + + // When reading in a region, I will be reading it into these buffers + var currentRegionBefore, currentRegionAfter bytes.Buffer + + // Extensions for the file. Used to decide how to count functions + var currentExtensionBefore, currentExtensionAfter string + + // Here I create a small state machine using state functions to read the + // relevent info from the diff files. + type stateFn func(line string) stateFn + var processFileHeaderLine, + processRegionHeaderLine, + processCodeLine stateFn + + processFileHeaderLine = func(line string) stateFn { + if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") { + + var fileName = line[len("--- "):] + if fileName != "/dev/null" { + fileName = fileName[len("a/"):] + } + + seenFiles[fileName] = struct{}{} + + var fileType = filepath.Ext(fileName) + if fileType == "" { + // If something doesn't have an extension, we assume the name itself + // is significant, like "Makefile" + fileType = filepath.Base(fileName) + } + if fileName == "/dev/null" { + fileType = "/dev/null" + } + if line[0] == '-' { + currentExtensionBefore = fileType + } else { + currentExtensionAfter = fileType + } + + } else if strings.HasPrefix(line, "@@") { + return processRegionHeaderLine(line) + } + return processFileHeaderLine + } + + processRegionHeaderLine = func(line string) stateFn { + r.regions++ + return processCodeLine + } + + processCodeLine = func(line string) stateFn { + if line[0] == ' ' { + currentRegionBefore.WriteString(line[1:]) + currentRegionBefore.WriteString("\n") + currentRegionAfter.WriteString(line[1:]) + currentRegionAfter.WriteString("\n") + } else if line[0] == '-' { + r.lineDeleted++ + currentRegionBefore.WriteString(line[1:]) + currentRegionBefore.WriteString("\n") + } else if line[0] == '+' { + r.lineAdded++ + currentRegionAfter.WriteString(line[1:]) + currentRegionAfter.WriteString("\n") + } else { + // If we finished reading in the region, we process it + countFunctionCalls(¤tRegionBefore, currentExtensionBefore, &functionCallsBefore) + countFunctionCalls(¤tRegionAfter, currentExtensionAfter, &functionCallsAfter) + currentRegionBefore.Reset() + currentRegionAfter.Reset() + + if strings.HasPrefix(line, "@@") { + return processRegionHeaderLine(line) + } else { + return processFileHeaderLine(line) + } + } + return processCodeLine + } + + diffnames, err := filepath.Glob("./diffs/*.diff") + if err != nil { + log.Fatal(err) + } + + for _, diffname := range diffnames { + + diffFile, err := os.Open(diffname) + if err != nil { + log.Fatal(err) + } + + scanner := bufio.NewScanner(diffFile) + + var state = processFileHeaderLine + for scanner.Scan() { + line := scanner.Text() + + state = state(line) + } + // Process the last region + countFunctionCalls(¤tRegionBefore, currentExtensionBefore, &functionCallsBefore) + countFunctionCalls(¤tRegionAfter, currentExtensionAfter, &functionCallsAfter) + currentRegionBefore.Reset() + currentRegionAfter.Reset() + + diffFile.Close() + } + + // Turn set into list + for name, _ := range seenFiles { + r.files = append(r.files, name) + } + + // Combine the two functionCalls maps into one + for name, times := range functionCallsBefore { + var prev = r.functionCalls[name] + prev.before += times + r.functionCalls[name] = prev + } + for name, times := range functionCallsAfter { + var prev = r.functionCalls[name] + prev.after += times + r.functionCalls[name] = prev + } + + return &r } diff --git a/result.go b/result.go index 7e78236..ddfd8cb 100644 --- a/result.go +++ b/result.go @@ -9,14 +9,14 @@ import ( type result struct { //The name of the files seen files []string - //How many region we have (i.e. seperated by @@) + //The name of the files seen regions int //How many line were added total lineAdded int //How many line were deleted totla lineDeleted int - //How many times the function seen in the code are called. - functionCalls map[string]int + //How many times the functionj seen in the code are called before and after + functionCalls map[string]struct{ before, after int } } //String returns the value of results as a formated string @@ -33,9 +33,15 @@ func (r *result) String() string { r.appendIntValueToBuffer(r.lineAdded, "LA", &buffer) r.appendIntValueToBuffer(r.lineDeleted, "LD", &buffer) - buffer.WriteString("Functions calls: \n") + buffer.WriteString("Function calls (before, after): \n") for key, value := range r.functionCalls { - r.appendIntValueToBuffer(value, key, &buffer) + buffer.WriteString("\t") + buffer.WriteString(key) + buffer.WriteString(" : ") + buffer.WriteString(strconv.Itoa(value.before)) + buffer.WriteString(", ") + buffer.WriteString(strconv.Itoa(value.after)) + buffer.WriteString("\n") } return buffer.String()