Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# cpp object files
*.o

# results files
results/*.csv

# preprocessed data labels
data/*/Label_*_*.csv

# main executable file
main

# idea
.idea
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,15 @@ AnoGraph and AnoGraph-K detect graph anomalies by first mapping the graph to a h

## Demo

1. To run on DARPA dataset `bash demo.sh DARPA`
2. To run on ISCX dataset `bash demo.sh ISCX`
1. To run on DARPA dataset
```
bash demo.sh DARPA
```

2. To run on ISCX dataset
```
bash demo.sh ISCX
```

## Datasets
1. [DARPA](http://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html)
Expand Down
26 changes: 26 additions & 0 deletions code/README.MD
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# توضیح بخش‌های مختلف کد

## process_data.py

در این فایل پردازش داده‌ه‌ها به صورت زیر انجام می‌شود.

داده‌های زمانی براساس پارامتر مشخصی گسسته‌سازی می‌شوند و به ازای هر
time_stamp
زمان یک برچسب مشخص می‌شود.

روش تضمیم‌گیری برای برچسب به این‌صورت است که اگر بیش از تعداد مشخصی از یال‌های آن دسته‌ی زمانی برچسب مثبت داشته باشند آن لحظه‌ی زمانی به صورت کامل مثبت اعلام می‌شود.

```python
def generate_final_labels(edge_threshold, record_labels, time_param):
data = pd.DataFrame(np.array(record_labels))
labels = []
data[2] = (data[2] / time_param).astype(int)

for i in pd.unique(data[2]):
labels.append(sum(data[data[2] == i][3]))

labels = np.array(labels)
labels = labels >= edge_threshold
labels = labels * 1
return labels
```
26 changes: 13 additions & 13 deletions code/anoedgeglobal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ vector<double> AnoedgeGlobal::getScores() {
vector<double> scores;
Hcms count(rows, buckets);

int num_records = src.size();
size_t num_records = src.size();
int last_time = 0;

for (int i = 0; i < num_records; i++) {
for (size_t i = 0; i < num_records; i++) {
if (times[i] - last_time > 0) {
count.decay(decay_factor);
}
Expand All @@ -51,20 +51,20 @@ void AnoedgeGlobal::run() {
}

double AnoedgeGlobal::getAnoedgeglobalDensity(vector<vector<double>>& mat, int src, int dst) {
int num_rows = mat.size();
int num_cols = mat[0].size();
size_t num_rows = mat.size();
size_t num_cols = mat[0].size();

bool row_flag[num_rows];
bool col_flag[num_cols];

double row_slice_sum[num_rows];
double col_slice_sum[num_cols];

for (int i = 0; i < num_rows; i++) {
for (size_t i = 0; i < num_rows; i++) {
row_flag[i] = false;
row_slice_sum[i] = mat[i][dst];
}
for (int i = 0; i < num_cols; i++) {
for (size_t i = 0; i < num_cols; i++) {
col_flag[i] = false;
col_slice_sum[i] = mat[src][i];
}
Expand All @@ -75,14 +75,14 @@ double AnoedgeGlobal::getAnoedgeglobalDensity(vector<vector<double>>& mat, int s
col_slice_sum[dst] = mat[src][dst];

pair<int, double> max_row = {-1, -1.0};
for (int i = 0; i < num_rows; i++) {
for (size_t i = 0; i < num_rows; i++) {
if (!row_flag[i] && (row_slice_sum[i] >= max_row.second)) {
max_row = {i, row_slice_sum[i]};
}
}

pair<int, double> max_col = {-1, -1.0};
for (int i = 0; i < num_cols; i++) {
for (size_t i = 0; i < num_cols; i++) {
if (!col_flag[i] && (col_slice_sum[i] >= max_col.second)) {
max_col = {i, col_slice_sum[i]};
}
Expand All @@ -94,14 +94,14 @@ double AnoedgeGlobal::getAnoedgeglobalDensity(vector<vector<double>>& mat, int s
double cur_mat_sum = mat[src][dst];
double output = cur_mat_sum/sqrt(marked_rows*marked_cols);

int ctr = num_rows + num_cols - 2;
size_t ctr = num_rows + num_cols - 2;
while (ctr--) {
if (max_row.second >= max_col.second) {
row_flag[max_row.first] = true;
marked_rows++;

max_col = {-1, -1.0};
for (int i = 0; i < num_cols; i++) {
for (size_t i = 0; i < num_cols; i++) {
if (col_flag[i]) {
cur_mat_sum = cur_mat_sum + mat[max_row.first][i];
} else {
Expand All @@ -113,7 +113,7 @@ double AnoedgeGlobal::getAnoedgeglobalDensity(vector<vector<double>>& mat, int s
}

max_row = {-1, -1.0};
for (int i = 0; i < num_rows; i++) {
for (size_t i = 0; i < num_rows; i++) {
if (!row_flag[i] && (row_slice_sum[i] >= max_row.second)) {
max_row = {i, row_slice_sum[i]};
}
Expand All @@ -123,7 +123,7 @@ double AnoedgeGlobal::getAnoedgeglobalDensity(vector<vector<double>>& mat, int s
marked_cols++;

max_row = {-1, -1.0};
for (int i = 0; i < num_rows; i++) {
for (size_t i = 0; i < num_rows; i++) {
if (row_flag[i]) {
cur_mat_sum = cur_mat_sum + mat[i][max_col.first];
} else {
Expand All @@ -135,7 +135,7 @@ double AnoedgeGlobal::getAnoedgeglobalDensity(vector<vector<double>>& mat, int s
}

max_col = {-1, -1.0};
for (int i = 0; i < num_cols; i++) {
for (size_t i = 0; i < num_cols; i++) {
if (!col_flag[i] && (col_slice_sum[i] >= max_col.second)) {
max_col = {i, col_slice_sum[i]};
}
Expand Down
1 change: 1 addition & 0 deletions code/anograph.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <iostream>
#include <cmath>
#include <algorithm>

#include "anograph.hpp"
#include "hcms.hpp"
Expand Down
1 change: 1 addition & 0 deletions code/anographk.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <iostream>
#include <cmath>
#include <queue>
#include <algorithm>

#include "anographk.hpp"
#include "hcms.hpp"
Expand Down
14 changes: 14 additions & 0 deletions code/demo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,25 @@ if [ $1 == "DARPA" ]; then
./main anograph_k DARPA 30 50 2 32 5

echo "Running AnoEdge-G"
# Algorithm => anoedge_g
# Dataset => DARPA
# Rows => 2
# Buckets => 32
# Decay factor => 0.9
./main anoedge_g DARPA 2 32 0.9

echo "Running AnoEdge-L"
# Algorithm => anoedge_g
# Dataset => DARPA
# Rows => 2
# Buckets => 32
# Decay factor => 0.9
./main anoedge_l DARPA 2 32 0.9

echo "Installing python dependencies"
pip3 install -r requirements.txt

echo "Running python metrics"
python3 metrics.py --dataset DARPA --time_window 30 --edge_threshold 50

fi
Expand Down
1 change: 1 addition & 0 deletions code/hcms.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <iostream>
#include <algorithm>

#include "hcms.hpp"
#include "anoedgeglobal.hpp"
Expand Down
65 changes: 26 additions & 39 deletions code/metrics.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,48 @@
import pandas as pd
import argparse
from sklearn import metrics

parser = argparse.ArgumentParser()
parser.add_argument('--dataset', default='DARPA')
parser.add_argument('--time_window', type=int, default=30)
parser.add_argument("--edge_threshold", type=int, default=50)
args = parser.parse_args()

results_path = "../results/"

def print_anoedge_auc_time(base_path, dataset_name, algorithm):
data = pd.read_csv(base_path + algorithm + "_" + dataset_name + "_score.csv", names=['score', 'label'], sep=" ")
time_values = pd.read_csv(base_path + algorithm + "_" + dataset_name + "_time.csv", names=['avg', 'total'], sep=" ")

fpr, tpr, _ = metrics.roc_curve(data.label, data.score)
auc = metrics.roc_auc_score(data.label, data.score)
def print_anoedge_auc_time(base_path, dataset_name, algorithm):
file_name = base_path + algorithm + "_" + dataset_name
print_auc_time(algorithm, dataset_name, file_name)

print ("%s,%s" % (algorithm, dataset_name))
print ("AUC: %.3f" % (auc))
print ("Time: %s\n" % (time_values["total"].iloc[1]))

def print_anograph_auc_time(base_path, dataset_name, time_window, edge_threshold, algorithm):
data = pd.read_csv(base_path + algorithm + "_" + dataset_name + "_" + str(time_window) + "_" + str(edge_threshold) + "_score.csv", names=['score', 'label'], sep=" ")
time_values = pd.read_csv(base_path + algorithm + "_" + dataset_name + "_" + str(time_window) + "_" + str(edge_threshold) + "_time.csv", names=['avg', 'total'], sep=" ")

fpr, tpr, _ = metrics.roc_curve(data.label, data.score)
auc = metrics.roc_auc_score(data.label, data.score)
file_name = base_path + algorithm + "_" + dataset_name + "_" + str(time_window) + "_" + str(edge_threshold)
print_auc_time(algorithm, dataset_name, file_name)

print ("%s,%s" % (algorithm, dataset_name))
print ("AUC: %.3f" % (auc))
print ("Time: %s\n" % (time_values["total"].iloc[1]))

def print_auc_time(algorithm, dataset_name, file_name):
data = pd.read_csv(file_name + "_score.csv", names=['score', 'label'], sep=" ")
time_values = pd.read_csv(file_name + "_time.csv", names=['avg', 'total'], sep=" ")

if __name__ == "__main__":
if args.dataset == 'DARPA':
print_anograph_auc_time("../results/", "DARPA", args.time_window, args.edge_threshold, "anograph")
print_anograph_auc_time("../results/", "DARPA", args.time_window, args.edge_threshold, "anograph_k")

print_anoedge_auc_time("../results/", "DARPA", "anoedge_g")
print_anoedge_auc_time("../results/", "DARPA", "anoedge_l")
fpr, tpr, _ = metrics.roc_curve(data.label, data.score)
auc = metrics.roc_auc_score(data.label, data.score)

if args.dataset == 'ISCX':
print_anograph_auc_time("../results/", "ISCX", args.time_window, args.edge_threshold, "anograph")
print_anograph_auc_time("../results/", "ISCX", args.time_window, args.edge_threshold, "anograph_k")
print("%s,%s" % (algorithm, dataset_name))
print("AUC: %.3f" % auc)
print("Time: %s\n" % (time_values["total"].iloc[1]))

print_anoedge_auc_time("../results/", "ISCX", "anoedge_g")
print_anoedge_auc_time("../results/", "ISCX", "anoedge_l")

if args.dataset == 'IDS2018':
print_anograph_auc_time("../results/", "IDS2018", args.time_window, args.edge_threshold, "anograph")
print_anograph_auc_time("../results/", "IDS2018", args.time_window, args.edge_threshold, "anograph_k")
def run_with_dataset(dataset_name):
print_anograph_auc_time(results_path, dataset_name, args.time_window, args.edge_threshold, "anograph")
print_anograph_auc_time(results_path, dataset_name, args.time_window, args.edge_threshold, "anograph_k")
print_anoedge_auc_time(results_path, dataset_name, "anoedge_g")
print_anoedge_auc_time(results_path, dataset_name, "anoedge_l")

print_anoedge_auc_time("../results/", "IDS2018", "anoedge_g")
print_anoedge_auc_time("../results/", "IDS2018", "anoedge_l")

if args.dataset == 'DDOS2019':
print_anograph_auc_time("../results/", "DDOS2019", args.time_window, args.edge_threshold, "anograph")
print_anograph_auc_time("../results/", "DDOS2019", args.time_window, args.edge_threshold, "anograph_k")

print_anoedge_auc_time("../results/", "DDOS2019", "anoedge_g")
print_anoedge_auc_time("../results/", "DDOS2019", "anoedge_l")
if __name__ == "__main__":
datasets = ["DARPA", "ISCX", "IDS2018", "DDOS2019"]
if args.dataset in datasets:
run_with_dataset(args.dataset)
else:
print(f"Could not detect dataset {args.dataset}")
Loading