Spaces:
Runtime error
Runtime error
Commit
·
5b22695
1
Parent(s):
19e5079
App
Browse files
app.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""GradioInterface_v2.ipynb
|
3 |
+
Automatically generated by Colaboratory.
|
4 |
+
"""
|
5 |
+
|
6 |
+
# Commented out IPython magic to ensure Python compatibility.
|
7 |
+
# # Capture to supress the download ouput
|
8 |
+
# %%capture
|
9 |
+
# !pip install gradio
|
10 |
+
# !pip install pandas
|
11 |
+
# !pip install transformers
|
12 |
+
# !pip install parsezeeklogs
|
13 |
+
# !pip install elasticsearch
|
14 |
+
|
15 |
+
# Define imports for model use
|
16 |
+
import torch
|
17 |
+
from transformers import pipeline
|
18 |
+
from parsezeeklogs import ParseZeekLogs
|
19 |
+
from transformers import BertTokenizer
|
20 |
+
import gradio as gr
|
21 |
+
import pandas as pd
|
22 |
+
|
23 |
+
# Define model
|
24 |
+
pipe = pipeline(model="19kmunz/IoT-23-BERT-Network-Logs-Classification", tokenizer=BertTokenizer.from_pretrained("bert-base-cased"))
|
25 |
+
|
26 |
+
# Define string constants
|
27 |
+
LOG = "conn.log Output"
|
28 |
+
HEADER_TABLE = "Headers Table"
|
29 |
+
SENTENCES = "Sentences"
|
30 |
+
OUT = "out"
|
31 |
+
INPUT_TYPES = [LOG, HEADER_TABLE, SENTENCES]
|
32 |
+
STEPS = [HEADER_TABLE, SENTENCES]
|
33 |
+
HEADERS=['id.resp_p', 'proto', 'conn_state', 'orig_pkts', 'orig_ip_bytes', 'resp_ip_bytes']
|
34 |
+
|
35 |
+
# Define sentence-ization functions
|
36 |
+
# Dictionary of feature names to use in the make sentence function
|
37 |
+
feature_names = {'id.resp_p':'response port',
|
38 |
+
'proto':'transport protocol',
|
39 |
+
'orig_pkts':'number of packets sent by the origin',
|
40 |
+
'conn_state':'connection state',
|
41 |
+
'orig_ip_bytes':'number of IP level bytes sent by the originator',
|
42 |
+
'resp_ip_bytes':'number of IP level bytes sent by the responder'}
|
43 |
+
|
44 |
+
# Function to make sentences out of the data
|
45 |
+
def make_sentence(row):
|
46 |
+
sentences = {}
|
47 |
+
for feature in row.keys():
|
48 |
+
if feature == 'label' or feature == "#":
|
49 |
+
sentences[feature] = row[feature]
|
50 |
+
else:
|
51 |
+
sentences[feature] = feature_names[feature] + " is " + str(row[feature]) + "."
|
52 |
+
return sentences
|
53 |
+
|
54 |
+
# Take all sentence observations and make them into paragraph inputs
|
55 |
+
def make_paragraphs(ser):
|
56 |
+
paragraphs_list = []
|
57 |
+
for index,obs in ser.items():
|
58 |
+
new_para = obs['id.resp_p'] + " " + obs['proto'] + " " + obs['conn_state'] + " " + obs['orig_pkts'] + " " + obs['orig_ip_bytes'] + " " + obs['resp_ip_bytes']
|
59 |
+
paragraphs_list.append(new_para)
|
60 |
+
return pd.Series(paragraphs_list, name="Sentences", index=ser.index).to_frame()
|
61 |
+
|
62 |
+
# Define prediction Functions For Different Settings
|
63 |
+
def predictFromSentences(sentenceTable):
|
64 |
+
output = pipe(sentenceTable[SENTENCES].tolist()) # This does the prediction!
|
65 |
+
return { OUT: pd.DataFrame({"Output": ["Malicious" if pred['label'] == "LABEL_0" else "Benign" for pred in output] }) }
|
66 |
+
|
67 |
+
def predictFromHeaderTable(headerTable):
|
68 |
+
sentences = headerTable.apply(make_sentence, axis=1);
|
69 |
+
paragraphs = make_paragraphs(sentences)
|
70 |
+
return {
|
71 |
+
SENTENCES: paragraphs,
|
72 |
+
OUT: predictFromSentences(paragraphs)[OUT]
|
73 |
+
}
|
74 |
+
|
75 |
+
def predictFromFileUpload(fileUpload):
|
76 |
+
if(fileUpload is None):
|
77 |
+
raise gr.Error("No file uploaded")
|
78 |
+
fileType = fileUpload.split('.')[-1]
|
79 |
+
if(fileType == 'csv'):
|
80 |
+
dataFrame = pd.read_csv(fileUpload, usecols=HEADERS)
|
81 |
+
elif(fileType == 'log' or fileType == 'labeled'):
|
82 |
+
with open('out.csv',"w") as outfile:
|
83 |
+
for log_record in ParseZeekLogs(fileUpload, output_format="csv", safe_headers=False, fields=HEADERS):
|
84 |
+
if log_record is not None:
|
85 |
+
outfile.write(log_record + "\n")
|
86 |
+
dataFrame = pd.read_csv('out.csv', names=HEADERS)
|
87 |
+
result = predictFromHeaderTable(dataFrame)
|
88 |
+
toReturn = {
|
89 |
+
HEADER_TABLE: dataFrame,
|
90 |
+
SENTENCES: result[SENTENCES],
|
91 |
+
OUT: result[OUT]
|
92 |
+
}
|
93 |
+
return toReturn
|
94 |
+
|
95 |
+
def makeIndexColumn(allInputs):
|
96 |
+
def _makeIndexColumnFor(column):
|
97 |
+
theseHeaders = allInputs[column].columns
|
98 |
+
newHeaders = ['#', *theseHeaders]
|
99 |
+
allInputs[column]['#'] = allInputs[column].index
|
100 |
+
allInputs[column] = allInputs[column][newHeaders]
|
101 |
+
|
102 |
+
if(SENTENCES in allInputs):
|
103 |
+
_makeIndexColumnFor(SENTENCES)
|
104 |
+
if(HEADER_TABLE in allInputs):
|
105 |
+
_makeIndexColumnFor(HEADER_TABLE)
|
106 |
+
if(OUT in allInputs):
|
107 |
+
_makeIndexColumnFor(OUT)
|
108 |
+
return allInputs
|
109 |
+
|
110 |
+
def predict(inputType, fileUpload, headerTable, sentenceTable, out):
|
111 |
+
output = {};
|
112 |
+
if(inputType == LOG):
|
113 |
+
# Process File Upload
|
114 |
+
output = makeIndexColumn(predictFromFileUpload(fileUpload))
|
115 |
+
return [output[HEADER_TABLE], output[SENTENCES], output[OUT]]
|
116 |
+
elif(inputType == HEADER_TABLE):
|
117 |
+
# Process Header Table
|
118 |
+
output = makeIndexColumn(predictFromHeaderTable(headerTable))
|
119 |
+
return [headerTable, output[SENTENCES], output[OUT]]
|
120 |
+
elif(inputType == SENTENCES):
|
121 |
+
# Process From Sentences
|
122 |
+
output = makeIndexColumn(predictFromSentences(sentenceTable))
|
123 |
+
return [headerTable, sentenceTable, output[OUT]]
|
124 |
+
|
125 |
+
# Update UI
|
126 |
+
def updateInputOutputBlocks(inputType, steps):
|
127 |
+
# Update visibility and Interactivity of Gradio Blocks based on Settings
|
128 |
+
fileUpload = gr.File(
|
129 |
+
visible=(True if inputType == LOG else False),
|
130 |
+
interactive=(1 if inputType == LOG else 0)
|
131 |
+
)
|
132 |
+
headerTable = gr.Dataframe(
|
133 |
+
visible=(True if (inputType == HEADER_TABLE or HEADER_TABLE in steps) else False),
|
134 |
+
interactive=(1 if inputType == HEADER_TABLE else 0)
|
135 |
+
)
|
136 |
+
sentenceTable = gr.Dataframe(
|
137 |
+
interactive=(1 if inputType == SENTENCES else 0),
|
138 |
+
visible=(True if (inputType == SENTENCES or SENTENCES in steps) else False)
|
139 |
+
)
|
140 |
+
return fileUpload, headerTable, sentenceTable
|
141 |
+
|
142 |
+
# Create Gradio UI
|
143 |
+
with gr.Blocks() as app:
|
144 |
+
gr.Markdown("""
|
145 |
+
# Network Log Predictions
|
146 |
+
Input log information below and click 'Run' to get predictions from our model!
|
147 |
+
Access the settings at the bottom for different types of input and to see inbetween steps.
|
148 |
+
""")
|
149 |
+
# Inputs / Outputs
|
150 |
+
fileUpload = gr.File(file_types=[".log", ".log.labeled", ".csv"], label="Zeek Log File", visible=False, file_count='single')
|
151 |
+
headerTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(7,"fixed"), headers=['#', *HEADERS], label="Header Inputs", interactive=1)
|
152 |
+
sentenceTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=["#", "Sentence"], label="Sentences", interactive=0, visible=False)
|
153 |
+
out = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=['#', "Output"], label="Predictions", column_widths=["60px", "100%"])
|
154 |
+
btn = gr.Button("Run")
|
155 |
+
|
156 |
+
# Settings
|
157 |
+
with gr.Accordion("Settings", open=False):
|
158 |
+
inputType = gr.Radio(INPUT_TYPES, value="Headers Table", label="Input")
|
159 |
+
steps = gr.CheckboxGroup(STEPS, label="Display Intermediary Steps")
|
160 |
+
inputType.change(
|
161 |
+
fn=updateInputOutputBlocks,
|
162 |
+
inputs=[inputType, steps],
|
163 |
+
outputs=[fileUpload, headerTable, sentenceTable]
|
164 |
+
)
|
165 |
+
steps.change(
|
166 |
+
fn=updateInputOutputBlocks,
|
167 |
+
inputs=[inputType, steps],
|
168 |
+
outputs=[fileUpload, headerTable, sentenceTable]
|
169 |
+
)
|
170 |
+
# Assign Callback
|
171 |
+
btn.click(
|
172 |
+
fn=predict,
|
173 |
+
inputs=[inputType, fileUpload, headerTable, sentenceTable, out],
|
174 |
+
outputs=[headerTable, sentenceTable, out]
|
175 |
+
)
|
176 |
+
|
177 |
+
app.launch()
|