Spaces:
Runtime error
Runtime error
File size: 6,902 Bytes
5b22695 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
# -*- coding: utf-8 -*-
"""GradioInterface_v2.ipynb
Automatically generated by Colaboratory.
"""
# Commented out IPython magic to ensure Python compatibility.
# # Capture to supress the download ouput
# %%capture
# !pip install gradio
# !pip install pandas
# !pip install transformers
# !pip install parsezeeklogs
# !pip install elasticsearch
# Define imports for model use
import torch
from transformers import pipeline
from parsezeeklogs import ParseZeekLogs
from transformers import BertTokenizer
import gradio as gr
import pandas as pd
# Define model
pipe = pipeline(model="19kmunz/IoT-23-BERT-Network-Logs-Classification", tokenizer=BertTokenizer.from_pretrained("bert-base-cased"))
# Define string constants
LOG = "conn.log Output"
HEADER_TABLE = "Headers Table"
SENTENCES = "Sentences"
OUT = "out"
INPUT_TYPES = [LOG, HEADER_TABLE, SENTENCES]
STEPS = [HEADER_TABLE, SENTENCES]
HEADERS=['id.resp_p', 'proto', 'conn_state', 'orig_pkts', 'orig_ip_bytes', 'resp_ip_bytes']
# Define sentence-ization functions
# Dictionary of feature names to use in the make sentence function
feature_names = {'id.resp_p':'response port',
'proto':'transport protocol',
'orig_pkts':'number of packets sent by the origin',
'conn_state':'connection state',
'orig_ip_bytes':'number of IP level bytes sent by the originator',
'resp_ip_bytes':'number of IP level bytes sent by the responder'}
# Function to make sentences out of the data
def make_sentence(row):
sentences = {}
for feature in row.keys():
if feature == 'label' or feature == "#":
sentences[feature] = row[feature]
else:
sentences[feature] = feature_names[feature] + " is " + str(row[feature]) + "."
return sentences
# Take all sentence observations and make them into paragraph inputs
def make_paragraphs(ser):
paragraphs_list = []
for index,obs in ser.items():
new_para = obs['id.resp_p'] + " " + obs['proto'] + " " + obs['conn_state'] + " " + obs['orig_pkts'] + " " + obs['orig_ip_bytes'] + " " + obs['resp_ip_bytes']
paragraphs_list.append(new_para)
return pd.Series(paragraphs_list, name="Sentences", index=ser.index).to_frame()
# Define prediction Functions For Different Settings
def predictFromSentences(sentenceTable):
output = pipe(sentenceTable[SENTENCES].tolist()) # This does the prediction!
return { OUT: pd.DataFrame({"Output": ["Malicious" if pred['label'] == "LABEL_0" else "Benign" for pred in output] }) }
def predictFromHeaderTable(headerTable):
sentences = headerTable.apply(make_sentence, axis=1);
paragraphs = make_paragraphs(sentences)
return {
SENTENCES: paragraphs,
OUT: predictFromSentences(paragraphs)[OUT]
}
def predictFromFileUpload(fileUpload):
if(fileUpload is None):
raise gr.Error("No file uploaded")
fileType = fileUpload.split('.')[-1]
if(fileType == 'csv'):
dataFrame = pd.read_csv(fileUpload, usecols=HEADERS)
elif(fileType == 'log' or fileType == 'labeled'):
with open('out.csv',"w") as outfile:
for log_record in ParseZeekLogs(fileUpload, output_format="csv", safe_headers=False, fields=HEADERS):
if log_record is not None:
outfile.write(log_record + "\n")
dataFrame = pd.read_csv('out.csv', names=HEADERS)
result = predictFromHeaderTable(dataFrame)
toReturn = {
HEADER_TABLE: dataFrame,
SENTENCES: result[SENTENCES],
OUT: result[OUT]
}
return toReturn
def makeIndexColumn(allInputs):
def _makeIndexColumnFor(column):
theseHeaders = allInputs[column].columns
newHeaders = ['#', *theseHeaders]
allInputs[column]['#'] = allInputs[column].index
allInputs[column] = allInputs[column][newHeaders]
if(SENTENCES in allInputs):
_makeIndexColumnFor(SENTENCES)
if(HEADER_TABLE in allInputs):
_makeIndexColumnFor(HEADER_TABLE)
if(OUT in allInputs):
_makeIndexColumnFor(OUT)
return allInputs
def predict(inputType, fileUpload, headerTable, sentenceTable, out):
output = {};
if(inputType == LOG):
# Process File Upload
output = makeIndexColumn(predictFromFileUpload(fileUpload))
return [output[HEADER_TABLE], output[SENTENCES], output[OUT]]
elif(inputType == HEADER_TABLE):
# Process Header Table
output = makeIndexColumn(predictFromHeaderTable(headerTable))
return [headerTable, output[SENTENCES], output[OUT]]
elif(inputType == SENTENCES):
# Process From Sentences
output = makeIndexColumn(predictFromSentences(sentenceTable))
return [headerTable, sentenceTable, output[OUT]]
# Update UI
def updateInputOutputBlocks(inputType, steps):
# Update visibility and Interactivity of Gradio Blocks based on Settings
fileUpload = gr.File(
visible=(True if inputType == LOG else False),
interactive=(1 if inputType == LOG else 0)
)
headerTable = gr.Dataframe(
visible=(True if (inputType == HEADER_TABLE or HEADER_TABLE in steps) else False),
interactive=(1 if inputType == HEADER_TABLE else 0)
)
sentenceTable = gr.Dataframe(
interactive=(1 if inputType == SENTENCES else 0),
visible=(True if (inputType == SENTENCES or SENTENCES in steps) else False)
)
return fileUpload, headerTable, sentenceTable
# Create Gradio UI
with gr.Blocks() as app:
gr.Markdown("""
# Network Log Predictions
Input log information below and click 'Run' to get predictions from our model!
Access the settings at the bottom for different types of input and to see inbetween steps.
""")
# Inputs / Outputs
fileUpload = gr.File(file_types=[".log", ".log.labeled", ".csv"], label="Zeek Log File", visible=False, file_count='single')
headerTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(7,"fixed"), headers=['#', *HEADERS], label="Header Inputs", interactive=1)
sentenceTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=["#", "Sentence"], label="Sentences", interactive=0, visible=False)
out = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=['#', "Output"], label="Predictions", column_widths=["60px", "100%"])
btn = gr.Button("Run")
# Settings
with gr.Accordion("Settings", open=False):
inputType = gr.Radio(INPUT_TYPES, value="Headers Table", label="Input")
steps = gr.CheckboxGroup(STEPS, label="Display Intermediary Steps")
inputType.change(
fn=updateInputOutputBlocks,
inputs=[inputType, steps],
outputs=[fileUpload, headerTable, sentenceTable]
)
steps.change(
fn=updateInputOutputBlocks,
inputs=[inputType, steps],
outputs=[fileUpload, headerTable, sentenceTable]
)
# Assign Callback
btn.click(
fn=predict,
inputs=[inputType, fileUpload, headerTable, sentenceTable, out],
outputs=[headerTable, sentenceTable, out]
)
app.launch() |