Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""GradioInterface_v2.ipynb | |
Automatically generated by Colaboratory. | |
""" | |
# Commented out IPython magic to ensure Python compatibility. | |
# # Capture to supress the download ouput | |
# %%capture | |
# !pip install gradio | |
# !pip install pandas | |
# !pip install transformers | |
# !pip install parsezeeklogs | |
# !pip install elasticsearch | |
# Define imports for model use | |
import torch | |
from transformers import pipeline | |
from parsezeeklogs import ParseZeekLogs | |
from transformers import BertTokenizer | |
import gradio as gr | |
import pandas as pd | |
# Define model | |
pipe = pipeline(model="19kmunz/IoT-23-BERT-Network-Logs-Classification", tokenizer=BertTokenizer.from_pretrained("bert-base-cased")) | |
# Define string constants | |
LOG = "conn.log Output" | |
HEADER_TABLE = "Headers Table" | |
SENTENCES = "Sentences" | |
OUT = "out" | |
INPUT_TYPES = [LOG, HEADER_TABLE, SENTENCES] | |
STEPS = [HEADER_TABLE, SENTENCES] | |
HEADERS=['id.resp_p', 'proto', 'conn_state', 'orig_pkts', 'orig_ip_bytes', 'resp_ip_bytes'] | |
# Define sentence-ization functions | |
# Dictionary of feature names to use in the make sentence function | |
feature_names = {'id.resp_p':'response port', | |
'proto':'transport protocol', | |
'orig_pkts':'number of packets sent by the origin', | |
'conn_state':'connection state', | |
'orig_ip_bytes':'number of IP level bytes sent by the originator', | |
'resp_ip_bytes':'number of IP level bytes sent by the responder'} | |
# Function to make sentences out of the data | |
def make_sentence(row): | |
sentences = {} | |
for feature in row.keys(): | |
if feature == 'label' or feature == "#": | |
sentences[feature] = row[feature] | |
else: | |
sentences[feature] = feature_names[feature] + " is " + str(row[feature]) + "." | |
return sentences | |
# Take all sentence observations and make them into paragraph inputs | |
def make_paragraphs(ser): | |
paragraphs_list = [] | |
for index,obs in ser.items(): | |
new_para = obs['id.resp_p'] + " " + obs['proto'] + " " + obs['conn_state'] + " " + obs['orig_pkts'] + " " + obs['orig_ip_bytes'] + " " + obs['resp_ip_bytes'] | |
paragraphs_list.append(new_para) | |
return pd.Series(paragraphs_list, name="Sentences", index=ser.index).to_frame() | |
# Define prediction Functions For Different Settings | |
def predictFromSentences(sentenceTable): | |
output = pipe(sentenceTable[SENTENCES].tolist()) # This does the prediction! | |
return { OUT: pd.DataFrame({"Output": ["Malicious" if pred['label'] == "LABEL_0" else "Benign" for pred in output] }) } | |
def predictFromHeaderTable(headerTable): | |
sentences = headerTable.apply(make_sentence, axis=1); | |
paragraphs = make_paragraphs(sentences) | |
return { | |
SENTENCES: paragraphs, | |
OUT: predictFromSentences(paragraphs)[OUT] | |
} | |
def predictFromFileUpload(fileUpload): | |
if(fileUpload is None): | |
raise gr.Error("No file uploaded") | |
fileType = fileUpload.split('.')[-1] | |
if(fileType == 'csv'): | |
dataFrame = pd.read_csv(fileUpload, usecols=HEADERS) | |
elif(fileType == 'log' or fileType == 'labeled'): | |
with open('out.csv',"w") as outfile: | |
for log_record in ParseZeekLogs(fileUpload, output_format="csv", safe_headers=False, fields=HEADERS): | |
if log_record is not None: | |
outfile.write(log_record + "\n") | |
dataFrame = pd.read_csv('out.csv', names=HEADERS) | |
result = predictFromHeaderTable(dataFrame) | |
toReturn = { | |
HEADER_TABLE: dataFrame, | |
SENTENCES: result[SENTENCES], | |
OUT: result[OUT] | |
} | |
return toReturn | |
def makeIndexColumn(allInputs): | |
def _makeIndexColumnFor(column): | |
theseHeaders = allInputs[column].columns | |
newHeaders = ['#', *theseHeaders] | |
allInputs[column]['#'] = allInputs[column].index | |
allInputs[column] = allInputs[column][newHeaders] | |
if(SENTENCES in allInputs): | |
_makeIndexColumnFor(SENTENCES) | |
if(HEADER_TABLE in allInputs): | |
_makeIndexColumnFor(HEADER_TABLE) | |
if(OUT in allInputs): | |
_makeIndexColumnFor(OUT) | |
return allInputs | |
def predict(inputType, fileUpload, headerTable, sentenceTable, out): | |
output = {}; | |
if(inputType == LOG): | |
# Process File Upload | |
output = makeIndexColumn(predictFromFileUpload(fileUpload)) | |
return [output[HEADER_TABLE], output[SENTENCES], output[OUT]] | |
elif(inputType == HEADER_TABLE): | |
# Process Header Table | |
output = makeIndexColumn(predictFromHeaderTable(headerTable)) | |
return [headerTable, output[SENTENCES], output[OUT]] | |
elif(inputType == SENTENCES): | |
# Process From Sentences | |
output = makeIndexColumn(predictFromSentences(sentenceTable)) | |
return [headerTable, sentenceTable, output[OUT]] | |
# Update UI | |
def updateInputOutputBlocks(inputType, steps): | |
# Update visibility and Interactivity of Gradio Blocks based on Settings | |
fileUpload = gr.File( | |
visible=(True if inputType == LOG else False), | |
interactive=(1 if inputType == LOG else 0) | |
) | |
headerTable = gr.Dataframe( | |
visible=(True if (inputType == HEADER_TABLE or HEADER_TABLE in steps) else False), | |
interactive=(1 if inputType == HEADER_TABLE else 0) | |
) | |
sentenceTable = gr.Dataframe( | |
interactive=(1 if inputType == SENTENCES else 0), | |
visible=(True if (inputType == SENTENCES or SENTENCES in steps) else False) | |
) | |
return fileUpload, headerTable, sentenceTable | |
# Create Gradio UI | |
with gr.Blocks() as app: | |
gr.Markdown(""" | |
# Network Log Predictions | |
Input log information below and click 'Run' to get predictions from our model! | |
Access the settings at the bottom for different types of input and to see inbetween steps. | |
""") | |
# Inputs / Outputs | |
fileUpload = gr.File(file_types=[".log", ".log.labeled", ".csv"], label="Zeek Log File", visible=False, file_count='single') | |
headerTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(7,"fixed"), headers=['#', *HEADERS], label="Header Inputs", interactive=1) | |
sentenceTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=["#", "Sentence"], label="Sentences", interactive=0, visible=False) | |
out = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=['#', "Output"], label="Predictions", column_widths=["60px", "100%"]) | |
btn = gr.Button("Run") | |
# Settings | |
with gr.Accordion("Settings", open=False): | |
inputType = gr.Radio(INPUT_TYPES, value="Headers Table", label="Input") | |
steps = gr.CheckboxGroup(STEPS, label="Display Intermediary Steps") | |
inputType.change( | |
fn=updateInputOutputBlocks, | |
inputs=[inputType, steps], | |
outputs=[fileUpload, headerTable, sentenceTable] | |
) | |
steps.change( | |
fn=updateInputOutputBlocks, | |
inputs=[inputType, steps], | |
outputs=[fileUpload, headerTable, sentenceTable] | |
) | |
# Assign Callback | |
btn.click( | |
fn=predict, | |
inputs=[inputType, fileUpload, headerTable, sentenceTable, out], | |
outputs=[headerTable, sentenceTable, out] | |
) | |
app.launch() |