arosyihuddin commited on
Commit
b9f1938
·
1 Parent(s): 1bf5976

add IndoNLU model

Browse files
app.py CHANGED
@@ -1,9 +1,7 @@
1
  from gradio_pdf import PDF
2
  from src.helper import *
3
  import gradio as gr
4
- from pathlib import Path
5
-
6
- dir_ = Path(__file__).parent
7
 
8
  with gr.Blocks() as ner:
9
  gr.Markdown("# Sistem Ekstraksi Informasi Dokumen Putusan Hukum")
@@ -24,7 +22,7 @@ with gr.Blocks() as ner:
24
  ]
25
  gr.Markdown("## Penjelasan Label")
26
  gr.DataFrame(keterangan_label, headers=["Label", "Keterangan"], height=200)
27
- gr.Markdown("## Uji Coba Model dengan Potongan Kalimat")
28
  # Input Text
29
  with gr.Row():
30
  with gr.Column(scale=2):
@@ -48,7 +46,7 @@ with gr.Blocks() as ner:
48
  fn=text_extraction,
49
  )
50
 
51
- gr.Markdown("## Ekstrak Entitas pada Dokumen Putusan Hukum")
52
  # Input PDF
53
  with gr.Row():
54
  with gr.Column(scale=2):
@@ -63,12 +61,12 @@ with gr.Blocks() as ner:
63
  button_pdf.click(fn=pdf_extraction, inputs=[doc, model_pdf], outputs=output_pdf, api_name="pdf")
64
 
65
  gr.Examples(
66
- ["428_pid.b_2021_pn_jkt.brt_20240529091234.pdf",
67
- "1558_pid.b_2020_pn_jkt.brt_20240529091451.pdf",
68
- "329_pid.b_2023_pn_jkt.brt_20240529090837.pdf",
69
- "168_Pid.Sus_2023_PN_Bkl.pdf",
70
- "169_Pid.Sus_2023_PN_Bkl.pdf",
71
- "167_Pid.Sus_2023_PN_Bkl.pdf"],
72
  inputs=[doc],
73
  outputs=output_pdf,
74
  fn=pdf_extraction,
 
1
  from gradio_pdf import PDF
2
  from src.helper import *
3
  import gradio as gr
4
+ from gradio_pdf import PDF
 
 
5
 
6
  with gr.Blocks() as ner:
7
  gr.Markdown("# Sistem Ekstraksi Informasi Dokumen Putusan Hukum")
 
22
  ]
23
  gr.Markdown("## Penjelasan Label")
24
  gr.DataFrame(keterangan_label, headers=["Label", "Keterangan"], height=200)
25
+ gr.Markdown("## Ekstraksi Entitas pada Potongan Kalimat")
26
  # Input Text
27
  with gr.Row():
28
  with gr.Column(scale=2):
 
46
  fn=text_extraction,
47
  )
48
 
49
+ gr.Markdown("## Ekstraksi Entitas pada Dokumen Putusan Hukum")
50
  # Input PDF
51
  with gr.Row():
52
  with gr.Column(scale=2):
 
61
  button_pdf.click(fn=pdf_extraction, inputs=[doc, model_pdf], outputs=output_pdf, api_name="pdf")
62
 
63
  gr.Examples(
64
+ ["data/428_pid.b_2021_pn_jkt.brt_20240529091234.pdf",
65
+ "data/1558_pid.b_2020_pn_jkt.brt_20240529091451.pdf",
66
+ "data/329_pid.b_2023_pn_jkt.brt_20240529090837.pdf",
67
+ "data/168_Pid.Sus_2023_PN_Bkl.pdf",
68
+ "data/169_Pid.Sus_2023_PN_Bkl.pdf",
69
+ "data/167_Pid.Sus_2023_PN_Bkl.pdf"],
70
  inputs=[doc],
71
  outputs=output_pdf,
72
  fn=pdf_extraction,
model/indoBERT-indoNLU-Fold-5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:486b48b22c41570e5768fc574ff11eb61ca6b5b9751c06e7884108e52ca90c07
3
+ size 495564052
src/__pycache__/helper.cpython-310.pyc CHANGED
Binary files a/src/__pycache__/helper.cpython-310.pyc and b/src/__pycache__/helper.cpython-310.pyc differ
 
src/__pycache__/legalNER.cpython-310.pyc CHANGED
Binary files a/src/__pycache__/legalNER.cpython-310.pyc and b/src/__pycache__/legalNER.cpython-310.pyc differ
 
src/helper.py CHANGED
@@ -2,6 +2,7 @@ from transformers import BertTokenizerFast, BertForTokenClassification
2
  import gradio as gr
3
  from src.legalNER import *
4
 
 
5
  ids_to_labels = {0: 'B_ADVO', 1: 'B_ARTV', 2: 'B_CRIA', 3: 'B_DEFN', 4: 'B_JUDG', 5: 'B_JUDP', 6: 'B_PENA', 7: 'B_PROS', 8: 'B_PUNI', 9: 'B_REGI', 10: 'B_TIMV', 11: 'B_VERN', 12: 'I_ADVO', 13: 'I_ARTV', 14: 'I_CRIA', 15: 'I_DEFN', 16: 'I_JUDG', 17: 'I_JUDP', 18: 'I_PENA', 19: 'I_PROS', 20: 'I_PUNI', 21: 'I_REGI', 22: 'I_TIMV', 23: 'I_VERN', 24: 'O'}
6
  indolem = 'indolem/indobert-base-uncased'
7
  indonlu = 'indobenchmark/indobert-base-p2'
 
2
  import gradio as gr
3
  from src.legalNER import *
4
 
5
+
6
  ids_to_labels = {0: 'B_ADVO', 1: 'B_ARTV', 2: 'B_CRIA', 3: 'B_DEFN', 4: 'B_JUDG', 5: 'B_JUDP', 6: 'B_PENA', 7: 'B_PROS', 8: 'B_PUNI', 9: 'B_REGI', 10: 'B_TIMV', 11: 'B_VERN', 12: 'I_ADVO', 13: 'I_ARTV', 14: 'I_CRIA', 15: 'I_DEFN', 16: 'I_JUDG', 17: 'I_JUDP', 18: 'I_PENA', 19: 'I_PROS', 20: 'I_PUNI', 21: 'I_REGI', 22: 'I_TIMV', 23: 'I_VERN', 24: 'O'}
7
  indolem = 'indolem/indobert-base-uncased'
8
  indonlu = 'indobenchmark/indobert-base-p2'
src/legalNER.py CHANGED
@@ -1,5 +1,10 @@
1
  import gradio as gr
2
  import torch
 
 
 
 
 
3
 
4
  class LegalNER():
5
  def __init__(self, model, tokenizer, ids_to_labels, check_point='IndoBERT (IndoLEM)'):
@@ -132,23 +137,23 @@ class LegalNER():
132
  result = ''
133
  for i, (label, data) in enumerate(sorted_entitu_result.items()):
134
  if label in ['PENA', 'ARTV']:
135
- result += f'{i+1}. {self.label_convert[label]}\t = {data.capitalize()}\n'
136
  elif label in ['PROS']:
137
  if (i+1) >= 10:
138
- result += f'{i+1}. {self.label_convert[label]}\t = {data.capitalize()}\n'
139
  else:
140
- result += f'{i+1}. {self.label_convert[label]}\t\t = {data.capitalize()}\n'
141
  elif label in ['JUDP', 'CRIA']:
142
- result += f'{i+1}. {self.label_convert[label]}\t\t\t = {data.capitalize()}\n'
143
  elif label in ['ADVO']:
144
- result += f'{i+1}. {self.label_convert[label]}\t\t\t\t = {data.capitalize()}\n'
145
  elif label in ['REGI']:
146
  if (i+1) >= 10:
147
- result += f'{i+1}. {self.label_convert[label]}\t\t\t\t\t = {data.capitalize()}\n'
148
  else:
149
- result += f'{i+1}. {self.label_convert[label]}\t\t\t\t\t\t = {data.capitalize()}\n'
150
  else:
151
- result += f'{i+1}. {self.label_convert[label]}\t\t = {data.capitalize()}\n'
152
 
153
  return result
154
 
 
1
  import gradio as gr
2
  import torch
3
+ import requests
4
+ import PyPDF2
5
+ import re
6
+ # import nltk
7
+ # nltk.download('punkt')
8
 
9
  class LegalNER():
10
  def __init__(self, model, tokenizer, ids_to_labels, check_point='IndoBERT (IndoLEM)'):
 
137
  result = ''
138
  for i, (label, data) in enumerate(sorted_entitu_result.items()):
139
  if label in ['PENA', 'ARTV']:
140
+ result += f'{i+1}. {self.label_convert[label]}\t = {data.capitalize()}\n'
141
  elif label in ['PROS']:
142
  if (i+1) >= 10:
143
+ result += f'{i+1}. {self.label_convert[label]}\t = {data.capitalize()}\n'
144
  else:
145
+ result += f'{i+1}. {self.label_convert[label]}\t\t = {data.capitalize()}\n'
146
  elif label in ['JUDP', 'CRIA']:
147
+ result += f'{i+1}. {self.label_convert[label]}\t\t\t = {data.capitalize()}\n'
148
  elif label in ['ADVO']:
149
+ result += f'{i+1}. {self.label_convert[label]}\t\t\t\t = {data.capitalize()}\n'
150
  elif label in ['REGI']:
151
  if (i+1) >= 10:
152
+ result += f'{i+1}. {self.label_convert[label]}\t\t\t\t\t = {data.capitalize()}\n'
153
  else:
154
+ result += f'{i+1}. {self.label_convert[label]}\t\t\t\t\t\t = {data.capitalize()}\n'
155
  else:
156
+ result += f'{i+1}. {self.label_convert[label]}\t\t = {data.capitalize()}\n'
157
 
158
  return result
159