emilylearning
commited on
Commit
·
12781b6
1
Parent(s):
c494a1d
fix add-own-model bug, less nice but its late, updated markdown,
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
# Model card: https://huggingface.co/emilylearning/selection-induced-collider-bias
|
2 |
# %%
|
3 |
import gradio as gr
|
4 |
import matplotlib.pyplot as plt
|
@@ -8,18 +7,9 @@ import random
|
|
8 |
from matplotlib.ticker import MaxNLocator
|
9 |
from transformers import pipeline
|
10 |
|
|
|
11 |
OWN_MODEL_NAME = 'add-a-model'
|
12 |
|
13 |
-
MODEL_NAME_DICT = {
|
14 |
-
"roberta-large": "RoBERTa-large",
|
15 |
-
"bert-large-uncased": "BERT-large",
|
16 |
-
"roberta-base": "RoBERTa-base",
|
17 |
-
"bert-base-uncased": "BERT-base",
|
18 |
-
"olm/olm-roberta-base-oct-2022": "OLM_RoBERTa-base",
|
19 |
-
OWN_MODEL_NAME: "Your model's"
|
20 |
-
}
|
21 |
-
MODEL_NAMES = list(MODEL_NAME_DICT.keys())
|
22 |
-
|
23 |
DECIMAL_PLACES = 1
|
24 |
EPS = 1e-5 # to avoid /0 errors
|
25 |
|
@@ -145,12 +135,15 @@ GENDERED_LIST = [
|
|
145 |
|
146 |
# %%
|
147 |
# Fire up the models
|
148 |
-
models =
|
|
|
|
|
|
|
149 |
|
150 |
# %%
|
151 |
|
152 |
|
153 |
-
def
|
154 |
male_gendered_tokens = [list[0] for list in GENDERED_LIST]
|
155 |
female_gendered_tokens = [list[1] for list in GENDERED_LIST]
|
156 |
|
@@ -177,7 +170,7 @@ def get_avg_prob_from_pipeline_outputs(mask_filled_text, gendered_token, num_pre
|
|
177 |
# %%
|
178 |
|
179 |
|
180 |
-
def get_figure(df, gender, n_fit=1):
|
181 |
df = df.set_index('x-axis')
|
182 |
cols = df.columns
|
183 |
xs = list(range(len(df)))
|
@@ -205,7 +198,7 @@ def get_figure(df, gender, n_fit=1):
|
|
205 |
ax.axis('tight')
|
206 |
ax.set_xlabel("Value injected into input text")
|
207 |
ax.set_title(
|
208 |
-
f"Probability of predicting {gender} pronouns.")
|
209 |
ax.set_ylabel(f"Softmax prob for pronouns")
|
210 |
ax.xaxis.set_major_locator(MaxNLocator(6))
|
211 |
ax.tick_params(axis='x', labelrotation=5)
|
@@ -227,6 +220,7 @@ def predict_gender_pronouns(
|
|
227 |
"""
|
228 |
if model_name not in MODEL_NAMES:
|
229 |
model = pipeline("fill-mask", model=own_model_name)
|
|
|
230 |
else:
|
231 |
model = models[model_name]
|
232 |
|
@@ -234,7 +228,7 @@ def predict_gender_pronouns(
|
|
234 |
|
235 |
indie_vars_list = indie_vars.split(',')
|
236 |
|
237 |
-
male_gendered_tokens, female_gendered_tokens =
|
238 |
|
239 |
text_segments, num_preds = prepare_text_for_masking(
|
240 |
input_text, mask_token, male_gendered_tokens + female_gendered_tokens, split_key)
|
@@ -276,9 +270,9 @@ def predict_gender_pronouns(
|
|
276 |
results_df['female_pronouns'] = female_pronoun_preds
|
277 |
results_df['male_pronouns'] = male_pronoun_preds
|
278 |
female_fig = get_figure(results_df.drop(
|
279 |
-
'male_pronouns', axis=1), 'female', n_fit,)
|
280 |
male_fig = get_figure(results_df.drop(
|
281 |
-
'female_pronouns', axis=1), 'male', n_fit,)
|
282 |
display_text = f"{random.choice(indie_vars_list)}".join(text_segments)
|
283 |
|
284 |
return (
|
@@ -293,18 +287,17 @@ def predict_gender_pronouns(
|
|
293 |
title = "Causing Gender Pronouns"
|
294 |
description = """
|
295 |
## Intro
|
296 |
-
|
297 |
"""
|
298 |
|
299 |
|
300 |
date_example = [
|
301 |
-
MODEL_NAMES[
|
302 |
'',
|
303 |
', '.join(DATES),
|
304 |
'DATE',
|
305 |
"False",
|
306 |
1,
|
307 |
-
'
|
308 |
]
|
309 |
|
310 |
|
@@ -315,7 +308,7 @@ place_example = [
|
|
315 |
'PLACE',
|
316 |
"False",
|
317 |
1,
|
318 |
-
'She became
|
319 |
]
|
320 |
|
321 |
|
@@ -362,7 +355,9 @@ with demo:
|
|
362 |
gr.Markdown("# Spurious Correlation Evaluation for Pre-trained LLMs")
|
363 |
gr.Markdown("Find spurious correlations between seemingly independent variables (for example between `gender` and `time`) in almost any BERT-like LLM on Hugging Face, below.")
|
364 |
|
365 |
-
gr.Markdown("
|
|
|
|
|
366 |
gr.Markdown("## Instructions for this Demo")
|
367 |
gr.Markdown("1) Click on one of the examples below (where we sweep through a spectrum of `places`, `dates` and `subreddits`) to pre-populate the input fields.")
|
368 |
gr.Markdown("2) Check out the pre-populated fields as you scroll down to the ['Hit Submit...'] button!")
|
@@ -401,7 +396,8 @@ with demo:
|
|
401 |
|
402 |
with gr.Row():
|
403 |
model_name = gr.Radio(
|
404 |
-
MODEL_NAMES,
|
|
|
405 |
label="B) BERT-like model.",
|
406 |
)
|
407 |
own_model_name = gr.Textbox(
|
@@ -417,6 +413,7 @@ with demo:
|
|
417 |
to_normalize = gr.Dropdown(
|
418 |
["False", "True"],
|
419 |
label="D) Normalize model's predictions to only the gendered ones?",
|
|
|
420 |
)
|
421 |
place_holder = gr.Textbox(
|
422 |
label="E) Special token place-holder",
|
@@ -424,6 +421,7 @@ with demo:
|
|
424 |
n_fit = gr.Dropdown(
|
425 |
list(range(1, 5)),
|
426 |
label="F) Degree of polynomial fit",
|
|
|
427 |
)
|
428 |
|
429 |
gr.Markdown(
|
@@ -436,15 +434,16 @@ with demo:
|
|
436 |
)
|
437 |
|
438 |
gr.Markdown("## Outputs!")
|
|
|
439 |
with gr.Row():
|
440 |
btn = gr.Button("Hit submit to generate predictions!")
|
441 |
|
442 |
with gr.Row():
|
443 |
sample_text = gr.Textbox(
|
444 |
-
label="Output text: Sample of text fed to model")
|
445 |
with gr.Row():
|
446 |
-
female_fig = gr.Plot()
|
447 |
-
male_fig = gr.Plot()
|
448 |
with gr.Row():
|
449 |
df = gr.Dataframe(
|
450 |
show_label=True,
|
@@ -471,6 +470,3 @@ with demo:
|
|
471 |
|
472 |
|
473 |
demo.launch(debug=True)
|
474 |
-
|
475 |
-
|
476 |
-
# %%
|
|
|
|
|
1 |
# %%
|
2 |
import gradio as gr
|
3 |
import matplotlib.pyplot as plt
|
|
|
7 |
from matplotlib.ticker import MaxNLocator
|
8 |
from transformers import pipeline
|
9 |
|
10 |
+
MODEL_NAMES = ["bert-base-uncased", "roberta-base", "bert-large-uncased", "roberta-large"]
|
11 |
OWN_MODEL_NAME = 'add-a-model'
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
DECIMAL_PLACES = 1
|
14 |
EPS = 1e-5 # to avoid /0 errors
|
15 |
|
|
|
135 |
|
136 |
# %%
|
137 |
# Fire up the models
|
138 |
+
models = dict()
|
139 |
+
|
140 |
+
for bert_like in MODEL_NAMES:
|
141 |
+
models[bert_like] = pipeline("fill-mask", model=bert_like)
|
142 |
|
143 |
# %%
|
144 |
|
145 |
|
146 |
+
def get_gendered_token_ids():
|
147 |
male_gendered_tokens = [list[0] for list in GENDERED_LIST]
|
148 |
female_gendered_tokens = [list[1] for list in GENDERED_LIST]
|
149 |
|
|
|
170 |
# %%
|
171 |
|
172 |
|
173 |
+
def get_figure(df, gender, n_fit=1, model_name=None):
|
174 |
df = df.set_index('x-axis')
|
175 |
cols = df.columns
|
176 |
xs = list(range(len(df)))
|
|
|
198 |
ax.axis('tight')
|
199 |
ax.set_xlabel("Value injected into input text")
|
200 |
ax.set_title(
|
201 |
+
f"Probability of predicting {gender} pronouns on {model_name}.")
|
202 |
ax.set_ylabel(f"Softmax prob for pronouns")
|
203 |
ax.xaxis.set_major_locator(MaxNLocator(6))
|
204 |
ax.tick_params(axis='x', labelrotation=5)
|
|
|
220 |
"""
|
221 |
if model_name not in MODEL_NAMES:
|
222 |
model = pipeline("fill-mask", model=own_model_name)
|
223 |
+
model_name = OWN_MODEL_NAME
|
224 |
else:
|
225 |
model = models[model_name]
|
226 |
|
|
|
228 |
|
229 |
indie_vars_list = indie_vars.split(',')
|
230 |
|
231 |
+
male_gendered_tokens, female_gendered_tokens = get_gendered_token_ids()
|
232 |
|
233 |
text_segments, num_preds = prepare_text_for_masking(
|
234 |
input_text, mask_token, male_gendered_tokens + female_gendered_tokens, split_key)
|
|
|
270 |
results_df['female_pronouns'] = female_pronoun_preds
|
271 |
results_df['male_pronouns'] = male_pronoun_preds
|
272 |
female_fig = get_figure(results_df.drop(
|
273 |
+
'male_pronouns', axis=1), 'female', n_fit, model_name)
|
274 |
male_fig = get_figure(results_df.drop(
|
275 |
+
'female_pronouns', axis=1), 'male', n_fit, model_name)
|
276 |
display_text = f"{random.choice(indie_vars_list)}".join(text_segments)
|
277 |
|
278 |
return (
|
|
|
287 |
title = "Causing Gender Pronouns"
|
288 |
description = """
|
289 |
## Intro
|
|
|
290 |
"""
|
291 |
|
292 |
|
293 |
date_example = [
|
294 |
+
MODEL_NAMES[1],
|
295 |
'',
|
296 |
', '.join(DATES),
|
297 |
'DATE',
|
298 |
"False",
|
299 |
1,
|
300 |
+
'She was a teenager in DATE.'
|
301 |
]
|
302 |
|
303 |
|
|
|
308 |
'PLACE',
|
309 |
"False",
|
310 |
1,
|
311 |
+
'She became an adult in PLACE.'
|
312 |
]
|
313 |
|
314 |
|
|
|
355 |
gr.Markdown("# Spurious Correlation Evaluation for Pre-trained LLMs")
|
356 |
gr.Markdown("Find spurious correlations between seemingly independent variables (for example between `gender` and `time`) in almost any BERT-like LLM on Hugging Face, below.")
|
357 |
|
358 |
+
# gr.Markdown("Note: If there is an issue with the rendering of the results taking longer than expected (more than 10s of seconds), there may be an unexpected issue effecting the hosting. If so, please see this [backup colab notebook](https://colab.research.google.com/drive/1A3a9cy9fERaxkuoX8YNTFhLlhRt_cxMm?usp=sharing).")
|
359 |
+
|
360 |
+
|
361 |
gr.Markdown("## Instructions for this Demo")
|
362 |
gr.Markdown("1) Click on one of the examples below (where we sweep through a spectrum of `places`, `dates` and `subreddits`) to pre-populate the input fields.")
|
363 |
gr.Markdown("2) Check out the pre-populated fields as you scroll down to the ['Hit Submit...'] button!")
|
|
|
396 |
|
397 |
with gr.Row():
|
398 |
model_name = gr.Radio(
|
399 |
+
MODEL_NAMES + [OWN_MODEL_NAME],
|
400 |
+
type="value",
|
401 |
label="B) BERT-like model.",
|
402 |
)
|
403 |
own_model_name = gr.Textbox(
|
|
|
413 |
to_normalize = gr.Dropdown(
|
414 |
["False", "True"],
|
415 |
label="D) Normalize model's predictions to only the gendered ones?",
|
416 |
+
type="index",
|
417 |
)
|
418 |
place_holder = gr.Textbox(
|
419 |
label="E) Special token place-holder",
|
|
|
421 |
n_fit = gr.Dropdown(
|
422 |
list(range(1, 5)),
|
423 |
label="F) Degree of polynomial fit",
|
424 |
+
type="value",
|
425 |
)
|
426 |
|
427 |
gr.Markdown(
|
|
|
434 |
)
|
435 |
|
436 |
gr.Markdown("## Outputs!")
|
437 |
+
#gr.Markdown("Scroll down and 'Hit Submit'!")
|
438 |
with gr.Row():
|
439 |
btn = gr.Button("Hit submit to generate predictions!")
|
440 |
|
441 |
with gr.Row():
|
442 |
sample_text = gr.Textbox(
|
443 |
+
type="auto", label="Output text: Sample of text fed to model")
|
444 |
with gr.Row():
|
445 |
+
female_fig = gr.Plot(type="auto")
|
446 |
+
male_fig = gr.Plot(type="auto")
|
447 |
with gr.Row():
|
448 |
df = gr.Dataframe(
|
449 |
show_label=True,
|
|
|
470 |
|
471 |
|
472 |
demo.launch(debug=True)
|
|
|
|
|
|