Joschka Strueber commited on
Commit
0d09d9a
·
1 Parent(s): 5623280

[Ref] switch to KaTeX Css in html

Browse files
Files changed (1) hide show
  1. app.py +20 -10
app.py CHANGED
@@ -78,17 +78,27 @@ with gr.Blocks(title="LLM Similarity Analyzer", css=app_util.custom_css) as demo
78
  )
79
 
80
  gr.Markdown("## Information")
81
- gr.HTML("""
82
- <script type="text/javascript" async
83
- src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/MathJax.js?config=TeX-MML-AM_CHTML">
84
- </script>
 
 
 
85
 
86
- <p>We propose Chance Adjusted Probabilistic Agreement (<span>\(\operatorname{CAPA}\)</span>, or <span>\(\kappa_p\)</span>), a novel metric
87
- for model similarity which adjusts for chance agreement due to accuracy. Using CAPA, we find: (1) LLM-as-a-judge scores are \
88
- biased towards more similar models controlling for the model's capability. (2) Gain from training strong models on annotations \
89
- of weak supervisors (weak-to-strong generalization) is higher when the two models are more different. (3) Concerningly, model \
90
- errors are getting more correlated as capabilities increase.</p>
91
- """)
 
 
 
 
 
 
 
92
  with gr.Row():
93
  gr.Image(value="data/table_capa.png", label="Comparison of different similarity metrics for multiple-choice questions", elem_classes="image_container", interactive=False)
94
  gr.Markdown("""
 
78
  )
79
 
80
  gr.Markdown("## Information")
81
+ metric_info_html = r"""
82
+ <!-- Include KaTeX CSS for styling -->
83
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/katex.min.css" integrity="sha384-vZTGXXFDvM1R7zDKx2g5N5S4FcoFdTJuFTz1Xj2A2/J1j4fGmS7a6hLQ6ZPfF1sk" crossorigin="anonymous">
84
+ <!-- Include KaTeX and its auto-render extension -->
85
+ <script defer src="https://cdn.jsdelivr.net/npm/[email protected]/dist/katex.min.js" integrity="sha384-6R6ckgSpF6yXUHg9+KJGXN9I+ik5U9dviDuzhSxrtk4AUaGr8/8Qovm6N9fl/hkz" crossorigin="anonymous"></script>
86
+ <script defer src="https://cdn.jsdelivr.net/npm/[email protected]/dist/contrib/auto-render.min.js" integrity="sha384-mll67QQ8ErU7t8/QqU3m0Cq56E7i2xUeFYSv6O9V3CRjNdqPzqxK9z6gS9GQFj8D" crossorigin="anonymous"
87
+ onload="renderMathInElement(document.body);"></script>
88
 
89
+ <div>
90
+ <p>
91
+ We propose Chance Adjusted Probabilistic Agreement ($\operatorname{CAPA}$, or $\kappa_p$), a novel metric
92
+ for model similarity which adjusts for chance agreement due to accuracy. Using CAPA, we find:
93
+ </p>
94
+ <ol>
95
+ <li>LLM-as-a-judge scores are biased towards more similar models controlling for the model's capability.</li>
96
+ <li>Gain from training strong models on annotations of weak supervisors (weak-to-strong generalization) is higher when the two models are more different.</li>
97
+ <li>Concerningly, model errors are getting more correlated as capabilities increase.</li>
98
+ </ol>
99
+ </div>
100
+ """
101
+ gr.HTML(value=metric_info_html)
102
  with gr.Row():
103
  gr.Image(value="data/table_capa.png", label="Comparison of different similarity metrics for multiple-choice questions", elem_classes="image_container", interactive=False)
104
  gr.Markdown("""