muhammedAdnan3 commited on
Commit
a2d6347
·
verified ·
1 Parent(s): cc6916c

Uploading everything from Mircosoft data

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +7 -0
  2. .gitignore +398 -0
  3. CODE_OF_CONDUCT.md +9 -0
  4. LICENSE +21 -0
  5. Makefile +16 -0
  6. README.md +265 -3
  7. RESPONSIBLE_AI.md +41 -0
  8. SECURITY.md +41 -0
  9. demos/aquarat/.env +8 -0
  10. demos/aquarat/configs/prompt_library.yaml +36 -0
  11. demos/aquarat/configs/promptopt_config.yaml +52 -0
  12. demos/aquarat/configs/setup_config.yaml +14 -0
  13. demos/aquarat/demo.ipynb +296 -0
  14. demos/bbh/.env +8 -0
  15. demos/bbh/configs/prompt_library.yaml +36 -0
  16. demos/bbh/configs/promptopt_config.yaml +52 -0
  17. demos/bbh/configs/setup_config.yaml +14 -0
  18. demos/bbh/demo.ipynb +428 -0
  19. demos/bbh/description.py +97 -0
  20. demos/gsm8k/.env +8 -0
  21. demos/gsm8k/configs/prompt_library.yaml +36 -0
  22. demos/gsm8k/configs/promptopt_config.yaml +52 -0
  23. demos/gsm8k/configs/setup_config.yaml +14 -0
  24. demos/gsm8k/demo.ipynb +298 -0
  25. demos/scenarios/.env +8 -0
  26. demos/scenarios/configs/prompt_library.yaml +36 -0
  27. demos/scenarios/configs/promptopt_config.yaml +53 -0
  28. demos/scenarios/configs/setup_config.yaml +14 -0
  29. demos/scenarios/dataset_scenarios_demo.ipynb +1146 -0
  30. demos/svamp/.env +8 -0
  31. demos/svamp/configs/prompt_library.yaml +36 -0
  32. demos/svamp/configs/promptopt_config.yaml +52 -0
  33. demos/svamp/configs/setup_config.yaml +14 -0
  34. demos/svamp/demo.ipynb +295 -0
  35. docs/images/arithmetic_task.png +0 -0
  36. docs/images/bigbench.png +0 -0
  37. docs/images/comaprision.png +3 -0
  38. docs/images/cost_analysis.png +0 -0
  39. docs/images/curve.png +0 -0
  40. docs/images/github.png +0 -0
  41. docs/images/icl_results.png +0 -0
  42. docs/images/iterative_flowchart-1.png +3 -0
  43. docs/images/msr_blog.png +0 -0
  44. docs/images/overview.png +3 -0
  45. docs/images/ppc.png +3 -0
  46. docs/images/ppc_1.png +3 -0
  47. docs/images/prompting.png +0 -0
  48. docs/images/sequential_flowchart-1.png +0 -0
  49. docs/images/slm_prompt.png +0 -0
  50. docs/index.html +784 -0
.gitattributes CHANGED
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ docs/images/comaprision.png filter=lfs diff=lfs merge=lfs -text
37
+ docs/images/iterative_flowchart-1.png filter=lfs diff=lfs merge=lfs -text
38
+ docs/images/overview.png filter=lfs diff=lfs merge=lfs -text
39
+ docs/images/ppc_1.png filter=lfs diff=lfs merge=lfs -text
40
+ docs/images/ppc.png filter=lfs diff=lfs merge=lfs -text
41
+ images/iterative_flowchart-1.png filter=lfs diff=lfs merge=lfs -text
42
+ images/overview.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Ignore Visual Studio temporary files, build results, and
2
+ ## files generated by popular Visual Studio add-ons.
3
+ ##
4
+ ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
5
+
6
+ # User-specific files
7
+ *.rsuser
8
+ *.suo
9
+ *.user
10
+ *.userosscache
11
+ *.sln.docstates
12
+
13
+ # User-specific files (MonoDevelop/Xamarin Studio)
14
+ *.userprefs
15
+
16
+ # Mono auto generated files
17
+ mono_crash.*
18
+
19
+ # Build results
20
+ [Dd]ebug/
21
+ [Dd]ebugPublic/
22
+ [Rr]elease/
23
+ [Rr]eleases/
24
+ x64/
25
+ x86/
26
+ [Ww][Ii][Nn]32/
27
+ [Aa][Rr][Mm]/
28
+ [Aa][Rr][Mm]64/
29
+ bld/
30
+ [Bb]in/
31
+ [Oo]bj/
32
+ [Ll]og/
33
+ [Ll]ogs/
34
+
35
+ # Visual Studio 2015/2017 cache/options directory
36
+ .vs/
37
+ # Uncomment if you have tasks that create the project's static files in wwwroot
38
+ #wwwroot/
39
+
40
+ # Visual Studio 2017 auto generated files
41
+ Generated\ Files/
42
+
43
+ # MSTest test Results
44
+ [Tt]est[Rr]esult*/
45
+ [Bb]uild[Ll]og.*
46
+
47
+ # NUnit
48
+ *.VisualState.xml
49
+ TestResult.xml
50
+ nunit-*.xml
51
+
52
+ # Build Results of an ATL Project
53
+ [Dd]ebugPS/
54
+ [Rr]eleasePS/
55
+ dlldata.c
56
+
57
+ # Benchmark Results
58
+ BenchmarkDotNet.Artifacts/
59
+
60
+ # .NET Core
61
+ project.lock.json
62
+ project.fragment.lock.json
63
+ artifacts/
64
+
65
+ # ASP.NET Scaffolding
66
+ ScaffoldingReadMe.txt
67
+
68
+ # StyleCop
69
+ StyleCopReport.xml
70
+
71
+ # Files built by Visual Studio
72
+ *_i.c
73
+ *_p.c
74
+ *_h.h
75
+ *.ilk
76
+ *.meta
77
+ *.obj
78
+ *.iobj
79
+ *.pch
80
+ *.pdb
81
+ *.ipdb
82
+ *.pgc
83
+ *.pgd
84
+ *.rsp
85
+ *.sbr
86
+ *.tlb
87
+ *.tli
88
+ *.tlh
89
+ *.tmp
90
+ *.tmp_proj
91
+ *_wpftmp.csproj
92
+ *.log
93
+ *.tlog
94
+ *.vspscc
95
+ *.vssscc
96
+ .builds
97
+ *.pidb
98
+ *.svclog
99
+ *.scc
100
+
101
+ # Chutzpah Test files
102
+ _Chutzpah*
103
+
104
+ # Visual C++ cache files
105
+ ipch/
106
+ *.aps
107
+ *.ncb
108
+ *.opendb
109
+ *.opensdf
110
+ *.sdf
111
+ *.cachefile
112
+ *.VC.db
113
+ *.VC.VC.opendb
114
+
115
+ # Visual Studio profiler
116
+ *.psess
117
+ *.vsp
118
+ *.vspx
119
+ *.sap
120
+
121
+ # Visual Studio Trace Files
122
+ *.e2e
123
+
124
+ # TFS 2012 Local Workspace
125
+ $tf/
126
+
127
+ # Guidance Automation Toolkit
128
+ *.gpState
129
+
130
+ # ReSharper is a .NET coding add-in
131
+ _ReSharper*/
132
+ *.[Rr]e[Ss]harper
133
+ *.DotSettings.user
134
+
135
+ # TeamCity is a build add-in
136
+ _TeamCity*
137
+
138
+ # DotCover is a Code Coverage Tool
139
+ *.dotCover
140
+
141
+ # AxoCover is a Code Coverage Tool
142
+ .axoCover/*
143
+ !.axoCover/settings.json
144
+
145
+ # Coverlet is a free, cross platform Code Coverage Tool
146
+ coverage*.json
147
+ coverage*.xml
148
+ coverage*.info
149
+
150
+ # Visual Studio code coverage results
151
+ *.coverage
152
+ *.coveragexml
153
+
154
+ # NCrunch
155
+ _NCrunch_*
156
+ .*crunch*.local.xml
157
+ nCrunchTemp_*
158
+
159
+ # MightyMoose
160
+ *.mm.*
161
+ AutoTest.Net/
162
+
163
+ # Web workbench (sass)
164
+ .sass-cache/
165
+
166
+ # Installshield output folder
167
+ [Ee]xpress/
168
+
169
+ # DocProject is a documentation generator add-in
170
+ DocProject/buildhelp/
171
+ DocProject/Help/*.HxT
172
+ DocProject/Help/*.HxC
173
+ DocProject/Help/*.hhc
174
+ DocProject/Help/*.hhk
175
+ DocProject/Help/*.hhp
176
+ DocProject/Help/Html2
177
+ DocProject/Help/html
178
+
179
+ # Click-Once directory
180
+ publish/
181
+
182
+ # Publish Web Output
183
+ *.[Pp]ublish.xml
184
+ *.azurePubxml
185
+ # Note: Comment the next line if you want to checkin your web deploy settings,
186
+ # but database connection strings (with potential passwords) will be unencrypted
187
+ *.pubxml
188
+ *.publishproj
189
+
190
+ # Microsoft Azure Web App publish settings. Comment the next line if you want to
191
+ # checkin your Azure Web App publish settings, but sensitive information contained
192
+ # in these scripts will be unencrypted
193
+ PublishScripts/
194
+
195
+ # NuGet Packages
196
+ *.nupkg
197
+ # NuGet Symbol Packages
198
+ *.snupkg
199
+ # The packages folder can be ignored because of Package Restore
200
+ **/[Pp]ackages/*
201
+ # except build/, which is used as an MSBuild target.
202
+ !**/[Pp]ackages/build/
203
+ # Uncomment if necessary however generally it will be regenerated when needed
204
+ #!**/[Pp]ackages/repositories.config
205
+ # NuGet v3's project.json files produces more ignorable files
206
+ *.nuget.props
207
+ *.nuget.targets
208
+
209
+ # Microsoft Azure Build Output
210
+ csx/
211
+ *.build.csdef
212
+
213
+ # Microsoft Azure Emulator
214
+ ecf/
215
+ rcf/
216
+
217
+ # Windows Store app package directories and files
218
+ AppPackages/
219
+ BundleArtifacts/
220
+ Package.StoreAssociation.xml
221
+ _pkginfo.txt
222
+ *.appx
223
+ *.appxbundle
224
+ *.appxupload
225
+
226
+ # Visual Studio cache files
227
+ # files ending in .cache can be ignored
228
+ *.[Cc]ache
229
+ # but keep track of directories ending in .cache
230
+ !?*.[Cc]ache/
231
+
232
+ # Others
233
+ ClientBin/
234
+ ~$*
235
+ *~
236
+ *.dbmdl
237
+ *.dbproj.schemaview
238
+ *.jfm
239
+ *.pfx
240
+ *.publishsettings
241
+ orleans.codegen.cs
242
+
243
+ # Including strong name files can present a security risk
244
+ # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245
+ #*.snk
246
+
247
+ # Since there are multiple workflows, uncomment next line to ignore bower_components
248
+ # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249
+ #bower_components/
250
+
251
+ # RIA/Silverlight projects
252
+ Generated_Code/
253
+
254
+ # Backup & report files from converting an old project file
255
+ # to a newer Visual Studio version. Backup files are not needed,
256
+ # because we have git ;-)
257
+ _UpgradeReport_Files/
258
+ Backup*/
259
+ UpgradeLog*.XML
260
+ UpgradeLog*.htm
261
+ ServiceFabricBackup/
262
+ *.rptproj.bak
263
+
264
+ # SQL Server files
265
+ *.mdf
266
+ *.ldf
267
+ *.ndf
268
+
269
+ # Business Intelligence projects
270
+ *.rdl.data
271
+ *.bim.layout
272
+ *.bim_*.settings
273
+ *.rptproj.rsuser
274
+ *- [Bb]ackup.rdl
275
+ *- [Bb]ackup ([0-9]).rdl
276
+ *- [Bb]ackup ([0-9][0-9]).rdl
277
+
278
+ # Microsoft Fakes
279
+ FakesAssemblies/
280
+
281
+ # GhostDoc plugin setting file
282
+ *.GhostDoc.xml
283
+
284
+ # Node.js Tools for Visual Studio
285
+ .ntvs_analysis.dat
286
+ node_modules/
287
+
288
+ # Visual Studio 6 build log
289
+ *.plg
290
+
291
+ # Visual Studio 6 workspace options file
292
+ *.opt
293
+
294
+ # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295
+ *.vbw
296
+
297
+ # Visual Studio 6 auto-generated project file (contains which files were open etc.)
298
+ *.vbp
299
+
300
+ # Visual Studio 6 workspace and project file (working project files containing files to include in project)
301
+ *.dsw
302
+ *.dsp
303
+
304
+ # Visual Studio 6 technical files
305
+ *.ncb
306
+ *.aps
307
+
308
+ # Visual Studio LightSwitch build output
309
+ **/*.HTMLClient/GeneratedArtifacts
310
+ **/*.DesktopClient/GeneratedArtifacts
311
+ **/*.DesktopClient/ModelManifest.xml
312
+ **/*.Server/GeneratedArtifacts
313
+ **/*.Server/ModelManifest.xml
314
+ _Pvt_Extensions
315
+
316
+ # Paket dependency manager
317
+ .paket/paket.exe
318
+ paket-files/
319
+
320
+ # FAKE - F# Make
321
+ .fake/
322
+
323
+ # CodeRush personal settings
324
+ .cr/personal
325
+
326
+ # Python Tools for Visual Studio (PTVS)
327
+ __pycache__/
328
+ *.pyc
329
+
330
+ # Cake - Uncomment if you are using it
331
+ # tools/**
332
+ # !tools/packages.config
333
+
334
+ # Tabs Studio
335
+ *.tss
336
+
337
+ # Telerik's JustMock configuration file
338
+ *.jmconfig
339
+
340
+ # BizTalk build output
341
+ *.btp.cs
342
+ *.btm.cs
343
+ *.odx.cs
344
+ *.xsd.cs
345
+
346
+ # OpenCover UI analysis results
347
+ OpenCover/
348
+
349
+ # Azure Stream Analytics local run output
350
+ ASALocalRun/
351
+
352
+ # MSBuild Binary and Structured Log
353
+ *.binlog
354
+
355
+ # NVidia Nsight GPU debugger configuration file
356
+ *.nvuser
357
+
358
+ # MFractors (Xamarin productivity tool) working folder
359
+ .mfractor/
360
+
361
+ # Local History for Visual Studio
362
+ .localhistory/
363
+
364
+ # Visual Studio History (VSHistory) files
365
+ .vshistory/
366
+
367
+ # BeatPulse healthcheck temp database
368
+ healthchecksdb
369
+
370
+ # Backup folder for Package Reference Convert tool in Visual Studio 2017
371
+ MigrationBackup/
372
+
373
+ # Ionide (cross platform F# VS Code tools) working folder
374
+ .ionide/
375
+
376
+ # Fody - auto-generated XML schema
377
+ FodyWeavers.xsd
378
+
379
+ # VS Code files for those working on multiple tools
380
+ .vscode/*
381
+ !.vscode/settings.json
382
+ !.vscode/tasks.json
383
+ !.vscode/launch.json
384
+ !.vscode/extensions.json
385
+ *.code-workspace
386
+
387
+ # Local History for Visual Studio Code
388
+ .history/
389
+
390
+ # Windows Installer files from build outputs
391
+ *.cab
392
+ *.msi
393
+ *.msix
394
+ *.msm
395
+ *.msp
396
+
397
+ # JetBrains Rider
398
+ *.sln.iml
CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Microsoft Open Source Code of Conduct
2
+
3
+ This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
4
+
5
+ Resources:
6
+
7
+ - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
8
+ - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
9
+ - Contact [[email protected]](mailto:[email protected]) with questions or concerns
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) Microsoft Corporation.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE
Makefile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: install style test
2
+
3
+ PYTHON := python
4
+ CHECK_DIRS := promptwizard tests
5
+
6
+ install:
7
+ @${PYTHON} setup.py bdist_wheel
8
+ @${PYTHON} -m pip install dist/sdtools*
9
+
10
+ style:
11
+ black $(CHECK_DIRS)
12
+ isort -rc $(CHECK_DIRS)
13
+ flake8 $(CHECK_DIRS)
14
+
15
+ test:
16
+ @${PYTHON} -m pytest -n auto --dist=loadfile -s -v ./tests/
README.md CHANGED
@@ -1,3 +1,265 @@
1
- ---
2
- license: bsd-2-clause
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # PromptWizard 🧙
3
+
4
+ <p align="left">
5
+ <a href='https://arxiv.org/abs/2405.18369'>
6
+ <img src=https://img.shields.io/badge/arXiv-2409.10566-b31b1b.svg>
7
+ </a>
8
+ <a href='https://www.microsoft.com/en-us/research/blog/promptwizard-the-future-of-prompt-optimization-through-feedback-driven-self-evolving-prompts/'>
9
+ <img src=images/msr_blog.png width="16">
10
+ Blog Post
11
+ </a>
12
+ <a href='https://microsoft.github.io/PromptWizard/'>
13
+ <img src=images/github.png width="16">
14
+ Project Website
15
+ </a>
16
+ </p>
17
+
18
+
19
+ > **PromptWizard: Task-Aware Prompt Optimization Framework**<br>
20
+ > Eshaan Agarwal, Joykirat Singh, Vivek Dani, Raghav Magazine, Tanuja Ganu, Akshay Nambi <br>
21
+
22
+ ## Overview 🌟
23
+ <p align="center">Overview of the PromptWizard framework</p>
24
+ <img src="./images/overview.png" >
25
+
26
+ PromptWizard is a discrete prompt optimization framework that employs a self-evolving mechanism where the LLM generates, critiques, and refines its own prompts and examples, continuously improving through iterative feedback and synthesis. This self-adaptive approach ensures holistic optimization by evolving both the instructions and in-context learning examples for better task performance.
27
+
28
+ Three key components of PromptWizard are te following :
29
+
30
+ - Feedback-driven Refinement: LLM generates, critiques, and refines its own prompts and examples, continuously improving through iterative feedback and synthesis​
31
+ - Critique and Synthesize diverse examples: Generates synthetic examples that are robust, diverse and task-aware. Also it optimizes both prompt and examples in tandem​
32
+ - Self generated Chain of Thought (CoT) steps with combination of positive, negative and synthetic examples
33
+
34
+ <p align="center">Stage 1: Iterative optimization of instructions</p>
35
+ <p align="center">
36
+ <img src="./images/iterative_flowchart-1.png" width="49.5%" />
37
+ </p>
38
+
39
+ <p align="center">Stage 2: Sequential optimization of instruction and examples</p>
40
+ <p align="center">
41
+ <img src="./images/sequential_flowchart-1.png" width="49.5%" />
42
+ </p>
43
+
44
+ ## Installation ⬇️
45
+
46
+ Follow these steps to set up the development environment and install the package:
47
+
48
+ 1) Clone the repository
49
+ ```
50
+ git clone https://github.com/microsoft/PromptWizard
51
+ cd PromptWizard
52
+ ```
53
+ 2) Create and activate a virtual environment
54
+
55
+ On Windows
56
+ ```
57
+ python -m venv venv
58
+ venv\Scripts\activate
59
+ ```
60
+ On macOS/Linux:
61
+ ```
62
+ python -m venv venv
63
+ source venv/bin/activate
64
+ ```
65
+ 3) Install the package in development mode:
66
+ ```
67
+ pip install -e .
68
+ ```
69
+
70
+
71
+ ## Quickstart 🏃
72
+
73
+ There are three main ways to use PromptWizard:
74
+ - Scenario 1 : Optimizing prompts without examples
75
+ - Scenario 2 : Generating synthetic examples and using them to optimize prompts
76
+ - Scenario 3 : Optimizing prompts with training data
77
+
78
+ **NOTE** : Refer this [notebook](demos/scenarios/dataset_scenarios_demo.ipynb) to get a detailed understanding of the usage for each of the scenarios. **This serves as a starting point to understand the usage of PromptWizard**
79
+
80
+ #### High level overview of using PromptWizard
81
+ - Decide your scenario
82
+ - Fix the configuration and environmental varibles for API calling
83
+ - Use ```promptopt_config.yaml``` to set configurations. For example for GSM8k this [file](demos/gsm8k/configs/promptopt_config.yaml) can be used
84
+ - Use ```.env``` to set environmental varibles. For GSM8k this [file](demos/gsm8k/.env) can be used
85
+ ```
86
+ USE_OPENAI_API_KEY="XXXX"
87
+ # Replace with True/False based on whether or not to use OPENAI API key
88
+
89
+ # If the first variable is set to True then fill the following two
90
+ OPENAI_API_KEY="XXXX"
91
+ OPENAI_MODEL_NAME ="XXXX"
92
+
93
+ # If the first variable is set to False then fill the following three
94
+ AZURE_OPENAI_ENDPOINT="XXXXX"
95
+ # Replace with your Azure OpenAI Endpoint
96
+
97
+ OPENAI_API_VERSION="XXXX"
98
+ # Replace with the version of your API
99
+
100
+ AZURE_OPENAI_CHAT_DEPLOYMENT_NAME="XXXXX"
101
+ # Create a deployment for the model and place the deployment name here.
102
+ ```
103
+ - Run the code
104
+ - To run PromptWizard on your custom dataset please jump [here](#run-on-custom-dataset)
105
+
106
+ #### Running PromptWizard with training data (Scenario 3)
107
+ - We support [GSM8k](https://huggingface.co/datasets/openai/gsm8k), [SVAMP](https://huggingface.co/datasets/ChilleD/SVAMP), [AQUARAT](https://huggingface.co/datasets/deepmind/aqua_rat) and [Instruction_Induction(BBII)](https://github.com/xqlin98/INSTINCT/tree/main/Induction/experiments/data/instruction_induction/raw) datasets
108
+ - Please note that time taken for prompt optimzation is dependent on the dataset. In our experiments for the above mentioned datasets, it took around 20 - 30 minutes on average.
109
+
110
+ #### Running on GSM8k (AQUARAT/SVAMP)
111
+
112
+ - Please note that this code requires access to LLMs via API calling for which we support AZURE endpoints or OPENAI keys
113
+ - Set the AZURE endpoint configurations in [.env](demos/gsm8k/.env)
114
+ - Follow the steps in [demo.ipynb](demos/gsm8k/demo.ipynb) to download the data, run the prompt optimization and carry out inference.
115
+
116
+ #### Running on BBII
117
+
118
+ - BBII has many datasets in it, based on the dataset set the configs [here](demos/bbh/configs/promptopt_config.yaml)
119
+ - In configs ```task_description```,```base_instruction``` and ```answer_format``` need to be changed for different datasets in BBII, the rest of the configs remain the same
120
+ - A demo is presented in [demo.ipynb](demos/bbh/demo.ipynb)
121
+
122
+
123
+
124
+ ## Run on Custom Datasets 🗃️
125
+
126
+ ### Create Custom Dataset
127
+ - Our code expects the dataset to be in ```.jsonl``` file format
128
+ - Both the train and test set follow the same format
129
+ - Every sample in the ```.jsonl``` should have 2 fields :
130
+ 1) ```question``` : It should contain the complete question that is to asked to the LLM
131
+ 2) ```answer``` : It should contain the ground truth answer which can be verbose or concise
132
+
133
+
134
+ ### Run on Custom Dataset
135
+
136
+ NOTE : Refer to [demos](demos) folder for examples of folders for four datasets. The ```.ipynb``` in each of the folders shows how to run PromptWizard on that particular dataset. A similar procedure can be followed for a new dataset. Below is the explanation of each of the components of the ```.ipynb``` and the dataset specifc folder structure in detail
137
+
138
+ #### Steps to be followed for custom datasets
139
+
140
+ 1) Every new dataset needs to have the following
141
+ - ```configs``` folder to store files for defining optimization hyperparameters and setup configs
142
+ - ```data``` folder to store ```train.jsonl``` and ```test.jsonl``` as curated [here](#create-custom-dataset) (this is done in the notebooks)
143
+ - ```.env``` file for environment varibles to be used for API calling
144
+ - ```.py/.ipynb``` script to run the code
145
+
146
+ 2) Set the hyperparameters like number of mutations, refine steps, in-context examples etc.
147
+ - Set the following in [promptopt_config.yaml](demos/gsm8k/configs/promptopt_config.yaml) :
148
+ - ```task_description``` : Desciption of the task at hand which will be fed into the prompt
149
+ - For GSM8k a description like the following can be used
150
+ ```
151
+ You are a mathematics expert. You will be given a mathematics problem which you need to solve
152
+ ```
153
+ - ```base_instruction``` : Base instruction in line with the dataset
154
+ - A commonly used base instruction could be
155
+ ```
156
+ Lets think step by step.
157
+ ```
158
+ - ```answer_format``` : Instruction for specifying the answer format
159
+ - It is crucial to set the ```answer_format``` properly to ensure correct extraction by ```def extract_final_answer()```
160
+ - Answer format could be :
161
+ ```
162
+ At the end, wrap only your final option between <ANS_START> and <ANS_END> tags
163
+ ```
164
+ Then in ```def extract_final_answer()``` we can simply write code to extract string between the tags
165
+
166
+ - ```seen_set_size``` : The number of train samples to be used for prompt optimization
167
+ - In our experiments we set this to be 25. In general any number between 20-50 would work
168
+ - ```few_shot_count``` : The number of in-context examples needed in the prompt
169
+ - The value can be set to any positive integer based on the requirement
170
+ - For generating zero-shot prompts, set the values to a small number (i.e between 2-5) and after the final prompt is generated the in-context examples can be removed. We suggest using some in-context examples as during the optimization process the instructions in the prompt are refined using in-context examples hence setting it to a small number will give better zero-shot instructions in the prompt
171
+ - ```generate_reasoning``` : Whether or not to generate reasoning for the in-context examples
172
+ - In our experiments we found it to improve the prompt overall as it provides a step-by-step approach to reach the final answer. However if there is a constraint on the prompt length or number of prompt tokens, it can be turned off to get smaller sized prompts
173
+ - ```generate_expert_identity``` and ```generate_intent_keywords``` : Having these helped improve the prompt as they help making the prompt relevant to the task
174
+ - Refer ```promptopt_config.yaml``` files in folders present [here](demos) for the descriptions used for AQUARAT, SVAMP and GSM8k. For BBII refer [description.py](demos/bbh/description.py) which has the meta instructions for each of the datasets
175
+ - Following are the global parameters which can be set based on the availability of the training data
176
+ - ```run_without_train_examples``` is a global hyperparameter which can be used when there are no training samples and in-context examples are not required in the final prompt
177
+ - ```generate_synthetic_examples``` is a global hyperparameter which can be used when there are no training samples and we want to generate synthetic data for training
178
+ - ```use_examples``` is a global hyperparameter which can be used to optimize prompts using training data
179
+ 3) Create a dataset specific class which inherits ```class DatasetSpecificProcessing``` similar to ```GSM8k(DatasetSpecificProcessing)``` in [demo.ipynb](demos/gsm8k/demo.ipynb) and define the following functions in it
180
+ 1) In ```def extract_answer_from_output()``` : This is a dataset specific function, given the ```answer``` from the dataset it should extract and return a concise form of the answer. Note that based on the dataset it can also simply return the ```answer``` as it is like in case of SVAMP and AQUARAT datasets
181
+ 2) ```def extract_final_answer()``` : This is a LLM output specific function, given the verbose answer from the LLM it should extract and return the concise final answer
182
+ 3) Define ```def access_answer()``` : This function takes an input the LLM output, then does the following:
183
+ - Extracts the concise answer using ```def extract_final_answer()``` from the LLM output as defined above
184
+ - Evaluates the extracted answer with the ground truth and retuns
185
+ - Extracted answer from LLM output
186
+ - Boolean value indicating if answer is correct or not
187
+ - The evaluation done here is dataset specific, for datasets like GSM8k, SVAMP and AQUARAT which have final answer as an number, we can do a direct match between the numbers generated and the ground truth, while for datasets where the answer is a sentence or paragraph it would be better to do evaluation with llm-as-a-judge, to compare the generated and ground truth paragraph/sentence. An example is available in ```def access_answer()``` in [this](demos/bbh/demo.ipynb) notebook
188
+
189
+
190
+ ## How PromptWizard Works 🔍
191
+ - Using the problem description and initial prompt instruction, PW generates variations of the instruction by prompting LLMs to mutate it. Based on performance, the best prompt is selected. PW incorporates a critique component that provides feedback, thus guiding and refining the prompt over multiple iterations.
192
+ - PW also optimizes in-context examples. PW selects a diverse set of examples
193
+ from the training data, identifying positive and negative examples based on their performance with
194
+ the modified prompt. Negative examples help inform further prompt refinements.
195
+ - Examples and instructions are sequentially optimized, using the critique to generate synthetic examples that address the current prompt’s weaknesses. These examples are integrated to further refine the prompt.
196
+ - PW generates detailed reasoning chains via Chain-of-Thought (CoT), enriching the prompt’s capacity for problem-solving.
197
+ - PW aligns prompts with human reasoning by integrating task intent and expert
198
+ personas, enhancing both model performance and interpretability.
199
+
200
+ ## Configurations ⚙️
201
+
202
+ Here we define the various hyperparameters used in prompt optimization process found in [promptopt_config.yaml](demos/gsm8k/configs/promptopt_config.yaml)
203
+
204
+ - ```mutate_refine_iterations```: Number of iterations for conducting mutation of task description
205
+ followed by refinement of instructions
206
+ - ```mutation_rounds```: Number of rounds of mutation to be performed when generating different styles
207
+ - ```refine_task_eg_iterations```: Number of iterations for refining task description and in context examples
208
+ - ```style_variation```: Number of thinking style variations to be used in prompt mutation
209
+ - ```questions_batch_size```: Number of questions to be asked to LLM in a single batch, during training step
210
+ - ```min_correct_count```: Minimum number of batches of questions to correctly answered, for a prompt to be considered as performing good
211
+ - ```max_eval_batches```: Maximum number of mini-batches on which we should evaluate the prompt
212
+ - ```top_n```: Number of top best prompts to be considered from scoring stage for the next stage
213
+ - ```seen_set_size```: Number of samples from trainset to be used for training
214
+ - ```few_shot_count```: Number of in-context examples required in final prompt
215
+
216
+ ## Best Practices 💡
217
+
218
+ Following are some of best pracitices we followed during are experiments
219
+ - Regarding the parameters in [promptopt_config.yaml](demos/gsm8k/configs/promptopt_config.yaml)
220
+ - We found the best performing values for ```mutate_refine_iterations```,```mutation_rounds```,```refine_task_eg_iterations``` to be 3 or 5
221
+ - Other parameters have been set to their ideal values. ```seen_set_size``` can be increased to 50 and ```few_shot_count``` can be set based on the use case
222
+ - The prompts generated at the end of the training process are usually very detailed, however user supervision can help tune it further for the task at hand
223
+ - Trying both configurations of having synthetic in-context examples or in-context examples from the train set can be tried to find the best prompt based on use case.
224
+
225
+ ## Results 📈
226
+
227
+ <p align="center">
228
+ <img src= "./images/curve.png" width="45%" />
229
+ <p align="center">PromptWizard consistently outperforms other methods across various
230
+ thresholds, maintaining the highest p(τ) values, indicating that it consistently performs near the best
231
+ possible accuracy across all tasks</p>
232
+ </p>
233
+
234
+
235
+ - The fiqure shows the performance profile curve for the instruction induction
236
+ tasks. The performance profile curve visualizes how frequently
237
+ different approaches’ performance is within a given distance of the best performance. In this curve,
238
+ the x-axis (τ) represents the performance ratio relative to the best-performing method, and the y-axis
239
+ (p(τ )) reflects the fraction of tasks where a method’s performance is within this ratio. So for a given
240
+ method, the curve tells what percentage of the tasks are within τ distance to the best performance.
241
+
242
+
243
+ ## How to contribute: ✋
244
+ This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.microsoft.com.
245
+ When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repositories using our CLA.
246
+ This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [email protected] with any additional questions or comments.
247
+
248
+ ## Citation 📝
249
+
250
+ If you make use of our work, please cite our paper:
251
+
252
+ ```
253
+ @misc{agarwal2024promptwizardtaskawarepromptoptimization,
254
+ title={PromptWizard: Task-Aware Prompt Optimization Framework},
255
+ author={Eshaan Agarwal and Joykirat Singh and Vivek Dani and Raghav Magazine and Tanuja Ganu and Akshay Nambi},
256
+ year={2024},
257
+ eprint={2405.18369},
258
+ archivePrefix={arXiv},
259
+ primaryClass={cs.CL},
260
+ url={https://arxiv.org/abs/2405.18369},
261
+ }
262
+ ```
263
+ ## Responsible AI Considerations
264
+ For guidelines and best practices related to Responsible AI, please refer to our [Responsible AI Guidelines](RESPONSIBLE_AI.md).
265
+
RESPONSIBLE_AI.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### PromptWizard: Responsible AI FAQ
2
+
3
+ - What is PromptWizard?
4
+
5
+ PromptWizard is a novel framework for prompt optimization that supports to tune a good prompt for a given task and dataset, so that LLMs’ output/accuracy can be optimized. PromptWizard is solely designed for research settings, and its testing has only been carried out in such environments. It should not be used in downstream applications without additional analysis and mitigation to address potential harm or bias in the proposed application. Please refer to the paper - [PromptWizard: Task-Aware Agent-driven Prompt Optimization Framework (arxiv.org)](https://arxiv.org/abs/2405.18369)-for more details.
6
+
7
+ - What can PromptWizard do?
8
+
9
+ PromptWizard framework is an AI-based framework that internally uses LLM to find the optimal prompt for a given task. It takes as input task description, dataset format & few training examples, hyperparameter configurations and outputs an optimized prompt for the given LLM and task intent.
10
+ Unlike existing approaches, PromptWizard optimizes both prompt instructions and in-context examples, maximizing the LLM performance. It iteratively refines prompts by mutating instructions using and incorporating negative examples. It further enhances both instructions and examples with the aid of a critic provided by LLM on a candidate prompt.
11
+ New synthetic instructions and examples are generated with detailed reasoning steps using LLM.
12
+
13
+ - What is/are PromptWizard’s intended use(s)?
14
+
15
+ Please note that PromptWizard is an open-source framework under active development and intended for use for research purposes. It should not be used in any downstream applications without additional detailed evaluation of robustness, safety issues and assessment of any potential harm or bias in the proposed application. For all GenAI applications, prompt design and tuning are a tedious, skilful and laborious tasks. PromptWizard’s intended use is to design and optimize the prompt along with the few shot examples for a given task/domain and dataset. This well crafted prompt would enable the LLM to provide more accurate and high quality answer. We have also integrated Azure AI Content Safety service, to avoid/slow-down malicious uses.
16
+
17
+ - How was PromptWizard evaluated? What metrics are used to measure performance?
18
+
19
+ PromptWizard framework is generic enough to work on any domain/dataset/task. However, we have evaluated the performance of PromptWizard across 35 tasks on 8 datasets. More details can be found [PromptWizard: Task-Aware Agent-driven Prompt Optimization Framework (arxiv.org)](https://arxiv.org/abs/2405.18369)
20
+
21
+ The opensource datasets used for evaluation include
22
+ - Medical challenges ([MedQA](https://github.com/jind11/MedQA), [PubMedQA](https://pubmedqa.github.io/))
23
+ - Commonsense reasoning ([CSQA](https://amritasaha1812.github.io/CSQA/), [SQA](https://www.microsoft.com/en-in/download/details.aspx?id=54253))
24
+ - Math reasoning problems ([GSM8k](https://huggingface.co/datasets/openai/gsm8k))
25
+ - Hate speech classification ([Ethos](https://link.springer.com/article/10.1007/s40747-021-00608-2)),
26
+ - Complex domain-specific tasks ([MMLU](https://huggingface.co/datasets/cais/mmlu) 6 medical tasks, [Big-Bench-Hard-23](https://huggingface.co/datasets/maveriq/bigbenchhard))
27
+
28
+ Additionally, the team has also conducted “red team” analysis to evaluate if PromptWizard optimizes harmful intent. With appropriate Azure content moderation deployed in the pipeline on the input to PromptWizard and output from PromptWizard, it didn’t optimize prompts for harmful intent. Please refer to the details for Azure content moderation [here](https://learn.microsoft.com/en-us/azure/ai-services/content-moderator/overview).
29
+
30
+ - What are the limitations of PromptWizard? How can users minimize the impact of PromptWizard’s limitations when using the system?
31
+
32
+ - The framework is evaluated primarily on English languages tasks as described in earlier section. The framework is not yet evaluated for multilingual settings.
33
+ - The framework generates synthetic examples for few-shot learning based on task description. User is required to validate the correctness and diversity of generated synthetic examples.
34
+ - PromptWizard utilizes existing LLMs and does not train a new model. Hence, it inherits the capabilities and limitations of its base model, as well as common limitations among other large language models or limitations caused by its training process. Hence, we suggest using the appropriate base LLM suitable for your use-cases to work with PromptWizard.
35
+
36
+ - What operational factors and settings allow for effective and responsible use of PromptWizard?
37
+
38
+ - Input considerations: Better performance with PromptWizard can be achieved by specifying the input components like task and intent as clearly and concisely as possible.
39
+ - Human involvement: PromptWizard optimizes the prompt with prompt instruction and a few shot examples for the given intent and task. We suggest human oversight to review the optimized prompts before those are executed with LLMs.
40
+ - LLMs: Users can choose the LLM that is optimized for responsible use. The default LLM is GPT-4 which inherits the existing RAI mechanisms and filters from the LLM provider. Caching is enabled by default to increase reliability and control cost. We encourage developers to review [OpenAI’s Usage policies](https://openai.com/policies/usage-policies/) and [Azure OpenAI’s Code of Conduct](https://learn.microsoft.com/en-us/legal/cognitive-services/openai/code-of-conduct) when using GPT-4.
41
+ - Content Safety: We have integrated [Azure AI Content Safety](https://learn.microsoft.com/en-us/azure/ai-services/content-safety/overview) service for content moderation. We suggest to deploy PromptWizard with such content safety system in the pipeline.
SECURITY.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK -->
2
+
3
+ ## Security
4
+
5
+ Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
6
+
7
+ If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
8
+
9
+ ## Reporting Security Issues
10
+
11
+ **Please do not report security vulnerabilities through public GitHub issues.**
12
+
13
+ Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
14
+
15
+ If you prefer to submit without logging in, send email to [[email protected]](mailto:[email protected]). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
16
+
17
+ You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
18
+
19
+ Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20
+
21
+ * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22
+ * Full paths of source file(s) related to the manifestation of the issue
23
+ * The location of the affected source code (tag/branch/commit or direct URL)
24
+ * Any special configuration required to reproduce the issue
25
+ * Step-by-step instructions to reproduce the issue
26
+ * Proof-of-concept or exploit code (if possible)
27
+ * Impact of the issue, including how an attacker might exploit the issue
28
+
29
+ This information will help us triage your report more quickly.
30
+
31
+ If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
32
+
33
+ ## Preferred Languages
34
+
35
+ We prefer all communications to be in English.
36
+
37
+ ## Policy
38
+
39
+ Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
40
+
41
+ <!-- END MICROSOFT SECURITY.MD BLOCK -->
demos/aquarat/.env ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ USE_OPENAI_API_KEY="False"
2
+
3
+ OPENAI_API_KEY=""
4
+ OPENAI_MODEL_NAME =""
5
+
6
+ OPENAI_API_VERSION=""
7
+ AZURE_OPENAI_ENDPOINT=""
8
+ AZURE_OPENAI_DEPLOYMENT_NAME=""
demos/aquarat/configs/prompt_library.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ system_prompts: |
2
+ You are a helpful assistant that assists research students in understanding research papers.
3
+ system_guidelines: |
4
+ Guidelines
5
+ - Your role must always be a helpful assistant that assists students in understanding research papers.
6
+ - Only answer questions that are directly or indirectly related to the referenced paper(s).
7
+
8
+ mode:
9
+ chat:
10
+ - name: CHAT-FIRST-MESSAGE
11
+ llm_request_type: rag-query
12
+ prompt_template: |
13
+ {user_msg}
14
+ emb_model_id: text embedding ada 002 [vellm-openai2]
15
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
16
+ prepend_system_prompts: False
17
+ prepend_system_guidelines: False
18
+
19
+ - name: CHAT-NEXT-MESSAGES
20
+ llm_request_type: rag-query
21
+ prompt_template: |
22
+ {user_msg}
23
+ emb_model_id: text embedding ada 002 [vellm-openai2]
24
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
25
+ prepend_system_prompts: False
26
+ prepend_system_guidelines: False
27
+
28
+ generation:
29
+ - name: FLASH_PROFILE
30
+ prompt_template: |
31
+ {user_msg}
32
+ prepend_system_prompts: False
33
+ prepend_system_guidelines: False
34
+ llm_request_type: rag-query
35
+ emb_model_id: text embedding ada 002 [vellm-openai2]
36
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
demos/aquarat/configs/promptopt_config.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Specify one or more prompt refinement technique to be used. If you specify more than one prompt refinement techniques,
2
+ # all these technique would run on same seed data. Result, iterations needed & cost incurred for each of these
3
+ # technique would be logged. And winning technique for each data instance and overall would be logged.
4
+
5
+ # Supported prompt refinement techniques: Basic, RecursiveEval, MedPrompt
6
+ # Uncomment techniques that you want to use
7
+ ############################ Critique Task Description Start ############################
8
+ prompt_technique_name: "critique_n_refine"
9
+ # unique_model_id of model defined in llm_config.yaml
10
+ unique_model_id: gpt-4o
11
+ # Number of iterations for conducting <mutation_rounds> rounds of mutation of task description
12
+ # followed by refinement of instructions
13
+ mutate_refine_iterations: 3
14
+ # Number of rounds of mutation to be performed when generating different styles
15
+ mutation_rounds: 3
16
+ # Refine instruction post mutation
17
+ refine_instruction: true
18
+ # Number of iterations for refining task description and in context examples for few-shot
19
+ refine_task_eg_iterations: 3
20
+ # Number of variations of prompts to generate in given iteration
21
+ style_variation: 5
22
+ # Number of questions to be asked to LLM in a single batch, during training step
23
+ questions_batch_size: 1
24
+ # Number of batches of questions to correctly answered, for a prompt to be considered as performing good
25
+ min_correct_count: 3
26
+ # Max number of mini-batches on which we should evaluate our prompt
27
+ max_eval_batches: 6
28
+ # Number of top best performing prompts to be considered for next iterations
29
+ top_n: 1
30
+ # Description of task. This will be fed to prompt
31
+ task_description: "You are a mathematics expert. You will be given a mathematics problem which you need to solve"
32
+ # Base instruction, in line with your dataset. This will be fed to prompt
33
+ base_instruction: "Lets think step by step."
34
+ # Instruction for specifying answer format
35
+ answer_format: "At the end, wrap only your final option between <ANS_START> and <ANS_END> tags"
36
+ # Number of samples from dataset, set aside as training data. In every iteration we would be drawing
37
+ # `questions_batch_size` examples from training data with replacement.
38
+ seen_set_size: 25
39
+ # Number of examples to be given for few shots
40
+ few_shot_count: 5
41
+ # Number of synthetic training examples to be generated
42
+ num_train_examples: 20
43
+ # Generate synthetic reasoning
44
+ generate_reasoning: true
45
+ # Generate description of an expert which can solve the task at hand
46
+ generate_expert_identity: true
47
+ # Generate keywords that describe the intent of the task
48
+ generate_intent_keywords: false
49
+ ############################ Critique Task Description End ############################
50
+
51
+
52
+
demos/aquarat/configs/setup_config.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ assistant_llm:
2
+ # put the unique_model_id that you specified in llm_config.yaml
3
+ prompt_opt: gpt-4o
4
+ dir_info:
5
+ # Base directory for everything
6
+ base_dir: logs
7
+ log_dir_name: glue_logs
8
+ experiment_name: aquarat
9
+ # Many features are different for mode: online/offline. For eg
10
+ # 1) Print of logs happens on console for offline mode
11
+ # 2) LLM Queue gets instantiated only in online mode
12
+ mode: offline
13
+ # Full length description of the experiment. This would be logged.
14
+ description:
demos/aquarat/demo.ipynb ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "695a1a9b",
6
+ "metadata": {},
7
+ "source": [
8
+ "#### Set environment variables in [.env](.env) for LLM API calling"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "markdown",
13
+ "id": "8042a9cc",
14
+ "metadata": {},
15
+ "source": [
16
+ "### Import Dependencies"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": null,
22
+ "id": "f1fb3d81-16b6-4b8c-a028-880fdce5e14a",
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "import sys\n",
27
+ "sys.path.insert(0, \"../../\")\n",
28
+ "import promptwizard\n",
29
+ "from promptwizard.glue.promptopt.instantiate import GluePromptOpt\n",
30
+ "from promptwizard.glue.promptopt.techniques.common_logic import DatasetSpecificProcessing\n",
31
+ "from promptwizard.glue.common.utils.file import save_jsonlist\n",
32
+ "from typing import Any\n",
33
+ "from tqdm import tqdm\n",
34
+ "import json\n",
35
+ "import os\n",
36
+ "from datasets import load_dataset\n",
37
+ "\n",
38
+ "from dotenv import load_dotenv\n",
39
+ "load_dotenv(override = True)\n"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "markdown",
44
+ "id": "5bbe055e",
45
+ "metadata": {},
46
+ "source": [
47
+ "### Create a dataset specific class and define the required functions "
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": 2,
53
+ "id": "5f325d33",
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "\n",
58
+ "def extract_between(start, end, text):\n",
59
+ " \"\"\"\n",
60
+ " Extracts the substring from 'text' that is between 'start' and 'end' strings.\n",
61
+ " \n",
62
+ " Parameters:\n",
63
+ " - start (str): The starting delimiter string.\n",
64
+ " - end (str): The ending delimiter string.\n",
65
+ " - text (str): The text to search within.\n",
66
+ " \n",
67
+ " Returns:\n",
68
+ " - str: The extracted substring between the start and end delimiters.\n",
69
+ " \"\"\"\n",
70
+ " start_index = text.find(start)\n",
71
+ " if start_index == -1:\n",
72
+ " return '' \n",
73
+ " \n",
74
+ " start_index += len(start)\n",
75
+ " \n",
76
+ " end_index = text.find(end, start_index)\n",
77
+ " if end_index == -1:\n",
78
+ " return '' \n",
79
+ " return text[start_index:end_index]\n",
80
+ "\n",
81
+ "class AQUARAT(DatasetSpecificProcessing):\n",
82
+ "\n",
83
+ " def dataset_to_jsonl(self, dataset_jsonl: str, **kwargs: Any) -> None:\n",
84
+ " def extract_answer_from_output(completion):\n",
85
+ "\n",
86
+ " return completion\n",
87
+ "\n",
88
+ " examples_set = []\n",
89
+ "\n",
90
+ " for _, sample in tqdm(enumerate(kwargs[\"dataset\"]), desc=\"Evaluating samples\"):\n",
91
+ " example = {\n",
92
+ " DatasetSpecificProcessing.QUESTION_LITERAL: sample['question'],\n",
93
+ " DatasetSpecificProcessing.ANSWER_WITH_REASON_LITERAL: sample['answer'],\n",
94
+ " DatasetSpecificProcessing.FINAL_ANSWER_LITERAL: extract_answer_from_output(sample[\"answer\"])\n",
95
+ " }\n",
96
+ " examples_set.append(example)\n",
97
+ "\n",
98
+ " save_jsonlist(dataset_jsonl, examples_set, \"w\")\n",
99
+ "\n",
100
+ " def extract_final_answer(self, answer: str):\n",
101
+ " \n",
102
+ " final_answer = extract_between(text=answer,start=\"<ANS_START>\",end=\"<ANS_END>\")\n",
103
+ " return final_answer\n",
104
+ " \n",
105
+ " def access_answer(self, llm_output: str, gt_answer: str):\n",
106
+ "\n",
107
+ " predicted_answer = self.extract_final_answer(llm_output)\n",
108
+ " is_correct = False\n",
109
+ " if predicted_answer and (predicted_answer.lower() == gt_answer.lower()):\n",
110
+ " is_correct = True\n",
111
+ "\n",
112
+ " return is_correct, predicted_answer"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": 3,
118
+ "id": "f384eb57",
119
+ "metadata": {},
120
+ "outputs": [],
121
+ "source": [
122
+ "aquarat_processor = AQUARAT()"
123
+ ]
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "execution_count": null,
128
+ "id": "976681bd-4f43-4dbc-947e-cdb94d4824f0",
129
+ "metadata": {},
130
+ "outputs": [],
131
+ "source": [
132
+ "\n",
133
+ "if not os.path.exists(\"data\"):\n",
134
+ " os.mkdir(\"data\")\n",
135
+ "dataset = load_dataset(\"deepmind/aqua_rat\", \"raw\")\n",
136
+ "num_samples = 1\n",
137
+ "for dataset_type in ['train','test']:\n",
138
+ " data_list = []\n",
139
+ " for data in dataset[dataset_type]:\n",
140
+ " options = data['options'][0]\n",
141
+ " for i in range(1,len(data['options'])):\n",
142
+ " options = options + \" \"+ data['options'][i]\n",
143
+ " data_list.append({\"question\": data['question']+\"\\n\"+options, \"answer\": data['correct']})\n",
144
+ " if num_samples == 100 and dataset_type == 'train': # We sample only 100 train examples and use 25 out them for training randomly\n",
145
+ " break\n",
146
+ " num_samples += 1\n",
147
+ " aquarat_processor.dataset_to_jsonl(\"data/\"+ dataset_type+'.jsonl', dataset=data_list)"
148
+ ]
149
+ },
150
+ {
151
+ "cell_type": "markdown",
152
+ "id": "db891c34",
153
+ "metadata": {},
154
+ "source": [
155
+ "### Set paths"
156
+ ]
157
+ },
158
+ {
159
+ "cell_type": "code",
160
+ "execution_count": 5,
161
+ "id": "f43482f1-3e10-4cf7-8ea6-ff42c04067a6",
162
+ "metadata": {},
163
+ "outputs": [],
164
+ "source": [
165
+ "train_file_name = os.path.join(\"data\", \"train.jsonl\")\n",
166
+ "test_file_name = os.path.join(\"data\", \"test.jsonl\")\n",
167
+ "path_to_config = \"configs\"\n",
168
+ "llm_config_path = os.path.join(path_to_config, \"llm_config.yaml\")\n",
169
+ "promptopt_config_path = os.path.join(path_to_config, \"promptopt_config.yaml\")\n",
170
+ "setup_config_path = os.path.join(path_to_config, \"setup_config.yaml\")"
171
+ ]
172
+ },
173
+ {
174
+ "cell_type": "markdown",
175
+ "id": "26ba1a62",
176
+ "metadata": {},
177
+ "source": [
178
+ "### Create an object for calling prompt optimization and inference functionalities"
179
+ ]
180
+ },
181
+ {
182
+ "cell_type": "code",
183
+ "execution_count": null,
184
+ "id": "8af4246f-db32-4b37-a73a-f9e2e5125d09",
185
+ "metadata": {},
186
+ "outputs": [],
187
+ "source": [
188
+ "gp = GluePromptOpt(promptopt_config_path,\n",
189
+ " setup_config_path,\n",
190
+ " train_file_name,\n",
191
+ " aquarat_processor)"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "markdown",
196
+ "id": "6b25843b",
197
+ "metadata": {},
198
+ "source": [
199
+ "### Call prompt optmization function\n",
200
+ "1. ```use_examples``` can be used when there are training samples and a mixture of real and synthetic in-context examples are required in the final prompt. When set to ```False``` all the in-context examples will be real\n",
201
+ "2. ```generate_synthetic_examples``` can be used when there are no training samples and we want to generate synthetic examples \n",
202
+ "3. ```run_without_train_examples``` can be used when there are no training samples and in-context examples are not required in the final prompt "
203
+ ]
204
+ },
205
+ {
206
+ "cell_type": "code",
207
+ "execution_count": null,
208
+ "id": "573c6151-2c03-45d9-9904-1724a1e20f1b",
209
+ "metadata": {
210
+ "scrolled": true
211
+ },
212
+ "outputs": [],
213
+ "source": [
214
+ "# Function call to generate optimal prompt and expert profile \n",
215
+ "best_prompt, expert_profile = gp.get_best_prompt(use_examples=True,run_without_train_examples=False,generate_synthetic_examples=False)"
216
+ ]
217
+ },
218
+ {
219
+ "cell_type": "markdown",
220
+ "id": "97549dd2",
221
+ "metadata": {},
222
+ "source": [
223
+ "### Save the optimized prompt and expert profile"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": null,
229
+ "id": "34a716af-0d77-4c7d-b1c2-6438d66096ce",
230
+ "metadata": {
231
+ "scrolled": true
232
+ },
233
+ "outputs": [],
234
+ "source": [
235
+ "import pickle \n",
236
+ "\n",
237
+ "if not os.path.exists(\"results\"):\n",
238
+ " os.system(\"mkdir results\")\n",
239
+ "\n",
240
+ "with open(\"results/best_prompt.pkl\", 'wb') as f:\n",
241
+ " pickle.dump(best_prompt, f)\n",
242
+ "with open(\"results/expert_profile.pkl\", 'wb') as f:\n",
243
+ " pickle.dump(expert_profile, f)\n",
244
+ "\n",
245
+ "print(f\"Best prompt: {best_prompt} \\nExpert profile: {expert_profile}\")"
246
+ ]
247
+ },
248
+ {
249
+ "cell_type": "markdown",
250
+ "id": "bdbb7e07",
251
+ "metadata": {},
252
+ "source": [
253
+ "### Evaluate the optimized prompt"
254
+ ]
255
+ },
256
+ {
257
+ "cell_type": "code",
258
+ "execution_count": null,
259
+ "id": "c49b5711-82dd-4d18-8cd4-ee447cf8d74c",
260
+ "metadata": {
261
+ "scrolled": true
262
+ },
263
+ "outputs": [],
264
+ "source": [
265
+ "gp.EXPERT_PROFILE = expert_profile\n",
266
+ "gp.BEST_PROMPT = best_prompt\n",
267
+ "\n",
268
+ "# Function call to evaluate the prompt\n",
269
+ "accuracy = gp.evaluate(test_file_name)\n",
270
+ "\n",
271
+ "print(f\"Final Accuracy: {accuracy}\")"
272
+ ]
273
+ }
274
+ ],
275
+ "metadata": {
276
+ "kernelspec": {
277
+ "display_name": "PromptWizard",
278
+ "language": "python",
279
+ "name": "python3"
280
+ },
281
+ "language_info": {
282
+ "codemirror_mode": {
283
+ "name": "ipython",
284
+ "version": 3
285
+ },
286
+ "file_extension": ".py",
287
+ "mimetype": "text/x-python",
288
+ "name": "python",
289
+ "nbconvert_exporter": "python",
290
+ "pygments_lexer": "ipython3",
291
+ "version": "3.12.4"
292
+ }
293
+ },
294
+ "nbformat": 4,
295
+ "nbformat_minor": 5
296
+ }
demos/bbh/.env ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ USE_OPENAI_API_KEY="False"
2
+
3
+ OPENAI_API_KEY=""
4
+ OPENAI_MODEL_NAME =""
5
+
6
+ OPENAI_API_VERSION=""
7
+ AZURE_OPENAI_ENDPOINT=""
8
+ AZURE_OPENAI_DEPLOYMENT_NAME=""
demos/bbh/configs/prompt_library.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ system_prompts: |
2
+ You are a helpful assistant that assists research students in understanding research papers.
3
+ system_guidelines: |
4
+ Guidelines
5
+ - Your role must always be a helpful assistant that assists students in understanding research papers.
6
+ - Only answer questions that are directly or indirectly related to the referenced paper(s).
7
+
8
+ mode:
9
+ chat:
10
+ - name: CHAT-FIRST-MESSAGE
11
+ llm_request_type: rag-query
12
+ prompt_template: |
13
+ {user_msg}
14
+ emb_model_id: text embedding ada 002 [vellm-openai2]
15
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
16
+ prepend_system_prompts: False
17
+ prepend_system_guidelines: False
18
+
19
+ - name: CHAT-NEXT-MESSAGES
20
+ llm_request_type: rag-query
21
+ prompt_template: |
22
+ {user_msg}
23
+ emb_model_id: text embedding ada 002 [vellm-openai2]
24
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
25
+ prepend_system_prompts: False
26
+ prepend_system_guidelines: False
27
+
28
+ generation:
29
+ - name: FLASH_PROFILE
30
+ prompt_template: |
31
+ {user_msg}
32
+ prepend_system_prompts: False
33
+ prepend_system_guidelines: False
34
+ llm_request_type: rag-query
35
+ emb_model_id: text embedding ada 002 [vellm-openai2]
36
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
demos/bbh/configs/promptopt_config.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Specify one or more prompt refinement technique to be used. If you specify more than one prompt refinement techniques,
2
+ # all these technique would run on same seed data. Result, iterations needed & cost incurred for each of these
3
+ # technique would be logged. And winning technique for each data instance and overall would be logged.
4
+
5
+ # Supported prompt refinement techniques: Basic, RecursiveEval, MedPrompt
6
+ # Uncomment techniques that you want to use
7
+ ############################ Critique Task Description Start ############################
8
+ prompt_technique_name: "critique_n_refine"
9
+ # unique_model_id of model defined in llm_config.yaml
10
+ unique_model_id: gpt-4o
11
+ # Number of iterations for conducting <mutation_rounds> rounds of mutation of task description
12
+ # followed by refinement of instructions
13
+ mutate_refine_iterations: 3
14
+ # Number of rounds of mutation to be performed when generating different styles
15
+ mutation_rounds: 3
16
+ # Refine instruction post mutation
17
+ refine_instruction: true
18
+ # Number of iterations for refining task description and in context examples for few-shot
19
+ refine_task_eg_iterations: 3
20
+ # Number of variations of prompts to generate in given iteration
21
+ style_variation: 5
22
+ # Number of questions to be asked to LLM in a single batch, during training step
23
+ questions_batch_size: 1
24
+ # Number of batches of questions to correctly answered, for a prompt to be considered as performing good
25
+ min_correct_count: 3
26
+ # Max number of mini-batches on which we should evaluate our prompt
27
+ max_eval_batches: 6
28
+ # Number of top best performing prompts to be considered for next iterations
29
+ top_n: 1
30
+ # Description of task. This will be fed to prompt
31
+ task_description : 'Extract the second letter from the input word.'
32
+ # Base instruction, in line with your dataset. This will be fed to prompt
33
+ base_instruction : 'Output the second letter. Think step by step to arrive at the solution.'
34
+ # Instruction for specifying answer format
35
+ answer_format : 'For each input word, present the reasoning followed by the extracted letter (only single letter) between <ANS_START> and <ANS_END> tags'
36
+ # Number of samples from dataset, set aside as training data. In every iteration we would be drawing
37
+ # `questions_batch_size` examples from training data with replacement.
38
+ seen_set_size: 25
39
+ # Number of examples to be given for few shots
40
+ few_shot_count: 5
41
+ # Number of synthetic training examples to be generated
42
+ num_train_examples: 20
43
+ # Generate synthetic reasoning
44
+ generate_reasoning: true
45
+ # Generate description of an expert which can solve the task at hand
46
+ generate_expert_identity: true
47
+ # Generate keywords that describe the intent of the task
48
+ generate_intent_keywords: false
49
+ ############################ Critique Task Description End ############################
50
+
51
+
52
+
demos/bbh/configs/setup_config.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ assistant_llm:
2
+ # put the unique_model_id that you specified in llm_config.yaml
3
+ prompt_opt: gpt-4o
4
+ dir_info:
5
+ # Base directory for everything
6
+ base_dir: logs
7
+ log_dir_name: glue_logs
8
+ experiment_name: bbh
9
+ # Many features are different for mode: online/offline. For eg
10
+ # 1) Print of logs happens on console for offline mode
11
+ # 2) LLM Queue gets instantiated only in online mode
12
+ mode: offline
13
+ # Full length description of the experiment. This would be logged.
14
+ description:
demos/bbh/demo.ipynb ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "ece8514e",
6
+ "metadata": {},
7
+ "source": [
8
+ "#### Set environment variables in [.env](.env) for LLM API calling"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "markdown",
13
+ "id": "678ed8db",
14
+ "metadata": {},
15
+ "source": [
16
+ "### Import Dependencies"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": null,
22
+ "id": "f1fb3d81-16b6-4b8c-a028-880fdce5e14a",
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "import sys\n",
27
+ "sys.path.insert(0, \"../../\")\n",
28
+ "import promptwizard\n",
29
+ "from promptwizard.glue.promptopt.instantiate import GluePromptOpt\n",
30
+ "from promptwizard.glue.promptopt.techniques.common_logic import DatasetSpecificProcessing\n",
31
+ "from promptwizard.glue.common.utils.file import save_jsonlist\n",
32
+ "from typing import Any\n",
33
+ "from tqdm import tqdm\n",
34
+ "import json\n",
35
+ "import os\n",
36
+ "from azure.identity import get_bearer_token_provider, AzureCliCredential\n",
37
+ "from openai import AzureOpenAI\n",
38
+ "\n",
39
+ "from dotenv import load_dotenv\n",
40
+ "load_dotenv(override = True)\n"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "markdown",
45
+ "id": "dc9b746c",
46
+ "metadata": {},
47
+ "source": [
48
+ "### Below code can be used for LLM-as-a-judge eval"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": 2,
54
+ "id": "26719362",
55
+ "metadata": {},
56
+ "outputs": [],
57
+ "source": [
58
+ "def extract_between(start, end, text):\n",
59
+ " \"\"\"\n",
60
+ " Extracts the substring from 'text' that is between 'start' and 'end' strings.\n",
61
+ " \n",
62
+ " Parameters:\n",
63
+ " - start (str): The starting delimiter string.\n",
64
+ " - end (str): The ending delimiter string.\n",
65
+ " - text (str): The text to search within.\n",
66
+ " \n",
67
+ " Returns:\n",
68
+ " - str: The extracted substring between the start and end delimiters.\n",
69
+ " \"\"\"\n",
70
+ " start_index = text.find(start)\n",
71
+ " if start_index == -1:\n",
72
+ " return '' \n",
73
+ " \n",
74
+ " start_index += len(start)\n",
75
+ " \n",
76
+ " end_index = text.find(end, start_index)\n",
77
+ " if end_index == -1:\n",
78
+ " return '' \n",
79
+ " return text[start_index:end_index]\n",
80
+ "\n",
81
+ "def call_api(messages):\n",
82
+ " \n",
83
+ " token_provider = get_bearer_token_provider(\n",
84
+ " AzureCliCredential(), \"https://cognitiveservices.azure.com/.default\"\n",
85
+ " )\n",
86
+ " client = AzureOpenAI(\n",
87
+ " api_version=\"<OPENAI_API_VERSION>\",\n",
88
+ " azure_endpoint=\"<AZURE_ENDPOINT>\",\n",
89
+ " azure_ad_token_provider=token_provider\n",
90
+ " )\n",
91
+ " response = client.chat.completions.create(\n",
92
+ " model=\"<MODEL_DEPLOYMENT_NAME>\",\n",
93
+ " messages=messages,\n",
94
+ " temperature=0.0,\n",
95
+ " )\n",
96
+ " prediction = response.choices[0].message.content\n",
97
+ " return prediction\n",
98
+ "\n",
99
+ "def llm_eval(predicted_answer,gt_answer):\n",
100
+ " \n",
101
+ " EVAL_PROMPT = f\"\"\"Given the Predicted_Answer and Reference_Answer, compare them and check they mean the same.\n",
102
+ " If they mean the same then return True between <ANS_START> and <ANS_END> tags , \n",
103
+ " If they differ in the meaning then return False between <ANS_START> and <ANS_END> tags \n",
104
+ " Following are the given :\n",
105
+ " Predicted_Answer: {predicted_answer}\n",
106
+ " Reference_Answer: {gt_answer}\"\"\"\n",
107
+ " messages = [\n",
108
+ " {\"role\": \"system\", \"content\": \"\"},\n",
109
+ " {\"role\": \"user\", \"content\": EVAL_PROMPT}\n",
110
+ " ]\n",
111
+ "\n",
112
+ " response = call_api(messages)\n",
113
+ " final_judgement = extract_between(start=\"<ANS_START>\", end=\"<ANS_END>\", text=response)\n",
114
+ " return final_judgement == \"True\""
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "markdown",
119
+ "id": "4a5084d7",
120
+ "metadata": {},
121
+ "source": [
122
+ "### Create a dataset specific class and define the required functions "
123
+ ]
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "execution_count": 3,
128
+ "id": "5f325d33",
129
+ "metadata": {},
130
+ "outputs": [],
131
+ "source": [
132
+ "\n",
133
+ "llm_as_judge_eval = True\n",
134
+ "\n",
135
+ "class BBH(DatasetSpecificProcessing):\n",
136
+ "\n",
137
+ " def dataset_to_jsonl(self, dataset_jsonl: str, **kwargs: Any) -> None:\n",
138
+ " def extract_answer_from_output(completion):\n",
139
+ "\n",
140
+ " return completion\n",
141
+ "\n",
142
+ " examples_set = []\n",
143
+ "\n",
144
+ " for _, sample in tqdm(enumerate(kwargs[\"dataset\"]), desc=\"Evaluating samples\"):\n",
145
+ " example = {\n",
146
+ " DatasetSpecificProcessing.QUESTION_LITERAL: sample['question'],\n",
147
+ " DatasetSpecificProcessing.ANSWER_WITH_REASON_LITERAL: sample['answer'],\n",
148
+ " DatasetSpecificProcessing.FINAL_ANSWER_LITERAL: extract_answer_from_output(sample[\"answer\"])\n",
149
+ " }\n",
150
+ " examples_set.append(example)\n",
151
+ "\n",
152
+ " save_jsonlist(dataset_jsonl, examples_set, \"w\")\n",
153
+ "\n",
154
+ " def extract_final_answer(self, answer: str):\n",
155
+ " \n",
156
+ " final_answer = extract_between(text=answer,start=\"<ANS_START>\",end=\"<ANS_END>\")\n",
157
+ " return final_answer\n",
158
+ " \n",
159
+ " def access_answer(self, llm_output: str, gt_answer: str):\n",
160
+ "\n",
161
+ " if llm_as_judge_eval:\n",
162
+ " predicted_answer = self.extract_final_answer(llm_output)\n",
163
+ " is_correct = False\n",
164
+ " if llm_eval(predicted_answer,gt_answer):\n",
165
+ " is_correct = True\n",
166
+ " else:\n",
167
+ " predicted_answer = self.extract_final_answer(llm_output)\n",
168
+ " is_correct = False\n",
169
+ " if predicted_answer and (predicted_answer.lower() == gt_answer.lower()):\n",
170
+ " is_correct = True\n",
171
+ "\n",
172
+ " return is_correct, predicted_answer"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": 4,
178
+ "id": "f384eb57",
179
+ "metadata": {},
180
+ "outputs": [],
181
+ "source": [
182
+ "bbh_processor = BBH()"
183
+ ]
184
+ },
185
+ {
186
+ "cell_type": "markdown",
187
+ "id": "ec7d1396",
188
+ "metadata": {},
189
+ "source": [
190
+ "### Load and save the dataset . \n",
191
+ "Set the ```dataset_to_run``` variable to choose 1 among the 19 datasets of BBII to run the optimization on"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "code",
196
+ "execution_count": null,
197
+ "id": "976681bd-4f43-4dbc-947e-cdb94d4824f0",
198
+ "metadata": {},
199
+ "outputs": [],
200
+ "source": [
201
+ "\n",
202
+ "if not os.path.exists(\"data\"):\n",
203
+ " os.mkdir(\"data\")\n",
204
+ "dataset_list = ['informal_to_formal','letters_list','negation','orthography_starts_with','rhymes','second_word_letter','sum','diff','sentence_similarity','taxonomy_animal','auto_categorization','object_counting','odd_one_out','antonyms','word_unscrambling','cause_and_effect','common_concept','word_sorting','synonyms']\n",
205
+ "\n",
206
+ "# Set the dataset on which to run optimization out of the 19 \n",
207
+ "dataset_to_run = 'second_word_letter'\n",
208
+ "\n",
209
+ "if not os.path.exists(\"data/\"+dataset_to_run):\n",
210
+ " os.mkdir(\"data/\"+dataset_to_run)\n",
211
+ " \n",
212
+ "os.system(\"git clone https://github.com/xqlin98/INSTINCT\")\n",
213
+ "\n",
214
+ "\n",
215
+ "for mode in ['execute','induce']:\n",
216
+ " for dataset in dataset_list:\n",
217
+ "\n",
218
+ " if dataset_to_run == dataset:\n",
219
+ " data_list = []\n",
220
+ "\n",
221
+ " file_path = 'INSTINCT/Induction/experiments/data/instruction_induction/raw/'+mode+'/'+dataset+'.json' \n",
222
+ " with open(file_path, 'r') as file:\n",
223
+ " data = json.load(file)\n",
224
+ " \n",
225
+ " save_file_path = 'test.jsonl'\n",
226
+ " if mode == 'execute':\n",
227
+ " save_file_path = 'train.jsonl'\n",
228
+ "\n",
229
+ " for key,sample in data['examples'].items():\n",
230
+ " task = dataset\n",
231
+ " if(task == 'cause_and_effect'):\n",
232
+ " cause = sample[\"cause\"]\n",
233
+ " effect = sample[\"effect\"]\n",
234
+ " import random\n",
235
+ " pair = [cause, effect]\n",
236
+ " random.shuffle(pair)\n",
237
+ " question = f\"Sentence 1: {pair[0]} Sentence 2: {pair[1]}\",\n",
238
+ " answer = cause,\n",
239
+ " elif(task == 'antonyms'):\n",
240
+ " \n",
241
+ " question = sample[\"input\"],\n",
242
+ " answer = sample[\"output\"],\n",
243
+ "\n",
244
+ " elif(task == 'common_concept'):\n",
245
+ " concept = sample[\"concept\"]\n",
246
+ " items = sample[\"items\"]\n",
247
+ " input = \", \".join(items)\n",
248
+ " question = f\"Objects: {input}\"\n",
249
+ " answer = f\"{concept}\"\n",
250
+ "\n",
251
+ " elif(task == 'diff'):\n",
252
+ " input = sample[\"input\"]\n",
253
+ " output = sample[\"output\"]\n",
254
+ " question = f\"{input}\"\n",
255
+ " answer = f\"{output}\"\n",
256
+ "\n",
257
+ " elif(task == 'informal_to_formal'):\n",
258
+ " informal = sample[\"input\"]\n",
259
+ " formal = sample[\"output\"]\n",
260
+ " question = f\"{informal}\"\n",
261
+ " answer = f\"{formal}\"\n",
262
+ "\n",
263
+ " elif(task == 'synonyms' or task == 'word_unscrambling' or task == 'word_sorting' or task == 'letters_list' or task == 'negation' or task == 'orthography_starts_with' or task == 'second_word_letter' or task == 'sentence_similarity' or task == 'sum' or task == 'taxonomy_animal' or task == 'auto_categorization' or task == 'object_counting' or task == 'odd_one_out'):\n",
264
+ " informal = sample[\"input\"]\n",
265
+ " formal = sample[\"output\"] \n",
266
+ " question = f\"{informal}\"\n",
267
+ " answer = f\"{formal}\"\n",
268
+ "\n",
269
+ " elif(task == 'rhymes'):\n",
270
+ " input = sample[\"input\"]\n",
271
+ " output = sample[\"other_rhymes\"]\n",
272
+ " output = \", \".join(output)\n",
273
+ " question = f\"{input}\"\n",
274
+ " answer = f\"{output}\"\n",
275
+ " \n",
276
+ " data_list.append({\"question\":question,\"answer\":answer})\n",
277
+ " bbh_processor.dataset_to_jsonl(\"data/\"+dataset +\"/\"+save_file_path, dataset=data_list)\n",
278
+ "\n",
279
+ "os.system(\"rm -r INSTINCT\")\n",
280
+ " "
281
+ ]
282
+ },
283
+ {
284
+ "cell_type": "markdown",
285
+ "id": "fe28a967",
286
+ "metadata": {},
287
+ "source": [
288
+ "### Set paths"
289
+ ]
290
+ },
291
+ {
292
+ "cell_type": "code",
293
+ "execution_count": 6,
294
+ "id": "f43482f1-3e10-4cf7-8ea6-ff42c04067a6",
295
+ "metadata": {},
296
+ "outputs": [],
297
+ "source": [
298
+ "train_file_name = os.path.join(\"data/\"+dataset_to_run, \"train.jsonl\")\n",
299
+ "test_file_name = os.path.join(\"data/\"+dataset_to_run, \"test.jsonl\")\n",
300
+ "path_to_config = \"configs\"\n",
301
+ "llm_config_path = os.path.join(path_to_config, \"llm_config.yaml\")\n",
302
+ "promptopt_config_path = os.path.join(path_to_config, \"promptopt_config.yaml\")\n",
303
+ "setup_config_path = os.path.join(path_to_config, \"setup_config.yaml\")"
304
+ ]
305
+ },
306
+ {
307
+ "cell_type": "markdown",
308
+ "id": "75ac5780",
309
+ "metadata": {},
310
+ "source": [
311
+ "### Create an object for calling prompt optimization and inference functionalities"
312
+ ]
313
+ },
314
+ {
315
+ "cell_type": "code",
316
+ "execution_count": null,
317
+ "id": "8af4246f-db32-4b37-a73a-f9e2e5125d09",
318
+ "metadata": {},
319
+ "outputs": [],
320
+ "source": [
321
+ "gp = GluePromptOpt(promptopt_config_path,\n",
322
+ " setup_config_path,\n",
323
+ " train_file_name,\n",
324
+ " bbh_processor)"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "markdown",
329
+ "id": "9a26af0d",
330
+ "metadata": {},
331
+ "source": [
332
+ "### Call prompt optmization function\n",
333
+ "1. ```use_examples``` can be used when there are training samples and a mixture of real and synthetic in-context examples are required in the final prompt. When set to ```False``` all the in-context examples will be real\n",
334
+ "2. ```generate_synthetic_examples``` can be used when there are no training samples and we want to generate synthetic examples \n",
335
+ "3. ```run_without_train_examples``` can be used when there are no training samples and in-context examples are not required in the final prompt "
336
+ ]
337
+ },
338
+ {
339
+ "cell_type": "code",
340
+ "execution_count": null,
341
+ "id": "573c6151-2c03-45d9-9904-1724a1e20f1b",
342
+ "metadata": {
343
+ "scrolled": true
344
+ },
345
+ "outputs": [],
346
+ "source": [
347
+ "# Function call to generate optimal prompt and expert profile \n",
348
+ "best_prompt, expert_profile = gp.get_best_prompt(use_examples=True,run_without_train_examples=False,generate_synthetic_examples=False)"
349
+ ]
350
+ },
351
+ {
352
+ "cell_type": "markdown",
353
+ "id": "ef923b11",
354
+ "metadata": {},
355
+ "source": [
356
+ "### Save the optimized prompt and expert profile"
357
+ ]
358
+ },
359
+ {
360
+ "cell_type": "code",
361
+ "execution_count": null,
362
+ "id": "34a716af-0d77-4c7d-b1c2-6438d66096ce",
363
+ "metadata": {
364
+ "scrolled": true
365
+ },
366
+ "outputs": [],
367
+ "source": [
368
+ "import pickle \n",
369
+ "if not os.path.exists(\"results\"):\n",
370
+ " os.system(\"mkdir results\")\n",
371
+ "\n",
372
+ "with open(\"results/best_prompt.pkl\", 'wb') as f:\n",
373
+ " pickle.dump(best_prompt, f)\n",
374
+ "with open(\"results/expert_profile.pkl\", 'wb') as f:\n",
375
+ " pickle.dump(expert_profile, f)\n",
376
+ "\n",
377
+ "print(f\"Best prompt: {best_prompt} \\nExpert profile: {expert_profile}\")"
378
+ ]
379
+ },
380
+ {
381
+ "cell_type": "markdown",
382
+ "id": "1942c67e",
383
+ "metadata": {},
384
+ "source": [
385
+ "### Evaluate the optimized prompt"
386
+ ]
387
+ },
388
+ {
389
+ "cell_type": "code",
390
+ "execution_count": null,
391
+ "id": "c49b5711-82dd-4d18-8cd4-ee447cf8d74c",
392
+ "metadata": {
393
+ "scrolled": true
394
+ },
395
+ "outputs": [],
396
+ "source": [
397
+ "gp.EXPERT_PROFILE = expert_profile\n",
398
+ "gp.BEST_PROMPT = best_prompt\n",
399
+ "\n",
400
+ "# Function call to evaluate the prompt\n",
401
+ "accuracy = gp.evaluate(test_file_name)\n",
402
+ "\n",
403
+ "print(f\"Final Accuracy: {accuracy}\")"
404
+ ]
405
+ }
406
+ ],
407
+ "metadata": {
408
+ "kernelspec": {
409
+ "display_name": "PromptWizard",
410
+ "language": "python",
411
+ "name": "python3"
412
+ },
413
+ "language_info": {
414
+ "codemirror_mode": {
415
+ "name": "ipython",
416
+ "version": 3
417
+ },
418
+ "file_extension": ".py",
419
+ "mimetype": "text/x-python",
420
+ "name": "python",
421
+ "nbconvert_exporter": "python",
422
+ "pygments_lexer": "ipython3",
423
+ "version": "3.12.4"
424
+ }
425
+ },
426
+ "nbformat": 4,
427
+ "nbformat_minor": 5
428
+ }
demos/bbh/description.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # BBH Datasets
2
+ # informal_to_formal
3
+ task_description = 'In this task, you will be given a sentence in an informal style. Your job is to rewrite the sentence in a formal style.'
4
+ base_instruction = 'For each given sentence, provide a formal paraphrase.'
5
+ answer_format = 'For each input sentence, present the reasoning followed by the format paraphrased sentence.'
6
+
7
+ #letters_list
8
+ task_description = 'In this task, you will be given a single word as input. Your job is to produce the output by adding a space between each character pair in the word.'
9
+ base_instruction = 'For each given word, insert a space between each character pair in the word.'
10
+ answer_format = 'For each input word, ouput only the space seperated characters.'
11
+
12
+ #negation
13
+ task_description = 'For each input, write a sentence that expresses the exact opposite meaning of the input.'
14
+ base_instruction = 'For each given sentence, provide a new sentence that conveys the exact opposite meaning by using "not" in the input sentence, keeping the rest of the sentence unchanged.'
15
+ answer_format = "For each input sentence, negate the meaning by adding 'not' to the input sentence."
16
+
17
+ #orthography_starts_with
18
+ task_description = 'For each input, output all the words in the sentence that begin with the character in brackets at the end of the sentence.'
19
+ base_instruction = 'Output words with space separated that begin with the character in brackets at the end of the following sentence='
20
+ answer_format = 'For each input sentence, present the reasoning followed by space seperated words.'
21
+
22
+ #rhymes
23
+ task_description = 'In this task, you will be given a single word as input. Your job is to produce list of comma sperated words that rhymes with the input word.'
24
+ base_instruction = 'For each given word, provide a list of words that rhyme with the input word='
25
+ answer_format = 'For each input word, present the reasoning followed by the list of rhyming word.'
26
+
27
+ #second_word_letter
28
+ task_description = 'Extract the second letter from the input word.'
29
+ base_instruction = 'Output the second letter. Think step by step to arrive at the solution.'
30
+ answer_format = 'For each input word, present the reasoning followed by the extracted letter (only single letter).'
31
+
32
+ #sentence_similarity
33
+ task_description = "Each input consists of two sentences (Sentence 1 and Sentence 2). Rate on a scale of 0 to 5 whether those sentences are paraphrases of each other, and also give a brief textual description of the rating (0 - definitely not, 2 - possibly, 3 - probably, 4 - almost perfectly and 5 - perfectly). Use \" - \" to separate them"
34
+ base_instruction = """Rate the similarity of each pair of sentences according to the following scale:
35
+
36
+ 0 - Definitely not : The sentences are completely unrelated in meaning.
37
+ 1 - Probably not : The sentences have minor or superficial similarities but differ significantly in meaning.
38
+ 2 - Possibly : The sentences share some elements of meaning but are not strong paraphrases.
39
+ 3 - Probably : The sentences convey similar meanings but have some differences.
40
+ 4 - Almost perfectly : The sentences are very similar with only minor differences.
41
+ 5 - Perfectly :The sentences are nearly identical in meaning."""
42
+ answer_format = 'Provide your rating and brief textual description for each pair of sentences from the 6 options. (0 - Definitely not, 1 - Probably not, 2 - Possibly, 3 - Probably, 4 - Almost perfectly, 5 - Perfectly)'
43
+
44
+ #sum
45
+ task_description = 'For each input, write the sum of the two numbers that appears there.'
46
+ base_instruction = 'Output the sum of the following two numbers='
47
+ answer_format = 'For each pair of numbers, present the reasoning followed by the sum.'
48
+
49
+ #synonyms
50
+ task_description = 'You will be given a word as input and need to output a word that is semantically similar.'
51
+ base_instruction = 'Output a word that is semantically similar to the input word='
52
+ answer_format = 'For each input word, present the reasoning followed by the synonym.'
53
+
54
+ #taxonomy_animal
55
+ task_description = 'In this task, you will be given a list of words. Your job is to identify and list all the animals from the given set of words.'
56
+ base_instruction = 'For each given list of words, provide a new list containing only the animals.'
57
+ answer_format = 'For each list of words, output the list of animals.'
58
+
59
+ #auto_categorization
60
+ task_description = 'Find the best categorization for the given set of words as input.'
61
+ base_instruction = 'Output the best categorization for the following set of words='
62
+ answer_format = 'For each set of words, present the reasoning followed by the best categorization.'
63
+
64
+ #object_counting
65
+ task_description = 'Find the number of objects in the given input.'
66
+ base_instruction = 'Output the number of objects in the following input='
67
+ answer_format = 'For each input, present the reasoning followed by the number of objects.'
68
+
69
+ #odd_one_out
70
+ task_description = 'Given the below list of words, find the odd one out'
71
+ base_instruction = 'Output the word that does not belong to the group of words='
72
+ answer_format = 'For each group of words, present the reasoning followed by the odd one out.'
73
+
74
+ #word_sorting
75
+ task_description = 'In this task, you will be given a set of words. Your job is to sort the words based on the first character of each word in alphabetical order.'
76
+ base_instruction = 'For each given set of words, provide a sorted list of the words based on the first character of each word.'
77
+ answer_format = 'For each input, list of sorted words based on the first character of each word.'
78
+
79
+ #word_unscrambling
80
+ task_description = 'In this task output all possible meaningful words that can be formed by rearranging all the letters of the given word. Each character must be used exactly once and the words must be valid.'
81
+ base_instruction = 'Output comma seperated words of same length as input word.'
82
+ answer_format = 'Output the all possible meaningful words comma seperated that can formed by rearranging the letters of the given word.'
83
+
84
+ #antonyms
85
+ task_description = 'In this task, you will be given a single word as input. Your job is to produce a word that has the exact opposite meaning (an antonym) to the input word.'
86
+ base_instruction = 'For each given word, provide a word that is an antonym (has the exact opposite meaning).'
87
+ answer_format = 'For each input word, output only a single word.'
88
+
89
+ #cause_and_effect
90
+ task_description = 'Find the cause in the following cause and effect pair. Each input consists of two sentences, where one is the cause and the other is the outcome.'
91
+ base_instruction = 'Output the cause in the following cause and effect pair='
92
+ answer_format = 'For each pair of sentences, present the reasoning followed by the cause.'
93
+
94
+ #common_concept
95
+ task_description = 'In this task, you will be given a list of objects. Your job is to identify and describe a common characteristic that links all the objects in the list.'
96
+ base_instruction = 'The instruction is to ”involve” the objects mentioned in the input.'
97
+ answer_format = 'For each list of objects, output the common concept by "involving" the objects mentioned.'
demos/gsm8k/.env ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ USE_OPENAI_API_KEY="False"
2
+
3
+ OPENAI_API_KEY=""
4
+ OPENAI_MODEL_NAME =""
5
+
6
+ OPENAI_API_VERSION=""
7
+ AZURE_OPENAI_ENDPOINT=""
8
+ AZURE_OPENAI_DEPLOYMENT_NAME=""
demos/gsm8k/configs/prompt_library.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ system_prompts: |
2
+ You are a helpful assistant that assists research students in understanding research papers.
3
+ system_guidelines: |
4
+ Guidelines
5
+ - Your role must always be a helpful assistant that assists students in understanding research papers.
6
+ - Only answer questions that are directly or indirectly related to the referenced paper(s).
7
+
8
+ mode:
9
+ chat:
10
+ - name: CHAT-FIRST-MESSAGE
11
+ llm_request_type: rag-query
12
+ prompt_template: |
13
+ {user_msg}
14
+ emb_model_id: text embedding ada 002 [vellm-openai2]
15
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
16
+ prepend_system_prompts: False
17
+ prepend_system_guidelines: False
18
+
19
+ - name: CHAT-NEXT-MESSAGES
20
+ llm_request_type: rag-query
21
+ prompt_template: |
22
+ {user_msg}
23
+ emb_model_id: text embedding ada 002 [vellm-openai2]
24
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
25
+ prepend_system_prompts: False
26
+ prepend_system_guidelines: False
27
+
28
+ generation:
29
+ - name: FLASH_PROFILE
30
+ prompt_template: |
31
+ {user_msg}
32
+ prepend_system_prompts: False
33
+ prepend_system_guidelines: False
34
+ llm_request_type: rag-query
35
+ emb_model_id: text embedding ada 002 [vellm-openai2]
36
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
demos/gsm8k/configs/promptopt_config.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Specify one or more prompt refinement technique to be used. If you specify more than one prompt refinement techniques,
2
+ # all these technique would run on same seed data. Result, iterations needed & cost incurred for each of these
3
+ # technique would be logged. And winning technique for each data instance and overall would be logged.
4
+
5
+ # Supported prompt refinement techniques: Basic, RecursiveEval, MedPrompt
6
+ # Uncomment techniques that you want to use
7
+ ############################ Critique Task Description Start ############################
8
+ prompt_technique_name: "critique_n_refine"
9
+ # unique_model_id of model defined in llm_config.yaml
10
+ unique_model_id: gpt-4o
11
+ # Number of iterations for conducting <mutation_rounds> rounds of mutation of task description
12
+ # followed by refinement of instructions
13
+ mutate_refine_iterations: 3
14
+ # Number of rounds of mutation to be performed when generating different styles
15
+ mutation_rounds: 3
16
+ # Refine instruction post mutation
17
+ refine_instruction: true
18
+ # Number of iterations for refining task description and in context examples for few-shot
19
+ refine_task_eg_iterations: 3
20
+ # Number of variations of prompts to generate in given iteration
21
+ style_variation: 5
22
+ # Number of questions to be asked to LLM in a single batch, during training step
23
+ questions_batch_size: 1
24
+ # Number of batches of questions to correctly answered, for a prompt to be considered as performing good
25
+ min_correct_count: 3
26
+ # Max number of mini-batches on which we should evaluate our prompt
27
+ max_eval_batches: 6
28
+ # Number of top best performing prompts to be considered for next iterations
29
+ top_n: 1
30
+ # Description of task. This will be fed to prompt
31
+ task_description: "You are a mathematics expert. You will be given a mathematics problem which you need to solve"
32
+ # Base instruction, in line with your dataset. This will be fed to prompt
33
+ base_instruction: "Lets think step by step."
34
+ # Instruction for specifying answer format
35
+ answer_format: "For each question present the reasoning followed by the correct answer."
36
+ # Number of samples from dataset, set aside as training data. In every iteration we would be drawing
37
+ # `questions_batch_size` examples from training data with replacement.
38
+ seen_set_size: 25
39
+ # Number of examples to be given for few shots
40
+ few_shot_count: 5
41
+ # Number of synthetic training examples to be generated
42
+ num_train_examples: 20
43
+ # Generate synthetic reasoning
44
+ generate_reasoning: true
45
+ # Generate description of an expert which can solve the task at hand
46
+ generate_expert_identity: true
47
+ # Generate keywords that describe the intent of the task
48
+ generate_intent_keywords: false
49
+ ############################ Critique Task Description End ############################
50
+
51
+
52
+
demos/gsm8k/configs/setup_config.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ assistant_llm:
2
+ # put the unique_model_id that you specified in llm_config.yaml
3
+ prompt_opt: gpt-4o
4
+ dir_info:
5
+ # Base directory for everything
6
+ base_dir: logs
7
+ log_dir_name: glue_logs
8
+ experiment_name: gsm8k
9
+ # Many features are different for mode: online/offline. For eg
10
+ # 1) Print of logs happens on console for offline mode
11
+ # 2) LLM Queue gets instantiated only in online mode
12
+ mode: offline
13
+ # Full length description of the experiment. This would be logged.
14
+ description:
demos/gsm8k/demo.ipynb ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "6eb94b72",
6
+ "metadata": {},
7
+ "source": [
8
+ "#### Set environment variables in [.env](.env) for LLM API calling"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "markdown",
13
+ "id": "388020c6",
14
+ "metadata": {},
15
+ "source": [
16
+ "### Import Dependencies"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": null,
22
+ "id": "11efa138",
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "import sys\n",
27
+ "sys.path.insert(0, \"../../\")\n",
28
+ "import promptwizard\n",
29
+ "from promptwizard.glue.promptopt.instantiate import GluePromptOpt\n",
30
+ "from promptwizard.glue.promptopt.techniques.common_logic import DatasetSpecificProcessing\n",
31
+ "from promptwizard.glue.common.utils.file import save_jsonlist\n",
32
+ "from typing import Any\n",
33
+ "from tqdm import tqdm\n",
34
+ "from re import compile, findall\n",
35
+ "import os\n",
36
+ "from datasets import load_dataset\n",
37
+ "\n",
38
+ "from dotenv import load_dotenv\n",
39
+ "load_dotenv(override = True)"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "markdown",
44
+ "id": "beb14821",
45
+ "metadata": {},
46
+ "source": [
47
+ "### Create a dataset specific class and define the required functions "
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": 2,
53
+ "id": "5f325d33",
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "class GSM8k(DatasetSpecificProcessing):\n",
58
+ "\n",
59
+ " def dataset_to_jsonl(self, dataset_jsonl: str, **kwargs: Any) -> None:\n",
60
+ " def extract_answer_from_output(completion):\n",
61
+ " # Your functions for metrics and prompt building\n",
62
+ " ans_re = compile(r\"#### (\\-?[0-9\\.\\,]+)\")\n",
63
+ " self.INVALID_ANS = \"[invalid]\"\n",
64
+ "\n",
65
+ " match = ans_re.search(completion)\n",
66
+ " if match:\n",
67
+ " match_str = match.group(1).strip()\n",
68
+ " match_str = match_str.replace(\",\", \"\")\n",
69
+ " return match_str\n",
70
+ " else:\n",
71
+ " return self.INVALID_ANS\n",
72
+ "\n",
73
+ " examples_set = []\n",
74
+ "\n",
75
+ " for _, sample in tqdm(enumerate(kwargs[\"dataset\"]), desc=\"Evaluating samples\"):\n",
76
+ " example = {\n",
77
+ " DatasetSpecificProcessing.QUESTION_LITERAL: sample['question'],\n",
78
+ " DatasetSpecificProcessing.ANSWER_WITH_REASON_LITERAL: sample['answer'],\n",
79
+ " DatasetSpecificProcessing.FINAL_ANSWER_LITERAL: extract_answer_from_output(sample[\"answer\"])\n",
80
+ " }\n",
81
+ " examples_set.append(example)\n",
82
+ "\n",
83
+ " save_jsonlist(dataset_jsonl, examples_set, \"w\")\n",
84
+ "\n",
85
+ " def extract_final_answer(self, answer: str):\n",
86
+ " \n",
87
+ " if not answer:\n",
88
+ " return self.INVALID_ANS\n",
89
+ "\n",
90
+ " model_pred = answer.lower()\n",
91
+ " preds = model_pred.split(self.ANSWER_START.lower())\n",
92
+ " answer_flag = True if len(preds) > 1 else False\n",
93
+ "\n",
94
+ " pred = preds[-1].replace(\",\", \"\")\n",
95
+ " pred = [s for s in findall(r'-?\\d+\\.?\\d*', pred)]\n",
96
+ "\n",
97
+ " if len(pred) == 0:\n",
98
+ " return self.INVALID_ANS\n",
99
+ "\n",
100
+ " if answer_flag:\n",
101
+ " # choose the first element in list\n",
102
+ " pred = pred[0]\n",
103
+ " else:\n",
104
+ " # choose the last element in list\n",
105
+ " pred = pred[-1]\n",
106
+ "\n",
107
+ " # (For arithmetic tasks) if a word ends with period, it will be omitted ...\n",
108
+ " if pred[-1] == \".\":\n",
109
+ " pred = pred[:-1]\n",
110
+ " return pred"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": 3,
116
+ "id": "f384eb57",
117
+ "metadata": {},
118
+ "outputs": [],
119
+ "source": [
120
+ "gsm8k_processor = GSM8k()"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "markdown",
125
+ "id": "11d2de75",
126
+ "metadata": {},
127
+ "source": [
128
+ "### Load and save the dataset "
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "execution_count": null,
134
+ "id": "976681bd-4f43-4dbc-947e-cdb94d4824f0",
135
+ "metadata": {},
136
+ "outputs": [],
137
+ "source": [
138
+ "if not os.path.exists(\"data\"):\n",
139
+ " os.mkdir(\"data\")\n",
140
+ " \n",
141
+ "dataset = load_dataset(\"openai/gsm8k\", \"main\")\n",
142
+ "num_samples = 0\n",
143
+ "for dataset_type in ['train','test']:\n",
144
+ " data_list = []\n",
145
+ " for data in dataset[dataset_type]:\n",
146
+ " data_list.append({\"question\": data['question'], \"answer\": data['answer']})\n",
147
+ " if num_samples == 100 and dataset_type == 'train': # We sample only 100 train examples and use 25 out them for training randomly\n",
148
+ " break\n",
149
+ " num_samples += 1\n",
150
+ " gsm8k_processor.dataset_to_jsonl(\"data/\"+ dataset_type+'.jsonl', dataset=data_list)"
151
+ ]
152
+ },
153
+ {
154
+ "cell_type": "markdown",
155
+ "id": "ac30e74f",
156
+ "metadata": {},
157
+ "source": [
158
+ "### Set paths"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": 5,
164
+ "id": "f43482f1-3e10-4cf7-8ea6-ff42c04067a6",
165
+ "metadata": {},
166
+ "outputs": [],
167
+ "source": [
168
+ "train_file_name = os.path.join(\"data\", \"train.jsonl\")\n",
169
+ "test_file_name = os.path.join(\"data\", \"test.jsonl\")\n",
170
+ "path_to_config = \"configs\"\n",
171
+ "promptopt_config_path = os.path.join(path_to_config, \"promptopt_config.yaml\")\n",
172
+ "setup_config_path = os.path.join(path_to_config, \"setup_config.yaml\")"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "markdown",
177
+ "id": "3392594d",
178
+ "metadata": {},
179
+ "source": [
180
+ "### Create an object for calling prompt optimization and inference functionalities"
181
+ ]
182
+ },
183
+ {
184
+ "cell_type": "code",
185
+ "execution_count": null,
186
+ "id": "8af4246f-db32-4b37-a73a-f9e2e5125d09",
187
+ "metadata": {},
188
+ "outputs": [],
189
+ "source": [
190
+ "gp = GluePromptOpt(promptopt_config_path,\n",
191
+ " setup_config_path,\n",
192
+ " train_file_name,\n",
193
+ " gsm8k_processor)"
194
+ ]
195
+ },
196
+ {
197
+ "cell_type": "markdown",
198
+ "id": "1784648c",
199
+ "metadata": {},
200
+ "source": [
201
+ "### Call prompt optmization function\n",
202
+ "1. ```use_examples``` can be used when there are training samples and a mixture of real and synthetic in-context examples are required in the final prompt. When set to ```False``` all the in-context examples will be real\n",
203
+ "2. ```generate_synthetic_examples``` can be used when there are no training samples and we want to generate synthetic examples \n",
204
+ "3. ```run_without_train_examples``` can be used when there are no training samples and in-context examples are not required in the final prompt "
205
+ ]
206
+ },
207
+ {
208
+ "cell_type": "code",
209
+ "execution_count": null,
210
+ "id": "573c6151-2c03-45d9-9904-1724a1e20f1b",
211
+ "metadata": {
212
+ "scrolled": true
213
+ },
214
+ "outputs": [],
215
+ "source": [
216
+ "# Function call to generate optimal prompt and expert profile \n",
217
+ "best_prompt, expert_profile = gp.get_best_prompt(use_examples=True,run_without_train_examples=False,generate_synthetic_examples=False)"
218
+ ]
219
+ },
220
+ {
221
+ "cell_type": "markdown",
222
+ "id": "1ee1aa99",
223
+ "metadata": {},
224
+ "source": [
225
+ "### Save the optimized prompt and expert profile"
226
+ ]
227
+ },
228
+ {
229
+ "cell_type": "code",
230
+ "execution_count": null,
231
+ "id": "34a716af-0d77-4c7d-b1c2-6438d66096ce",
232
+ "metadata": {
233
+ "scrolled": true
234
+ },
235
+ "outputs": [],
236
+ "source": [
237
+ "import pickle \n",
238
+ "\n",
239
+ "if not os.path.exists(\"results\"):\n",
240
+ " os.system(\"mkdir results\")\n",
241
+ " \n",
242
+ "with open(\"results/best_prompt.pkl\", 'wb') as f:\n",
243
+ " pickle.dump(best_prompt, f)\n",
244
+ "with open(\"results/expert_profile.pkl\", 'wb') as f:\n",
245
+ " pickle.dump(expert_profile, f)\n",
246
+ "\n",
247
+ "print(f\"Best prompt: {best_prompt} \\nExpert profile: {expert_profile}\")"
248
+ ]
249
+ },
250
+ {
251
+ "cell_type": "markdown",
252
+ "id": "aac42eed",
253
+ "metadata": {},
254
+ "source": [
255
+ "### Evaluate the optimized prompt"
256
+ ]
257
+ },
258
+ {
259
+ "cell_type": "code",
260
+ "execution_count": null,
261
+ "id": "c49b5711-82dd-4d18-8cd4-ee447cf8d74c",
262
+ "metadata": {
263
+ "scrolled": true
264
+ },
265
+ "outputs": [],
266
+ "source": [
267
+ "gp.EXPERT_PROFILE = expert_profile\n",
268
+ "gp.BEST_PROMPT = best_prompt\n",
269
+ "\n",
270
+ "# Function call to evaluate the prompt\n",
271
+ "accuracy = gp.evaluate(test_file_name)\n",
272
+ "\n",
273
+ "print(f\"Final Accuracy: {accuracy}\")"
274
+ ]
275
+ }
276
+ ],
277
+ "metadata": {
278
+ "kernelspec": {
279
+ "display_name": "general",
280
+ "language": "python",
281
+ "name": "python3"
282
+ },
283
+ "language_info": {
284
+ "codemirror_mode": {
285
+ "name": "ipython",
286
+ "version": 3
287
+ },
288
+ "file_extension": ".py",
289
+ "mimetype": "text/x-python",
290
+ "name": "python",
291
+ "nbconvert_exporter": "python",
292
+ "pygments_lexer": "ipython3",
293
+ "version": "3.12.4"
294
+ }
295
+ },
296
+ "nbformat": 4,
297
+ "nbformat_minor": 5
298
+ }
demos/scenarios/.env ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ USE_OPENAI_API_KEY="False"
2
+
3
+ OPENAI_API_KEY=""
4
+ OPENAI_MODEL_NAME =""
5
+
6
+ OPENAI_API_VERSION=""
7
+ AZURE_OPENAI_ENDPOINT=""
8
+ AZURE_OPENAI_DEPLOYMENT_NAME=""
demos/scenarios/configs/prompt_library.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ system_prompts: |
2
+ You are a helpful assistant that assists research students in understanding research papers.
3
+ system_guidelines: |
4
+ Guidelines
5
+ - Your role must always be a helpful assistant that assists students in understanding research papers.
6
+ - Only answer questions that are directly or indirectly related to the referenced paper(s).
7
+
8
+ mode:
9
+ chat:
10
+ - name: CHAT-FIRST-MESSAGE
11
+ llm_request_type: rag-query
12
+ prompt_template: |
13
+ {user_msg}
14
+ emb_model_id: text embedding ada 002 [vellm-openai2]
15
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
16
+ prepend_system_prompts: False
17
+ prepend_system_guidelines: False
18
+
19
+ - name: CHAT-NEXT-MESSAGES
20
+ llm_request_type: rag-query
21
+ prompt_template: |
22
+ {user_msg}
23
+ emb_model_id: text embedding ada 002 [vellm-openai2]
24
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
25
+ prepend_system_prompts: False
26
+ prepend_system_guidelines: False
27
+
28
+ generation:
29
+ - name: FLASH_PROFILE
30
+ prompt_template: |
31
+ {user_msg}
32
+ prepend_system_prompts: False
33
+ prepend_system_guidelines: False
34
+ llm_request_type: rag-query
35
+ emb_model_id: text embedding ada 002 [vellm-openai2]
36
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
demos/scenarios/configs/promptopt_config.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Specify one or more prompt refinement technique to be used. If you specify more than one prompt refinement techniques,
2
+ # all these technique would run on same seed data. Result, iterations needed & cost incurred for each of these
3
+ # technique would be logged. And winning technique for each data instance and overall would be logged.
4
+
5
+ # Supported prompt refinement techniques: Basic, RecursiveEval, MedPrompt
6
+ # Uncomment techniques that you want to use
7
+ ############################ Critique Task Description Start ############################
8
+ prompt_technique_name: "critique_n_refine"
9
+ # unique_model_id of model defined in llm_config.yaml
10
+ unique_model_id: gpt-4o
11
+ # Number of iterations for conducting <mutation_rounds> rounds of mutation of task description
12
+ # followed by refinement of instructions
13
+ mutate_refine_iterations: 3
14
+ # Number of rounds of mutation to be performed when generating different styles
15
+ mutation_rounds: 3
16
+ # Refine instruction post mutation
17
+ refine_instruction: true
18
+ # Number of iterations for refining task description and in context examples for few-shot
19
+ refine_task_eg_iterations: 3
20
+ # Number of variations of prompts to generate in given iteration
21
+ style_variation: 5
22
+ # Number of questions to be asked to LLM in a single batch, during training step
23
+ questions_batch_size: 1
24
+ # Number of batches of questions to correctly answered, for a prompt to be considered as performing good
25
+ min_correct_count: 3
26
+ # Max number of mini-batches on which we should evaluate our prompt
27
+ max_eval_batches: 6
28
+ # Number of top best performing prompts to be considered for next iterations
29
+ top_n: 1
30
+ # Description of task. This will be fed to prompt
31
+ task_description: "You are a mathematics expert. You will be given a mathematics problem which you need to solve"
32
+ # Base instruction, in line with your dataset. This will be fed to prompt
33
+ base_instruction: "Lets think step by step."
34
+ # Instruction for specifying answer format
35
+ answer_format: "For each question present the reasoning followed by the correct answer."
36
+ # Number of samples from dataset, set aside as training data. In every iteration we would be drawing
37
+ # `questions_batch_size` examples from training data with replacement.
38
+ seen_set_size: 25
39
+ # Number of examples to be given for few shots
40
+ few_shot_count: 5
41
+ # Number of synthetic training examples to be generated
42
+ num_train_examples: 20
43
+ # Generate synthetic reasoning
44
+ generate_reasoning: true
45
+ # Generate description of an expert which can solve the task at hand
46
+ generate_expert_identity: true
47
+ # Generate keywords that describe the intent of the task
48
+ generate_intent_keywords: false
49
+ ############################ Critique Task Description End ############################
50
+
51
+
52
+
53
+
demos/scenarios/configs/setup_config.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ assistant_llm:
2
+ # put the unique_model_id that you specified in llm_config.yaml
3
+ prompt_opt: gpt-4o
4
+ dir_info:
5
+ # Base directory for everything
6
+ base_dir: logs
7
+ log_dir_name: glue_logs
8
+ experiment_name: gsm8k
9
+ # Many features are different for mode: online/offline. For eg
10
+ # 1) Print of logs happens on console for offline mode
11
+ # 2) LLM Queue gets instantiated only in online mode
12
+ mode: offline
13
+ # Full length description of the experiment. This would be logged.
14
+ description:
demos/scenarios/dataset_scenarios_demo.ipynb ADDED
@@ -0,0 +1,1146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "6eb94b72",
6
+ "metadata": {},
7
+ "source": [
8
+ "## Following is a demo on running PromptWizard under different scenarios "
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "markdown",
13
+ "id": "52c7ee0a",
14
+ "metadata": {},
15
+ "source": [
16
+ "#### Set environment variables in [.env](.env) for LLM API calling"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "markdown",
21
+ "id": "3cffa5ef",
22
+ "metadata": {},
23
+ "source": [
24
+ "#### Import Dependencies"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": null,
30
+ "id": "11efa138",
31
+ "metadata": {},
32
+ "outputs": [],
33
+ "source": [
34
+ "import sys\n",
35
+ "sys.path.insert(0, \"../../\")\n",
36
+ "import promptwizard\n",
37
+ "from promptwizard.glue.promptopt.instantiate import GluePromptOpt\n",
38
+ "from promptwizard.glue.promptopt.techniques.common_logic import DatasetSpecificProcessing\n",
39
+ "from promptwizard.glue.common.utils.file import save_jsonlist\n",
40
+ "from typing import Any\n",
41
+ "from tqdm import tqdm\n",
42
+ "from re import compile, findall\n",
43
+ "import os\n",
44
+ "from datasets import load_dataset\n",
45
+ "import yaml\n",
46
+ "from dotenv import load_dotenv\n",
47
+ "load_dotenv(override = True)"
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": 2,
53
+ "id": "9be22d5d",
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "def update_yaml_file(file_path,config_dict):\n",
58
+ "\n",
59
+ " with open(file_path, 'r') as file:\n",
60
+ " data = yaml.safe_load(file)\n",
61
+ "\n",
62
+ "\n",
63
+ " for field,value in config_dict.items():\n",
64
+ " data[field] = value\n",
65
+ "\n",
66
+ " with open(file_path, 'w') as file:\n",
67
+ " yaml.dump(data, file, default_flow_style=False)\n",
68
+ "\n",
69
+ " print(\"YAML file updated successfully!\")"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "markdown",
74
+ "id": "78abb34a",
75
+ "metadata": {},
76
+ "source": [
77
+ "Set the paths"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "execution_count": 3,
83
+ "id": "14399d47",
84
+ "metadata": {},
85
+ "outputs": [],
86
+ "source": [
87
+ "path_to_config = \"configs\"\n",
88
+ "promptopt_config_path = os.path.join(path_to_config, \"promptopt_config.yaml\")\n",
89
+ "setup_config_path = os.path.join(path_to_config, \"setup_config.yaml\")"
90
+ ]
91
+ },
92
+ {
93
+ "cell_type": "markdown",
94
+ "id": "0f274af9",
95
+ "metadata": {},
96
+ "source": [
97
+ "### Now let us consider the three scenarios with respect to availability of training data"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "markdown",
102
+ "id": "5aaed236",
103
+ "metadata": {},
104
+ "source": [
105
+ "#### Scenario 1 : We have no training data , but we also don't want in-context examples in final prompt"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "markdown",
110
+ "id": "4c34423d",
111
+ "metadata": {},
112
+ "source": [
113
+ "Set the configurations to generate mutations"
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "code",
118
+ "execution_count": null,
119
+ "id": "ec4e7607",
120
+ "metadata": {},
121
+ "outputs": [],
122
+ "source": [
123
+ "file_path = 'configs/promptopt_config.yaml' \n",
124
+ "# Set the following based on the use case\n",
125
+ "config_dict = {\n",
126
+ " \"task_description\": \"You are a mathematics expert. You will be given a mathematics problem which you need to solve\",\n",
127
+ " \"base_instruction\": \"Lets think step by step.\",\n",
128
+ " \"mutation_rounds\": 5\n",
129
+ " }\n",
130
+ "update_yaml_file(file_path,config_dict)"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "markdown",
135
+ "id": "d984e84e",
136
+ "metadata": {},
137
+ "source": [
138
+ "Create an object for calling prompt optimization and inference functionalities"
139
+ ]
140
+ },
141
+ {
142
+ "cell_type": "code",
143
+ "execution_count": null,
144
+ "id": "c7aa4ccb",
145
+ "metadata": {},
146
+ "outputs": [],
147
+ "source": [
148
+ "gp = GluePromptOpt(promptopt_config_path,\n",
149
+ " setup_config_path,\n",
150
+ " dataset_jsonl=None,\n",
151
+ " data_processor=None)"
152
+ ]
153
+ },
154
+ {
155
+ "cell_type": "markdown",
156
+ "id": "8d587065",
157
+ "metadata": {},
158
+ "source": [
159
+ "Call the optimization function"
160
+ ]
161
+ },
162
+ {
163
+ "cell_type": "code",
164
+ "execution_count": null,
165
+ "id": "afe8de4f",
166
+ "metadata": {},
167
+ "outputs": [],
168
+ "source": [
169
+ "best_prompt, expert_profile = gp.get_best_prompt(use_examples=False,run_without_train_examples=True,generate_synthetic_examples=False)"
170
+ ]
171
+ },
172
+ {
173
+ "cell_type": "markdown",
174
+ "id": "a30db274",
175
+ "metadata": {},
176
+ "source": [
177
+ "Output : Five mutated prompts are printed on the termial as shown below :"
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ "execution_count": 7,
183
+ "id": "e5cb1a65",
184
+ "metadata": {},
185
+ "outputs": [],
186
+ "source": [
187
+ "OUTPUT = \"\"\"\n",
188
+ "Variations 1:\n",
189
+ "Expert Profile:\n",
190
+ "You are a mathematician with a strong background in various fields of mathematics, including algebra, calculus, geometry, and statistics. You have a deep understanding of mathematical theories and principles, and you are skilled at solving complex problems with precision and clarity. Your expertise allows you to approach mathematical problems methodically, breaking them down into manageable steps and applying appropriate techniques to find solutions. You are familiar with both theoretical and applied mathematics, and you can explain your reasoning and solutions in a clear and concise manner. Your ability to solve mathematical problems efficiently and accurately makes you an invaluable resource for anyone seeking help with mathematics.:\n",
191
+ "Prompt:\n",
192
+ "You are a mathematics expert. You will be given a mathematics problem which you need to solve\n",
193
+ "Lets think step by step.\n",
194
+ "\n",
195
+ "\n",
196
+ "For each question present the reasoning followed by the correct answer.\n",
197
+ "Keywords: mathematics, problem-solving, step-by-step, logical reasoning, expert\n",
198
+ "_______________________________________________________________________\n",
199
+ "\n",
200
+ "Variations 2:\n",
201
+ "Expert Profile:\n",
202
+ "You are a mathematician with a strong background in various fields of mathematics, including algebra, calculus, geometry, and statistics. You have a deep understanding of mathematical theories and principles, and you are skilled at solving complex problems with precision and clarity. Your expertise allows you to approach mathematical problems methodically, breaking them down into manageable steps and applying appropriate techniques to find solutions. You are familiar with both theoretical and applied mathematics, and you can explain your reasoning and solutions in a clear and concise manner. Your ability to solve mathematical problems efficiently and accurately makes you an invaluable resource for anyone seeking help with mathematics.:\n",
203
+ "Prompt:\n",
204
+ "Let's break this problem down step by step and devise an experiment to help solve it.\n",
205
+ "\n",
206
+ "\n",
207
+ "For each question present the reasoning followed by the correct answer.\n",
208
+ "Keywords: mathematics, problem-solving, step-by-step, logical reasoning, expert\n",
209
+ "_______________________________________________________________________\n",
210
+ "\n",
211
+ "Variations 3:\n",
212
+ "Expert Profile:\n",
213
+ "You are a mathematics expert with a strong background in various fields of mathematics, including algebra, calculus, geometry, and statistics. You have a deep understanding of mathematical theories and principles, and you are skilled at solving complex problems with precision and clarity. Your expertise allows you to break down intricate problems into manageable steps, making it easier to find solutions. You are familiar with a wide range of mathematical techniques and tools, and you can apply them effectively to solve problems. Whether the problem involves solving equations, proving theorems, or analyzing data, you can provide a clear and accurate solution. Your ability to explain your reasoning and methodology ensures that others can follow and understand your approach, making you an invaluable resource for tackling challenging mathematical problems.:\n",
214
+ "Prompt:\n",
215
+ "Let's think through this problem step by step and make a list of ideas to solve it.\n",
216
+ "\n",
217
+ "\n",
218
+ "For each question present the reasoning followed by the correct answer.\n",
219
+ "Keywords: mathematics, problem-solving, step-by-step, logical reasoning, expert\n",
220
+ "_______________________________________________________________________\n",
221
+ "\n",
222
+ "Variations 4:\n",
223
+ "Expert Profile:\n",
224
+ "You are a mathematics expert with a strong background in various fields of mathematics, including algebra, calculus, geometry, and statistics. You have a deep understanding of mathematical theories and principles, and you are skilled at solving complex problems with precision and clarity. Your expertise allows you to break down intricate problems into manageable steps, making it easier for others to follow your reasoning. You are familiar with a wide range of mathematical techniques and tools, and you can apply them effectively to find solutions. Whether the problem involves solving equations, proving theorems, or analyzing data, you can provide a clear, accurate, and well-explained solution. Your ability to communicate complex mathematical concepts in an understandable way makes you an invaluable resource for anyone seeking to solve mathematical problems.:\n",
225
+ "Prompt:\n",
226
+ "Let's approach this problem step by step and measure our progress as we go.\n",
227
+ "\n",
228
+ "\n",
229
+ "For each question present the reasoning followed by the correct answer.\n",
230
+ "Keywords: mathematics, problem-solving, step-by-step, logical reasoning, expert\n",
231
+ "Iterations completed: 0%| | 0/3 [00:24<?, ?it/s]\n",
232
+ "Time taken to find best prompt: 24.79972267150879 sec\n",
233
+ "_______________________________________________________________________\n",
234
+ "\n",
235
+ "Variations 5:\n",
236
+ "Expert Profile:\n",
237
+ "You are a mathematics expert with a strong background in various fields of mathematics, including algebra, calculus, geometry, and statistics. You have a deep understanding of mathematical theories and principles, and you are skilled at solving complex problems with precision and clarity. Your expertise allows you to approach problems methodically, breaking them down into manageable steps and applying appropriate mathematical techniques to find solutions. You are also adept at explaining your reasoning and methods in a clear and concise manner, making it easy for others to follow your thought process. Whether the problem involves solving equations, proving theorems, or analyzing data, you have the knowledge and skills to tackle it effectively. Your proficiency in mathematics is highly valuable in both academic and practical applications, and you are well-equipped to provide accurate and insightful solutions to a wide range of mathematical problems.:\n",
238
+ "Prompt:\n",
239
+ "Let's simplify this problem step by step to make it easier to solve.\n",
240
+ "\n",
241
+ "\n",
242
+ "For each question present the reasoning followed by the correct answer.\n",
243
+ "Keywords: mathematics, problem-solving, step-by-step, logical reasoning, expert\"\"\""
244
+ ]
245
+ },
246
+ {
247
+ "cell_type": "markdown",
248
+ "id": "dfd54818",
249
+ "metadata": {},
250
+ "source": [
251
+ "#### Scenario 2 : We have no training data , but we also want in-context examples in final prompt"
252
+ ]
253
+ },
254
+ {
255
+ "cell_type": "markdown",
256
+ "id": "b07d1862",
257
+ "metadata": {},
258
+ "source": [
259
+ "This scenario has two steps \n",
260
+ "- Genrate synthetic data\n",
261
+ "- Optimize prompts using synthetic data"
262
+ ]
263
+ },
264
+ {
265
+ "cell_type": "markdown",
266
+ "id": "bf44d6d7",
267
+ "metadata": {},
268
+ "source": [
269
+ "STEP 1 : Generate synthetic data"
270
+ ]
271
+ },
272
+ {
273
+ "cell_type": "markdown",
274
+ "id": "96d07ae3",
275
+ "metadata": {},
276
+ "source": [
277
+ "Set the configurations to first generate synthetic training data. \\\n",
278
+ "Any number of synthetic examples can be generated and then used for optimizing prompts as mentioned in STEP 2"
279
+ ]
280
+ },
281
+ {
282
+ "cell_type": "code",
283
+ "execution_count": null,
284
+ "id": "3c7c1f19",
285
+ "metadata": {},
286
+ "outputs": [],
287
+ "source": [
288
+ "file_path = 'configs/promptopt_config.yaml' \n",
289
+ "# Set the number of synthetic training examples to be generated\n",
290
+ "config_dict = {\n",
291
+ " \"num_train_examples\":20\n",
292
+ " }\n",
293
+ "update_yaml_file(file_path,config_dict)"
294
+ ]
295
+ },
296
+ {
297
+ "cell_type": "code",
298
+ "execution_count": null,
299
+ "id": "2311b4ad",
300
+ "metadata": {},
301
+ "outputs": [],
302
+ "source": [
303
+ "gp = GluePromptOpt(promptopt_config_path,\n",
304
+ " setup_config_path,\n",
305
+ " dataset_jsonl=None,\n",
306
+ " data_processor=None)"
307
+ ]
308
+ },
309
+ {
310
+ "cell_type": "markdown",
311
+ "id": "65ec6cd2",
312
+ "metadata": {},
313
+ "source": [
314
+ "Call the function to generate synthetic examples, which are saved in train.jsonl"
315
+ ]
316
+ },
317
+ {
318
+ "cell_type": "code",
319
+ "execution_count": null,
320
+ "id": "ff84f04e",
321
+ "metadata": {},
322
+ "outputs": [],
323
+ "source": [
324
+ "best_prompt, expert_profile = gp.get_best_prompt(use_examples=False,run_without_train_examples=False,generate_synthetic_examples=True)"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "markdown",
329
+ "id": "a286dcdf",
330
+ "metadata": {},
331
+ "source": [
332
+ "STEP 2 : Optimize prompts using synthetic data"
333
+ ]
334
+ },
335
+ {
336
+ "cell_type": "markdown",
337
+ "id": "bb0a4060",
338
+ "metadata": {},
339
+ "source": [
340
+ "Create a dataset specific class and define the required functions "
341
+ ]
342
+ },
343
+ {
344
+ "cell_type": "code",
345
+ "execution_count": 11,
346
+ "id": "7aaa5126",
347
+ "metadata": {},
348
+ "outputs": [],
349
+ "source": [
350
+ "class GSM8k(DatasetSpecificProcessing):\n",
351
+ "\n",
352
+ " def dataset_to_jsonl(self, dataset_jsonl: str, **kwargs: Any) -> None:\n",
353
+ " def extract_answer_from_output(completion):\n",
354
+ " # Your functions for metrics and prompt building\n",
355
+ " ans_re = compile(r\"#### (\\-?[0-9\\.\\,]+)\")\n",
356
+ " self.INVALID_ANS = \"[invalid]\"\n",
357
+ "\n",
358
+ " match = ans_re.search(completion)\n",
359
+ " if match:\n",
360
+ " match_str = match.group(1).strip()\n",
361
+ " match_str = match_str.replace(\",\", \"\")\n",
362
+ " return match_str\n",
363
+ " else:\n",
364
+ " return self.INVALID_ANS\n",
365
+ "\n",
366
+ " examples_set = []\n",
367
+ "\n",
368
+ " for _, sample in tqdm(enumerate(kwargs[\"dataset\"]), desc=\"Evaluating samples\"):\n",
369
+ " example = {\n",
370
+ " DatasetSpecificProcessing.QUESTION_LITERAL: sample['question'],\n",
371
+ " DatasetSpecificProcessing.ANSWER_WITH_REASON_LITERAL: sample['answer'],\n",
372
+ " DatasetSpecificProcessing.FINAL_ANSWER_LITERAL: extract_answer_from_output(sample[\"answer\"])\n",
373
+ " }\n",
374
+ " examples_set.append(example)\n",
375
+ "\n",
376
+ " save_jsonlist(dataset_jsonl, examples_set, \"w\")\n",
377
+ "\n",
378
+ " def extract_final_answer(self, answer: str):\n",
379
+ " \n",
380
+ " if not answer:\n",
381
+ " return self.INVALID_ANS\n",
382
+ "\n",
383
+ " model_pred = answer.lower()\n",
384
+ " preds = model_pred.split(self.ANSWER_START.lower())\n",
385
+ " answer_flag = True if len(preds) > 1 else False\n",
386
+ "\n",
387
+ " pred = preds[-1].replace(\",\", \"\")\n",
388
+ " pred = [s for s in findall(r'-?\\d+\\.?\\d*', pred)]\n",
389
+ "\n",
390
+ " if len(pred) == 0:\n",
391
+ " return self.INVALID_ANS\n",
392
+ "\n",
393
+ " if answer_flag:\n",
394
+ " # choose the first element in list\n",
395
+ " pred = pred[0]\n",
396
+ " else:\n",
397
+ " # choose the last element in list\n",
398
+ " pred = pred[-1]\n",
399
+ "\n",
400
+ " # (For arithmetic tasks) if a word ends with period, it will be omitted ...\n",
401
+ " if pred[-1] == \".\":\n",
402
+ " pred = pred[:-1]\n",
403
+ " return pred"
404
+ ]
405
+ },
406
+ {
407
+ "cell_type": "code",
408
+ "execution_count": 12,
409
+ "id": "212bea42",
410
+ "metadata": {},
411
+ "outputs": [],
412
+ "source": [
413
+ "gsm8k_processor = GSM8k()"
414
+ ]
415
+ },
416
+ {
417
+ "cell_type": "markdown",
418
+ "id": "36ae1f65",
419
+ "metadata": {},
420
+ "source": [
421
+ "Set the configurations to optimize the prompt on the synthetic data"
422
+ ]
423
+ },
424
+ {
425
+ "cell_type": "code",
426
+ "execution_count": null,
427
+ "id": "67db60b0",
428
+ "metadata": {},
429
+ "outputs": [],
430
+ "source": [
431
+ "file_path = 'configs/promptopt_config.yaml' \n",
432
+ "config_dict = {\n",
433
+ " \"task_description\": \"You are a mathematics expert. You will be given a mathematics problem which you need to solve\",\n",
434
+ " \"base_instruction\": \"Lets think step by step.\",\n",
435
+ " \"mutation_rounds\": 2,\n",
436
+ " \"few_shot_count\": 5,\n",
437
+ " \"generate_reasoning\": True,\n",
438
+ " \"mutate_refine_iterations\" : 3,\n",
439
+ " \"seen_set_size\":20\n",
440
+ " }\n",
441
+ "update_yaml_file(file_path,config_dict)"
442
+ ]
443
+ },
444
+ {
445
+ "cell_type": "markdown",
446
+ "id": "fc8eb2c5",
447
+ "metadata": {},
448
+ "source": [
449
+ "Call the optimization function "
450
+ ]
451
+ },
452
+ {
453
+ "cell_type": "code",
454
+ "execution_count": null,
455
+ "id": "e53934e6",
456
+ "metadata": {},
457
+ "outputs": [],
458
+ "source": [
459
+ "gp = GluePromptOpt(promptopt_config_path,\n",
460
+ " setup_config_path,\n",
461
+ " dataset_jsonl = \"train_synthetic.jsonl\",\n",
462
+ " data_processor=gsm8k_processor)\n",
463
+ "best_prompt, expert_profile = gp.get_best_prompt(use_examples=True,run_without_train_examples=False,generate_synthetic_examples=False)"
464
+ ]
465
+ },
466
+ {
467
+ "cell_type": "markdown",
468
+ "id": "b4bcd46b",
469
+ "metadata": {},
470
+ "source": [
471
+ "Output : Following Prompt and Expert Profile are generated "
472
+ ]
473
+ },
474
+ {
475
+ "cell_type": "code",
476
+ "execution_count": null,
477
+ "id": "ee6006f0",
478
+ "metadata": {},
479
+ "outputs": [],
480
+ "source": [
481
+ "OUTPUT = \"\"\"\n",
482
+ "Generating Expert Identity....\n",
483
+ "Expert Identity: You are a mathematician with a strong background in various fields of mathematics, including algebra, calculus, geometry, and statistics. You have a deep understanding of mathematical theories and principles, and you are skilled at solving complex problems with precision and clarity. Your analytical skills and logical reasoning enable you to break down problems into manageable steps and find accurate solutions efficiently. You are familiar with a wide range of mathematical techniques and tools, and you can apply them to solve problems in both theoretical and applied contexts. Your expertise allows you to explain your solutions clearly and concisely, making complex concepts accessible to others. Whether the problem involves solving equations, proving theorems, or analyzing data, you are well-equipped to provide a thorough and correct solution.\n",
484
+ "Final best prompt: Provide a clear and detailed solution, breaking down all necessary steps. Ensure that the final answer is clearly marked and separated from the solution steps. Use proper mathematical notation and formatting throughout. Verify the final answer by checking the solution steps for accuracy. Simplify all expressions and fractions where possible. Handle special cases or edge cases appropriately, and clearly state any assumptions or conditions applied during the solution process. Finally, review the entire solution to ensure logical consistency and correct formatting.\n",
485
+ "\n",
486
+ "[Question] Solve for \\( x \\) in the equation \\( 2x + 3 = 11 \\).\n",
487
+ "[Answer] To solve for \\( x \\) in the equation \\( 2x + 3 = 11 \\), we will follow these steps:\n",
488
+ "\n",
489
+ "1. **Isolate the term containing \\( x \\)**:\n",
490
+ " We start by isolating the term with \\( x \\) on one side of the equation. To do this, we need to eliminate the constant term on the left side of the equation.\n",
491
+ "\n",
492
+ " \\[\n",
493
+ " 2x + 3 = 11\n",
494
+ " \\]\n",
495
+ "\n",
496
+ " Subtract 3 from both sides of the equation:\n",
497
+ "\n",
498
+ " \\[\n",
499
+ " 2x + 3 - 3 = 11 - 3\n",
500
+ " \\]\n",
501
+ "\n",
502
+ " Simplifying this, we get:\n",
503
+ "\n",
504
+ " \\[\n",
505
+ " 2x = 8\n",
506
+ " \\]\n",
507
+ "\n",
508
+ "2. **Solve for \\( x \\)**:\n",
509
+ " Now, we need to solve for \\( x \\) by isolating \\( x \\) itself. Since \\( x \\) is multiplied by 2, we will divide both sides of the equation by 2 to solve for \\( x \\).\n",
510
+ "\n",
511
+ " \\[\n",
512
+ " \\frac{2x}{2} = \\frac{8}{2}\n",
513
+ " \\]\n",
514
+ "\n",
515
+ " Simplifying this, we get:\n",
516
+ "\n",
517
+ " \\[\n",
518
+ " x = 4\n",
519
+ " \\]\n",
520
+ "\n",
521
+ "3. **Verify the solution**:\n",
522
+ " To ensure our solution is correct, we substitute \\( x = 4 \\) back into the original equation and check if both sides are equal.\n",
523
+ "\n",
524
+ " Original equation:\n",
525
+ "\n",
526
+ " \\[\n",
527
+ " 2x + 3 = 11\n",
528
+ " \\]\n",
529
+ "\n",
530
+ " Substitute \\( x = 4 \\):\n",
531
+ "\n",
532
+ " \\[\n",
533
+ " 2(4) + 3 = 11\n",
534
+ " \\]\n",
535
+ "\n",
536
+ " Simplifying this, we get:\n",
537
+ "\n",
538
+ " \\[\n",
539
+ " 8 + 3 = 11\n",
540
+ " \\]\n",
541
+ "\n",
542
+ " \\[\n",
543
+ " 11 = 11\n",
544
+ " \\]\n",
545
+ "\n",
546
+ " Since both sides of the equation are equal, our solution is verified to be correct.\n",
547
+ "\n",
548
+ "**Final Answer**: \\( x = 4 \\) <ANS_START> \\( x = 4 \\) <ANS_END>\n",
549
+ "\n",
550
+ "[Question] Solve for \\( x \\) in the equation \\( x^2 - 4x + 4 = 0 \\).\n",
551
+ "[Answer] To solve the quadratic equation \\( x^2 - 4x + 4 = 0 \\), we will follow these steps:\n",
552
+ "\n",
553
+ "1. **Identify the quadratic equation**: The given equation is \\( x^2 - 4x + 4 = 0 \\).\n",
554
+ "\n",
555
+ "2. **Recognize the standard form**: The standard form of a quadratic equation is \\( ax^2 + bx + c = 0 \\). Here, \\( a = 1 \\), \\( b = -4 \\), and \\( c = 4 \\).\n",
556
+ "\n",
557
+ "3. **Factor the quadratic expression**: We need to factor the quadratic expression on the left-hand side of the equation. We look for two numbers that multiply to \\( c \\) (which is 4) and add up to \\( b \\) (which is -4). These numbers are -2 and -2.\n",
558
+ "\n",
559
+ "4. **Write the factored form**: The quadratic expression \\( x^2 - 4x + 4 \\) can be factored as \\( (x - 2)(x - 2) \\) or \\( (x - 2)^2 \\).\n",
560
+ "\n",
561
+ "5. **Set the factored form equal to zero**: We now have \\( (x - 2)^2 = 0 \\).\n",
562
+ "\n",
563
+ "6. **Solve for \\( x \\)**: To find the value of \\( x \\), we take the square root of both sides of the equation:\n",
564
+ " \\[\n",
565
+ " \\sqrt{(x - 2)^2} = \\sqrt{0}\n",
566
+ " \\]\n",
567
+ " This simplifies to:\n",
568
+ " \\[\n",
569
+ " x - 2 = 0\n",
570
+ " \\]\n",
571
+ "\n",
572
+ "7. **Isolate \\( x \\)**: Add 2 to both sides of the equation to solve for \\( x \\):\n",
573
+ " \\[\n",
574
+ " x = 2\n",
575
+ " \\]\n",
576
+ "\n",
577
+ "8. **Verify the solution**: Substitute \\( x = 2 \\) back into the original equation to ensure it satisfies the equation:\n",
578
+ " \\[\n",
579
+ " (2)^2 - 4(2) + 4 = 4 - 8 + 4 = 0\n",
580
+ " \\]\n",
581
+ " Since the left-hand side equals the right-hand side (0), the solution \\( x = 2 \\) is verified.\n",
582
+ "\n",
583
+ "**Final Answer**: \\( x = 2 \\) <ANS_START> \\( x = 2 \\) <ANS_END>\n",
584
+ "\n",
585
+ "[Question] Find the derivative of \\( f(x) = 3x^2 \\cdot \\sin(x) \\).\n",
586
+ "[Answer] To find the derivative of the function \\( f(x) = 3x^2 \\cdot \\sin(x) \\), we will use the product rule of differentiation. The product rule states that if we have a function \\( f(x) = u(x) \\cdot v(x) \\), then its derivative \\( f'(x) \\) is given by:\n",
587
+ "\n",
588
+ "\\[ f'(x) = u'(x) \\cdot v(x) + u(x) \\cdot v'(x) \\]\n",
589
+ "\n",
590
+ "Here, we identify \\( u(x) = 3x^2 \\) and \\( v(x) = \\sin(x) \\).\n",
591
+ "\n",
592
+ "Step 1: Differentiate \\( u(x) = 3x^2 \\)\n",
593
+ "\\[ u'(x) = \\frac{d}{dx}(3x^2) = 3 \\cdot 2x = 6x \\]\n",
594
+ "\n",
595
+ "Step 2: Differentiate \\( v(x) = \\sin(x) \\)\n",
596
+ "\\[ v'(x) = \\frac{d}{dx}(\\sin(x)) = \\cos(x) \\]\n",
597
+ "\n",
598
+ "Step 3: Apply the product rule\n",
599
+ "\\[ f'(x) = u'(x) \\cdot v(x) + u(x) \\cdot v'(x) \\]\n",
600
+ "\\[ f'(x) = (6x) \\cdot \\sin(x) + (3x^2) \\cdot \\cos(x) \\]\n",
601
+ "\n",
602
+ "Step 4: Simplify the expression\n",
603
+ "\\[ f'(x) = 6x \\sin(x) + 3x^2 \\cos(x) \\]\n",
604
+ "\n",
605
+ "Thus, the derivative of the function \\( f(x) = 3x^2 \\cdot \\sin(x) \\) is:\n",
606
+ "\n",
607
+ "\\[ \\boxed{f'(x) = 6x \\sin(x) + 3x^2 \\cos(x)} \\]\n",
608
+ "\n",
609
+ "To verify the final answer, we can recheck each step to ensure accuracy:\n",
610
+ "- The derivative of \\( 3x^2 \\) is correctly calculated as \\( 6x \\).\n",
611
+ "- The derivative of \\( \\sin(x) \\) is correctly calculated as \\( \\cos(x) \\).\n",
612
+ "- The product rule is correctly applied, and the terms are correctly combined and simplified.\n",
613
+ "\n",
614
+ "Therefore, the final answer is confirmed to be correct. <ANS_START> \\( f'(x) = 3x^2 \\cos(x) + 6x \\sin(x) \\) <ANS_END>\n",
615
+ "\n",
616
+ "[Question] Evaluate the definite integral \\( \\int_{0}^{1} (4x^3 - 2x + 1) \\, dx \\).\n",
617
+ "[Answer] To evaluate the definite integral \\( \\int_{0}^{1} (4x^3 - 2x + 1) \\, dx \\), we will follow these steps:\n",
618
+ "\n",
619
+ "1. **Find the antiderivative** of the integrand \\( 4x^3 - 2x + 1 \\).\n",
620
+ "2. **Evaluate the antiderivative** at the upper limit of integration (1).\n",
621
+ "3. **Evaluate the antiderivative** at the lower limit of integration (0).\n",
622
+ "4. **Subtract the value** of the antiderivative at the lower limit from the value at the upper limit to find the definite integral.\n",
623
+ "\n",
624
+ "### Step-by-Step Solution:\n",
625
+ "\n",
626
+ "1. **Find the antiderivative**:\n",
627
+ " - The antiderivative of \\( 4x^3 \\) is \\( \\frac{4x^4}{4} = x^4 \\).\n",
628
+ " - The antiderivative of \\( -2x \\) is \\( -\\frac{2x^2}{2} = -x^2 \\).\n",
629
+ " - The antiderivative of \\( 1 \\) is \\( x \\).\n",
630
+ "\n",
631
+ " Therefore, the antiderivative of \\( 4x^3 - 2x + 1 \\) is:\n",
632
+ " \\[\n",
633
+ " F(x) = x^4 - x^2 + x\n",
634
+ " \\]\n",
635
+ "\n",
636
+ "2. **Evaluate the antiderivative at the upper limit (1)**:\n",
637
+ " \\[\n",
638
+ " F(1) = 1^4 - 1^2 + 1 = 1 - 1 + 1 = 1\n",
639
+ " \\]\n",
640
+ "\n",
641
+ "3. **Evaluate the antiderivative at the lower limit (0)**:\n",
642
+ " \\[\n",
643
+ " F(0) = 0^4 - 0^2 + 0 = 0\n",
644
+ " \\]\n",
645
+ "\n",
646
+ "4. **Subtract the value at the lower limit from the value at the upper limit**:\n",
647
+ " \\[\n",
648
+ " \\int_{0}^{1} (4x^3 - 2x + 1) \\, dx = F(1) - F(0) = 1 - 0 = 1\n",
649
+ " \\]\n",
650
+ "\n",
651
+ "### Final Answer:\n",
652
+ "\\[\n",
653
+ "\\boxed{1}\n",
654
+ "\\] <ANS_START> \\( 1 \\) <ANS_END>\n",
655
+ "\n",
656
+ "[Question] Solve the system of equations:\n",
657
+ "\\[ \\begin{cases} \n",
658
+ "x + 2y + z = 6 \\\\\n",
659
+ "2x - y + 3z = 14 \\\\\n",
660
+ "3x + y - z = 2 \n",
661
+ "\\end{cases} \\]\n",
662
+ "[Answer] To solve the system of equations:\n",
663
+ "\\[ \\begin{cases} \n",
664
+ "x + 2y + z = 6 \\\\\n",
665
+ "2x - y + 3z = 14 \\\\\n",
666
+ "3x + y - z = 2 \n",
667
+ "\\end{cases} \\]\n",
668
+ "\n",
669
+ "we will use the method of elimination and substitution to find the values of \\(x\\), \\(y\\), and \\(z\\).\n",
670
+ "\n",
671
+ "**Step 1: Eliminate \\(z\\) from the first two equations.**\n",
672
+ "\n",
673
+ "First, we multiply the first equation by 3 to align the coefficients of \\(z\\):\n",
674
+ "\\[ 3(x + 2y + z) = 3 \\cdot 6 \\]\n",
675
+ "\\[ 3x + 6y + 3z = 18 \\]\n",
676
+ "\n",
677
+ "Now, we subtract the second equation from this result:\n",
678
+ "\\[ (3x + 6y + 3z) - (2x - y + 3z) = 18 - 14 \\]\n",
679
+ "\\[ 3x + 6y + 3z - 2x + y - 3z = 4 \\]\n",
680
+ "\\[ x + 7y = 4 \\]\n",
681
+ "\\[ \\text{(Equation 4)} \\]\n",
682
+ "\n",
683
+ "**Step 2: Eliminate \\(z\\) from the first and third equations.**\n",
684
+ "\n",
685
+ "Next, we multiply the first equation by 1 and the third equation by 1 to align the coefficients of \\(z\\):\n",
686
+ "\\[ 1(x + 2y + z) = 1 \\cdot 6 \\]\n",
687
+ "\\[ x + 2y + z = 6 \\]\n",
688
+ "\n",
689
+ "\\[ 1(3x + y - z) = 1 \\cdot 2 \\]\n",
690
+ "\\[ 3x + y - z = 2 \\]\n",
691
+ "\n",
692
+ "Now, we add these two equations:\n",
693
+ "\\[ (x + 2y + z) + (3x + y - z) = 6 + 2 \\]\n",
694
+ "\\[ x + 2y + z + 3x + y - z = 8 \\]\n",
695
+ "\\[ 4x + 3y = 8 \\]\n",
696
+ "\\[ \\text{(Equation 5)} \\]\n",
697
+ "\n",
698
+ "**Step 3: Solve the system of equations formed by Equation 4 and Equation 5.**\n",
699
+ "\n",
700
+ "We now have:\n",
701
+ "\\[ \\begin{cases} \n",
702
+ "x + 7y = 4 \\\\\n",
703
+ "4x + 3y = 8 \n",
704
+ "\\end{cases} \\]\n",
705
+ "\n",
706
+ "First, we solve Equation 4 for \\(x\\):\n",
707
+ "\\[ x = 4 - 7y \\]\n",
708
+ "\n",
709
+ "Substitute \\(x = 4 - 7y\\) into Equation 5:\n",
710
+ "\\[ 4(4 - 7y) + 3y = 8 \\]\n",
711
+ "\\[ 16 - 28y + 3y = 8 \\]\n",
712
+ "\\[ 16 - 25y = 8 \\]\n",
713
+ "\\[ -25y = 8 - 16 \\]\n",
714
+ "\\[ -25y = -8 \\]\n",
715
+ "\\[ y = \\frac{8}{25} \\]\n",
716
+ "\n",
717
+ "**Step 4: Substitute \\(y\\) back into Equation 4 to find \\(x\\).**\n",
718
+ "\n",
719
+ "\\[ x + 7\\left(\\frac{8}{25}\\right) = 4 \\]\n",
720
+ "\\[ x + \\frac{56}{25} = 4 \\]\n",
721
+ "\\[ x = 4 - \\frac{56}{25} \\]\n",
722
+ "\\[ x = \\frac{100}{25} - \\frac{56}{25} \\]\n",
723
+ "\\[ x = \\frac{44}{25} \\]\n",
724
+ "\n",
725
+ "**Step 5: Substitute \\(x\\) and \\(y\\) back into the first original equation to find \\(z\\).**\n",
726
+ "\n",
727
+ "\\[ x + 2y + z = 6 \\]\n",
728
+ "\\[ \\frac{44}{25} + 2\\left(\\frac{8}{25}\\right) + z = 6 \\]\n",
729
+ "\\[ \\frac{44}{25} + \\frac{16}{25} + z = 6 \\]\n",
730
+ "\\[ \\frac{60}{25} + z = 6 \\]\n",
731
+ "\\[ \\frac{60}{25} = 2.4 \\]\n",
732
+ "\\[ 2.4 + z = 6 \\]\n",
733
+ "\\[ z = 6 - 2.4 \\]\n",
734
+ "\\[ z = 3.6 \\]\n",
735
+ "\n",
736
+ "**Final Answer:**\n",
737
+ "\\[ x = \\frac{44}{25}, y = \\frac{8}{25}, z = 3.6 \\]\n",
738
+ "\n",
739
+ "We have verified each step and simplified all expressions. The solution is logically consistent and correctly formatted. <ANS_START> \\( x = \\frac{44}{25}, y = \\frac{8}{25}, z = 3.6 \\) <ANS_END>\n",
740
+ "\n",
741
+ "\n",
742
+ "For each question present the reasoning followed by the correct answer.\n",
743
+ "\"\"\""
744
+ ]
745
+ },
746
+ {
747
+ "cell_type": "markdown",
748
+ "id": "c61c2f84",
749
+ "metadata": {},
750
+ "source": [
751
+ "#### Scenario 3 : We have training data and also want in-context examples in final prompt"
752
+ ]
753
+ },
754
+ {
755
+ "cell_type": "markdown",
756
+ "id": "11d2de75",
757
+ "metadata": {},
758
+ "source": [
759
+ "Load and save the dataset "
760
+ ]
761
+ },
762
+ {
763
+ "cell_type": "code",
764
+ "execution_count": null,
765
+ "id": "976681bd-4f43-4dbc-947e-cdb94d4824f0",
766
+ "metadata": {},
767
+ "outputs": [],
768
+ "source": [
769
+ "if not os.path.exists(\"data\"):\n",
770
+ " os.mkdir(\"data\")\n",
771
+ " \n",
772
+ "dataset = load_dataset(\"openai/gsm8k\", \"main\")\n",
773
+ "num_samples = 0\n",
774
+ "for dataset_type in ['train','test']:\n",
775
+ " data_list = []\n",
776
+ " for data in dataset[dataset_type]:\n",
777
+ " data_list.append({\"question\": data['question'], \"answer\": data['answer']})\n",
778
+ " if num_samples == 100 and dataset_type == 'train': # We sample only 100 train examples and use 25 out them for training randomly\n",
779
+ " break\n",
780
+ " num_samples += 1\n",
781
+ " gsm8k_processor.dataset_to_jsonl(\"data/\"+ dataset_type+'.jsonl', dataset=data_list)"
782
+ ]
783
+ },
784
+ {
785
+ "cell_type": "markdown",
786
+ "id": "abf1671a",
787
+ "metadata": {},
788
+ "source": [
789
+ "Set the configurations"
790
+ ]
791
+ },
792
+ {
793
+ "cell_type": "code",
794
+ "execution_count": null,
795
+ "id": "cc841576",
796
+ "metadata": {},
797
+ "outputs": [],
798
+ "source": [
799
+ "file_path = 'configs/promptopt_config.yaml' \n",
800
+ "config_dict = {\n",
801
+ " \"task_description\": \"You are a mathematics expert. You will be given a mathematics problem which you need to solve\",\n",
802
+ " \"base_instruction\": \"Lets think step by step.\",\n",
803
+ " \"mutation_rounds\": 2,\n",
804
+ " \"few_shot_count\": 5,\n",
805
+ " \"generate_reasoning\": True,\n",
806
+ " \"mutate_refine_iterations\" : 3,\n",
807
+ " \"seen_set_size\":20\n",
808
+ " }\n",
809
+ "update_yaml_file(file_path,config_dict)"
810
+ ]
811
+ },
812
+ {
813
+ "cell_type": "markdown",
814
+ "id": "3392594d",
815
+ "metadata": {},
816
+ "source": [
817
+ "Create an object for calling prompt optimization and inference functionalities"
818
+ ]
819
+ },
820
+ {
821
+ "cell_type": "code",
822
+ "execution_count": null,
823
+ "id": "8af4246f-db32-4b37-a73a-f9e2e5125d09",
824
+ "metadata": {},
825
+ "outputs": [],
826
+ "source": [
827
+ "gp = GluePromptOpt(promptopt_config_path,\n",
828
+ " setup_config_path,\n",
829
+ " dataset_jsonl = os.path.join(\"data\", \"train.jsonl\"),\n",
830
+ " data_processor = gsm8k_processor)"
831
+ ]
832
+ },
833
+ {
834
+ "cell_type": "markdown",
835
+ "id": "6f421ce9",
836
+ "metadata": {},
837
+ "source": [
838
+ "Call the optimization function "
839
+ ]
840
+ },
841
+ {
842
+ "cell_type": "code",
843
+ "execution_count": null,
844
+ "id": "09e3e6e1",
845
+ "metadata": {},
846
+ "outputs": [],
847
+ "source": [
848
+ "best_prompt, expert_profile = gp.get_best_prompt(use_examples=True,run_without_train_examples=False,generate_synthetic_examples=False)"
849
+ ]
850
+ },
851
+ {
852
+ "cell_type": "markdown",
853
+ "id": "15bb0e80",
854
+ "metadata": {},
855
+ "source": [
856
+ "Output : Following Prompt and Expert Profile are generated "
857
+ ]
858
+ },
859
+ {
860
+ "cell_type": "code",
861
+ "execution_count": null,
862
+ "id": "696e6612",
863
+ "metadata": {},
864
+ "outputs": [],
865
+ "source": [
866
+ "OUTPUT = \"\"\"Expert Identity: You are a mathematics expert with a strong background in various fields of mathematics, including algebra, calculus, geometry, and statistics. You have a deep understanding of mathematical theories and principles, and you are skilled at solving complex problems with precision and clarity. Your expertise allows you to break down intricate problems into manageable steps, making it easier for others to follow your reasoning. You are familiar with a wide range of mathematical techniques and tools, and you can apply them effectively to find solutions. Whether the problem involves solving equations, proving theorems, or analyzing data, you can provide a clear, accurate, and well-explained solution. Your ability to communicate complex mathematical concepts in an understandable way makes you an invaluable resource for anyone seeking help with mathematics.\n",
867
+ "\n",
868
+ "Final best prompt: \n",
869
+ "\n",
870
+ "You are a mathematics expert. Your task is to solve a given mathematics problem accurately and provide a clear, detailed explanation of your solution process. Follow these steps to ensure a comprehensive and well-structured solution:\n",
871
+ "\n",
872
+ "1. **Understand the Problem**: Carefully read and comprehend the problem statement. Identify the key components and what is being asked.\n",
873
+ "\n",
874
+ "2. **Identify Components**: Break down the problem into its fundamental components, such as variables, constants, and relevant quantities (e.g., base pay, overtime pay, distances, speeds, etc.).\n",
875
+ "\n",
876
+ "3. **Apply Relevant Principles**: Use appropriate mathematical principles, formulas, and methods to solve the problem step by step.\n",
877
+ "\n",
878
+ "4. **Logical Reasoning**: Employ logical reasoning to explain each step of your solution process. Ensure that each step follows logically from the previous one.\n",
879
+ "\n",
880
+ "5. **Detailed Explanations**: Provide detailed explanations for each step to ensure clarity and understanding. Include intermediate results and how they contribute to the final solution.\n",
881
+ "\n",
882
+ "6. **Explicit Calculation Steps**: Show each calculation step in detail, including intermediate results. Use proper mathematical notation and symbols.\n",
883
+ "\n",
884
+ "7. **Verify Each Step**: Recheck each intermediate step of your calculation to verify the correctness of the final answer. Ensure that all arithmetic and algebraic operations are accurate.\n",
885
+ "\n",
886
+ "8. **Combine Results**: Clearly combine different components of the problem (e.g., base pay and overtime pay) before arriving at the final answer.\n",
887
+ "\n",
888
+ "9. **Simplify and Notate**: Simplify the final answer where possible, and use proper mathematical notation and symbols.\n",
889
+ "\n",
890
+ "10. **Mark the Final Answer**: Clearly mark the final answer within <ANS_START> and <ANS_END> tags.\n",
891
+ "\n",
892
+ "Ensure that your approach is tailored to the specific type of mathematical problem being solved, whether it involves arithmetic, algebra, geometry, calculus, or any other area of mathematics. Present the solutions in a clear and organized manner.\n",
893
+ "\n",
894
+ "**Additional Guidelines:**\n",
895
+ "- **Contextual Understanding**: Pay close attention to the context of the problem to ensure that all relationships and quantities are correctly interpreted.\n",
896
+ "- **Correct Application of Arithmetic Operations**: Double-check that all arithmetic operations are applied correctly and align with the problem's requirements.\n",
897
+ "- **Verification of Final Answer**: Dedicate a step to verify the final answer by rechecking all intermediate steps and ensuring they logically lead to the correct final result.\n",
898
+ "- **Clarity in Marking Final Answer**: Use the <ANS_START> and <ANS_END> tags to clearly mark the final answer.\n",
899
+ "\n",
900
+ "By following these steps and additional guidelines, you will ensure that the solution is accurate, well-explained, and clearly presented.\n",
901
+ "\n",
902
+ "\n",
903
+ "[Question] Bella bought stamps at the post office. Some of the stamps had a snowflake design, some had a truck design, and some had a rose design. Bella bought 11 snowflake stamps. She bought 9 more truck stamps than snowflake stamps, and 13 fewer rose stamps than truck stamps. How many stamps did Bella buy in all?\n",
904
+ "[Answer] 1. **Understand the Problem**: Bella bought three types of stamps: snowflake, truck, and rose. We need to determine the total number of stamps she bought, given the relationships between the quantities of each type.\n",
905
+ "\n",
906
+ "2. **Identify Components**:\n",
907
+ " - Number of snowflake stamps: 11.\n",
908
+ " - Number of truck stamps: 9 more than the number of snowflake stamps.\n",
909
+ " - Number of rose stamps: 13 fewer than the number of truck stamps.\n",
910
+ "\n",
911
+ "3. **Apply Relevant Principles**: Use basic arithmetic operations to find the quantities of truck and rose stamps, and then sum all the quantities to find the total number of stamps.\n",
912
+ "\n",
913
+ "4. **Logical Reasoning**:\n",
914
+ " - Number of snowflake stamps: 11.\n",
915
+ " - Number of truck stamps: 11 (snowflake stamps) + 9 = 20.\n",
916
+ " - Number of rose stamps: 20 (truck stamps) - 13 = 7.\n",
917
+ "\n",
918
+ "5. **Detailed Explanations**:\n",
919
+ " - Calculate the number of truck stamps: 11 (snowflake stamps) + 9 = 20.\n",
920
+ " - Calculate the number of rose stamps: 20 (truck stamps) - 13 = 7.\n",
921
+ " - Calculate the total number of stamps: 11 (snowflake) + 20 (truck) + 7 (rose) = 38.\n",
922
+ "\n",
923
+ "6. **Explicit Calculation Steps**:\n",
924
+ " - Truck stamps: 11 + 9 = $<11+9=20>20.\n",
925
+ " - Rose stamps: 20 - 13 = $<20-13=7>7.\n",
926
+ " - Total stamps: 11 + 20 + 7 = $<11+20+7=38>38.\n",
927
+ "\n",
928
+ "7. **Verify Each Step**: Recheck each calculation step to ensure correctness:\n",
929
+ " - Truck stamps: 11 + 9 = 20.\n",
930
+ " - Rose stamps: 20 - 13 = 7.\n",
931
+ " - Total stamps: 11 + 20 + 7 = 38.\n",
932
+ "\n",
933
+ "8. **Combine Results**: Combine the number of each type of stamp correctly to find the total number of stamps.\n",
934
+ "\n",
935
+ "9. **Simplify and Notate**: The final answer is already simplified.\n",
936
+ "\n",
937
+ "10. **Mark the Final Answer**: <ANS_START>38<ANS_END>\n",
938
+ "\n",
939
+ "By following these steps, we ensure that the solution is accurate, well-explained, and clearly presented. <ANS_START>38<ANS_END>\n",
940
+ "\n",
941
+ "[Question] It takes Roque two hours to walk to work and one hour to ride his bike to work. Roque walks to and from work three times a week and rides his bike to and from work twice a week. How many hours in total does he take to get to and from work a week with walking and biking?\n",
942
+ "[Answer] 1. **Understand the Problem**: Roque has two modes of transportation to work: walking and biking. We need to calculate the total time he spends traveling to and from work in a week, considering the different times and frequencies for each mode.\n",
943
+ "\n",
944
+ "2. **Identify Components**:\n",
945
+ " - Time to walk to work: 2 hours (one way).\n",
946
+ " - Time to bike to work: 1 hour (one way).\n",
947
+ " - Frequency of walking: 3 times a week (to and from work).\n",
948
+ " - Frequency of biking: 2 times a week (to and from work).\n",
949
+ "\n",
950
+ "3. **Apply Relevant Principles**: Use basic arithmetic to calculate the total time spent walking and biking separately, then sum these times to get the total weekly travel time.\n",
951
+ "\n",
952
+ "4. **Logical Reasoning**:\n",
953
+ " - Calculate the total walking time for a week:\n",
954
+ " - One round trip (to and from work) by walking takes 2 hours (to work) + 2 hours (from work) = 4 hours.\n",
955
+ " - Roque walks to and from work 3 times a week, so the total walking time is 4 hours per round trip * 3 round trips = 12 hours.\n",
956
+ " - Calculate the total biking time for a week:\n",
957
+ " - One round trip (to and from work) by biking takes 1 hour (to work) + 1 hour (from work) = 2 hours.\n",
958
+ " - Roque bikes to and from work 2 times a week, so the total biking time is 2 hours per round trip * 2 round trips = 4 hours.\n",
959
+ "\n",
960
+ "5. **Detailed Explanations**:\n",
961
+ " - Walking time calculation:\n",
962
+ " - One round trip walking: 2 hours (to work) + 2 hours (from work) = 4 hours.\n",
963
+ " - Total walking time for the week: 4 hours per round trip * 3 round trips = 12 hours.\n",
964
+ " - Biking time calculation:\n",
965
+ " - One round trip biking: 1 hour (to work) + 1 hour (from work) = 2 hours.\n",
966
+ " - Total biking time for the week: 2 hours per round trip * 2 round trips = 4 hours.\n",
967
+ " - Combine the total walking and biking times to get the total weekly travel time:\n",
968
+ " - Total weekly travel time: 12 hours (walking) + 4 hours (biking) = 16 hours.\n",
969
+ "\n",
970
+ "6. **Explicit Calculation Steps**:\n",
971
+ " - Walking time: 2 hours (one way) * 2 (round trip) * 3 (times a week) = $<2*2*3=12>12 hours.\n",
972
+ " - Biking time: 1 hour (one way) * 2 (round trip) * 2 (times a week) = $<1*2*2=4>4 hours.\n",
973
+ " - Total time: 12 hours (walking) + 4 hours (biking) = $<12+4=16>16 hours.\n",
974
+ "\n",
975
+ "7. **Verify Each Step**: Recheck each calculation step to ensure correctness. Confirm that the arithmetic operations and logic used are accurate.\n",
976
+ "\n",
977
+ "8. **Combine Results**: Combine the total walking and biking times correctly to ensure the final answer is accurate.\n",
978
+ "\n",
979
+ "9. **Simplify and Notate**: The final answer is already simplified and clearly presented.\n",
980
+ "\n",
981
+ "10. **Mark the Final Answer**: <ANS_START>16<ANS_END>\n",
982
+ "\n",
983
+ "By following these steps, we ensure that the solution is accurate, well-explained, and clearly presented. <ANS_START>16<ANS_END>\n",
984
+ "\n",
985
+ "[Question] Betty is saving money for a new wallet which costs $100. Betty has only half of the money she needs. Her parents decided to give her $15 for that purpose, and her grandparents twice as much as her parents. How much more money does Betty need to buy the wallet?\n",
986
+ "[Answer] 1. **Understand the Problem**: Betty is saving money for a wallet that costs $100. She currently has half of the money she needs. Her parents and grandparents are contributing additional amounts to help her reach her goal. We need to determine how much more money Betty needs to buy the wallet.\n",
987
+ "\n",
988
+ "2. **Identify Components**:\n",
989
+ " - Total cost of the wallet: $100.\n",
990
+ " - Amount Betty currently has: half of $100.\n",
991
+ " - Contribution from parents: $15.\n",
992
+ " - Contribution from grandparents: twice the amount given by parents.\n",
993
+ "\n",
994
+ "3. **Apply Relevant Principles**: Use basic arithmetic to calculate the total amount of money Betty will have after receiving contributions from her parents and grandparents, and then determine how much more she needs to reach $100.\n",
995
+ "\n",
996
+ "4. **Logical Reasoning**:\n",
997
+ " - Calculate the amount Betty currently has: $100 / 2 = $50.\n",
998
+ " - Calculate the contribution from grandparents: 2 * $15 = $30.\n",
999
+ " - Calculate the total amount of money Betty will have: $50 (current amount) + $15 (parents' contribution) + $30 (grandparents' contribution).\n",
1000
+ "\n",
1001
+ "5. **Detailed Explanations**:\n",
1002
+ " - Betty currently has $50 because half of $100 is $50.\n",
1003
+ " - Her parents give her $15.\n",
1004
+ " - Her grandparents give her twice the amount her parents give, which is 2 * $15 = $30.\n",
1005
+ " - The total amount of money Betty will have is $50 (current amount) + $15 (parents' contribution) + $30 (grandparents' contribution) = $95.\n",
1006
+ "\n",
1007
+ "6. **Explicit Calculation Steps**:\n",
1008
+ " - Current amount: $100 / 2 = $<100/2=50>50.\n",
1009
+ " - Grandparents' contribution: 2 * $15 = $<2*15=30>30.\n",
1010
+ " - Total amount: $50 + $15 + $30 = $<50+15+30=95>95.\n",
1011
+ "\n",
1012
+ "7. **Verify Each Step**: Recheck each calculation step to ensure correctness.\n",
1013
+ " - Current amount: $100 / 2 = $50.\n",
1014
+ " - Grandparents' contribution: 2 * $15 = $30.\n",
1015
+ " - Total amount: $50 + $15 + $30 = $95.\n",
1016
+ "\n",
1017
+ "8. **Combine Results**: Combine the total amount of money Betty will have correctly.\n",
1018
+ " - Total amount: $50 (current amount) + $15 (parents' contribution) + $30 (grandparents' contribution) = $95.\n",
1019
+ "\n",
1020
+ "9. **Simplify and Notate**: The final answer is already simplified.\n",
1021
+ "\n",
1022
+ "10. **Mark the Final Answer**: \n",
1023
+ " - Amount Betty needs to buy the wallet: $100 - $95 = $<100-95=5>5.\n",
1024
+ "\n",
1025
+ "<ANS_START>5<ANS_END> <ANS_START>5<ANS_END>\n",
1026
+ "\n",
1027
+ "[Question] A rectangle has a length of 10 cm and a width of 5 cm. What is the area and perimeter of the rectangle?\n",
1028
+ "[Answer] 1. **Understand the Problem**: We need to find both the area and the perimeter of a rectangle given its length and width.\n",
1029
+ "\n",
1030
+ "2. **Identify Components**: \n",
1031
+ " - Length of the rectangle (L) = 10 cm\n",
1032
+ " - Width of the rectangle (W) = 5 cm\n",
1033
+ "\n",
1034
+ "3. **Apply Relevant Principles**: \n",
1035
+ " - The formula for the area of a rectangle is \\( \\text{Area} = \\text{Length} \\times \\text{Width} \\).\n",
1036
+ " - The formula for the perimeter of a rectangle is \\( \\text{Perimeter} = 2 \\times (\\text{Length} + \\text{Width}) \\).\n",
1037
+ "\n",
1038
+ "4. **Logical Reasoning**:\n",
1039
+ " - To find the area, multiply the length by the width.\n",
1040
+ " - To find the perimeter, add the length and the width, then multiply the result by 2.\n",
1041
+ "\n",
1042
+ "5. **Detailed Explanations**:\n",
1043
+ " - Calculate the area: \\( \\text{Area} = 10 \\, \\text{cm} \\times 5 \\, \\text{cm} \\).\n",
1044
+ " - Calculate the perimeter: \\( \\text{Perimeter} = 2 \\times (10 \\, \\text{cm} + 5 \\, \\text{cm}) \\).\n",
1045
+ "\n",
1046
+ "6. **Explicit Calculation Steps**:\n",
1047
+ " - Area: \\( 10 \\times 5 = 50 \\, \\text{cm}^2 \\).\n",
1048
+ " - Perimeter: \\( 2 \\times (10 + 5) = 2 \\times 15 = 30 \\, \\text{cm} \\).\n",
1049
+ "\n",
1050
+ "7. **Verify Each Step**: \n",
1051
+ " - Recheck the area calculation: \\( 10 \\times 5 = 50 \\, \\text{cm}^2 \\).\n",
1052
+ " - Recheck the perimeter calculation: \\( 2 \\times 15 = 30 \\, \\text{cm} \\).\n",
1053
+ "\n",
1054
+ "8. **Combine Results**: \n",
1055
+ " - The area of the rectangle is \\( 50 \\, \\text{cm}^2 \\).\n",
1056
+ " - The perimeter of the rectangle is \\( 30 \\, \\text{cm} \\).\n",
1057
+ "\n",
1058
+ "9. **Simplify and Notate**: \n",
1059
+ " - The final answers are already simplified.\n",
1060
+ "\n",
1061
+ "10. **Mark the Final Answer**: \n",
1062
+ " - Area: <ANS_START>50 \\, \\text{cm}^2<ANS_END>\n",
1063
+ " - Perimeter: <ANS_START>30 \\, \\text{cm}<ANS_END>\n",
1064
+ "\n",
1065
+ "By following these steps, we ensure that the solution is accurate, well-explained, and clearly presented. <ANS_START>50<ANS_END>\n",
1066
+ "\n",
1067
+ "[Question] Solve for x in the equation 2x + 3 = 11.\n",
1068
+ "[Answer] **Understand the Problem**: We need to solve for the variable \\( x \\) in the given linear equation \\( 2x + 3 = 11 \\).\n",
1069
+ "\n",
1070
+ "**Identify Components**: \n",
1071
+ "- The equation is \\( 2x + 3 = 11 \\).\n",
1072
+ "- We need to isolate \\( x \\) on one side of the equation.\n",
1073
+ "\n",
1074
+ "**Apply Relevant Principles**: \n",
1075
+ "- Use basic algebraic principles to isolate \\( x \\).\n",
1076
+ "\n",
1077
+ "**Logical Reasoning**:\n",
1078
+ "1. Start with the given equation: \\( 2x + 3 = 11 \\).\n",
1079
+ "2. Subtract 3 from both sides of the equation to isolate the term with \\( x \\):\n",
1080
+ " \\[\n",
1081
+ " 2x + 3 - 3 = 11 - 3\n",
1082
+ " \\]\n",
1083
+ "3. Simplify both sides:\n",
1084
+ " \\[\n",
1085
+ " 2x = 8\n",
1086
+ " \\]\n",
1087
+ "4. Divide both sides by 2 to solve for \\( x \\):\n",
1088
+ " \\[\n",
1089
+ " \\frac{2x}{2} = \\frac{8}{2}\n",
1090
+ " \\]\n",
1091
+ "5. Simplify the division:\n",
1092
+ " \\[\n",
1093
+ " x = 4\n",
1094
+ " \\]\n",
1095
+ "\n",
1096
+ "**Detailed Explanations**:\n",
1097
+ "- Subtracting 3 from both sides removes the constant term on the left side, leaving \\( 2x \\) isolated.\n",
1098
+ "- Dividing both sides by 2 isolates \\( x \\) by removing the coefficient of 2.\n",
1099
+ "\n",
1100
+ "**Explicit Calculation Steps**:\n",
1101
+ "1. \\( 2x + 3 = 11 \\)\n",
1102
+ "2. \\( 2x + 3 - 3 = 11 - 3 \\)\n",
1103
+ "3. \\( 2x = 8 \\)\n",
1104
+ "4. \\( \\frac{2x}{2} = \\frac{8}{2} \\)\n",
1105
+ "5. \\( x = 4 \\)\n",
1106
+ "\n",
1107
+ "**Verify Each Step**:\n",
1108
+ "- Recheck each step to ensure no arithmetic errors:\n",
1109
+ " - Subtracting 3 from 11 gives 8.\n",
1110
+ " - Dividing 8 by 2 gives 4.\n",
1111
+ "\n",
1112
+ "**Combine Results**: The final value of \\( x \\) is correctly isolated and calculated.\n",
1113
+ "\n",
1114
+ "**Simplify and Notate**: The final answer is already simplified.\n",
1115
+ "\n",
1116
+ "**Mark the Final Answer**: <ANS_START>4<ANS_END>\n",
1117
+ "\n",
1118
+ "By following these steps, we ensure that the solution is accurate, well-explained, and clearly presented. <ANS_START>4<ANS_END>\n",
1119
+ "\n",
1120
+ "\n",
1121
+ "For each question present the reasoning followed by the correct answer.\"\"\""
1122
+ ]
1123
+ }
1124
+ ],
1125
+ "metadata": {
1126
+ "kernelspec": {
1127
+ "display_name": "general",
1128
+ "language": "python",
1129
+ "name": "python3"
1130
+ },
1131
+ "language_info": {
1132
+ "codemirror_mode": {
1133
+ "name": "ipython",
1134
+ "version": 3
1135
+ },
1136
+ "file_extension": ".py",
1137
+ "mimetype": "text/x-python",
1138
+ "name": "python",
1139
+ "nbconvert_exporter": "python",
1140
+ "pygments_lexer": "ipython3",
1141
+ "version": "3.12.4"
1142
+ }
1143
+ },
1144
+ "nbformat": 4,
1145
+ "nbformat_minor": 5
1146
+ }
demos/svamp/.env ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ USE_OPENAI_API_KEY="False"
2
+
3
+ OPENAI_API_KEY=""
4
+ OPENAI_MODEL_NAME =""
5
+
6
+ OPENAI_API_VERSION=""
7
+ AZURE_OPENAI_ENDPOINT=""
8
+ AZURE_OPENAI_DEPLOYMENT_NAME=""
demos/svamp/configs/prompt_library.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ system_prompts: |
2
+ You are a helpful assistant that assists research students in understanding research papers.
3
+ system_guidelines: |
4
+ Guidelines
5
+ - Your role must always be a helpful assistant that assists students in understanding research papers.
6
+ - Only answer questions that are directly or indirectly related to the referenced paper(s).
7
+
8
+ mode:
9
+ chat:
10
+ - name: CHAT-FIRST-MESSAGE
11
+ llm_request_type: rag-query
12
+ prompt_template: |
13
+ {user_msg}
14
+ emb_model_id: text embedding ada 002 [vellm-openai2]
15
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
16
+ prepend_system_prompts: False
17
+ prepend_system_guidelines: False
18
+
19
+ - name: CHAT-NEXT-MESSAGES
20
+ llm_request_type: rag-query
21
+ prompt_template: |
22
+ {user_msg}
23
+ emb_model_id: text embedding ada 002 [vellm-openai2]
24
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
25
+ prepend_system_prompts: False
26
+ prepend_system_guidelines: False
27
+
28
+ generation:
29
+ - name: FLASH_PROFILE
30
+ prompt_template: |
31
+ {user_msg}
32
+ prepend_system_prompts: False
33
+ prepend_system_guidelines: False
34
+ llm_request_type: rag-query
35
+ emb_model_id: text embedding ada 002 [vellm-openai2]
36
+ llm_model_id: gpt 35 Turbo [vellm-openai2]
demos/svamp/configs/promptopt_config.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Specify one or more prompt refinement technique to be used. If you specify more than one prompt refinement techniques,
2
+ # all these technique would run on same seed data. Result, iterations needed & cost incurred for each of these
3
+ # technique would be logged. And winning technique for each data instance and overall would be logged.
4
+
5
+ # Supported prompt refinement techniques: Basic, RecursiveEval, MedPrompt
6
+ # Uncomment techniques that you want to use
7
+ ############################ Critique Task Description Start ############################
8
+ prompt_technique_name: "critique_n_refine"
9
+ # unique_model_id of model defined in llm_config.yaml
10
+ unique_model_id: gpt-4o
11
+ # Number of iterations for conducting <mutation_rounds> rounds of mutation of task description
12
+ # followed by refinement of instructions
13
+ mutate_refine_iterations: 3
14
+ # Number of rounds of mutation to be performed when generating different styles
15
+ mutation_rounds: 3
16
+ # Refine instruction post mutation
17
+ refine_instruction: true
18
+ # Number of iterations for refining task description and in context examples for few-shot
19
+ refine_task_eg_iterations: 3
20
+ # Number of variations of prompts to generate in given iteration
21
+ style_variation: 5
22
+ # Number of questions to be asked to LLM in a single batch, during training step
23
+ questions_batch_size: 1
24
+ # Number of batches of questions to correctly answered, for a prompt to be considered as performing good
25
+ min_correct_count: 3
26
+ # Max number of mini-batches on which we should evaluate our prompt
27
+ max_eval_batches: 6
28
+ # Number of top best performing prompts to be considered for next iterations
29
+ top_n: 1
30
+ # Description of task. This will be fed to prompt
31
+ task_description: "You are a mathematics expert. You will be given a mathematics problem which you need to solve"
32
+ # Base instruction, in line with your dataset. This will be fed to prompt
33
+ base_instruction: "Lets think step by step."
34
+ # Instruction for specifying answer format
35
+ answer_format: "At the end, wrap your final answer and option if applicable between <ANS_START> and <ANS_END> tags"
36
+ # Number of samples from dataset, set aside as training data. In every iteration we would be drawing
37
+ # `questions_batch_size` examples from training data with replacement.
38
+ seen_set_size: 25
39
+ # Number of examples to be given for few shots
40
+ few_shot_count: 5
41
+ # Number of synthetic training examples to be generated
42
+ num_train_examples: 20
43
+ # Generate synthetic reasoning
44
+ generate_reasoning: true
45
+ # Generate description of an expert which can solve the task at hand
46
+ generate_expert_identity: true
47
+ # Generate keywords that describe the intent of the task
48
+ generate_intent_keywords: false
49
+ ############################ Critique Task Description End ############################
50
+
51
+
52
+
demos/svamp/configs/setup_config.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ assistant_llm:
2
+ # put the unique_model_id that you specified in llm_config.yaml
3
+ prompt_opt: gpt-4o
4
+ dir_info:
5
+ # Base directory for everything
6
+ base_dir: logs
7
+ log_dir_name: glue_logs
8
+ experiment_name: svamp
9
+ # Many features are different for mode: online/offline. For eg
10
+ # 1) Print of logs happens on console for offline mode
11
+ # 2) LLM Queue gets instantiated only in online mode
12
+ mode: offline
13
+ # Full length description of the experiment. This would be logged.
14
+ description:
demos/svamp/demo.ipynb ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "14360485",
6
+ "metadata": {},
7
+ "source": [
8
+ "#### Set environment variables in [.env](.env) for LLM API calling"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "markdown",
13
+ "id": "6bd95c11",
14
+ "metadata": {},
15
+ "source": [
16
+ "### Import Dependencies"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": null,
22
+ "id": "f1fb3d81-16b6-4b8c-a028-880fdce5e14a",
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "import sys\n",
27
+ "sys.path.insert(0, \"../../\")\n",
28
+ "import os\n",
29
+ "import promptwizard\n",
30
+ "from promptwizard.glue.promptopt.instantiate import GluePromptOpt\n",
31
+ "from promptwizard.glue.promptopt.techniques.common_logic import DatasetSpecificProcessing\n",
32
+ "from promptwizard.glue.common.utils.file import save_jsonlist\n",
33
+ "from typing import Any\n",
34
+ "from tqdm import tqdm\n",
35
+ "import json\n",
36
+ "from datasets import load_dataset\n",
37
+ "\n",
38
+ "from dotenv import load_dotenv\n",
39
+ "load_dotenv(override = True)\n"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "markdown",
44
+ "id": "f061d2fd",
45
+ "metadata": {},
46
+ "source": [
47
+ "### Create a dataset specific class and define the required functions "
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": null,
53
+ "id": "5f325d33",
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "\n",
58
+ "def extract_between(start, end, text):\n",
59
+ " \"\"\"\n",
60
+ " Extracts the substring from 'text' that is between 'start' and 'end' strings.\n",
61
+ " \n",
62
+ " Parameters:\n",
63
+ " - start (str): The starting delimiter string.\n",
64
+ " - end (str): The ending delimiter string.\n",
65
+ " - text (str): The text to search within.\n",
66
+ " \n",
67
+ " Returns:\n",
68
+ " - str: The extracted substring between the start and end delimiters.\n",
69
+ " \"\"\"\n",
70
+ " start_index = text.find(start)\n",
71
+ " if start_index == -1:\n",
72
+ " return '' \n",
73
+ " \n",
74
+ " start_index += len(start)\n",
75
+ " \n",
76
+ " end_index = text.find(end, start_index)\n",
77
+ " if end_index == -1:\n",
78
+ " return '' \n",
79
+ " return text[start_index:end_index]\n",
80
+ "\n",
81
+ "class SVAMP(DatasetSpecificProcessing):\n",
82
+ "\n",
83
+ " def dataset_to_jsonl(self, dataset_jsonl: str, **kwargs: Any) -> None:\n",
84
+ " def extract_answer_from_output(completion):\n",
85
+ "\n",
86
+ " return completion\n",
87
+ "\n",
88
+ " examples_set = []\n",
89
+ "\n",
90
+ " for _, sample in tqdm(enumerate(kwargs[\"dataset\"]), desc=\"Evaluating samples\"):\n",
91
+ " example = {\n",
92
+ " DatasetSpecificProcessing.QUESTION_LITERAL: sample['question'],\n",
93
+ " DatasetSpecificProcessing.ANSWER_WITH_REASON_LITERAL: sample['answer'],\n",
94
+ " DatasetSpecificProcessing.FINAL_ANSWER_LITERAL: extract_answer_from_output(sample[\"answer\"])\n",
95
+ " }\n",
96
+ " examples_set.append(example)\n",
97
+ "\n",
98
+ " save_jsonlist(dataset_jsonl, examples_set, \"w\")\n",
99
+ "\n",
100
+ " def extract_final_answer(self, answer: str):\n",
101
+ " \n",
102
+ " final_answer = extract_between(text=answer,start=\"<ANS_START>\",end=\"<ANS_END>\")\n",
103
+ " return final_answer\n",
104
+ " \n",
105
+ " def access_answer(self, llm_output: str, gt_answer: str):\n",
106
+ "\n",
107
+ " predicted_answer = self.extract_final_answer(llm_output)\n",
108
+ " is_correct = False\n",
109
+ " if predicted_answer and (predicted_answer.lower() == gt_answer.lower()):\n",
110
+ " is_correct = True\n",
111
+ "\n",
112
+ " return is_correct, predicted_answer"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": null,
118
+ "id": "f384eb57",
119
+ "metadata": {},
120
+ "outputs": [],
121
+ "source": [
122
+ "svamp_processor = SVAMP()"
123
+ ]
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "execution_count": null,
128
+ "id": "976681bd-4f43-4dbc-947e-cdb94d4824f0",
129
+ "metadata": {},
130
+ "outputs": [],
131
+ "source": [
132
+ "\n",
133
+ "if not os.path.exists(\"data\"):\n",
134
+ " os.mkdir(\"data\")\n",
135
+ "\n",
136
+ "dataset = load_dataset(\"ChilleD/SVAMP\")\n",
137
+ "\n",
138
+ "for dataset_type in ['train','test']:\n",
139
+ " data_list = []\n",
140
+ " num_samples = 0\n",
141
+ " for data in dataset[dataset_type]:\n",
142
+ " data_list.append({\"question\": data['question_concat'], \"answer\": data['Answer']})\n",
143
+ " if dataset_type == 'train' and num_samples == 100: # We sample only 100 train examples and use 25 out them for training randomly\n",
144
+ " break\n",
145
+ " num_samples += 1\n",
146
+ " svamp_processor.dataset_to_jsonl(\"data/\"+ dataset_type+'.jsonl', dataset=data_list)"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "markdown",
151
+ "id": "4852b94b",
152
+ "metadata": {},
153
+ "source": [
154
+ "### Set paths"
155
+ ]
156
+ },
157
+ {
158
+ "cell_type": "code",
159
+ "execution_count": 47,
160
+ "id": "f43482f1-3e10-4cf7-8ea6-ff42c04067a6",
161
+ "metadata": {},
162
+ "outputs": [],
163
+ "source": [
164
+ "train_file_name = os.path.join(\"data\", \"train.jsonl\")\n",
165
+ "test_file_name = os.path.join(\"data\", \"test.jsonl\")\n",
166
+ "path_to_config = \"configs\"\n",
167
+ "llm_config_path = os.path.join(path_to_config, \"llm_config.yaml\")\n",
168
+ "promptopt_config_path = os.path.join(path_to_config, \"promptopt_config.yaml\")\n",
169
+ "setup_config_path = os.path.join(path_to_config, \"setup_config.yaml\")"
170
+ ]
171
+ },
172
+ {
173
+ "cell_type": "markdown",
174
+ "id": "f7ba6394",
175
+ "metadata": {},
176
+ "source": [
177
+ "### Create an object for calling prompt optimization and inference functionalities"
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ "execution_count": null,
183
+ "id": "8af4246f-db32-4b37-a73a-f9e2e5125d09",
184
+ "metadata": {},
185
+ "outputs": [],
186
+ "source": [
187
+ "gp = GluePromptOpt(promptopt_config_path,\n",
188
+ " setup_config_path,\n",
189
+ " train_file_name,\n",
190
+ " svamp_processor)"
191
+ ]
192
+ },
193
+ {
194
+ "cell_type": "markdown",
195
+ "id": "6e38ea08",
196
+ "metadata": {},
197
+ "source": [
198
+ "### Call prompt optmization function\n",
199
+ "1. ```use_examples``` can be used when there are training samples and a mixture of real and synthetic in-context examples are required in the final prompt. When set to ```False``` all the in-context examples will be real\n",
200
+ "2. ```generate_synthetic_examples``` can be used when there are no training samples and we want to generate synthetic examples \n",
201
+ "3. ```run_without_train_examples``` can be used when there are no training samples and in-context examples are not required in the final prompt "
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": null,
207
+ "id": "573c6151-2c03-45d9-9904-1724a1e20f1b",
208
+ "metadata": {
209
+ "scrolled": true
210
+ },
211
+ "outputs": [],
212
+ "source": [
213
+ "# Function call to generate optimal prompt and expert profile \n",
214
+ "best_prompt, expert_profile = gp.get_best_prompt(use_examples=True,run_without_train_examples=False,generate_synthetic_examples=False)"
215
+ ]
216
+ },
217
+ {
218
+ "cell_type": "markdown",
219
+ "id": "bae1a791",
220
+ "metadata": {},
221
+ "source": [
222
+ "### Save the optimized prompt and expert profile"
223
+ ]
224
+ },
225
+ {
226
+ "cell_type": "code",
227
+ "execution_count": null,
228
+ "id": "34a716af-0d77-4c7d-b1c2-6438d66096ce",
229
+ "metadata": {
230
+ "scrolled": true
231
+ },
232
+ "outputs": [],
233
+ "source": [
234
+ "import pickle \n",
235
+ "\n",
236
+ "if not os.path.exists(\"results\"):\n",
237
+ " os.system(\"mkdir results\")\n",
238
+ "\n",
239
+ "with open(\"results/best_prompt.pkl\", 'wb') as f:\n",
240
+ " pickle.dump(best_prompt, f)\n",
241
+ "with open(\"results/expert_profile.pkl\", 'wb') as f:\n",
242
+ " pickle.dump(expert_profile, f)\n",
243
+ "\n",
244
+ "print(f\"Best prompt: {best_prompt} \\nExpert profile: {expert_profile}\")"
245
+ ]
246
+ },
247
+ {
248
+ "cell_type": "markdown",
249
+ "id": "b7691a87",
250
+ "metadata": {},
251
+ "source": [
252
+ "### Evaluate the optimized prompt"
253
+ ]
254
+ },
255
+ {
256
+ "cell_type": "code",
257
+ "execution_count": null,
258
+ "id": "c49b5711-82dd-4d18-8cd4-ee447cf8d74c",
259
+ "metadata": {
260
+ "scrolled": true
261
+ },
262
+ "outputs": [],
263
+ "source": [
264
+ "gp.EXPERT_PROFILE = expert_profile\n",
265
+ "gp.BEST_PROMPT = best_prompt\n",
266
+ "\n",
267
+ "# Function call to evaluate the prompt\n",
268
+ "accuracy = gp.evaluate(test_file_name)\n",
269
+ "\n",
270
+ "print(f\"Final Accuracy: {accuracy}\")"
271
+ ]
272
+ }
273
+ ],
274
+ "metadata": {
275
+ "kernelspec": {
276
+ "display_name": "Python 3 (ipykernel)",
277
+ "language": "python",
278
+ "name": "python3"
279
+ },
280
+ "language_info": {
281
+ "codemirror_mode": {
282
+ "name": "ipython",
283
+ "version": 3
284
+ },
285
+ "file_extension": ".py",
286
+ "mimetype": "text/x-python",
287
+ "name": "python",
288
+ "nbconvert_exporter": "python",
289
+ "pygments_lexer": "ipython3",
290
+ "version": "3.10.12"
291
+ }
292
+ },
293
+ "nbformat": 4,
294
+ "nbformat_minor": 5
295
+ }
docs/images/arithmetic_task.png ADDED
docs/images/bigbench.png ADDED
docs/images/comaprision.png ADDED

Git LFS Details

  • SHA256: ace953e64449bdfaac42a9e587e3c1f37447755aad8c522c6d02be6d7e925c65
  • Pointer size: 131 Bytes
  • Size of remote file: 130 kB
docs/images/cost_analysis.png ADDED
docs/images/curve.png ADDED
docs/images/github.png ADDED
docs/images/icl_results.png ADDED
docs/images/iterative_flowchart-1.png ADDED

Git LFS Details

  • SHA256: 692c18bece5f26e48a8549c19b6f9969a284a0ce2e00d510da461ad579770f3e
  • Pointer size: 131 Bytes
  • Size of remote file: 168 kB
docs/images/msr_blog.png ADDED
docs/images/overview.png ADDED

Git LFS Details

  • SHA256: 6dbee997ee3b194173cd2cc5fee24dfef3cc28311bf3e28778e66fb1ce8ca9ae
  • Pointer size: 131 Bytes
  • Size of remote file: 252 kB
docs/images/ppc.png ADDED

Git LFS Details

  • SHA256: 85152be471400c6927f4d3c5d201755564d719dbe2602f5499c1c7179c66b607
  • Pointer size: 131 Bytes
  • Size of remote file: 113 kB
docs/images/ppc_1.png ADDED

Git LFS Details

  • SHA256: 04a31930771409f59afb7ac8ac5207b77ed526b12592a87c44d773ad10d95e9f
  • Pointer size: 131 Bytes
  • Size of remote file: 132 kB
docs/images/prompting.png ADDED
docs/images/sequential_flowchart-1.png ADDED
docs/images/slm_prompt.png ADDED
docs/index.html ADDED
@@ -0,0 +1,784 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="description"
6
+ content=" PromptWizard:Task-Aware Prompt Optimization Framework">
7
+ <meta name="keywords" content="PromptWizard">
8
+ <meta name="viewport" content="width=device-width, initial-scale=1">
9
+ <title>PromptWizard</title>
10
+
11
+ <!-- Global site tag (gtag.js) - Google Analytics -->
12
+ <script async src="https://www.googletagmanager.com/gtag/js?id=G-PYVRSFMDRL"></script>
13
+ <script>
14
+ window.dataLayer = window.dataLayer || [];
15
+
16
+ function gtag() {
17
+ dataLayer.push(arguments);
18
+ }
19
+
20
+ gtag('js', new Date());
21
+
22
+ gtag('config', 'G-PYVRSFMDRL');
23
+ </script>
24
+
25
+ <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
26
+ rel="stylesheet">
27
+
28
+ <link rel="stylesheet" href="./static/css/bulma.min.css">
29
+ <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
30
+ <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
31
+ <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
32
+ <link rel="stylesheet"
33
+ href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
34
+ <link rel="stylesheet" href="./static/css/index.css">
35
+ <link rel="icon" href="./static/images/favicon.svg">
36
+
37
+ <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
38
+ <script defer src="./static/js/fontawesome.all.min.js"></script>
39
+ <script src="./static/js/bulma-carousel.min.js"></script>
40
+ <script src="./static/js/bulma-slider.min.js"></script>
41
+ <script src="./static/js/index.js"></script>
42
+
43
+ <style>
44
+ .red-text {
45
+ color: red;
46
+ }
47
+
48
+ /* Collapsible content - initially hidden */
49
+ .col_content_1 {
50
+ padding: 15px;
51
+ background-color: #f1f1f1;
52
+ display: none;
53
+ }
54
+ .col_content_2 {
55
+ padding: 15px;
56
+ background-color: #f1f1f1;
57
+ display: none;
58
+ }
59
+ .col_content_3 {
60
+ padding: 15px;
61
+ background-color: #f1f1f1;
62
+ display: none;
63
+ }
64
+ .col_content_4 {
65
+ padding: 15px;
66
+ background-color: #f1f1f1;
67
+ display: none;
68
+ }
69
+ .col_content_5 {
70
+ padding: 15px;
71
+ background-color: #f1f1f1;
72
+ display: none;
73
+ }
74
+ .col_content_6 {
75
+ padding: 15px;
76
+ background-color: #f1f1f1;
77
+ display: none;
78
+ }
79
+ .col_content_7 {
80
+ padding: 15px;
81
+ background-color: #f1f1f1;
82
+ display: none;
83
+ }
84
+ .col_content_8 {
85
+ padding: 15px;
86
+ background-color: #f1f1f1;
87
+ display: none;
88
+ }
89
+ .col_content_9 {
90
+ padding: 15px;
91
+ background-color: #f1f1f1;
92
+ display: none;
93
+ }
94
+ .col_content_10 {
95
+ padding: 15px;
96
+ background-color: #f1f1f1;
97
+ display: none;
98
+ }
99
+ .col_content_11 {
100
+ padding: 15px;
101
+ background-color: #f1f1f1;
102
+ display: none;
103
+ }
104
+ table {
105
+ width: 100%;
106
+ border-collapse: collapse;
107
+ }
108
+ table, th, td {
109
+ border: 1px solid black;
110
+ }
111
+ th, td {
112
+ padding: 8px;
113
+ text-align: left;
114
+ }
115
+
116
+ .btn {
117
+ display: flex; /* Use flexbox for layout */
118
+ justify-content: space-between; /* Space out content on left and right */
119
+ align-items: center; /* Center content vertically */
120
+ padding: 10px 20px; /* Add padding to the button */
121
+ font-size: 18px; /* Text size */
122
+ background-color: black;
123
+ color: white;
124
+ border: none;
125
+ border-radius: 5px;
126
+ cursor: pointer;
127
+ width: 100%; /* Button width (you can adjust this) */
128
+ }
129
+
130
+ /* Style for the + sign */
131
+ .btn .icon {
132
+ font-size: 24px; /* Size of the + sign */
133
+ }
134
+
135
+ .btn:hover {
136
+ background-color: gray; /* Hover effect */
137
+ }
138
+
139
+ /* Container for the slider */
140
+ .slider-container {
141
+ width: 80%; /* Set the width of the slider */
142
+ margin: 0 auto;
143
+ overflow: hidden;
144
+ position: relative;
145
+ }
146
+
147
+ /* Slide wrapper that holds all the images */
148
+ .slider-wrapper {
149
+ display: flex;
150
+ transition: transform 0.5s ease-in-out;
151
+ }
152
+
153
+ /* Each image box (b5 box) */
154
+ .box {
155
+
156
+ flex: 0 0 100%; /* Each image takes full width of the container */
157
+ display: flex;
158
+ justify-content: center;
159
+ align-items: center;
160
+ }
161
+
162
+ .box img {
163
+ width: 90%; /* Make images responsive to fit the container */
164
+ max-height: 400px; /* Control max height */
165
+ object-fit: cover; /* Ensure images maintain aspect ratio */
166
+ }
167
+
168
+ /* Navigation buttons (next and previous) */
169
+ .prev, .next {
170
+ position: absolute;
171
+ top: 50%;
172
+ transform: translateY(-50%);
173
+ background-color: rgba(0, 0, 0, 0.5);
174
+ color: white;
175
+ border: none;
176
+ padding: 10px;
177
+ cursor: pointer;
178
+ }
179
+
180
+ .prev {
181
+ left: 10px;
182
+ }
183
+
184
+ .next {
185
+ right: 10px;
186
+ }
187
+
188
+ * {box-sizing: border-box;}
189
+ body {font-family: Verdana, sans-serif;}
190
+ .mySlides {display: none;}
191
+ img {vertical-align: middle;}
192
+
193
+ /* Slideshow container */
194
+ .slideshow-container {
195
+ max-width: 1000px;
196
+ position: relative;
197
+ margin: auto;
198
+ }
199
+
200
+ /* Caption text */
201
+ .text {
202
+ color: #f2f2f2;
203
+ font-size: 15px;
204
+ padding: 8px 12px;
205
+ position: absolute;
206
+ bottom: 8px;
207
+ width: 100%;
208
+ text-align: center;
209
+ }
210
+
211
+ /* Number text (1/3 etc) */
212
+ .numbertext {
213
+ color: #f2f2f2;
214
+ font-size: 12px;
215
+ padding: 8px 12px;
216
+ position: absolute;
217
+ top: 0;
218
+ }
219
+
220
+ /* The dots/bullets/indicators */
221
+ .dot {
222
+ height: 15px;
223
+ width: 15px;
224
+ margin: 0 2px;
225
+ background-color: #bbb;
226
+ border-radius: 50%;
227
+ display: inline-block;
228
+ transition: background-color 0.6s ease;
229
+ }
230
+
231
+ .active {
232
+ background-color: #717171;
233
+ }
234
+
235
+ /* Fading animation */
236
+ .fade {
237
+ animation-name: fade;
238
+ animation-duration: 1.5s;
239
+ }
240
+
241
+ @keyframes fade {
242
+ from {opacity: .4}
243
+ to {opacity: 1}
244
+ }
245
+
246
+ /* On smaller screens, decrease text size */
247
+ @media only screen and (max-width: 300px) {
248
+ .text {font-size: 11px}
249
+ }
250
+
251
+
252
+ </style>
253
+
254
+ </head>
255
+ <body>
256
+
257
+ <section class="hero">
258
+ <div class="hero-body">
259
+ <div class="container is-max-desktop">
260
+ <div class="columns is-centered">
261
+ <div class="column has-text-centered">
262
+ <h1 class="title is-1 publication-title">🧙 PromptWizard<br><p style="white-space: nowrap;">Task-Aware Prompt Optimization Framework</p></h1>
263
+ <div class="is-size-5 publication-authors">
264
+ <span class="author-block">
265
+ <a>Eshaan Agarwal</a>,</span>
266
+ <span class="author-block">
267
+ <a>Joykirat Singh</a>,</span>
268
+ <span class="author-block">
269
+ <a>Vivek Dani</a>,
270
+ </span>
271
+ <span class="author-block">
272
+ <a>Raghav Magazine</a>,
273
+ </span>
274
+ <span class="author-block">
275
+ <a>Tanuja Ganu</a>,
276
+ </span>
277
+ <span class="author-block">
278
+ <a>Akshay Nambi</a>
279
+ </span>
280
+ </div>
281
+
282
+ <div class="is-size-5 publication-authors">
283
+ <span class="author-block">Microsoft Research</span>
284
+ </div>
285
+
286
+ <div class="column has-text-centered">
287
+ <div class="publication-links">
288
+ <!-- PDF Link. -->
289
+ <span class="link-block">
290
+ <a href="https://arxiv.org/pdf/2405.18369"
291
+ class="external-link button is-normal is-rounded is-dark">
292
+ <span class="icon">
293
+ <i class="fas fa-file-pdf"></i>
294
+ </span>
295
+ <span>Paper</span>
296
+ </a>
297
+ </span>
298
+ <span class="link-block">
299
+ <a href="https://arxiv.org/abs/2405.18369"
300
+ class="external-link button is-normal is-rounded is-dark">
301
+ <span class="icon">
302
+ <i class="ai ai-arxiv"></i>
303
+ </span>
304
+ <span>arXiv</span>
305
+ </a>
306
+ </span>
307
+ <!-- Code Link. -->
308
+ <span class="link-block">
309
+ <a href="https://github.com/microsoft/PromptWizard"
310
+ class="external-link button is-normal is-rounded is-dark">
311
+ <span class="icon">
312
+ <i class="fab fa-github"></i>
313
+ </span>
314
+ <span>Code</span>
315
+ </a>
316
+ </span>
317
+
318
+ </div>
319
+
320
+ </div>
321
+ </div>
322
+ </div>
323
+ </div>
324
+ </div>
325
+ </section>
326
+
327
+ <div class="slideshow-container">
328
+
329
+ <div class="mySlides fade">
330
+ <div class="numbertext">1 / 3</div>
331
+ <p align="center">
332
+ <img src="images/overview.png">
333
+ </p>
334
+ </div>
335
+
336
+ <div class="mySlides fade">
337
+ <div class="numbertext">2 / 3</div>
338
+ <p align="center">
339
+ <img width="700" height="700" src="images/iterative_flowchart-1.png">
340
+ </p>
341
+ </div>
342
+
343
+ <div class="mySlides fade">
344
+ <div class="numbertext">3 / 3</div>
345
+ <p align="center">
346
+ <img width="700" height="700" src="images/sequential_flowchart-1.png">
347
+ </p>
348
+ </p>
349
+ </div>
350
+
351
+ </div>
352
+ <br>
353
+
354
+ <div style="text-align:center">
355
+ <span class="dot"></span>
356
+ <span class="dot"></span>
357
+ <span class="dot"></span>
358
+ </div>
359
+
360
+ <script>
361
+ let slideIndex = 0;
362
+ showSlides();
363
+
364
+ function showSlides() {
365
+ let i;
366
+ let slides = document.getElementsByClassName("mySlides");
367
+ let dots = document.getElementsByClassName("dot");
368
+ for (i = 0; i < slides.length; i++) {
369
+ slides[i].style.display = "none";
370
+ }
371
+ slideIndex++;
372
+ if (slideIndex > slides.length) {slideIndex = 1}
373
+ for (i = 0; i < dots.length; i++) {
374
+ dots[i].className = dots[i].className.replace(" active", "");
375
+ }
376
+ slides[slideIndex-1].style.display = "block";
377
+ dots[slideIndex-1].className += " active";
378
+ setTimeout(showSlides, 2000); // Change image every 2 seconds
379
+ }
380
+ </script>
381
+
382
+
383
+
384
+ <section class="section">
385
+ <div class="container is-max-desktop">
386
+ <div class="columns is-centered has-text-centered">
387
+ <div class="column is-four-fifths">
388
+ <div class="content has-text-justified">
389
+ <b>PromptWizard</b> is an open source framework for automated prompt and example optimization, leveraging a feedback-driven critique and synthesis process to balance exploration and exploitation. It consistently outperforms state-of-the-art methods while significantly reducing computational costs, enabling efficient and scalable prompt engineering across diverse tasks and LLMs.
390
+ </div>
391
+ </div>
392
+ </div>
393
+
394
+ </section>
395
+
396
+ <section class="section">
397
+ <div class="container is-max-desktop">
398
+ <div class="columns is-centered has-text-centered">
399
+ <div class="column is-four-fifths">
400
+ <h2 class="title is-3">Overview</h2>
401
+ <div class="content has-text-justified">
402
+ Large language models (LLMs) like GPT-4 have achieved remarkable performance across diverse tasks. At the core of this success is prompting—the process of providing input instructions to guide models toward desired outputs. Studies have shown that prompting significantly influences LLM performance, making prompt engineering—the design and refinement of prompts—critical for maximizing accuracy. However, crafting effective prompts remains a labor-intensive and domain-specific task, requiring human expertise and subjective judgment. As models evolve and tasks vary, the need to repeatedly design prompts raises an important question: <br> <b>Can prompt engineering be automated to streamline this process and enhance scalability? </b>
403
+ </div>
404
+ </div>
405
+ </div>
406
+
407
+ </section>
408
+
409
+
410
+ <section class="section">
411
+ <div class="container is-max-desktop">
412
+ <!-- Motivation. -->
413
+ <div class="columns is-centered has-text-centered">
414
+ <div class="column is-four-fifths">
415
+ <h2 class="title is-3">Motivation</h2>
416
+ <div class="content has-text-justified">
417
+
418
+ <h3>Prompting is central to LLMs!</h3>
419
+ <ul>
420
+ <li><b>Prompting</b>: The process of providing input instructions to guide models towards desired output</li>
421
+ <li><b>Prompt Engineering</b>: The process of designing and refining of prompts​</li>
422
+ <li>Crating effective prompts is a challenge as:​</li>
423
+ <ol>
424
+ <li>The task is labor-intensive</li>
425
+ <li>Prompts need to be domain-specific to work effectively</li>
426
+ <li>Often it equires human expertise and is subjective​</li>
427
+ <li>Also as models and tasks evolve, there is a need for repeated design</li>
428
+ </ol>
429
+ </ul>
430
+
431
+ </div>
432
+ </div>
433
+ </div>
434
+ <!--/ Motivation. -->
435
+
436
+ </section>
437
+
438
+
439
+
440
+
441
+
442
+
443
+
444
+ <section class="section">
445
+ <div class="container is-max-desktop">
446
+ <!-- Abstract. -->
447
+ <div class="columns is-centered has-text-centered">
448
+ <div class="column is-four-fifths">
449
+ <h2 class="title is-3">PromptWizard Working </h2>
450
+ <div class="content has-text-justified">
451
+ <p>
452
+ PromptWizard (PW) is a discrete prompt optimization framework that employs a self-evolving mechanism where the LLM generates, critiques, and refines its own prompts and examples, continuously improving through iterative feedback and synthesis. This self-adaptive approach ensures holistic optimization by evolving both the instructions and in-context learning examples for better task performance.
453
+ </p>
454
+ <h3>Three Key Insights :</h3>
455
+ <p>
456
+ <ol>
457
+ <li><b>Feedback-driven Refinement</b>: LLM generates, critiques, and refines its own prompts and examples, continuously improving through iterative feedback and synthesis​
458
+ </li>
459
+ <li><b>Critique and Synthesize diverse examples</b>: Generates synthetic examples that are robust, diverse and task-aware. Also it optimizes both prompt and examples in tandem​
460
+ </li>
461
+ <li><b>Self generated Chain of Thought (CoT)</b> steps with combination of positive, negative and synthetic examples</li>
462
+ </ol>
463
+
464
+ <p>
465
+ Following are the details of each step :
466
+ </p>
467
+
468
+
469
+ <button class="btn" onclick="toggleContent(this,'1')">1. Feedback driven Refinement <span class="icon">+</span></button>
470
+ <div class="col_content_1">
471
+ <ul>
472
+ <li>Prompt wizard uses a systematic, feedback-driven proces where it incorporates a critique component that provides feedback, thus guiding and refining the prompt over multiple iterations​</li>
473
+ <li>The following steps help in carrying out this systematically</li>
474
+ <ul>
475
+ <li><b>Mutate</b>: Takes an initial problem description + thinking Styles to generate prompts​</li>
476
+ <li><b>Scoring</b>: Evaluate the performance of the generated prompts to determine best prompt​</li>
477
+ <li><b>Critique</b>: Reviews where the prompt succeeded and failed by analyzing cases where the LLM struggled​</li>
478
+ <li><b>Synthesize</b>: Uses critique’s feedback to refine the best prompt</li>
479
+ </ul>
480
+ </li>
481
+ </ul>
482
+ </div>
483
+
484
+ <script>
485
+ // Function to toggle the visibility of the collapsible content
486
+ function toggleContent(button,index) {
487
+ var content = document.querySelector(".col_content_"+index);
488
+ const icon = button.querySelector('.icon');
489
+ if (content.style.display === "block") {
490
+ content.style.display = "none"; // Hide content if it's visible
491
+ icon.textContent = icon.textContent.replace('-', '+');
492
+ } else {
493
+ content.style.display = "block"; // Show content if it's hidden
494
+ icon.textContent = icon.textContent.replace('+', '-');
495
+ }
496
+ }
497
+ </script>
498
+ <br>
499
+ <button class="btn" onclick="toggleContent(this,'2')">2. Critique and Synthesize diverse examples <span class="icon">+</span></button>
500
+ <div class="col_content_2">
501
+ <ul>
502
+ <li>PromptWizard improves both prompt instructions and few-shot examples in tandem​</li>
503
+ <li>It uses self-reflection to synthesize examples that are diverse and task-relevant​ </li>
504
+ <li>An iterative feedback loop is used that continuously refines both the prompt and few-shot examples​</li>
505
+ <li>Few shot example optimization:​</li>
506
+ <ul>
507
+ <li><b>Critique</b>: Analyzes previously selected examples and use the feedback to determine how examples should evolve​</li>
508
+ <li><b>Synthesize</b>: Incorporates feedback to generate new synthetic examples that are more diverse, robust, and task-relevant​</li>
509
+ </ul>
510
+ <li>Prompt instruction optimization:​</li>
511
+ <ul>
512
+ <li><b>Critique</b>: Identifies weaknesses and gaps that require addressing to further refine the prompt instruction​</li>
513
+ <li><b>Synthesize</b>: Leverages feedback from the critique to synthesize and refine the prompt instruction</li>
514
+ </ul>
515
+ </ul>
516
+
517
+ </div>
518
+ <br>
519
+ <button class="btn" onclick="toggleContent(this,'3')">3. Chain of Thought Reasoning <span class="icon">+</span></button>
520
+ <div class="col_content_3">
521
+ <p>
522
+ <ul>
523
+ <li>Incorporating chain-of-thought (CoT) reasoning improves problem-solving abilities of the model​</li>
524
+ <li>CoT Reasoning takes the selected few-shot examples and generates a detailed reasoning chain for each example to facilitate problem-solving​</li>
525
+ <li>An LLM to check the coherence and relevance of examples​</li>
526
+ </ul>
527
+ ​</p>
528
+ </div>
529
+ </p>
530
+ </div>
531
+ </div>
532
+ </div>
533
+ <!--/ Abstract. -->
534
+
535
+ </section>
536
+
537
+ <section class="section">
538
+ <div class="container is-max-desktop">
539
+ <!-- Results. -->
540
+ <div class="columns is-centered has-text-centered">
541
+ <div class="column is-four-fifths">
542
+ <h2 class="title is-3">Results</h2>
543
+ <div class="content has-text-justified">
544
+
545
+ <button class="btn" onclick="toggleContent(this,'4')">Instruction Induction Dataset<span class="icon">+</span></button>
546
+ <div class="col_content_4">
547
+ <p align="center">
548
+ <img src="./images/comaprision.png" >
549
+ </p>
550
+ <p align="center"><b>PromptWizard outperforms the baselines, achieving the highest accuracy on <b class="red-text">13/19 tasks (68%)</b> with 0-shot and <b class="red-text">16/19 (84%)</b> with 1-shot</b></p>
551
+ <p align="center">
552
+ <img src="./images/ppc.png" >
553
+ </p>
554
+ <p align="center"><b>PromptWizard consistently performs near the best possible accuracy across all tasks</b></p>
555
+ <p align="center">
556
+ <img src="./images/cost_analysis.png" >
557
+ </p>
558
+ <p align="center"><b>PromptWizard costs just $0.05 per task, <b class="red-text">5-60x reduction</b> in overall tokens/cost</b>​</p>
559
+ </div>
560
+ </div>
561
+
562
+ <button class="btn" onclick="toggleContent(this,'5')">Arithmetic Tasks<span class="icon">+</span></button>
563
+ <div class="col_content_5">
564
+ <p align="center">
565
+ <img src="./images/arithmetic_task.png" >
566
+ </p>
567
+ </div>
568
+ <br>
569
+ <button class="btn" onclick="toggleContent(this,'7')">Big Bench Hard<span class="icon">+</span></button>
570
+ <div class="col_content_7">
571
+ <p align="center">
572
+ <img src="./images/bigbench.png" >
573
+ </p>
574
+ </div>
575
+ <br>
576
+ <button class="btn" onclick="toggleContent(this,'8')">Prompts Using SLMs<span class="icon">+</span></button>
577
+ <div class="col_content_8">
578
+ <p align="center">
579
+ <img src="./images/slm_prompt.png" >
580
+ </p>
581
+ <p align="center"><b>PromptWizard using Llama-70B show a negligible <b b class="red-text">< 1% drop</b> in accuracy</b> ​</p>
582
+ </div>
583
+ <br>
584
+ <button class="btn" onclick="toggleContent(this,'9')">Varying the In-Context Examples<span class="icon">+</span></button>
585
+ <div class="col_content_9">
586
+ <p align="center">
587
+ <img src="./images/icl_results.png" >
588
+ </p>
589
+ <p align="center"><b>PromptWizard shows strong resilience even with fewer training samples mainly due to synthetic example generation and reasoning chains​</b>​</p>
590
+ </div>
591
+ <br>
592
+ <button class="btn" onclick="toggleContent(this,'10')">Comparision with naive prompting<span class="icon">+</span></button>
593
+ <div class="col_content_10">
594
+ <p align="center">
595
+ <img src="./images/prompting.png" >
596
+ </p>
597
+ <p align="center"><b>Substantial performance improvements across all models when optimized prompts are generated by PromptWizard on GSM8k dataset</b>​</p>
598
+ </div>
599
+ <br>
600
+ <button class="btn" onclick="toggleContent(this,'11')">Comparision with Feedback based and other Prompt Optimization Techniques<span class="icon">+</span></button>
601
+ <div class="col_content_11">
602
+ <p align="center">
603
+ <table>
604
+ <tr>
605
+ <td>Dataset</td>
606
+ <td colspan="4">Accuracy (high)</td>
607
+ </tr>
608
+ <tr>
609
+ <td></td>
610
+ <td>DSPy</td>
611
+ <td>PromptAgent </td>
612
+ <td>APO</td>
613
+ <td>PW</td>
614
+ </tr>
615
+ <tr>
616
+ <td>GSM8k</td>
617
+ <td>78.2</td>
618
+ <td>68.84</td>
619
+ <td>25.67</td>
620
+ <td><b>90</b></td>
621
+ </tr>
622
+ <tr>
623
+ <td>AQUARAT</td>
624
+ <td>55.1</td>
625
+ <td>56.67</td>
626
+ <td>20.12</td>
627
+ <td><b>58.2</b></td>
628
+ </tr>
629
+ <tr>
630
+ <td>SVAMP</td>
631
+ <td>77</td>
632
+ <td>78.67</td>
633
+ <td>75.25</td>
634
+ <td><b>82.3</b></td>
635
+ </tr>
636
+ <tr>
637
+ <td>ETHOS</td>
638
+ <td>84.1</td>
639
+ <td>84.25</td>
640
+ <td>80.62</td>
641
+ <td><b>89.4</b></td>
642
+ </tr>
643
+ </table>
644
+ <br>
645
+ <table>
646
+ <tr>
647
+ <td>Dataset</td>
648
+ <td colspan="4">Calls (low)</td>
649
+ </tr>
650
+ <tr>
651
+ <td></td>
652
+ <td>DSPy</td>
653
+ <td>PromptAgent </td>
654
+ <td>APO</td>
655
+ <td>PW</td>
656
+ </tr>
657
+ <tr>
658
+ <td>GSM8k</td>
659
+ <td>915</td>
660
+ <td>2115</td>
661
+ <td>8490</td>
662
+ <td><b>147</b></td>
663
+ </tr>
664
+ <tr>
665
+ <td>AQUARAT</td>
666
+ <td>920</td>
667
+ <td>2200</td>
668
+ <td>8500</td>
669
+ <td><b>112</b></td>
670
+ </tr>
671
+ <tr>
672
+ <td>SVAMP</td>
673
+ <td>2300</td>
674
+ <td>2111</td>
675
+ <td>8000</td>
676
+ <td><b>178</b></td>
677
+ </tr>
678
+ <tr>
679
+ <td>ETHOS</td>
680
+ <td>660</td>
681
+ <td>2217</td>
682
+ <td>8200</td>
683
+ <td><b>80</b></td>
684
+ </tr>
685
+ </table>
686
+ <br>
687
+ <table>
688
+ <tr>
689
+ <td>Dataset</td>
690
+ <td colspan="4">Tokens (low)</td>
691
+ </tr>
692
+ <tr>
693
+ <td></td>
694
+ <td>DSPy</td>
695
+ <td>PromptAgent </td>
696
+ <td>APO</td>
697
+ <td>PW</td>
698
+ </tr>
699
+ <tr>
700
+ <td>GSM8k</td>
701
+ <td>262</td>
702
+ <td>500</td>
703
+ <td><b>109</b></td>
704
+ <td>237</td>
705
+ </tr>
706
+ <tr>
707
+ <td>AQUARAT</td>
708
+ <td>326</td>
709
+ <td>875</td>
710
+ <td><b>125</b></td>
711
+ <td>200</td>
712
+ </tr>
713
+ <tr>
714
+ <td>SVAMP</td>
715
+ <td>189</td>
716
+ <td>680</td>
717
+ <td><b>85</b></td>
718
+ <td>127</td>
719
+ </tr>
720
+ <tr>
721
+ <td>ETHOS</td>
722
+ <td>175</td>
723
+ <td>417</td>
724
+ <td><b>55</b></td>
725
+ <td>190</td>
726
+ </tr>
727
+ </table>
728
+ </p>
729
+ <br>
730
+ <p align="center"> <b>PromptWizard outperforms feedback based methods like APO, PromptAgent and other prompt optimization techniques like DSPy in terms of accuracy and number of API calls for optimization on various datasets.
731
+ ​</p>
732
+ </b>
733
+ </div>
734
+ </div>
735
+ </div>
736
+ </div>
737
+ <!--/ Results. -->
738
+
739
+ </section>
740
+
741
+
742
+ <section class="section" id="BibTeX">
743
+ <div class="container is-max-desktop content">
744
+ <h2 class="title">BibTeX</h2>
745
+ <pre><code>@misc{agarwal2024promptwizardtaskawarepromptoptimization,
746
+ title={PromptWizard: Task-Aware Prompt Optimization Framework},
747
+ author={Eshaan Agarwal and Joykirat Singh and Vivek Dani and Raghav Magazine and Tanuja Ganu and Akshay Nambi},
748
+ year={2024},
749
+ eprint={2405.18369},
750
+ archivePrefix={arXiv},
751
+ primaryClass={cs.CL},
752
+ url={https://arxiv.org/abs/2405.18369},
753
+ }</code></pre>
754
+ </div>
755
+ </section>
756
+
757
+
758
+ <footer class="footer">
759
+ <div class="container">
760
+ <div class="content has-text-centered">
761
+ </div>
762
+ <div class="columns is-centered">
763
+ <div class="column is-8">
764
+ <div class="content">
765
+ <p>
766
+ This website is licensed under a <a rel="license"
767
+ href="http://creativecommons.org/licenses/by-sa/4.0/">Creative
768
+ Commons Attribution-ShareAlike 4.0 International License</a>.
769
+ </p>
770
+ <p>
771
+ This means you are free to borrow the <a
772
+ href="https://github.com/nerfies/nerfies.github.io">source code</a> of this website,
773
+ we just ask that you link back to this page in the footer.
774
+ Please remember to remove the analytics code included in the header of the website which
775
+ you do not want on your website.
776
+ </p>
777
+ </div>
778
+ </div>
779
+ </div>
780
+ </div>
781
+ </footer>
782
+
783
+ </body>
784
+ </html>