WizardCoder-15B-1.0-ggmlv3-q4-0

Build error

NERDDISCO commited on Jul 14, 2023

Commit

e0b5928

1 Parent(s): 9eae9de

feat: changed model to "WizardCoder-15B-1.0.ggmlv3.q4_0" and increased max_tokens to 1024

Files changed (1) hide show

main.py CHANGED Viewed

@@ -15,7 +15,7 @@ from ctransformers import AutoModelForCausalLM
 from pydantic import BaseModel
 llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardCoder-15B-1.0-GGML",
-                                           model_file="WizardCoder-15B-1.0.ggmlv3.q5_0.bin",
                                            model_type="starcoder",
                                            threads=8)
 app = fastapi.FastAPI(title="🪄WizardCoder💫")
@@ -52,7 +52,7 @@ class Message(BaseModel):
 class ChatCompletionRequest(BaseModel):
     messages: List[Message]
-    max_tokens: int = 250
 @app.post("/v1/completions")
 async def completion(request: ChatCompletionRequestV0, response_mode=None):

 from pydantic import BaseModel
 llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardCoder-15B-1.0-GGML",
+                                           model_file="WizardCoder-15B-1.0.ggmlv3.q4_0.bin",
                                            model_type="starcoder",
                                            threads=8)
 app = fastapi.FastAPI(title="🪄WizardCoder💫")
 class ChatCompletionRequest(BaseModel):
     messages: List[Message]
+    max_tokens: int = 1024
 @app.post("/v1/completions")
 async def completion(request: ChatCompletionRequestV0, response_mode=None):