Spaces:
Build error
Build error
feat: changed model to "WizardCoder-15B-1.0.ggmlv3.q4_0" and increased max_tokens to 1024
Browse files
main.py
CHANGED
@@ -15,7 +15,7 @@ from ctransformers import AutoModelForCausalLM
|
|
15 |
from pydantic import BaseModel
|
16 |
|
17 |
llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardCoder-15B-1.0-GGML",
|
18 |
-
model_file="WizardCoder-15B-1.0.ggmlv3.
|
19 |
model_type="starcoder",
|
20 |
threads=8)
|
21 |
app = fastapi.FastAPI(title="🪄WizardCoder💫")
|
@@ -52,7 +52,7 @@ class Message(BaseModel):
|
|
52 |
|
53 |
class ChatCompletionRequest(BaseModel):
|
54 |
messages: List[Message]
|
55 |
-
max_tokens: int =
|
56 |
|
57 |
@app.post("/v1/completions")
|
58 |
async def completion(request: ChatCompletionRequestV0, response_mode=None):
|
|
|
15 |
from pydantic import BaseModel
|
16 |
|
17 |
llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardCoder-15B-1.0-GGML",
|
18 |
+
model_file="WizardCoder-15B-1.0.ggmlv3.q4_0.bin",
|
19 |
model_type="starcoder",
|
20 |
threads=8)
|
21 |
app = fastapi.FastAPI(title="🪄WizardCoder💫")
|
|
|
52 |
|
53 |
class ChatCompletionRequest(BaseModel):
|
54 |
messages: List[Message]
|
55 |
+
max_tokens: int = 1024
|
56 |
|
57 |
@app.post("/v1/completions")
|
58 |
async def completion(request: ChatCompletionRequestV0, response_mode=None):
|