oai

Sleeping

App Files Files Community

yangtb24 commited on 21 days ago

Commit

af14b51

verified ·

1 Parent(s): 0cfc696

Update app.py

Browse files

Files changed (1) hide show

app.py +141 -660

app.py CHANGED Viewed

@@ -95,84 +95,6 @@ def get_credit_summary(api_key):
             logging.error(f"获取额度信息失败，API Key：{api_key}，错误信息：{e}")
             return None
-FREE_IMAGE_LIST = [
-    "stabilityai/stable-diffusion-3-5-large",
-    "black-forest-labs/FLUX.1-schnell",
-    "stabilityai/stable-diffusion-3-medium",
-    "stabilityai/stable-diffusion-xl-base-1.0",
-    "stabilityai/stable-diffusion-2-1"
-]
-def test_model_availability(api_key, model_name, model_type="chat"):
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json"
-    }
-    if model_type == "image":
-        return model_name in FREE_IMAGE_LIST
-    try:
-        endpoint = EMBEDDINGS_ENDPOINT if model_type == "embedding" else TEST_MODEL_ENDPOINT
-        payload = (
-            {"model": model_name, "input": ["hi"]}
-            if model_type == "embedding"
-            else {"model": model_name, "messages": [{"role": "user", "content": "hi"}], "max_tokens": 5, "stream": False}
-        )
-        timeout = 10 if model_type == "embedding" else 5
-        response = session.post(
-            endpoint,
-            headers=headers,
-            json=payload,
-            timeout=timeout
-        )
-        return response.status_code in [200, 429]
-    except requests.exceptions.RequestException as e:
-        logging.error(
-            f"测试{model_type}模型 {model_name} 可用性失败，"
-            f"API Key：{api_key}，错误信息：{e}"
-        )
-        return False
-def process_image_url(image_url, response_format=None):
-    if not image_url:
-        return {"url": ""}
-    if response_format == "b64_json":
-        try:
-            response = session.get(image_url, stream=True)
-            response.raise_for_status()
-            image = Image.open(response.raw)
-            buffered = io.BytesIO()
-            image.save(buffered, format="PNG")
-            img_str = base64.b64encode(buffered.getvalue()).decode()
-            return {"b64_json": img_str}
-        except Exception as e:
-            logging.error(f"图片转base64失败: {e}")
-            return {"url": image_url}
-    return {"url": image_url}
-def create_base64_markdown_image(image_url):
-    try:
-        response = session.get(image_url, stream=True)
-        response.raise_for_status()
-        image = Image.open(BytesIO(response.content))
-        new_size = tuple(dim // 4 for dim in image.size)
-        resized_image = image.resize(new_size, Image.LANCZOS)
-        buffered = BytesIO()
-        resized_image.save(buffered, format="PNG")
-        base64_encoded = base64.b64encode(buffered.getvalue()).decode('utf-8')
-        markdown_image_link = f"![](data:image/png;base64,{base64_encoded})"
-        logging.info("Created base64 markdown image link.")
-        return markdown_image_link
-    except Exception as e:
-        logging.error(f"Error creating markdown image: {e}")
-        return None
 def extract_user_content(messages):
     user_content = ""
     for message in messages:
@@ -247,6 +169,9 @@ def load_keys():
         key_status[status] = []
     keys_str = os.environ.get("KEYS")
     if not keys_str:
         logging.warning("环境变量 KEYS 未设置。")
         return
@@ -440,244 +365,6 @@ def list_models():
         "data": detailed_models
     })
-@app.route('/handsome/v1/dashboard/billing/usage', methods=['GET'])
-def billing_usage():
-    if not check_authorization(request):
-        return jsonify({"error": "Unauthorized"}), 401
-    daily_usage = []
-    return jsonify({
-        "object": "list",
-        "data": daily_usage,
-        "total_usage": 0
-    })
-@app.route('/handsome/v1/dashboard/billing/subscription', methods=['GET'])
-def billing_subscription():
-    if not check_authorization(request):
-        return jsonify({"error": "Unauthorized"}), 401
-    keys = valid_keys_global + unverified_keys_global
-    total_balance = 0
-    with concurrent.futures.ThreadPoolExecutor(
-        max_workers=10000
-    ) as executor:
-        futures = [
-            executor.submit(get_credit_summary, key) for key in keys
-        ]
-        for future in concurrent.futures.as_completed(futures):
-            try:
-                credit_summary = future.result()
-                if credit_summary:
-                    total_balance += credit_summary.get("total_balance", 0)
-            except Exception as exc:
-                logging.error(f"获取额度信息生成异常: {exc}")
-    return jsonify({
-        "object": "billing_subscription",
-        "access_until": int(datetime(9999, 12, 31).timestamp()),
-        "soft_limit": 0,
-        "hard_limit": total_balance,
-        "system_hard_limit": total_balance,
-        "soft_limit_usd": 0,
-        "hard_limit_usd": total_balance,
-        "system_hard_limit_usd": total_balance
-    })
-@app.route('/handsome/v1/embeddings', methods=['POST'])
-def handsome_embeddings():
-    if not check_authorization(request):
-        return jsonify({"error": "Unauthorized"}), 401
-    data = request.get_json()
-    if not data or 'model' not in data:
-        return jsonify({"error": "Invalid request data"}), 400
-    if data['model'] not in models["embedding"]:
-        return jsonify({"error": "Invalid model"}), 400
-    model_name = data['model']
-    request_type = determine_request_type(
-        model_name,
-        models["embedding"],
-        models["free_embedding"]
-    )
-    api_key = select_key(request_type, model_name)
-    if not api_key:
-        return jsonify({"error": ("No available API key for this request type or all keys have reached their limits")}), 429
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json"
-    }
-    try:
-        start_time = time.time()
-        response = requests.post(
-            EMBEDDINGS_ENDPOINT,
-            headers=headers,
-            json=data,
-            timeout=120
-        )
-        if response.status_code == 429:
-            return jsonify(response.json()), 429
-        response.raise_for_status()
-        end_time = time.time()
-        response_json = response.json()
-        total_time = end_time - start_time
-        try:
-            prompt_tokens = response_json["usage"]["prompt_tokens"]
-            embedding_data = response_json["data"]
-        except (KeyError, ValueError, IndexError) as e:
-            logging.error(
-                f"解析响应 JSON 失败: {e}, "
-                f"完整内容: {response_json}"
-            )
-            prompt_tokens = 0
-            embedding_data = []
-        logging.info(
-            f"使用的key: {api_key}, "
-            f"提示token: {prompt_tokens}, "
-            f"总共用时: {total_time:.4f}秒, "
-            f"使用的模型: {model_name}"
-        )
-        with data_lock:
-            request_timestamps.append(time.time())
-            token_counts.append(prompt_tokens)
-            request_timestamps_day.append(time.time())
-            token_counts_day.append(prompt_tokens)
-        return jsonify({
-            "object": "list",
-            "data": embedding_data,
-            "model": model_name,
-            "usage": {
-                "prompt_tokens": prompt_tokens,
-                "total_tokens": prompt_tokens
-            }
-        })
-    except requests.exceptions.RequestException as e:
-        return jsonify({"error": str(e)}), 500
-@app.route('/handsome/v1/images/generations', methods=['POST'])
-def handsome_images_generations():
-    if not check_authorization(request):
-        return jsonify({"error": "Unauthorized"}), 401
-    data = request.get_json()
-    if not data or 'model' not in data:
-        return jsonify({"error": "Invalid request data"}), 400
-    if data['model'] not in models["image"]:
-        return jsonify({"error": "Invalid model"}), 400
-    model_name = data.get('model')
-    request_type = determine_request_type(
-        model_name,
-        models["image"],
-        models["free_image"]
-    )
-    api_key = select_key(request_type, model_name)
-    if not api_key:
-        return jsonify({"error": ("No available API key for this request type or all keys have reached their limits")}), 429
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json"
-    }
-    response_data = {}
-    if "stable-diffusion" in model_name or model_name in ["black-forest-labs/FLUX.1-schnell", "Pro/black-forest-labs/FLUX.1-schnell","black-forest-labs/FLUX.1-dev", "black-forest-labs/FLUX.1-pro"]:
-        siliconflow_data = get_siliconflow_data(model_name, data)
-        try:
-            start_time = time.time()
-            response = requests.post(
-                IMAGE_ENDPOINT,
-                headers=headers,
-                json=siliconflow_data,
-                timeout=120
-            )
-            if response.status_code == 429:
-                return jsonify(response.json()), 429
-            response.raise_for_status()
-            end_time = time.time()
-            response_json = response.json()
-            total_time = end_time - start_time
-            try:
-                images = response_json.get("images", [])
-                openai_images = []
-                for item in images:
-                    if isinstance(item, dict) and "url" in item:
-                        image_url = item["url"]
-                        print(f"image_url: {image_url}")
-                        if data.get("response_format") == "b64_json":
-                           try:
-                                image_data = session.get(image_url, stream=True).raw
-                                image = Image.open(image_data)
-                                buffered = io.BytesIO()
-                                image.save(buffered, format="PNG")
-                                img_str = base64.b64encode(buffered.getvalue()).decode()
-                                openai_images.append({"b64_json": img_str})
-                           except Exception as e:
-                                logging.error(f"图片转base64失败: {e}")
-                                openai_images.append({"url": image_url})
-                        else:
-                            openai_images.append({"url": image_url})
-                    else:
-                        logging.error(f"无效的图片数据: {item}")
-                        openai_images.append({"url": item})
-                response_data = {
-                    "created": int(time.time()),
-                    "data": openai_images
-                }
-            except (KeyError, ValueError, IndexError) as e:
-                logging.error(
-                    f"解析响应 JSON 失败: {e}, "
-                    f"完整内容: {response_json}"
-                )
-                response_data = {
-                    "created": int(time.time()),
-                    "data": []
-                }
-            logging.info(
-                f"使用的key: {api_key}, "
-                f"总共用时: {total_time:.4f}秒, "
-                f"使用的模型: {model_name}"
-            )
-            with data_lock:
-                request_timestamps.append(time.time())
-                token_counts.append(0)
-                request_timestamps_day.append(time.time())
-                token_counts_day.append(0)
-            return jsonify(response_data)
-        except requests.exceptions.RequestException as e:
-            logging.error(f"请求转发异常: {e}")
-            return jsonify({"error": str(e)}), 500
-    else:
-        return jsonify({"error": "Unsupported model"}), 400
 @app.route('/handsome/v1/chat/completions', methods=['POST'])
 def handsome_chat_completions():
     if not check_authorization(request):
@@ -715,343 +402,84 @@ def handsome_chat_completions():
         "Content-Type": "application/json"
     }
-    if model_name in models["image"]:
-        if isinstance(data.get("messages"), list):
-            data = data.copy()
-            data["prompt"] = extract_user_content(data["messages"])
-        siliconflow_data = get_siliconflow_data(model_name, data)
-        try:
-            start_time = time.time()
-            response = requests.post(
-                IMAGE_ENDPOINT,
-                headers=headers,
-                json=siliconflow_data,
-                stream=data.get("stream", False)
-            )
-            if response.status_code == 429:
-                return jsonify(response.json()), 429
-            if data.get("stream", False):
-                def generate():
-                    try:
-                        response.raise_for_status()
-                        response_json = response.json()
-                        images = response_json.get("images", [])
-                        image_url = ""
-                        if images and isinstance(images[0], dict) and "url" in images[0]:
-                            image_url = images[0]["url"]
-                            logging.info(f"Extracted image URL: {image_url}")
-                        elif images and isinstance(images[0], str):
-                            image_url = images[0]
-                            logging.info(f"Extracted image URL: {image_url}")
-                        markdown_image_link = create_base64_markdown_image(image_url)
-                        if image_url:
-                            chunk_size = 8192
-                            for i in range(0, len(markdown_image_link), chunk_size):
-                                chunk = markdown_image_link[i:i + chunk_size]
-                                chunk_data = {
-                                    "id": f"chatcmpl-{uuid.uuid4()}",
-                                    "object": "chat.completion.chunk",
-                                    "created": int(time.time()),
-                                    "model": model_name,
-                                    "choices": [
-                                        {
-                                            "index": 0,
-                                            "delta": {
-                                                "role": "assistant",
-                                                "content": chunk
-                                            },
-                                            "finish_reason": None
-                                        }
-                                    ]
-                                }
-                                yield f"data: {json.dumps(chunk_data)}\n\n".encode('utf-8')
-                        else:
-                            chunk_data = {
-                                "id": f"chatcmpl-{uuid.uuid4()}",
-                                "object": "chat.completion.chunk",
-                                "created": int(time.time()),
-                                "model": model_name,
-                                "choices": [
-                                    {
-                                        "index": 0,
-                                        "delta": {
-                                            "role": "assistant",
-                                            "content": "Failed to generate image"
-                                        },
-                                        "finish_reason": None
-                                    }
-                                ]
-                            }
-                            yield f"data: {json.dumps(chunk_data)}\n\n".encode('utf-8')
-                        end_chunk_data = {
-                            "id": f"chatcmpl-{uuid.uuid4()}",
-                            "object": "chat.completion.chunk",
-                            "created": int(time.time()),
-                            "model": model_name,
-                            "choices": [
-                                {
-                                    "index": 0,
-                                    "delta": {},
-                                    "finish_reason": "stop"
-                                }
-                            ]
-                        }
-                        yield f"data: {json.dumps(end_chunk_data)}\n\n".encode('utf-8')
-                        with data_lock:
-                            request_timestamps.append(time.time())
-                            token_counts.append(0)
-                            request_timestamps_day.append(time.time())
-                            token_counts_day.append(0)
-                    except requests.exceptions.RequestException as e:
-                        logging.error(f"请求转发异常: {e}")
-                        error_chunk_data = {
-                            "id": f"chatcmpl-{uuid.uuid4()}",
-                            "object": "chat.completion.chunk",
-                            "created": int(time.time()),
-                            "model": model_name,
-                            "choices": [
-                                {
-                                    "index": 0,
-                                    "delta": {
-                                        "role": "assistant",
-                                        "content": f"Error: {str(e)}"
-                                    },
-                                    "finish_reason": None
-                                }
-                            ]
-                        }
-                        yield f"data: {json.dumps(error_chunk_data)}\n\n".encode('utf-8')
-                        end_chunk_data = {
-                                "id": f"chatcmpl-{uuid.uuid4()}",
-                                "object": "chat.completion.chunk",
-                                "created": int(time.time()),
-                                "model": model_name,
-                                "choices": [
-                                    {
-                                        "index": 0,
-                                        "delta": {},
-                                        "finish_reason": "stop"
-                                    }
-                                ]
-                            }
-                        yield f"data: {json.dumps(end_chunk_data)}\n\n".encode('utf-8')
-                    logging.info(
-                        f"使用的key: {api_key}, "
-                        f"使用的模型: {model_name}"
-                    )
-                    yield "data: [DONE]\n\n".encode('utf-8')
-                return Response(stream_with_context(generate()), content_type='text/event-stream')
-            else:
-                response.raise_for_status()
-                end_time = time.time()
-                response_json = response.json()
-                total_time = end_time - start_time
-                try:
-                    images = response_json.get("images", [])
-                    image_url = ""
-                    if images and isinstance(images[0], dict) and "url" in images[0]:
-                        image_url = images[0]["url"]
-                        logging.info(f"Extracted image URL: {image_url}")
-                    elif images and isinstance(images[0], str):
-                        image_url = images[0]
-                        logging.info(f"Extracted image URL: {image_url}")
-                    markdown_image_link = f"![image]({image_url})"
-                    response_data = {
-                        "id": f"chatcmpl-{uuid.uuid4()}",
-                        "object": "chat.completion",
-                        "created": int(time.time()),
-                        "model": model_name,
-                        "choices": [
-                            {
-                            "index": 0,
-                            "message": {
-                                "role": "assistant",
-                                "content": markdown_image_link if image_url else "Failed to generate image",
-                            },
-                            "finish_reason": "stop",
-                            }
-                        ],
-                    }
-                except (KeyError, ValueError, IndexError) as e:
-                    logging.error(
-                        f"解析响应 JSON 失败: {e}, "
-                        f"完整内容: {response_json}"
-                    )
-                    response_data = {
-                        "id": f"chatcmpl-{uuid.uuid4()}",
-                        "object": "chat.completion",
-                        "created": int(time.time()),
-                        "model": model_name,
-                        "choices": [
-                            {
-                            "index": 0,
-                            "message": {
-                                "role": "assistant",
-                                "content": "Failed to process image data",
-                            },
-                            "finish_reason": "stop",
-                            }
-                        ],
-                    }
-                logging.info(
-                    f"使用的key: {api_key}, "
-                    f"总共用时: {total_time:.4f}秒, "
-                    f"使用的模型: {model_name}"
-                )
-                with data_lock:
-                    request_timestamps.append(time.time())
-                    token_counts.append(0)
-                    request_timestamps_day.append(time.time())
-                    token_counts_day.append(0)
-                return jsonify(response_data)
-        except requests.exceptions.RequestException as e:
-            logging.error(f"请求转发异常: {e}")
-            return jsonify({"error": str(e)}), 500
-    else:
-        try:
-            start_time = time.time()
-            response = requests.post(
-                TEST_MODEL_ENDPOINT,
-                headers=headers,
-                json=data,
-                stream=data.get("stream", False)
-            )
-            if response.status_code == 429:
-                return jsonify(response.json()), 429
-            if data.get("stream", False):
-                def generate():
-                    first_chunk_time = None
-                    full_response_content = ""
-                    for chunk in response.iter_content(chunk_size=2048):
-                        if chunk:
-                            if first_chunk_time is None:
-                                first_chunk_time = time.time()
-                            full_response_content += chunk.decode("utf-8")
-                            yield chunk
-                    end_time = time.time()
-                    first_token_time = (
-                        first_chunk_time - start_time
-                        if first_chunk_time else 0
-                    )
-                    total_time = end_time - start_time
-                    prompt_tokens = 0
-                    completion_tokens = 0
-                    response_content = ""
-                    for line in full_response_content.splitlines():
-                        if line.startswith("data:"):
-                            line = line[5:].strip()
-                            if line == "[DONE]":
-                                continue
-                            try:
-                                response_json = json.loads(line)
-                                if (
-                                    "usage" in response_json and
-                                    "completion_tokens" in response_json["usage"]
-                                ):
-                                    completion_tokens = response_json[
-                                        "usage"
-                                    ]["completion_tokens"]
-                                if (
-                                    "choices" in response_json and
-                                    len(response_json["choices"]) > 0 and
-                                    "delta" in response_json["choices"][0] and
-                                    "content" in response_json[
-                                        "choices"
-                                    ][0]["delta"]
-                                ):
-                                    response_content += response_json[
-                                        "choices"
-                                    ][0]["delta"]["content"]
-                                if (
-                                    "usage" in response_json and
-                                    "prompt_tokens" in response_json["usage"]
-                                ):
-                                    prompt_tokens = response_json[
-                                        "usage"
-                                    ]["prompt_tokens"]
-                            except (
-                                KeyError,
-                                ValueError,
-                                IndexError
-                            ) as e:
-                                logging.error(
-                                    f"解析流式响应单行 JSON 失败: {e}, "
-                                    f"行内容: {line}"
-                                )
-                    user_content = extract_user_content(data.get("messages", []))
-                    user_content_replaced = user_content.replace(
-                        '\n', '\\n'
-                    ).replace('\r', '\\n')
-                    response_content_replaced = response_content.replace(
-                        '\n', '\\n'
-                    ).replace('\r', '\\n')
-                    logging.info(
-                        f"使用的key: {api_key}, "
-                        f"提示token: {prompt_tokens}, "
-                        f"输出token: {completion_tokens}, "
-                        f"首字用时: {first_token_time:.4f}秒, "
-                        f"总共用时: {total_time:.4f}秒, "
-                        f"使用的模型: {model_name}, "
-                        f"用户的内容: {user_content_replaced}, "
-                        f"输出的内容: {response_content_replaced}"
-                    )
-                    with data_lock:
-                        request_timestamps.append(time.time())
-                        token_counts.append(prompt_tokens+completion_tokens)
-                        request_timestamps_day.append(time.time())
-                        token_counts_day.append(prompt_tokens+completion_tokens)
-                return Response(
-                    stream_with_context(generate()),
-                    content_type=response.headers['Content-Type']
-                )
-            else:
-                response.raise_for_status()
                 end_time = time.time()
-                response_json = response.json()
                 total_time = end_time - start_time
-                try:
-                    prompt_tokens = response_json["usage"]["prompt_tokens"]
-                    completion_tokens = response_json[
-                        "usage"
-                    ]["completion_tokens"]
-                    response_content = response_json[
-                        "choices"
-                    ][0]["message"]["content"]
-                except (KeyError, ValueError, IndexError) as e:
-                    logging.error(
-                        f"解析非流式响应 JSON 失败: {e}, "
-                        f"完整内容: {response_json}"
-                    )
-                    prompt_tokens = 0
-                    completion_tokens = 0
-                    response_content = ""
                 user_content = extract_user_content(data.get("messages", []))
@@ -1066,29 +494,82 @@ def handsome_chat_completions():
                     f"使用的key: {api_key}, "
                     f"提示token: {prompt_tokens}, "
                     f"输出token: {completion_tokens}, "
-                    f"首字用时: 0, "
                     f"总共用时: {total_time:.4f}秒, "
                     f"使用的模型: {model_name}, "
                     f"用户的内容: {user_content_replaced}, "
                     f"输出的内容: {response_content_replaced}"
                 )
                 with data_lock:
                     request_timestamps.append(time.time())
-                    if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
-                        token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
-                    else:
-                        token_counts.append(0)
                     request_timestamps_day.append(time.time())
-                    if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
-                        token_counts_day.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
-                    else:
-                        token_counts_day.append(0)
-                return jsonify(response_json)
-        except requests.exceptions.RequestException as e:
-            logging.error(f"请求转发异常: {e}")
-            return jsonify({"error": str(e)}), 500
 if __name__ == '__main__':
     logging.info(f"环境变量：{os.environ}")

             logging.error(f"获取额度信息失败，API Key：{api_key}，错误信息：{e}")
             return None
 def extract_user_content(messages):
     user_content = ""
     for message in messages:
         key_status[status] = []
     keys_str = os.environ.get("KEYS")
+    logging.info(f"The value of KEYS environment variable is: {keys_str}")
     if not keys_str:
         logging.warning("环境变量 KEYS 未设置。")
         return
         "data": detailed_models
     })
 @app.route('/handsome/v1/chat/completions', methods=['POST'])
 def handsome_chat_completions():
     if not check_authorization(request):
         "Content-Type": "application/json"
     }
+    try:
+        start_time = time.time()
+        response = requests.post(
+            TEST_MODEL_ENDPOINT,
+            headers=headers,
+            json=data,
+            stream=data.get("stream", False)
+        )
+        if response.status_code == 429:
+            return jsonify(response.json()), 429
+        if data.get("stream", False):
+            def generate():
+                first_chunk_time = None
+                full_response_content = ""
+                for chunk in response.iter_content(chunk_size=2048):
+                    if chunk:
+                        if first_chunk_time is None:
+                            first_chunk_time = time.time()
+                        full_response_content += chunk.decode("utf-8")
+                        yield chunk
                 end_time = time.time()
+                first_token_time = (
+                    first_chunk_time - start_time
+                    if first_chunk_time else 0
+                )
                 total_time = end_time - start_time
+                prompt_tokens = 0
+                completion_tokens = 0
+                response_content = ""
+                for line in full_response_content.splitlines():
+                    if line.startswith("data:"):
+                        line = line[5:].strip()
+                        if line == "[DONE]":
+                            continue
+                        try:
+                            response_json = json.loads(line)
+                            if (
+                                "usage" in response_json and
+                                "completion_tokens" in response_json["usage"]
+                            ):
+                                completion_tokens = response_json[
+                                    "usage"
+                                ]["completion_tokens"]
+                            if (
+                                "choices" in response_json and
+                                len(response_json["choices"]) > 0 and
+                                "delta" in response_json["choices"][0] and
+                                "content" in response_json[
+                                    "choices"
+                                ][0]["delta"]
+                            ):
+                                response_content += response_json[
+                                    "choices"
+                                ][0]["delta"]["content"]
+                            if (
+                                "usage" in response_json and
+                                "prompt_tokens" in response_json["usage"]
+                            ):
+                                prompt_tokens = response_json[
+                                    "usage"
+                                ]["prompt_tokens"]
+                        except (
+                            KeyError,
+                            ValueError,
+                            IndexError
+                        ) as e:
+                            logging.error(
+                                f"解析流式响应单行 JSON 失败: {e}, "
+                                f"行内容: {line}"
+                            )
                 user_content = extract_user_content(data.get("messages", []))
                     f"使用的key: {api_key}, "
                     f"提示token: {prompt_tokens}, "
                     f"输出token: {completion_tokens}, "
+                    f"首字用时: {first_token_time:.4f}秒, "
                     f"总共用时: {total_time:.4f}秒, "
                     f"使用的模型: {model_name}, "
                     f"用户的内容: {user_content_replaced}, "
                     f"输出的内容: {response_content_replaced}"
                 )
                 with data_lock:
                     request_timestamps.append(time.time())
+                    token_counts.append(prompt_tokens+completion_tokens)
                     request_timestamps_day.append(time.time())
+                    token_counts_day.append(prompt_tokens+completion_tokens)
+            return Response(
+                stream_with_context(generate()),
+                content_type=response.headers['Content-Type']
+            )
+        else:
+            response.raise_for_status()
+            end_time = time.time()
+            response_json = response.json()
+            total_time = end_time - start_time
+            try:
+                prompt_tokens = response_json["usage"]["prompt_tokens"]
+                completion_tokens = response_json[
+                    "usage"
+                ]["completion_tokens"]
+                response_content = response_json[
+                    "choices"
+                ][0]["message"]["content"]
+            except (KeyError, ValueError, IndexError) as e:
+                logging.error(
+                    f"解析非流式响应 JSON 失败: {e}, "
+                    f"完整内容: {response_json}"
+                )
+                prompt_tokens = 0
+                completion_tokens = 0
+                response_content = ""
+            user_content = extract_user_content(data.get("messages", []))
+            user_content_replaced = user_content.replace(
+                '\n', '\\n'
+            ).replace('\r', '\\n')
+            response_content_replaced = response_content.replace(
+                '\n', '\\n'
+            ).replace('\r', '\\n')
+            logging.info(
+                f"使用的key: {api_key}, "
+                f"提示token: {prompt_tokens}, "
+                f"输出token: {completion_tokens}, "
+                f"首字用时: 0, "
+                f"总共用时: {total_time:.4f}秒, "
+                f"使用的模型: {model_name}, "
+                f"用户的内容: {user_content_replaced}, "
+                f"输出的内容: {response_content_replaced}"
+            )
+            with data_lock:
+                request_timestamps.append(time.time())
+                if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
+                    token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
+                else:
+                    token_counts.append(0)
+                request_timestamps_day.append(time.time())
+                if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
+                    token_counts_day.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
+                else:
+                    token_counts_day.append(0)
+            return jsonify(response_json)
+    except requests.exceptions.RequestException as e:
+        logging.error(f"请求转发异常: {e}")
+        return jsonify({"error": str(e)}), 500
 if __name__ == '__main__':
     logging.info(f"环境变量：{os.environ}")