Spaces:
Running
Running
import os,time,logging,requests,json,uuid,concurrent.futures,threading,base64,io | |
from io import BytesIO | |
from itertools import chain | |
from PIL import Image | |
from datetime import datetime | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from flask import Flask, request, jsonify, Response, stream_with_context | |
from werkzeug.middleware.proxy_fix import ProxyFix | |
from requests.adapters import HTTPAdapter | |
from requests.packages.urllib3.util.retry import Retry | |
os.environ['TZ'] = 'Asia/Shanghai' | |
time.tzset() | |
logging.basicConfig(level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s') | |
API_ENDPOINT = "https://api-st.siliconflow.cn/v1/user/info" | |
TEST_MODEL_ENDPOINT = "https://api-st.siliconflow.cn/v1/chat/completions" | |
MODELS_ENDPOINT = "https://api-st.siliconflow.cn/v1/models" | |
EMBEDDINGS_ENDPOINT = "https://api-st.siliconflow.cn/v1/embeddings" | |
IMAGE_ENDPOINT = "https://api-st.siliconflow.cn/v1/images/generations" | |
def requests_session_with_retries( | |
retries=3, backoff_factor=0.3, status_forcelist=(500, 502, 504) | |
): | |
session = requests.Session() | |
retry = Retry( | |
total=retries, | |
read=retries, | |
connect=retries, | |
backoff_factor=backoff_factor, | |
status_forcelist=status_forcelist, | |
) | |
adapter = HTTPAdapter( | |
max_retries=retry, | |
pool_connections=1000, | |
pool_maxsize=10000, | |
pool_block=False | |
) | |
session.mount("http://", adapter) | |
session.mount("https://", adapter) | |
return session | |
session = requests_session_with_retries() | |
app = Flask(__name__) | |
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1) | |
models = { | |
"text": [], | |
"free_text": [], | |
"embedding": [], | |
"free_embedding": [], | |
"image": [], | |
"free_image": [] | |
} | |
key_status = { | |
"invalid": [], | |
"free": [], | |
"unverified": [], | |
"valid": [] | |
} | |
executor = concurrent.futures.ThreadPoolExecutor(max_workers=10000) | |
model_key_indices = {} | |
request_timestamps = [] | |
token_counts = [] | |
request_timestamps_day = [] | |
token_counts_day = [] | |
data_lock = threading.Lock() | |
def get_credit_summary(api_key): | |
headers = { | |
"Authorization": f"Bearer {api_key}", | |
"Content-Type": "application/json" | |
} | |
max_retries = 3 | |
for attempt in range(max_retries): | |
try: | |
response = session.get(API_ENDPOINT, headers=headers, timeout=2) | |
response.raise_for_status() | |
data = response.json().get("data", {}) | |
total_balance = data.get("totalBalance", 0) | |
logging.info(f"获取额度,API Key:{api_key},当前额度: {total_balance}") | |
return {"total_balance": float(total_balance)} | |
except requests.exceptions.Timeout as e: | |
logging.error(f"获取额度信息失败,API Key:{api_key},尝试次数:{attempt+1}/{max_retries},错误信息:{e} (Timeout)") | |
if attempt >= max_retries - 1: | |
logging.error(f"获取额度信息失败,API Key:{api_key},所有重试次数均已失败 (Timeout)") | |
except requests.exceptions.RequestException as e: | |
logging.error(f"获取额度信息失败,API Key:{api_key},错误信息:{e}") | |
return None | |
FREE_MODEL_TEST_KEY = ( | |
"sk-bmjbjzleaqfgtqfzmcnsbagxrlohriadnxqrzfocbizaxukw" | |
) | |
FREE_IMAGE_LIST = [ | |
"stabilityai/stable-diffusion-3-5-large", | |
"black-forest-labs/FLUX.1-schnell", | |
"stabilityai/stable-diffusion-3-medium", | |
"stabilityai/stable-diffusion-xl-base-1.0", | |
"stabilityai/stable-diffusion-2-1" | |
] | |
def test_model_availability(api_key, model_name, model_type="chat"): | |
headers = { | |
"Authorization": f"Bearer {api_key}", | |
"Content-Type": "application/json" | |
} | |
if model_type == "image": | |
return model_name in FREE_IMAGE_LIST | |
try: | |
endpoint = EMBEDDINGS_ENDPOINT if model_type == "embedding" else TEST_MODEL_ENDPOINT | |
payload = ( | |
{"model": model_name, "input": ["hi"]} | |
if model_type == "embedding" | |
else {"model": model_name, "messages": [{"role": "user", "content": "hi"}], "max_tokens": 5, "stream": False} | |
) | |
timeout = 10 if model_type == "embedding" else 5 | |
response = session.post( | |
endpoint, | |
headers=headers, | |
json=payload, | |
timeout=timeout | |
) | |
return response.status_code in [200, 429] | |
except requests.exceptions.RequestException as e: | |
logging.error( | |
f"测试{model_type}模型 {model_name} 可用性失败," | |
f"API Key:{api_key},错误信息:{e}" | |
) | |
return False | |
def process_image_url(image_url, response_format=None): | |
if not image_url: | |
return {"url": ""} | |
if response_format == "b64_json": | |
try: | |
response = session.get(image_url, stream=True) | |
response.raise_for_status() | |
image = Image.open(response.raw) | |
buffered = io.BytesIO() | |
image.save(buffered, format="PNG") | |
img_str = base64.b64encode(buffered.getvalue()).decode() | |
return {"b64_json": img_str} | |
except Exception as e: | |
logging.error(f"图片转base64失败: {e}") | |
return {"url": image_url} | |
return {"url": image_url} | |
def create_base64_markdown_image(image_url): | |
try: | |
response = session.get(image_url, stream=True) | |
response.raise_for_status() | |
image = Image.open(BytesIO(response.content)) | |
new_size = tuple(dim // 4 for dim in image.size) | |
resized_image = image.resize(new_size, Image.LANCZOS) | |
buffered = BytesIO() | |
resized_image.save(buffered, format="PNG") | |
base64_encoded = base64.b64encode(buffered.getvalue()).decode('utf-8') | |
markdown_image_link = f"![](data:image/png;base64,{base64_encoded})" | |
logging.info("Created base64 markdown image link.") | |
return markdown_image_link | |
except Exception as e: | |
logging.error(f"Error creating markdown image: {e}") | |
return None | |
def extract_user_content(messages): | |
user_content = "" | |
for message in messages: | |
if message["role"] == "user": | |
if isinstance(message["content"], str): | |
user_content += message["content"] + " " | |
elif isinstance(message["content"], list): | |
for item in message["content"]: | |
if isinstance(item, dict) and item.get("type") == "text": | |
user_content += item.get("text", "") + " " | |
return user_content.strip() | |
def get_siliconflow_data(model_name, data): | |
siliconflow_data = { | |
"model": model_name, | |
"prompt": data.get("prompt") or "", | |
} | |
if model_name == "black-forest-labs/FLUX.1-pro": | |
siliconflow_data.update({ | |
"width": max(256, min(1440, (data.get("width", 1024) // 32) * 32)), | |
"height": max(256, min(1440, (data.get("height", 768) // 32) * 32)), | |
"prompt_upsampling": data.get("prompt_upsampling", False), | |
"image_prompt": data.get("image_prompt"), | |
"steps": max(1, min(50, data.get("steps", 20))), | |
"guidance": max(1.5, min(5, data.get("guidance", 3))), | |
"safety_tolerance": max(0, min(6, data.get("safety_tolerance", 2))), | |
"interval": max(1, min(4, data.get("interval", 2))), | |
"output_format": data.get("output_format", "png") | |
}) | |
seed = data.get("seed") | |
if isinstance(seed, int) and 0 < seed < 9999999999: | |
siliconflow_data["seed"] = seed | |
else: | |
siliconflow_data.update({ | |
"image_size": data.get("image_size", "1024x1024"), | |
"prompt_enhancement": data.get("prompt_enhancement", False) | |
}) | |
seed = data.get("seed") | |
if isinstance(seed, int) and 0 < seed < 9999999999: | |
siliconflow_data["seed"] = seed | |
if model_name not in ["black-forest-labs/FLUX.1-schnell", "Pro/black-forest-labs/FLUX.1-schnell"]: | |
siliconflow_data.update({ | |
"batch_size": max(1, min(4, data.get("n", 1))), | |
"num_inference_steps": max(1, min(50, data.get("steps", 20))), | |
"guidance_scale": max(0, min(100, data.get("guidance_scale", 7.5))), | |
"negative_prompt": data.get("negative_prompt") | |
}) | |
valid_sizes = ["1024x1024", "512x1024", "768x512", "768x1024", "1024x576", "576x1024", "960x1280", "720x1440", "720x1280"] | |
if "image_size" in siliconflow_data and siliconflow_data["image_size"] not in valid_sizes: | |
siliconflow_data["image_size"] = "1024x1024" | |
return siliconflow_data | |
def refresh_models(): | |
global models | |
models["text"] = get_all_models(FREE_MODEL_TEST_KEY, "chat") | |
models["embedding"] = get_all_models(FREE_MODEL_TEST_KEY, "embedding") | |
models["image"] = get_all_models(FREE_MODEL_TEST_KEY, "text-to-image") | |
models["free_text"] = [] | |
models["free_embedding"] = [] | |
models["free_image"] = [] | |
ban_models = [] | |
ban_models_str = os.environ.get("BAN_MODELS") | |
if ban_models_str: | |
try: | |
ban_models = json.loads(ban_models_str) | |
if not isinstance(ban_models, list): | |
logging.warning("环境变量 BAN_MODELS 格式不正确,应为 JSON 数组。") | |
ban_models = [] | |
except json.JSONDecodeError: | |
logging.warning("环境变量 BAN_MODELS JSON 解析失败,请检查格式。") | |
models["text"] = [model for model in models["text"] if model not in ban_models] | |
models["embedding"] = [model for model in models["embedding"] if model not in ban_models] | |
models["image"] = [model for model in models["image"] if model not in ban_models] | |
model_types = [ | |
("text", "chat"), | |
("embedding", "embedding"), | |
("image", "image") | |
] | |
for model_type, test_type in model_types: | |
with concurrent.futures.ThreadPoolExecutor(max_workers=10000) as executor: | |
future_to_model = { | |
executor.submit( | |
test_model_availability, | |
FREE_MODEL_TEST_KEY, | |
model, | |
test_type | |
): model for model in models[model_type] | |
} | |
for future in concurrent.futures.as_completed(future_to_model): | |
model = future_to_model[future] | |
try: | |
is_free = future.result() | |
if is_free: | |
models[f"free_{model_type}"].append(model) | |
except Exception as exc: | |
logging.error(f"{model_type}模型 {model} 测试生成异常: {exc}") | |
for model_type in ["text", "embedding", "image"]: | |
logging.info(f"所有{model_type}模型列表:{models[model_type]}") | |
logging.info(f"免费{model_type}模型列表:{models[f'free_{model_type}']}") | |
def load_keys(): | |
global key_status | |
for status in key_status: | |
key_status[status] = [] | |
keys_str = os.environ.get("KEYS") | |
if not keys_str: | |
logging.warning("环境变量 KEYS 未设置。") | |
return | |
test_model = os.environ.get("TEST_MODEL", "Pro/google/gemma-2-9b-it") | |
unique_keys = list(set(key.strip() for key in keys_str.split(','))) | |
os.environ["KEYS"] = ','.join(unique_keys) | |
logging.info(f"加载的 keys:{unique_keys}") | |
def process_key_with_logging(key): | |
try: | |
key_type = process_key(key, test_model) | |
if key_type in key_status: | |
key_status[key_type].append(key) | |
return key_type | |
except Exception as exc: | |
logging.error(f"处理 KEY {key} 生成异常: {exc}") | |
return "invalid" | |
with concurrent.futures.ThreadPoolExecutor(max_workers=10000) as executor: | |
futures = [executor.submit(process_key_with_logging, key) for key in unique_keys] | |
concurrent.futures.wait(futures) | |
for status, keys in key_status.items(): | |
logging.info(f"{status.capitalize()} KEYS: {keys}") | |
global invalid_keys_global, free_keys_global, unverified_keys_global, valid_keys_global | |
invalid_keys_global = key_status["invalid"] | |
free_keys_global = key_status["free"] | |
unverified_keys_global = key_status["unverified"] | |
valid_keys_global = key_status["valid"] | |
def process_key(key, test_model): | |
credit_summary = get_credit_summary(key) | |
if credit_summary is None: | |
return "invalid" | |
else: | |
total_balance = credit_summary.get("total_balance", 0) | |
if total_balance <= 0.03: | |
return "free" | |
else: | |
if test_model_availability(key, test_model): | |
return "valid" | |
else: | |
return "unverified" | |
def get_all_models(api_key, sub_type): | |
headers = { | |
"Authorization": f"Bearer {api_key}", | |
"Content-Type": "application/json" | |
} | |
try: | |
response = session.get( | |
MODELS_ENDPOINT, | |
headers=headers, | |
params={"sub_type": sub_type} | |
) | |
response.raise_for_status() | |
data = response.json() | |
if ( | |
isinstance(data, dict) and | |
'data' in data and | |
isinstance(data['data'], list) | |
): | |
return [ | |
model.get("id") for model in data["data"] | |
if isinstance(model, dict) and "id" in model | |
] | |
else: | |
logging.error("获取模型列表失败:响应数据格式不正确") | |
return [] | |
except requests.exceptions.RequestException as e: | |
logging.error( | |
f"获取模型列表失败," | |
f"API Key:{api_key},错误信息:{e}" | |
) | |
return [] | |
except (KeyError, TypeError) as e: | |
logging.error( | |
f"解析模型列表失败," | |
f"API Key:{api_key},错误信息:{e}" | |
) | |
return [] | |
def determine_request_type(model_name, model_list, free_model_list): | |
if model_name in free_model_list: | |
return "free" | |
elif model_name in model_list: | |
return "paid" | |
else: | |
return "unknown" | |
def select_key(request_type, model_name): | |
if request_type == "free": | |
available_keys = ( | |
free_keys_global + | |
unverified_keys_global + | |
valid_keys_global | |
) | |
elif request_type == "paid": | |
available_keys = unverified_keys_global + valid_keys_global | |
else: | |
available_keys = ( | |
free_keys_global + | |
unverified_keys_global + | |
valid_keys_global | |
) | |
if not available_keys: | |
return None | |
current_index = model_key_indices.get(model_name, 0) | |
for _ in range(len(available_keys)): | |
key = available_keys[current_index % len(available_keys)] | |
current_index += 1 | |
if key_is_valid(key, request_type): | |
model_key_indices[model_name] = current_index | |
return key | |
else: | |
logging.warning( | |
f"KEY {key} 无效或达到限制,尝试下一个 KEY" | |
) | |
model_key_indices[model_name] = 0 | |
return None | |
def key_is_valid(key, request_type): | |
if request_type == "invalid": | |
return False | |
credit_summary = get_credit_summary(key) | |
if credit_summary is None: | |
return False | |
total_balance = credit_summary.get("total_balance", 0) | |
if request_type == "free": | |
return True | |
elif request_type == "paid" or request_type == "unverified": | |
return total_balance > 0 | |
else: | |
return False | |
def check_authorization(request): | |
authorization_key = os.environ.get("AUTHORIZATION_KEY") | |
if not authorization_key: | |
logging.warning("环境变量 AUTHORIZATION_KEY 未设置,此时无需鉴权即可使用,建议进行设置后再使用。") | |
return True | |
auth_header = request.headers.get('Authorization') | |
if not auth_header: | |
logging.warning("请求头中缺少 Authorization 字段。") | |
return False | |
if auth_header != f"Bearer {authorization_key}": | |
logging.warning(f"无效的 Authorization 密钥:{auth_header}") | |
return False | |
return True | |
scheduler = BackgroundScheduler() | |
scheduler.add_job(load_keys, 'interval', hours=1) | |
scheduler.remove_all_jobs() | |
scheduler.add_job(refresh_models, 'interval', hours=1) | |
def index(): | |
current_time = time.time() | |
one_minute_ago = current_time - 60 | |
one_day_ago = current_time - 86400 | |
with data_lock: | |
while request_timestamps and request_timestamps[0] < one_minute_ago: | |
request_timestamps.pop(0) | |
token_counts.pop(0) | |
rpm = len(request_timestamps) | |
tpm = sum(token_counts) | |
with data_lock: | |
while request_timestamps_day and request_timestamps_day[0] < one_day_ago: | |
request_timestamps_day.pop(0) | |
token_counts_day.pop(0) | |
rpd = len(request_timestamps_day) | |
tpd = sum(token_counts_day) | |
return jsonify({"rpm": rpm, "tpm": tpm, "rpd": rpd, "tpd": tpd}) | |
def list_models(): | |
if not check_authorization(request): | |
return jsonify({"error": "Unauthorized"}), 401 | |
detailed_models = [] | |
all_models = chain( | |
models["text"], | |
models["embedding"], | |
models["image"] | |
) | |
for model in all_models: | |
model_data = { | |
"id": model, | |
"object": "model", | |
"created": 1678888888, | |
"owned_by": "openai", | |
"permission": [], | |
"root": model, | |
"parent": None | |
} | |
detailed_models.append(model_data) | |
if "DeepSeek-R1" in model: | |
detailed_models.append({ | |
"id": model + "-thinking", | |
"object": "model", | |
"created": 1678888888, | |
"owned_by": "openai", | |
"permission": [], | |
"root": model + "-thinking", | |
"parent": None | |
}) | |
detailed_models.append({ | |
"id": model + "-openwebui", | |
"object": "model", | |
"created": 1678888888, | |
"owned_by": "openai", | |
"permission": [], | |
"root": model + "-openwebui", | |
"parent": None | |
}) | |
return jsonify({ | |
"success": True, | |
"data": detailed_models | |
}) | |
def billing_usage(): | |
if not check_authorization(request): | |
return jsonify({"error": "Unauthorized"}), 401 | |
daily_usage = [] | |
return jsonify({ | |
"object": "list", | |
"data": daily_usage, | |
"total_usage": 0 | |
}) | |
def billing_subscription(): | |
if not check_authorization(request): | |
return jsonify({"error": "Unauthorized"}), 401 | |
keys = valid_keys_global + unverified_keys_global | |
total_balance = 0 | |
with concurrent.futures.ThreadPoolExecutor( | |
max_workers=10000 | |
) as executor: | |
futures = [ | |
executor.submit(get_credit_summary, key) for key in keys | |
] | |
for future in concurrent.futures.as_completed(futures): | |
try: | |
credit_summary = future.result() | |
if credit_summary: | |
total_balance += credit_summary.get("total_balance", 0) | |
except Exception as exc: | |
logging.error(f"获取额度信息生成异常: {exc}") | |
return jsonify({ | |
"object": "billing_subscription", | |
"access_until": int(datetime(9999, 12, 31).timestamp()), | |
"soft_limit": 0, | |
"hard_limit": total_balance, | |
"system_hard_limit": total_balance, | |
"soft_limit_usd": 0, | |
"hard_limit_usd": total_balance, | |
"system_hard_limit_usd": total_balance | |
}) | |
def handsome_embeddings(): | |
if not check_authorization(request): | |
return jsonify({"error": "Unauthorized"}), 401 | |
data = request.get_json() | |
if not data or 'model' not in data: | |
return jsonify({"error": "Invalid request data"}), 400 | |
if data['model'] not in models["embedding"]: | |
return jsonify({"error": "Invalid model"}), 400 | |
model_name = data['model'] | |
request_type = determine_request_type( | |
model_name, | |
models["embedding"], | |
models["free_embedding"] | |
) | |
api_key = select_key(request_type, model_name) | |
if not api_key: | |
return jsonify({"error": ("No available API key for this request type or all keys have reached their limits")}), 429 | |
headers = { | |
"Authorization": f"Bearer {api_key}", | |
"Content-Type": "application/json" | |
} | |
try: | |
start_time = time.time() | |
response = requests.post( | |
EMBEDDINGS_ENDPOINT, | |
headers=headers, | |
json=data, | |
timeout=120 | |
) | |
if response.status_code == 429: | |
return jsonify(response.json()), 429 | |
response.raise_for_status() | |
end_time = time.time() | |
response_json = response.json() | |
total_time = end_time - start_time | |
try: | |
prompt_tokens = response_json["usage"]["prompt_tokens"] | |
embedding_data = response_json["data"] | |
except (KeyError, ValueError, IndexError) as e: | |
logging.error( | |
f"解析响应 JSON 失败: {e}, " | |
f"完整内容: {response_json}" | |
) | |
prompt_tokens = 0 | |
embedding_data = [] | |
logging.info( | |
f"使用的key: {api_key}, " | |
f"提示token: {prompt_tokens}, " | |
f"总共用时: {total_time:.4f}秒, " | |
f"使用的模型: {model_name}" | |
) | |
with data_lock: | |
request_timestamps.append(time.time()) | |
token_counts.append(prompt_tokens) | |
request_timestamps_day.append(time.time()) | |
token_counts_day.append(prompt_tokens) | |
return jsonify({ | |
"object": "list", | |
"data": embedding_data, | |
"model": model_name, | |
"usage": { | |
"prompt_tokens": prompt_tokens, | |
"total_tokens": prompt_tokens | |
} | |
}) | |
except requests.exceptions.RequestException as e: | |
return jsonify({"error": str(e)}), 500 | |
def handsome_images_generations(): | |
if not check_authorization(request): | |
return jsonify({"error": "Unauthorized"}), 401 | |
data = request.get_json() | |
if not data or 'model' not in data: | |
return jsonify({"error": "Invalid request data"}), 400 | |
if data['model'] not in models["image"]: | |
return jsonify({"error": "Invalid model"}), 400 | |
model_name = data.get('model') | |
request_type = determine_request_type( | |
model_name, | |
models["image"], | |
models["free_image"] | |
) | |
api_key = select_key(request_type, model_name) | |
if not api_key: | |
return jsonify({"error": ("No available API key for this request type or all keys have reached their limits")}), 429 | |
headers = { | |
"Authorization": f"Bearer {api_key}", | |
"Content-Type": "application/json" | |
} | |
response_data = {} | |
if "stable-diffusion" in model_name or model_name in ["black-forest-labs/FLUX.1-schnell", "Pro/black-forest-labs/FLUX.1-schnell","black-forest-labs/FLUX.1-dev", "black-forest-labs/FLUX.1-pro"]: | |
siliconflow_data = get_siliconflow_data(model_name, data) | |
try: | |
start_time = time.time() | |
response = requests.post( | |
IMAGE_ENDPOINT, | |
headers=headers, | |
json=siliconflow_data, | |
timeout=120 | |
) | |
if response.status_code == 429: | |
return jsonify(response.json()), 429 | |
response.raise_for_status() | |
end_time = time.time() | |
response_json = response.json() | |
total_time = end_time - start_time | |
try: | |
images = response_json.get("images", []) | |
openai_images = [] | |
for item in images: | |
if isinstance(item, dict) and "url" in item: | |
image_url = item["url"] | |
print(f"image_url: {image_url}") | |
if data.get("response_format") == "b64_json": | |
try: | |
image_data = session.get(image_url, stream=True).raw | |
image = Image.open(image_data) | |
buffered = io.BytesIO() | |
image.save(buffered, format="PNG") | |
img_str = base64.b64encode(buffered.getvalue()).decode() | |
openai_images.append({"b64_json": img_str}) | |
except Exception as e: | |
logging.error(f"图片转base64失败: {e}") | |
openai_images.append({"url": image_url}) | |
else: | |
openai_images.append({"url": image_url}) | |
else: | |
logging.error(f"无效的图片数据: {item}") | |
openai_images.append({"url": item}) | |
response_data = { | |
"created": int(time.time()), | |
"data": openai_images | |
} | |
except (KeyError, ValueError, IndexError) as e: | |
logging.error( | |
f"解析响应 JSON 失败: {e}, " | |
f"完整内容: {response_json}" | |
) | |
response_data = { | |
"created": int(time.time()), | |
"data": [] | |
} | |
logging.info( | |
f"使用的key: {api_key}, " | |
f"总共用时: {total_time:.4f}秒, " | |
f"使用的模型: {model_name}" | |
) | |
with data_lock: | |
request_timestamps.append(time.time()) | |
token_counts.append(0) | |
request_timestamps_day.append(time.time()) | |
token_counts_day.append(0) | |
return jsonify(response_data) | |
except requests.exceptions.RequestException as e: | |
logging.error(f"请求转发异常: {e}") | |
return jsonify({"error": str(e)}), 500 | |
else: | |
return jsonify({"error": "Unsupported model"}), 400 | |
def handsome_chat_completions(): | |
if not check_authorization(request): | |
return jsonify({"error": "Unauthorized"}), 401 | |
data = request.get_json() | |
if not data or 'model' not in data: | |
return jsonify({"error": "Invalid request data"}), 400 | |
model_name = data['model'] | |
if model_name not in models["text"] and model_name not in models["image"]: | |
if "DeepSeek-R1" in model_name and (model_name.endswith("-openwebui") or model_name.endswith("-thinking")): | |
pass | |
else: | |
return jsonify({"error": "Invalid model"}), 400 | |
model_realname = model_name.replace("-thinking", "").replace("-openwebui", "") | |
request_type = determine_request_type( | |
model_realname, | |
models["text"] + models["image"], | |
models["free_text"] + models["free_image"] | |
) | |
api_key = select_key(request_type, model_name) | |
if not api_key: | |
return jsonify( | |
{ | |
"error": ( | |
"No available API key for this " | |
"request type or all keys have " | |
"reached their limits" | |
) | |
} | |
), 429 | |
headers = { | |
"Authorization": f"Bearer {api_key}", | |
"Content-Type": "application/json" | |
} | |
if "DeepSeek-R1" in model_name and ("thinking" in model_name or "openwebui" in model_name): | |
data['model'] = model_realname | |
start_time = time.time() | |
response = requests.post( | |
TEST_MODEL_ENDPOINT, | |
headers=headers, | |
json=data, | |
stream=data.get("stream", False), | |
timeout=120 | |
) | |
if response.status_code == 429: | |
return jsonify(response.json()), 429 | |
if data.get("stream", False): | |
def generate(): | |
if model_name.endswith("-openwebui"): | |
first_chunk_time = None | |
full_response_content = "" | |
reasoning_content_accumulated = "" | |
content_accumulated = "" | |
first_reasoning_chunk = True | |
for chunk in response.iter_lines(): | |
if chunk: | |
if first_chunk_time is None: | |
first_chunk_time = time.time() | |
full_response_content += chunk.decode("utf-8") | |
for line in chunk.decode("utf-8").splitlines(): | |
if line.startswith("data:"): | |
try: | |
chunk_json = json.loads(line.lstrip("data: ").strip()) | |
if "choices" in chunk_json and len(chunk_json["choices"]) > 0: | |
delta = chunk_json["choices"][0].get("delta", {}) | |
if delta.get("reasoning_content") is not None: | |
reasoning_chunk = delta["reasoning_content"] | |
if first_reasoning_chunk: | |
think_chunk = f"<" | |
yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n" | |
think_chunk = f"think" | |
yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n" | |
think_chunk = f">\n" | |
yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n" | |
first_reasoning_chunk = False | |
yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n" | |
if delta.get("content") is not None: | |
if not first_reasoning_chunk: | |
reasoning_chunk = f"\n</think>\n" | |
yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n" | |
first_reasoning_chunk = True | |
yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n" | |
except (KeyError, ValueError, json.JSONDecodeError) as e: | |
continue | |
end_time = time.time() | |
first_token_time = ( | |
first_chunk_time - start_time | |
if first_chunk_time else 0 | |
) | |
total_time = end_time - start_time | |
prompt_tokens = 0 | |
completion_tokens = 0 | |
for line in full_response_content.splitlines(): | |
if line.startswith("data:"): | |
line = line[5:].strip() | |
if line == "[DONE]": | |
continue | |
try: | |
response_json = json.loads(line) | |
if ( | |
"usage" in response_json and | |
"completion_tokens" in response_json["usage"] | |
): | |
completion_tokens += response_json[ | |
"usage" | |
]["completion_tokens"] | |
if ( | |
"usage" in response_json and | |
"prompt_tokens" in response_json["usage"] | |
): | |
prompt_tokens = response_json[ | |
"usage" | |
]["prompt_tokens"] | |
except ( KeyError,ValueError,IndexError) as e: | |
pass | |
user_content = "" | |
messages = data.get("messages", []) | |
for message in messages: | |
if message["role"] == "user": | |
if isinstance(message["content"], str): | |
user_content += message["content"] + " " | |
elif isinstance(message["content"], list): | |
for item in message["content"]: | |
if ( | |
isinstance(item, dict) and | |
item.get("type") == "text" | |
): | |
user_content += ( | |
item.get("text", "") + | |
" " | |
) | |
user_content = user_content.strip() | |
user_content_replaced = user_content.replace( | |
'\n', '\\n' | |
).replace('\r', '\\n') | |
response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated | |
response_content_replaced = response_content_replaced.replace( | |
'\n', '\\n' | |
).replace('\r', '\\n') | |
logging.info( | |
f"使用的key: {api_key}, " | |
f"提示token: {prompt_tokens}, " | |
f"输出token: {completion_tokens}, " | |
f"首字用时: {first_token_time:.4f}秒, " | |
f"总共用时: {total_time:.4f}秒, " | |
f"使用的模型: {model_name}, " | |
f"用户的内容: {user_content_replaced}, " | |
f"输出的内容: {response_content_replaced}" | |
) | |
with data_lock: | |
request_timestamps.append(time.time()) | |
token_counts.append(prompt_tokens + completion_tokens) | |
yield "data: [DONE]\n\n" | |
return Response( | |
stream_with_context(generate()), | |
content_type="text/event-stream" | |
) | |
first_chunk_time = None | |
full_response_content = "" | |
reasoning_content_accumulated = "" | |
content_accumulated = "" | |
first_reasoning_chunk = True | |
for chunk in response.iter_lines(): | |
if chunk: | |
if first_chunk_time is None: | |
first_chunk_time = time.time() | |
full_response_content += chunk.decode("utf-8") | |
for line in chunk.decode("utf-8").splitlines(): | |
if line.startswith("data:"): | |
try: | |
chunk_json = json.loads(line.lstrip("data: ").strip()) | |
if "choices" in chunk_json and len(chunk_json["choices"]) > 0: | |
delta = chunk_json["choices"][0].get("delta", {}) | |
if delta.get("reasoning_content") is not None: | |
reasoning_chunk = delta["reasoning_content"] | |
reasoning_chunk = reasoning_chunk.replace('\n', '\n> ') | |
if first_reasoning_chunk: | |
reasoning_chunk = "> " + reasoning_chunk | |
first_reasoning_chunk = False | |
yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n" | |
if delta.get("content") is not None: | |
if not first_reasoning_chunk: | |
yield f"data: {json.dumps({'choices': [{'delta': {'content': '\n\n'}, 'index': 0}]})}\n\n" | |
first_reasoning_chunk = True | |
yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n" | |
except (KeyError, ValueError, json.JSONDecodeError) as e: | |
continue | |
end_time = time.time() | |
first_token_time = ( | |
first_chunk_time - start_time | |
if first_chunk_time else 0 | |
) | |
total_time = end_time - start_time | |
prompt_tokens = 0 | |
completion_tokens = 0 | |
for line in full_response_content.splitlines(): | |
if line.startswith("data:"): | |
line = line[5:].strip() | |
if line == "[DONE]": | |
continue | |
try: | |
response_json = json.loads(line) | |
if ( | |
"usage" in response_json and | |
"completion_tokens" in response_json["usage"] | |
): | |
completion_tokens += response_json[ | |
"usage" | |
]["completion_tokens"] | |
if ( | |
"usage" in response_json and | |
"prompt_tokens" in response_json["usage"] | |
): | |
prompt_tokens = response_json[ | |
"usage" | |
]["prompt_tokens"] | |
except (KeyError,ValueError,IndexError) as e: | |
pass | |
user_content = "" | |
messages = data.get("messages", []) | |
for message in messages: | |
if message["role"] == "user": | |
if isinstance(message["content"], str): | |
user_content += message["content"] + " " | |
elif isinstance(message["content"], list): | |
for item in message["content"]: | |
if ( | |
isinstance(item, dict) and | |
item.get("type") == "text" | |
): | |
user_content += ( | |
item.get("text", "") + | |
" " | |
) | |
user_content = user_content.strip() | |
user_content_replaced = user_content.replace( | |
'\n', '\\n' | |
).replace('\r', '\\n') | |
response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated | |
response_content_replaced = response_content_replaced.replace( | |
'\n', '\\n' | |
).replace('\r', '\\n') | |
logging.info( | |
f"使用的key: {api_key}, " | |
f"提示token: {prompt_tokens}, " | |
f"输出token: {completion_tokens}, " | |
f"首字用时: {first_token_time:.4f}秒, " | |
f"总共用时: {total_time:.4f}秒, " | |
f"使用的模型: {model_name}, " | |
f"用户的内容: {user_content_replaced}, " | |
f"输出的内容: {response_content_replaced}" | |
) | |
with data_lock: | |
request_timestamps.append(time.time()) | |
token_counts.append(prompt_tokens + completion_tokens) | |
yield "data: [DONE]\n\n" | |
return Response( | |
stream_with_context(generate()), | |
content_type="text/event-stream" | |
) | |
else: | |
response.raise_for_status() | |
end_time = time.time() | |
response_json = response.json() | |
total_time = end_time - start_time | |
try: | |
prompt_tokens = response_json["usage"]["prompt_tokens"] | |
completion_tokens = response_json["usage"]["completion_tokens"] | |
response_content = "" | |
if model_name.endswith("-thinking") and "choices" in response_json and len(response_json["choices"]) > 0: | |
choice = response_json["choices"][0] | |
if "message" in choice: | |
if "reasoning_content" in choice["message"]: | |
reasoning_content = choice["message"]["reasoning_content"] | |
reasoning_content = reasoning_content.replace('\n', '\n> ') | |
reasoning_content = '> ' + reasoning_content | |
formatted_reasoning = f"{reasoning_content}\n" | |
response_content += formatted_reasoning + "\n" | |
if "content" in choice["message"]: | |
response_content += choice["message"]["content"] | |
elif model_name.endswith("-openwebui") and "choices" in response_json and len(response_json["choices"]) > 0: | |
choice = response_json["choices"][0] | |
if "message" in choice: | |
if "reasoning_content" in choice["message"]: | |
reasoning_content = choice["message"]["reasoning_content"] | |
response_content += f"<think>\n{reasoning_content}\n</think>\n" | |
if "content" in choice["message"]: | |
response_content += choice["message"]["content"] | |
except (KeyError, ValueError, IndexError) as e: | |
logging.error( | |
f"解析非流式响应 JSON 失败: {e}, " | |
f"完整内容: {response_json}" | |
) | |
prompt_tokens = 0 | |
completion_tokens = 0 | |
response_content = "" | |
user_content = "" | |
messages = data.get("messages", []) | |
for message in messages: | |
if message["role"] == "user": | |
if isinstance(message["content"], str): | |
user_content += message["content"] + " " | |
elif isinstance(message["content"], list): | |
for item in message["content"]: | |
if ( | |
isinstance(item, dict) and | |
item.get("type") == "text" | |
): | |
user_content += ( | |
item.get("text", "") + | |
" " | |
) | |
user_content = user_content.strip() | |
user_content_replaced = user_content.replace( | |
'\n', '\\n' | |
).replace('\r', '\\n') | |
response_content_replaced = response_content.replace( | |
'\n', '\\n' | |
).replace('\r', '\\n') | |
logging.info( | |
f"使用的key: {api_key}, " | |
f"提示token: {prompt_tokens}, " | |
f"输出token: {completion_tokens}, " | |
f"首字用时: 0, " | |
f"总共用时: {total_time:.4f}秒, " | |
f"使用的模型: {model_name}, " | |
f"用户的内容: {user_content_replaced}, " | |
f"输出的内容: {response_content_replaced}" | |
) | |
with data_lock: | |
request_timestamps.append(time.time()) | |
token_counts.append(prompt_tokens + completion_tokens) | |
formatted_response = { | |
"id": response_json.get("id", ""), | |
"object": "chat.completion", | |
"created": response_json.get("created", int(time.time())), | |
"model": model_name, | |
"choices": [ | |
{ | |
"index": 0, | |
"message": { | |
"role": "assistant", | |
"content": response_content | |
}, | |
"finish_reason": "stop" | |
} | |
], | |
"usage": { | |
"prompt_tokens": prompt_tokens, | |
"completion_tokens": completion_tokens, | |
"total_tokens": prompt_tokens + completion_tokens | |
} | |
} | |
return jsonify(formatted_response) | |
if model_name in models["image"]: | |
if isinstance(data.get("messages"), list): | |
data = data.copy() | |
data["prompt"] = extract_user_content(data["messages"]) | |
siliconflow_data = get_siliconflow_data(model_name, data) | |
try: | |
start_time = time.time() | |
response = requests.post( | |
IMAGE_ENDPOINT, | |
headers=headers, | |
json=siliconflow_data, | |
stream=data.get("stream", False) | |
) | |
if response.status_code == 429: | |
return jsonify(response.json()), 429 | |
if data.get("stream", False): | |
def generate(): | |
try: | |
response.raise_for_status() | |
response_json = response.json() | |
images = response_json.get("images", []) | |
image_url = "" | |
if images and isinstance(images[0], dict) and "url" in images[0]: | |
image_url = images[0]["url"] | |
logging.info(f"Extracted image URL: {image_url}") | |
elif images and isinstance(images[0], str): | |
image_url = images[0] | |
logging.info(f"Extracted image URL: {image_url}") | |
markdown_image_link = create_base64_markdown_image(image_url) | |
if image_url: | |
chunk_size = 8192 | |
for i in range(0, len(markdown_image_link), chunk_size): | |
chunk = markdown_image_link[i:i + chunk_size] | |
chunk_data = { | |
"id": f"chatcmpl-{uuid.uuid4()}", | |
"object": "chat.completion.chunk", | |
"created": int(time.time()), | |
"model": model_name, | |
"choices": [ | |
{ | |
"index": 0, | |
"delta": { | |
"role": "assistant", | |
"content": chunk | |
}, | |
"finish_reason": None | |
} | |
] | |
} | |
yield f"data: {json.dumps(chunk_data)}\n\n".encode('utf-8') | |
else: | |
chunk_data = { | |
"id": f"chatcmpl-{uuid.uuid4()}", | |
"object": "chat.completion.chunk", | |
"created": int(time.time()), | |
"model": model_name, | |
"choices": [ | |
{ | |
"index": 0, | |
"delta": { | |
"role": "assistant", | |
"content": "Failed to generate image" | |
}, | |
"finish_reason": None | |
} | |
] | |
} | |
yield f"data: {json.dumps(chunk_data)}\n\n".encode('utf-8') | |
end_chunk_data = { | |
"id": f"chatcmpl-{uuid.uuid4()}", | |
"object": "chat.completion.chunk", | |
"created": int(time.time()), | |
"model": model_name, | |
"choices": [ | |
{ | |
"index": 0, | |
"delta": {}, | |
"finish_reason": "stop" | |
} | |
] | |
} | |
yield f"data: {json.dumps(end_chunk_data)}\n\n".encode('utf-8') | |
with data_lock: | |
request_timestamps.append(time.time()) | |
token_counts.append(0) | |
request_timestamps_day.append(time.time()) | |
token_counts_day.append(0) | |
except requests.exceptions.RequestException as e: | |
logging.error(f"请求转发异常: {e}") | |
error_chunk_data = { | |
"id": f"chatcmpl-{uuid.uuid4()}", | |
"object": "chat.completion.chunk", | |
"created": int(time.time()), | |
"model": model_name, | |
"choices": [ | |
{ | |
"index": 0, | |
"delta": { | |
"role": "assistant", | |
"content": f"Error: {str(e)}" | |
}, | |
"finish_reason": None | |
} | |
] | |
} | |
yield f"data: {json.dumps(error_chunk_data)}\n\n".encode('utf-8') | |
end_chunk_data = { | |
"id": f"chatcmpl-{uuid.uuid4()}", | |
"object": "chat.completion.chunk", | |
"created": int(time.time()), | |
"model": model_name, | |
"choices": [ | |
{ | |
"index": 0, | |
"delta": {}, | |
"finish_reason": "stop" | |
} | |
] | |
} | |
yield f"data: {json.dumps(end_chunk_data)}\n\n".encode('utf-8') | |
logging.info( | |
f"使用的key: {api_key}, " | |
f"使用的模型: {model_name}" | |
) | |
yield "data: [DONE]\n\n".encode('utf-8') | |
return Response(stream_with_context(generate()), content_type='text/event-stream') | |
else: | |
response.raise_for_status() | |
end_time = time.time() | |
response_json = response.json() | |
total_time = end_time - start_time | |
try: | |
images = response_json.get("images", []) | |
image_url = "" | |
if images and isinstance(images[0], dict) and "url" in images[0]: | |
image_url = images[0]["url"] | |
logging.info(f"Extracted image URL: {image_url}") | |
elif images and isinstance(images[0], str): | |
image_url = images[0] | |
logging.info(f"Extracted image URL: {image_url}") | |
markdown_image_link = f"![image]({image_url})" | |
response_data = { | |
"id": f"chatcmpl-{uuid.uuid4()}", | |
"object": "chat.completion", | |
"created": int(time.time()), | |
"model": model_name, | |
"choices": [ | |
{ | |
"index": 0, | |
"message": { | |
"role": "assistant", | |
"content": markdown_image_link if image_url else "Failed to generate image", | |
}, | |
"finish_reason": "stop", | |
} | |
], | |
} | |
except (KeyError, ValueError, IndexError) as e: | |
logging.error( | |
f"解析响应 JSON 失败: {e}, " | |
f"完整内容: {response_json}" | |
) | |
response_data = { | |
"id": f"chatcmpl-{uuid.uuid4()}", | |
"object": "chat.completion", | |
"created": int(time.time()), | |
"model": model_name, | |
"choices": [ | |
{ | |
"index": 0, | |
"message": { | |
"role": "assistant", | |
"content": "Failed to process image data", | |
}, | |
"finish_reason": "stop", | |
} | |
], | |
} | |
logging.info( | |
f"使用的key: {api_key}, " | |
f"总共用时: {total_time:.4f}秒, " | |
f"使用的模型: {model_name}" | |
) | |
with data_lock: | |
request_timestamps.append(time.time()) | |
token_counts.append(0) | |
request_timestamps_day.append(time.time()) | |
token_counts_day.append(0) | |
return jsonify(response_data) | |
except requests.exceptions.RequestException as e: | |
logging.error(f"请求转发异常: {e}") | |
return jsonify({"error": str(e)}), 500 | |
else: | |
try: | |
start_time = time.time() | |
response = requests.post( | |
TEST_MODEL_ENDPOINT, | |
headers=headers, | |
json=data, | |
stream=data.get("stream", False) | |
) | |
if response.status_code == 429: | |
return jsonify(response.json()), 429 | |
if data.get("stream", False): | |
def generate(): | |
first_chunk_time = None | |
full_response_content = "" | |
for chunk in response.iter_content(chunk_size=2048): | |
if chunk: | |
if first_chunk_time is None: | |
first_chunk_time = time.time() | |
full_response_content += chunk.decode("utf-8") | |
yield chunk | |
end_time = time.time() | |
first_token_time = ( | |
first_chunk_time - start_time | |
if first_chunk_time else 0 | |
) | |
total_time = end_time - start_time | |
prompt_tokens = 0 | |
completion_tokens = 0 | |
response_content = "" | |
for line in full_response_content.splitlines(): | |
if line.startswith("data:"): | |
line = line[5:].strip() | |
if line == "[DONE]": | |
continue | |
try: | |
response_json = json.loads(line) | |
if ( | |
"usage" in response_json and | |
"completion_tokens" in response_json["usage"] | |
): | |
completion_tokens = response_json[ | |
"usage" | |
]["completion_tokens"] | |
if ( | |
"choices" in response_json and | |
len(response_json["choices"]) > 0 and | |
"delta" in response_json["choices"][0] and | |
"content" in response_json[ | |
"choices" | |
][0]["delta"] | |
): | |
response_content += response_json[ | |
"choices" | |
][0]["delta"]["content"] | |
if ( | |
"usage" in response_json and | |
"prompt_tokens" in response_json["usage"] | |
): | |
prompt_tokens = response_json[ | |
"usage" | |
]["prompt_tokens"] | |
except ( | |
KeyError, | |
ValueError, | |
IndexError | |
) as e: | |
logging.error( | |
f"解析流式响应单行 JSON 失败: {e}, " | |
f"行内容: {line}" | |
) | |
user_content = extract_user_content(data.get("messages", [])) | |
user_content_replaced = user_content.replace( | |
'\n', '\\n' | |
).replace('\r', '\\n') | |
response_content_replaced = response_content.replace( | |
'\n', '\\n' | |
).replace('\r', '\\n') | |
logging.info( | |
f"使用的key: {api_key}, " | |
f"提示token: {prompt_tokens}, " | |
f"输出token: {completion_tokens}, " | |
f"首字用时: {first_token_time:.4f}秒, " | |
f"总共用时: {total_time:.4f}秒, " | |
f"使用的模型: {model_name}, " | |
f"用户的内容: {user_content_replaced}, " | |
f"输出的内容: {response_content_replaced}" | |
) | |
with data_lock: | |
request_timestamps.append(time.time()) | |
token_counts.append(prompt_tokens+completion_tokens) | |
request_timestamps_day.append(time.time()) | |
token_counts_day.append(prompt_tokens+completion_tokens) | |
return Response( | |
stream_with_context(generate()), | |
content_type=response.headers['Content-Type'] | |
) | |
else: | |
response.raise_for_status() | |
end_time = time.time() | |
response_json = response.json() | |
total_time = end_time - start_time | |
try: | |
prompt_tokens = response_json["usage"]["prompt_tokens"] | |
completion_tokens = response_json[ | |
"usage" | |
]["completion_tokens"] | |
response_content = response_json[ | |
"choices" | |
][0]["message"]["content"] | |
except (KeyError, ValueError, IndexError) as e: | |
logging.error( | |
f"解析非流式响应 JSON 失败: {e}, " | |
f"完整内容: {response_json}" | |
) | |
prompt_tokens = 0 | |
completion_tokens = 0 | |
response_content = "" | |
user_content = extract_user_content(data.get("messages", [])) | |
user_content_replaced = user_content.replace( | |
'\n', '\\n' | |
).replace('\r', '\\n') | |
response_content_replaced = response_content.replace( | |
'\n', '\\n' | |
).replace('\r', '\\n') | |
logging.info( | |
f"使用的key: {api_key}, " | |
f"提示token: {prompt_tokens}, " | |
f"输出token: {completion_tokens}, " | |
f"首字用时: 0, " | |
f"总共用时: {total_time:.4f}秒, " | |
f"使用的模型: {model_name}, " | |
f"用户的内容: {user_content_replaced}, " | |
f"输出的内容: {response_content_replaced}" | |
) | |
with data_lock: | |
request_timestamps.append(time.time()) | |
if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]: | |
token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"]) | |
else: | |
token_counts.append(0) | |
request_timestamps_day.append(time.time()) | |
if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]: | |
token_counts_day.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"]) | |
else: | |
token_counts_day.append(0) | |
return jsonify(response_json) | |
except requests.exceptions.RequestException as e: | |
logging.error(f"请求转发异常: {e}") | |
return jsonify({"error": str(e)}), 500 | |
if __name__ == '__main__': | |
logging.info(f"环境变量:{os.environ}") | |
load_keys() | |
logging.info("程序启动时首次加载 keys 已执行") | |
scheduler.start() | |
logging.info("首次加载 keys 已手动触发执行") | |
refresh_models() | |
logging.info("首次刷新模型列表已手动触发执行") | |
app.run(debug=False,host='0.0.0.0',port=int(os.environ.get('PORT', 7860))) |