|
import os,time,logging,requests,json,uuid,concurrent.futures,threading,base64,io |
|
from io import BytesIO |
|
from itertools import chain |
|
from PIL import Image |
|
from datetime import datetime |
|
from apscheduler.schedulers.background import BackgroundScheduler |
|
from flask import Flask, request, jsonify, Response, stream_with_context |
|
from werkzeug.middleware.proxy_fix import ProxyFix |
|
from requests.adapters import HTTPAdapter |
|
from requests.packages.urllib3.util.retry import Retry |
|
|
|
os.environ['TZ'] = 'Asia/Shanghai' |
|
time.tzset() |
|
|
|
logging.basicConfig(level=logging.INFO, |
|
format='%(asctime)s - %(levelname)s - %(message)s') |
|
|
|
API_ENDPOINT = "https://api-st.siliconflow.cn/v1/user/info" |
|
TEST_MODEL_ENDPOINT = "https://api.openai.com/v1/chat/completions" |
|
MODELS_ENDPOINT = "https://api.openai.com/v1/models" |
|
EMBEDDINGS_ENDPOINT = "https://api-st.siliconflow.cn/v1/embeddings" |
|
IMAGE_ENDPOINT = "https://api-st.siliconflow.cn/v1/images/generations" |
|
|
|
def requests_session_with_retries( |
|
retries=3, backoff_factor=0.3, status_forcelist=(500, 502, 504) |
|
): |
|
session = requests.Session() |
|
retry = Retry( |
|
total=retries, |
|
read=retries, |
|
connect=retries, |
|
backoff_factor=backoff_factor, |
|
status_forcelist=status_forcelist, |
|
) |
|
adapter = HTTPAdapter( |
|
max_retries=retry, |
|
pool_connections=1000, |
|
pool_maxsize=10000, |
|
pool_block=False |
|
) |
|
session.mount("http://", adapter) |
|
session.mount("https://", adapter) |
|
return session |
|
|
|
session = requests_session_with_retries() |
|
|
|
app = Flask(__name__) |
|
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1) |
|
|
|
models = { |
|
"text": [], |
|
"free_text": [], |
|
"embedding": [], |
|
"free_embedding": [], |
|
"image": [], |
|
"free_image": [] |
|
} |
|
|
|
key_status = { |
|
"invalid": [], |
|
"free": [], |
|
"unverified": [], |
|
"valid": [] |
|
} |
|
|
|
executor = concurrent.futures.ThreadPoolExecutor(max_workers=10000) |
|
model_key_indices = {} |
|
|
|
request_timestamps = [] |
|
token_counts = [] |
|
request_timestamps_day = [] |
|
token_counts_day = [] |
|
data_lock = threading.Lock() |
|
|
|
def extract_user_content(messages): |
|
user_content = "" |
|
for message in messages: |
|
if message["role"] == "user": |
|
if isinstance(message["content"], str): |
|
user_content += message["content"] + " " |
|
elif isinstance(message["content"], list): |
|
for item in message["content"]: |
|
if isinstance(item, dict) and item.get("type") == "text": |
|
user_content += item.get("text", "") + " " |
|
return user_content.strip() |
|
|
|
def refresh_models(): |
|
global models |
|
|
|
|
|
first_valid_key = None |
|
for key_list in key_status.values(): |
|
if key_list: |
|
first_valid_key = key_list[0] |
|
break |
|
|
|
if first_valid_key: |
|
models["text"] = get_all_models(first_valid_key) |
|
else: |
|
logging.warning("No valid keys found to fetch models.") |
|
models["text"] = [] |
|
|
|
|
|
for model_type in ["text"]: |
|
logging.info(f"所有{model_type}模型列表:{models[model_type]}") |
|
|
|
def load_keys(): |
|
global key_status |
|
for status in key_status: |
|
key_status[status] = [] |
|
|
|
keys_str = os.environ.get("KEYS") |
|
|
|
logging.info(f"The value of KEYS environment variable is: {keys_str}") |
|
|
|
if not keys_str: |
|
logging.warning("环境变量 KEYS 未设置。") |
|
return |
|
|
|
keys = keys_str.split(",") |
|
keys = [key.strip() for key in keys] |
|
|
|
global valid_keys_global, free_keys_global, unverified_keys_global |
|
valid_keys_global = [] |
|
free_keys_global = [] |
|
unverified_keys_global = [] |
|
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: |
|
futures = [executor.submit(process_key, key, "gpt-3.5-turbo") for key in keys] |
|
for key, future in zip(keys, futures): |
|
status = future.result() |
|
key_status[status].append(key) |
|
if status == "valid": |
|
valid_keys_global.append(key) |
|
elif status == "free": |
|
free_keys_global.append(key) |
|
elif status == "unverified": |
|
unverified_keys_global.append(key) |
|
logging.info(f"Key {key} status: {status}") |
|
|
|
def process_key(key, test_model): |
|
return "valid" |
|
|
|
def get_all_models(api_key): |
|
headers = { |
|
"Authorization": f"Bearer {api_key}", |
|
"Content-Type": "application/json" |
|
} |
|
try: |
|
response = session.get( |
|
MODELS_ENDPOINT, |
|
headers=headers |
|
) |
|
response.raise_for_status() |
|
data = response.json() |
|
if ( |
|
isinstance(data, dict) and |
|
'data' in data and |
|
isinstance(data['data'], list) |
|
): |
|
return [ |
|
model.get("id") for model in data["data"] |
|
if isinstance(model, dict) and "id" in model |
|
] |
|
else: |
|
logging.error("获取模型列表失败:响应数据格式不正确") |
|
return [] |
|
except requests.exceptions.RequestException as e: |
|
logging.error( |
|
f"获取模型列表失败," |
|
f"API Key:{api_key},错误信息:{e}" |
|
) |
|
return [] |
|
except (KeyError, TypeError) as e: |
|
logging.error( |
|
f"解析模型列表失败," |
|
f"API Key:{api_key},错误信息:{e}" |
|
) |
|
return [] |
|
|
|
def determine_request_type(model_name, model_list, free_model_list): |
|
if model_name in free_model_list: |
|
return "free" |
|
elif model_name in model_list: |
|
return "paid" |
|
else: |
|
return "unknown" |
|
|
|
def select_key(request_type, model_name): |
|
if request_type == "free": |
|
available_keys = ( |
|
free_keys_global + |
|
unverified_keys_global + |
|
valid_keys_global |
|
) |
|
elif request_type == "paid": |
|
available_keys = unverified_keys_global + valid_keys_global |
|
else: |
|
available_keys = ( |
|
free_keys_global + |
|
unverified_keys_global + |
|
valid_keys_global |
|
) |
|
|
|
if not available_keys: |
|
return None |
|
|
|
current_index = model_key_indices.get(model_name, 0) |
|
|
|
for _ in range(len(available_keys)): |
|
key = available_keys[current_index % len(available_keys)] |
|
current_index += 1 |
|
|
|
if key_is_valid(key, request_type): |
|
model_key_indices[model_name] = current_index |
|
return key |
|
else: |
|
logging.warning( |
|
f"KEY {key} 无效或达到限制,尝试下一个 KEY" |
|
) |
|
|
|
model_key_indices[model_name] = 0 |
|
return None |
|
|
|
def key_is_valid(key, request_type): |
|
return True |
|
|
|
def check_authorization(request): |
|
authorization_key = os.environ.get("AUTHORIZATION_KEY") |
|
if not authorization_key: |
|
logging.warning("环境变量 AUTHORIZATION_KEY 未设置,此时无需鉴权即可使用,建议进行设置后再使用。") |
|
return True |
|
|
|
auth_header = request.headers.get('Authorization') |
|
if not auth_header: |
|
logging.warning("请求头中缺少 Authorization 字段。") |
|
return False |
|
|
|
if auth_header != f"Bearer {authorization_key}": |
|
logging.warning(f"无效的 Authorization 密钥:{auth_header}") |
|
return False |
|
|
|
return True |
|
|
|
scheduler = BackgroundScheduler() |
|
scheduler.add_job(load_keys, 'interval', hours=1) |
|
scheduler.remove_all_jobs() |
|
scheduler.add_job(refresh_models, 'interval', hours=1) |
|
|
|
@app.route('/') |
|
def index(): |
|
current_time = time.time() |
|
one_minute_ago = current_time - 60 |
|
one_day_ago = current_time - 86400 |
|
|
|
with data_lock: |
|
while request_timestamps and request_timestamps[0] < one_minute_ago: |
|
request_timestamps.pop(0) |
|
token_counts.pop(0) |
|
|
|
rpm = len(request_timestamps) |
|
tpm = sum(token_counts) |
|
|
|
with data_lock: |
|
while request_timestamps_day and request_timestamps_day[0] < one_day_ago: |
|
request_timestamps_day.pop(0) |
|
token_counts_day.pop(0) |
|
|
|
rpd = len(request_timestamps_day) |
|
tpd = sum(token_counts_day) |
|
|
|
return jsonify({"rpm": rpm, "tpm": tpm, "rpd": rpd, "tpd": tpd}) |
|
|
|
@app.route('/handsome/v1/models', methods=['GET']) |
|
def list_models(): |
|
if not check_authorization(request): |
|
return jsonify({"error": "Unauthorized"}), 401 |
|
|
|
detailed_models = [] |
|
|
|
all_models = chain( |
|
models["text"], |
|
models["embedding"], |
|
models["image"] |
|
) |
|
|
|
for model in all_models: |
|
detailed_models.append({ |
|
"id": model, |
|
"object": "model", |
|
"created": 1678888888, |
|
"owned_by": "openai", |
|
"permission": [], |
|
"root": model, |
|
"parent": None |
|
}) |
|
|
|
return jsonify({ |
|
"success": True, |
|
"data": detailed_models |
|
}) |
|
|
|
@app.route('/handsome/v1/chat/completions', methods=['POST']) |
|
def handsome_chat_completions(): |
|
if not check_authorization(request): |
|
return jsonify({"error": "Unauthorized"}), 401 |
|
|
|
data = request.get_json() |
|
logging.info(f"Request data: {data}") |
|
if not data or 'model' not in data: |
|
return jsonify({"error": "Invalid request data"}), 400 |
|
if data['model'] not in models["text"] and data['model'] not in models["image"]: |
|
return jsonify({"error": "Invalid model"}), 400 |
|
|
|
model_name = data['model'] |
|
|
|
request_type = determine_request_type( |
|
model_name, |
|
models["text"] + models["image"], |
|
models["free_text"] + models["free_image"] |
|
) |
|
|
|
user_content = extract_user_content(data.get("messages", [])) |
|
|
|
phrases_to_check = ["hello", "你好", "什么模型", "签到", "社工", "你是谁", "冷笑话", "只回答", "Netflix", "response", "A="] |
|
phrases_to_check_lower = [phrase.lower() for phrase in phrases_to_check] |
|
canned_response_content = "这是公益api,模型全部可用且保真,请不要对模型进行无意义的测试,请尽量不要使用高级模型解决没必要的问题。\n换个话题吧,请不要对模型进行无意义的测试,请尽量不要使用高级模型解决没必要的问题。" |
|
|
|
user_content_lower = user_content.lower() |
|
|
|
if user_content_lower == "hi": |
|
logging.info("成功拦截一次!(仅hi)") |
|
if data.get("stream", False): |
|
def generate_canned_stream(): |
|
message_data = { |
|
"choices": [ |
|
{ |
|
"delta": { |
|
"content": canned_response_content |
|
}, |
|
"index": 0, |
|
"finish_reason": "stop" |
|
} |
|
] |
|
} |
|
yield f"data: {json.dumps(message_data)}\n\n".encode("utf-8") |
|
|
|
model_data = { |
|
"model_name": model_name |
|
} |
|
|
|
yield f"data: {json.dumps(message_data)}\n\n".encode("utf-8") |
|
yield f"data: {json.dumps(model_data)}\n\n".encode("utf-8") |
|
yield f"data: [DONE]\n\n".encode("utf-8") |
|
return Response( |
|
stream_with_context(generate_canned_stream()), |
|
content_type="text/event-stream" |
|
) |
|
else: |
|
canned_response = { |
|
"choices": [ |
|
{ |
|
"message": { |
|
"content": canned_response_content |
|
}, |
|
"index": 0, |
|
"finish_reason": "stop" |
|
} |
|
], |
|
"usage": { |
|
"prompt_tokens": 0, |
|
"completion_tokens": 0, |
|
"total_tokens": 0 |
|
}, |
|
"model_name": model_name |
|
} |
|
return jsonify(canned_response) |
|
elif any(phrase in user_content_lower for phrase in phrases_to_check_lower): |
|
logging.info("成功拦截一次!") |
|
if data.get("stream", False): |
|
def generate_canned_stream(): |
|
message_data = { |
|
"choices": [ |
|
{ |
|
"delta": { |
|
"content": canned_response_content |
|
}, |
|
"index": 0, |
|
"finish_reason": "stop" |
|
} |
|
] |
|
} |
|
|
|
model_data = { |
|
"model_name": model_name |
|
} |
|
|
|
yield f"data: {json.dumps(message_data)}\n\n".encode("utf-8") |
|
yield f"data: {json.dumps(model_data)}\n\n".encode("utf-8") |
|
yield f"data: [DONE]\n\n".encode("utf-8") |
|
return Response( |
|
stream_with_context(generate_canned_stream()), |
|
content_type="text/event-stream" |
|
) |
|
else: |
|
canned_response = { |
|
"choices": [ |
|
{ |
|
"message": { |
|
"content": canned_response_content |
|
}, |
|
"index": 0, |
|
"finish_reason": "stop" |
|
} |
|
], |
|
"usage": { |
|
"prompt_tokens": 0, |
|
"completion_tokens": 0, |
|
"total_tokens": 0 |
|
}, |
|
"model_name": model_name |
|
} |
|
return jsonify(canned_response) |
|
|
|
api_key = select_key(request_type, model_name) |
|
|
|
if not api_key: |
|
return jsonify( |
|
{ |
|
"error": ( |
|
"No available API key for this " |
|
"request type or all keys have " |
|
"reached their limits" |
|
) |
|
} |
|
), 429 |
|
|
|
headers = { |
|
"Authorization": f"Bearer {api_key}", |
|
"Content-Type": "application/json" |
|
} |
|
|
|
try: |
|
start_time = time.time() |
|
response = requests.post( |
|
TEST_MODEL_ENDPOINT, |
|
headers=headers, |
|
json=data, |
|
stream=data.get("stream", False) |
|
) |
|
|
|
if response.status_code == 429: |
|
return jsonify(response.json()), 429 |
|
|
|
if data.get("stream", False): |
|
def generate(): |
|
first_chunk_time = None |
|
full_response_content = "" |
|
|
|
for chunk in response.iter_content(chunk_size=2048): |
|
if chunk: |
|
if first_chunk_time is None: |
|
first_chunk_time = time.time() |
|
full_response_content += chunk.decode("utf-8") |
|
yield chunk |
|
|
|
end_time = time.time() |
|
first_token_time = ( |
|
first_chunk_time - start_time |
|
if first_chunk_time else 0 |
|
) |
|
total_time = end_time - start_time |
|
|
|
response_content = "" |
|
prompt_tokens = 0 |
|
completion_tokens = 0 |
|
|
|
for line in full_response_content.splitlines(): |
|
if line.startswith("data:"): |
|
line = line[5:].strip() |
|
if line == "[DONE]": |
|
continue |
|
try: |
|
response_json = json.loads(line) |
|
|
|
if "choices" in response_json: |
|
for choice in response_json["choices"]: |
|
if "delta" in choice and "content" in choice["delta"]: |
|
response_content += choice["delta"]["content"] |
|
elif "message" in choice and "content" in choice["message"]: |
|
response_content += choice["message"]["content"] |
|
|
|
if "finish_reason" in choice: |
|
finish_reason = choice["finish_reason"] |
|
except json.JSONDecodeError as e: |
|
logging.error(f"JSON 解析失败: {e}, 行内容: {line}") |
|
except KeyError as e: |
|
logging.error(f"键错误: {e}, 行内容: {line}") |
|
except IndexError as e: |
|
logging.error(f"索引错误: {e}, 行内容: {line}") |
|
|
|
print(f'{response_content=}') |
|
print(f'{prompt_tokens=}') |
|
print(f'{completion_tokens=}') |
|
|
|
user_content = extract_user_content(data.get("messages", [])) |
|
|
|
user_content_replaced = user_content.replace( |
|
'\n', '\\n' |
|
).replace('\r', '\\n') |
|
response_content_replaced = response_content.replace( |
|
'\n', '\\n' |
|
).replace('\r', '\\n') |
|
|
|
logging.info( |
|
f"使用的key: {api_key}, " |
|
f"提示token: {prompt_tokens}, " |
|
f"输出token: {completion_tokens}, " |
|
f"首字用时: {first_token_time:.4f}秒, " |
|
f"总共用时: {total_time:.4f}秒, " |
|
f"使用的模型: {model_name}, " |
|
f"用户的内容: {user_content_replaced}, " |
|
f"输出的内容: {response_content_replaced}" |
|
) |
|
|
|
with data_lock: |
|
request_timestamps.append(time.time()) |
|
token_counts.append(prompt_tokens+completion_tokens) |
|
request_timestamps_day.append(time.time()) |
|
token_counts_day.append(prompt_tokens+completion_tokens) |
|
|
|
return Response( |
|
stream_with_context(generate()), |
|
content_type=response.headers['Content-Type'] |
|
) |
|
else: |
|
response.raise_for_status() |
|
end_time = time.time() |
|
response_json = response.json() |
|
total_time = end_time - start_time |
|
|
|
try: |
|
prompt_tokens = response_json["usage"]["prompt_tokens"] |
|
completion_tokens = response_json[ |
|
"usage" |
|
]["completion_tokens"] |
|
response_content = response_json[ |
|
"choices" |
|
][0]["message"]["content"] |
|
response_content = response_content |
|
except (KeyError, ValueError, IndexError) as e: |
|
logging.error( |
|
f"解析非流式响应 JSON 失败: {e}, " |
|
f"完整内容: {response_json}" |
|
) |
|
prompt_tokens = 0 |
|
completion_tokens = 0 |
|
response_content = "这是公益api,模型全部可用且保真,请不要对模型进行无意义的测试,请尽量不要使用高级模型解决没必要的问题。\n" |
|
|
|
user_content = extract_user_content(data.get("messages", [])) |
|
|
|
user_content_replaced = user_content.replace( |
|
'\n', '\\n' |
|
).replace('\r', '\\n') |
|
response_content_replaced = response_content.replace( |
|
'\n', '\\n' |
|
).replace('\r', '\\n') |
|
|
|
logging.info( |
|
f"使用的key: {api_key}, " |
|
f"提示token: {prompt_tokens}, " |
|
f"输出token: {completion_tokens}, " |
|
f"首字用时: 0, " |
|
f"总共用时: {total_time:.4f}秒, " |
|
f"使用的模型: {model_name}, " |
|
f"用户的内容: {user_content_replaced}, " |
|
f"输出的内容: {response_content_replaced}" |
|
) |
|
with data_lock: |
|
request_timestamps.append(time.time()) |
|
if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]: |
|
token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"]) |
|
else: |
|
token_counts.append(0) |
|
request_timestamps_day.append(time.time()) |
|
if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]: |
|
token_counts_day.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"]) |
|
else: |
|
token_counts_day.append(0) |
|
|
|
response_json["choices"][0]["message"]["content"] = response_content |
|
return jsonify(response_json) |
|
|
|
except requests.exceptions.RequestException as e: |
|
logging.error(f"请求转发异常: {e}") |
|
return jsonify({"error": str(e)}), 500 |
|
|
|
|
|
if __name__ == '__main__': |
|
logging.info(f"环境变量:{os.environ}") |
|
|
|
load_keys() |
|
logging.info("程序启动时首次加载 keys 已执行") |
|
|
|
scheduler.start() |
|
|
|
logging.info("首次加载 keys 已手动触发执行") |
|
|
|
refresh_models() |
|
logging.info("首次刷新模型列表已手动触发执行") |
|
|
|
app.run(debug=False,host='0.0.0.0',port=int(os.environ.get('PORT', 7860))) |