yangtb24 commited on
Commit
8e3da9c
·
verified ·
1 Parent(s): f5287ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -1
app.py CHANGED
@@ -6,6 +6,7 @@ import json
6
  import random
7
  import uuid
8
  import concurrent.futures
 
9
  from datetime import datetime, timedelta
10
  from apscheduler.schedulers.background import BackgroundScheduler
11
  from flask import Flask, request, jsonify, Response, stream_with_context
@@ -33,6 +34,10 @@ valid_keys_global = []
33
  executor = concurrent.futures.ThreadPoolExecutor(max_workers=20)
34
  model_key_indices = {}
35
 
 
 
 
 
36
  def get_credit_summary(api_key):
37
  """
38
  使用 API 密钥获取额度信息。
@@ -394,7 +399,19 @@ scheduler.add_job(refresh_models, 'interval', hours=1)
394
 
395
  @app.route('/')
396
  def index():
397
- return "<h1>Welcome to SiliconFlow</h1>"
 
 
 
 
 
 
 
 
 
 
 
 
398
 
399
  @app.route('/check_tokens', methods=['POST'])
400
  def check_tokens():
@@ -616,6 +633,10 @@ def handsome_chat_completions():
616
  f"输出的内容: {response_content_replaced}"
617
  )
618
 
 
 
 
 
619
  return Response(
620
  stream_with_context(generate()),
621
  content_type=response.headers['Content-Type']
@@ -678,6 +699,13 @@ def handsome_chat_completions():
678
  f"用户的内容: {user_content_replaced}, "
679
  f"输出的内容: {response_content_replaced}"
680
  )
 
 
 
 
 
 
 
681
  return jsonify(response_json)
682
 
683
  except requests.exceptions.RequestException as e:
 
6
  import random
7
  import uuid
8
  import concurrent.futures
9
+ import threading
10
  from datetime import datetime, timedelta
11
  from apscheduler.schedulers.background import BackgroundScheduler
12
  from flask import Flask, request, jsonify, Response, stream_with_context
 
34
  executor = concurrent.futures.ThreadPoolExecutor(max_workers=20)
35
  model_key_indices = {}
36
 
37
+ request_timestamps = []
38
+ token_counts = []
39
+ data_lock = threading.Lock()
40
+
41
  def get_credit_summary(api_key):
42
  """
43
  使用 API 密钥获取额度信息。
 
399
 
400
  @app.route('/')
401
  def index():
402
+ current_time = time.time()
403
+ one_minute_ago = current_time - 60
404
+
405
+ with data_lock:
406
+ # Clean up old data
407
+ while request_timestamps and request_timestamps[0] < one_minute_ago:
408
+ request_timestamps.pop(0)
409
+ token_counts.pop(0)
410
+
411
+ rpm = len(request_timestamps)
412
+ tpm = sum(token_counts)
413
+
414
+ return jsonify({"rpm": rpm, "tpm": tpm})
415
 
416
  @app.route('/check_tokens', methods=['POST'])
417
  def check_tokens():
 
633
  f"输出的内容: {response_content_replaced}"
634
  )
635
 
636
+ with data_lock:
637
+ request_timestamps.append(time.time())
638
+ token_counts.append(prompt_tokens+completion_tokens)
639
+
640
  return Response(
641
  stream_with_context(generate()),
642
  content_type=response.headers['Content-Type']
 
699
  f"用户的内容: {user_content_replaced}, "
700
  f"输出的内容: {response_content_replaced}"
701
  )
702
+ with data_lock:
703
+ request_timestamps.append(time.time())
704
+ if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
705
+ token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
706
+ else:
707
+ token_counts.append(0)
708
+
709
  return jsonify(response_json)
710
 
711
  except requests.exceptions.RequestException as e: