Spaces:

bigcomputer
/

arena-annotation-progress

Running

App Files Files Community

Terry Zhuo commited on 3 days ago

Commit

46e626f

1 Parent(s): ed574fd

update

Browse files

Files changed (5) hide show

app.py +5 -7
azure_count_ip_data.py +116 -41
count_ip_data.py +0 -288
data/battle_stats.csv +1 -0
data/last_update.txt +1 -0

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import gradio as gr
 import pandas as pd
 from datetime import datetime
 import time
-from azure_count_ip_data import count_files_per_ip
 from log_reader import RemoteLogReader
 import threading
@@ -36,12 +36,10 @@ def update_stats():
     try:
         # Initialize RemoteLogReader
         reader = RemoteLogReader()
-        # Get IP counts using Azure storage
-        ip_counts = count_files_per_ip(reader)
         # Convert to DataFrame for better display
-        df = pd.DataFrame(list(ip_counts.items()), columns=['Annotator', 'Battle Count'])
         df = df.sort_values('Battle Count', ascending=False)
         # Get current time
@@ -88,7 +86,7 @@ def create_ui():
     with gr.Blocks(title="Battle Count Statistics") as app:
         gr.Markdown("# Battle Count Statistics")
-        gr.Markdown("Displays the count of valid battles per IP address. Updates automatically every hour.")
         with gr.Row():
             last_update = gr.Textbox(

 import pandas as pd
 from datetime import datetime
 import time
+from azure_count_ip_data import count_files_per_annotator
 from log_reader import RemoteLogReader
 import threading
     try:
         # Initialize RemoteLogReader
         reader = RemoteLogReader()
+        # Get annotator counts using Azure storage
+        annotator_counts = count_files_per_annotator(reader)
         # Convert to DataFrame for better display
+        df = pd.DataFrame(list(annotator_counts.items()), columns=['Annotator', 'Battle Count'])
         df = df.sort_values('Battle Count', ascending=False)
         # Get current time
     with gr.Blocks(title="Battle Count Statistics") as app:
         gr.Markdown("# Battle Count Statistics")
+        gr.Markdown("Displays the count of valid battles per annotator. Updates automatically every hour.")
         with gr.Row():
             last_update = gr.Textbox(

azure_count_ip_data.py CHANGED Viewed

@@ -6,7 +6,7 @@ from collections import defaultdict
 import shutil
 import re
 import argparse
-from typing import Dict, Set, Tuple, Optional
 from log_reader import RemoteLogReader
 # List of IP addresses we care about
@@ -16,24 +16,48 @@ WHITELIST_IPS_DICT = {
     "Kenneth Hamilton": ["109.245.193.97"],
     "Marek Suppa": ["158.195.18.232"],
     "Max Tian": ["2607:fea8:4f40:4b00:e5b9:9806:6b69:233b", "2607:fea8:4f40:4b00:bcef:571:6124:f01", "2607:fea8:7c9d:3800:d9c0:7295:3e2e:6287", "2607:fea8:7c9d:3800:fd51:2c91:c9e2:3c8"],
-    "Mengzhao Jia": ["66.254.231.49", "160.32.74.89"],
-    "Noah Ziems": ["2601:245:c500:92c0:633c:c0d2:dcc1:1f48", "2601:245:c500:92c0:961e:9ac7:e02:c266", "2601:245:c500:92c0:ae74:d1d5:ca3b:da90"],
     "Sabina A": ["175.196.44.217", "58.235.174.122", "14.52.175.55"],
-    "Wenhao Yu": ["2601:600:8d00:9510:1d77:b610:9358:f443", "2601:600:8d00:9510:513f:6c4e:5960:fdc7", "174.164.6.99"],
     "Vaisakhi Mishra": ["74.90.222.68"],
     "Kumar Shridhar": ["129.132.145.250"],
     "Viktor Gal": ["2a02:169:3e9:0:6ce8:e76f:faed:c830"],
-    "Guangyu Song": ["70.50.179.57", "209.226.139.83"],
-    "Bhupesh Bishnoi": ["37.65.177.22", "195.220.58.237", "194.57.114.147", "195.220.58.234"],
     "Zheng Liu": ["128.143.71.67"],
-    "Ming Xu": ["2601:600:8d00:9510:185b:955d:275b:7685", "2601:600:8d00:9510:5150:468c:ab7d:518d"],
-    "Ayush Sunil Munot": ["10.145.76.56"],
-    "Saiteja Utpala": ["192.168.31.185"]
 }
 # Flatten IP list for backward compatibility
 WHITELIST_IPS = [ip for ips in WHITELIST_IPS_DICT.values() for ip in ips]
 logging.basicConfig(level=logging.WARNING)
 log = logging.getLogger(__name__)
@@ -47,6 +71,21 @@ def get_ip_from_jsonl(content: str) -> Optional[str]:
         log.error(f"Error extracting IP from content: {e}")
         return None
 def get_chat_session_id(file_name: str, content: str = None) -> Optional[str]:
     """Extract chat_session_id based on the file location:
     - For files under conv_logs: extract from filename
@@ -82,52 +121,86 @@ def get_sandbox_session_ids(reader: 'RemoteLogReader', date_str: str) -> Set[str
     return session_ids
-def get_file_data(content: str) -> Tuple[Optional[str], bool]:
-    """Read file content and return IP and vote condition status"""
     try:
         lines = [line.strip() for line in content.split('\n') if line.strip()]
         if not lines:
-            return None, False
         # Get IP from first line
         try:
             first_line_data = json.loads(lines[0])
             ip = first_line_data.get('ip')
-            # Early return if IP is not in whitelist
-            if ip not in WHITELIST_IPS:
-                return None, False
         except json.JSONDecodeError:
             ip = None
-        # Check vote conditions from last line
         try:
             last_line_data = json.loads(lines[-1])
-            feedback = last_line_data.get('feedback')
-            vote_conditions_met = (last_line_data.get('type') == 'vote' and
-                                isinstance(feedback, dict) and
-                                len(feedback) == 6)
-        except json.JSONDecodeError:
             vote_conditions_met = False
-        return ip, vote_conditions_met
     except Exception as e:
         log.error(f"Error processing file content: {e}")
-        return None, False
-def count_files_per_ip(reader: 'RemoteLogReader', start_date_str: str = "2025_02_18") -> Dict[str, int]:
-    """Count files per name from the given start date"""
     # Convert start date string to datetime
     start_date = datetime.strptime(start_date_str, "%Y_%m_%d")
     name_counts = defaultdict(int)
     try:
         # Get current date for iteration
         current_date = start_date
         today = datetime.now()
-        # Create reverse mapping of IP to name
-        ip_to_name = {ip: name for name, ips in WHITELIST_IPS_DICT.items() for ip in ips}
         while current_date <= today:
             date_str = current_date.strftime("%Y_%m_%d")
@@ -135,16 +208,18 @@ def count_files_per_ip(reader: 'RemoteLogReader', start_date_str: str = "2025_02
                 # Get conversation logs for battle_anony mode
                 conv_logs = reader.get_conv_logs(date_str)
                 battle_anony_logs = conv_logs.get('battle_anony', {})
                 # Process each conversation
                 for conv_id, messages in battle_anony_logs.items():
                     if messages:
                         # Convert messages to file content format
                         content = '\n'.join(json.dumps(msg) for msg in messages)
-                        ip, vote_conditions_met = get_file_data(content)
-                        if vote_conditions_met and ip and ip in ip_to_name:
-                            name = ip_to_name[ip]
-                            name_counts[name] += 1
             except Exception as e:
                 log.error(f"Error processing logs for date {date_str}: {e}")
@@ -169,9 +244,6 @@ def download_files_by_name(reader: 'RemoteLogReader', start_date_str: str = "202
     data_dir = os.path.join(os.getcwd(), "data")
     os.makedirs(data_dir, exist_ok=True)
-    # Create reverse mapping of IP to name
-    ip_to_name = {ip: name for name, ips in WHITELIST_IPS_DICT.items() for ip in ips}
     # Convert start date string to datetime
     start_date = datetime.strptime(start_date_str, "%Y_%m_%d")
@@ -199,11 +271,14 @@ def download_files_by_name(reader: 'RemoteLogReader', start_date_str: str = "202
                     # Convert messages to file content
                     content = '\n'.join(json.dumps(msg) for msg in messages)
                     ip = get_ip_from_jsonl(content)
-                    if ip and ip in ip_to_name:
-                        name = ip_to_name[ip]
                         # Create directory structure for this name
-                        name_dir = os.path.join(data_dir, name)
                         valid_dir = os.path.join(name_dir, "valid")
                         invalid_dir = os.path.join(name_dir, "invalid")
                         os.makedirs(valid_dir, exist_ok=True)
@@ -252,7 +327,7 @@ def main():
         download_files_by_name(reader, check_sandbox=args.sandbox_check)
     # Count and display statistics
-    name_counts = count_files_per_ip(reader)
     print("\nFile counts per annotator:")
     for name, count in sorted(name_counts.items(), key=lambda x: x[1], reverse=True):
         print(f"Name: {name:<20} Count: {count}")

 import shutil
 import re
 import argparse
+from typing import Dict, Set, Tuple, Optional, List, Union
 from log_reader import RemoteLogReader
 # List of IP addresses we care about
     "Kenneth Hamilton": ["109.245.193.97"],
     "Marek Suppa": ["158.195.18.232"],
     "Max Tian": ["2607:fea8:4f40:4b00:e5b9:9806:6b69:233b", "2607:fea8:4f40:4b00:bcef:571:6124:f01", "2607:fea8:7c9d:3800:d9c0:7295:3e2e:6287", "2607:fea8:7c9d:3800:fd51:2c91:c9e2:3c8"],
+    "Mengzhao Jia": ["66.254.231.49"],
+    "Noah Ziems": ["2601:245:c500:92c0:633c:c0d2:dcc1:1f48", "2601:245:c500:92c0:961e:9ac7:e02:c266"],
     "Sabina A": ["175.196.44.217", "58.235.174.122", "14.52.175.55"],
+    "Wenhao Yu": ["2601:600:8d00:9510:1d77:b610:9358:f443"],
     "Vaisakhi Mishra": ["74.90.222.68"],
     "Kumar Shridhar": ["129.132.145.250"],
     "Viktor Gal": ["2a02:169:3e9:0:6ce8:e76f:faed:c830"],
+    "Guangyu Song": ["70.50.179.57"],
+    "Bhupesh Bishnoi": ["2a02:842a:24:5a01:8cd6:5b22:1189:6035","192.168.1.8"],
     "Zheng Liu": ["128.143.71.67"],
+    "Ming Xu": ["10.0.0.243"],
+    "Ayush Sunil Munot": ["10.145.76.56"]
+}
+# Username whitelist for each annotator
+WHITELIST_USERNAMES_DICT = {
+    "Chen Gong": [],
+    "Juyong Jiang": [],
+    "Kenneth Hamilton": [],
+    "Marek Suppa": [],
+    "Max Tian": [],
+    "Mengzhao Jia": [],
+    "Noah Ziems": [],
+    "Sabina A": [],
+    "Wenhao Yu": [],
+    "Vaisakhi Mishra": [],
+    "Kumar Shridhar": [],
+    "Viktor Gal": [],
+    "Guangyu Song": [],
+    "Bhupesh Bishnoi": [],
+    "Zheng Liu": [],
+    "Ming Xu": [],
+    "Ayush Sunil Munot": [],
+    "Terry Yue Zhuo": ["test"]
 }
 # Flatten IP list for backward compatibility
 WHITELIST_IPS = [ip for ips in WHITELIST_IPS_DICT.values() for ip in ips]
+# Flatten username list for backward compatibility
+WHITELIST_USERNAMES = [username for usernames in WHITELIST_USERNAMES_DICT.values() for username in usernames]
 logging.basicConfig(level=logging.WARNING)
 log = logging.getLogger(__name__)
         log.error(f"Error extracting IP from content: {e}")
         return None
+def get_username_from_jsonl(content: str) -> Optional[str]:
+    """Extract username from the last line of a JSONL content if it's a vote"""
+    try:
+        lines = [line.strip() for line in content.split('\n') if line.strip()]
+        if not lines:
+            return None
+        last_line = json.loads(lines[-1])
+        if last_line.get('type') == 'vote':
+            return last_line.get('username')
+        return None
+    except Exception as e:
+        log.error(f"Error extracting username from content: {e}")
+        return None
 def get_chat_session_id(file_name: str, content: str = None) -> Optional[str]:
     """Extract chat_session_id based on the file location:
     - For files under conv_logs: extract from filename
     return session_ids
+def get_file_data(content: str) -> Tuple[Optional[str], Optional[str], bool]:
+    """Read file content and return IP, username, and vote condition status"""
     try:
         lines = [line.strip() for line in content.split('\n') if line.strip()]
         if not lines:
+            return None, None, False
         # Get IP from first line
         try:
             first_line_data = json.loads(lines[0])
             ip = first_line_data.get('ip')
         except json.JSONDecodeError:
             ip = None
+        # Early check if IP is in whitelist
+        ip_in_whitelist = ip in WHITELIST_IPS
+        # Check vote conditions from last line and get username if available
         try:
             last_line_data = json.loads(lines[-1])
+            username = None
+            if last_line_data.get('type') == 'vote':
+                # Only try to get username if the key exists
+                if 'username' in last_line_data:
+                    username = last_line_data.get('username')
+                feedback = last_line_data.get('feedback')
+                # Check vote conditions: type is vote, feedback has 6 items, and at least 4 lines (2 rounds of chat)
+                vote_conditions_met = (
+                    isinstance(feedback, dict) and
+                    len(feedback) == 6 and
+                    len(lines) >= 4
+                )
+            else:
+                vote_conditions_met = False
+        except (json.JSONDecodeError, TypeError):
+            username = None
             vote_conditions_met = False
+        # Check if username is in whitelist (if username exists)
+        username_in_whitelist = username in WHITELIST_USERNAMES if username else False
+        # Early return if neither IP nor username is in whitelist
+        if not (ip_in_whitelist or username_in_whitelist):
+            return ip, username, False
+        return ip, username, vote_conditions_met
     except Exception as e:
         log.error(f"Error processing file content: {e}")
+        return None, None, False
+def get_annotator_name(ip: Optional[str], username: Optional[str]) -> Optional[str]:
+    """Get annotator name from IP or username"""
+    # Check IP first
+    if ip:
+        for name, ips in WHITELIST_IPS_DICT.items():
+            if ip in ips:
+                return name
+    # Check username if IP didn't match
+    if username:
+        for name, usernames in WHITELIST_USERNAMES_DICT.items():
+            if username in usernames:
+                return name
+    return None
+def count_files_per_annotator(reader: 'RemoteLogReader', start_date_str: str = "2025_02_18") -> Dict[str, int]:
+    """Count files per annotator name from the given start date, considering both IP and username"""
     # Convert start date string to datetime
     start_date = datetime.strptime(start_date_str, "%Y_%m_%d")
     name_counts = defaultdict(int)
     try:
         # Get current date for iteration
         current_date = start_date
         today = datetime.now()
         while current_date <= today:
             date_str = current_date.strftime("%Y_%m_%d")
                 # Get conversation logs for battle_anony mode
                 conv_logs = reader.get_conv_logs(date_str)
                 battle_anony_logs = conv_logs.get('battle_anony', {})
                 # Process each conversation
                 for conv_id, messages in battle_anony_logs.items():
                     if messages:
                         # Convert messages to file content format
                         content = '\n'.join(json.dumps(msg) for msg in messages)
+                        ip, username, vote_conditions_met = get_file_data(content)
+                        if vote_conditions_met:
+                            # Get annotator name from either IP or username
+                            annotator_name = get_annotator_name(ip, username)
+                            if annotator_name:
+                                name_counts[annotator_name] += 1
             except Exception as e:
                 log.error(f"Error processing logs for date {date_str}: {e}")
     data_dir = os.path.join(os.getcwd(), "data")
     os.makedirs(data_dir, exist_ok=True)
     # Convert start date string to datetime
     start_date = datetime.strptime(start_date_str, "%Y_%m_%d")
                     # Convert messages to file content
                     content = '\n'.join(json.dumps(msg) for msg in messages)
                     ip = get_ip_from_jsonl(content)
+                    username = get_username_from_jsonl(content)
+                    # Get annotator name from either IP or username
+                    annotator_name = get_annotator_name(ip, username)
+                    if annotator_name:
                         # Create directory structure for this name
+                        name_dir = os.path.join(data_dir, annotator_name)
                         valid_dir = os.path.join(name_dir, "valid")
                         invalid_dir = os.path.join(name_dir, "invalid")
                         os.makedirs(valid_dir, exist_ok=True)
         download_files_by_name(reader, check_sandbox=args.sandbox_check)
     # Count and display statistics
+    name_counts = count_files_per_annotator(reader)
     print("\nFile counts per annotator:")
     for name, count in sorted(name_counts.items(), key=lambda x: x[1], reverse=True):
         print(f"Name: {name:<20} Count: {count}")

count_ip_data.py DELETED Viewed

@@ -1,288 +0,0 @@
-import os
-import logging
-from datetime import datetime, timedelta
-from urllib.parse import unquote
-import json
-from collections import defaultdict
-import smbclient
-import shutil
-import re
-import argparse
-# List of IP addresses we care about
-WHITELIST_IPS = [
-    "199.111.212.5",
-    "175.159.122.63",
-    "109.245.193.97",
-    "158.195.18.232",
-    "2607:fea8:4f40:4b00:e5b9:9806:6b69:233b",
-    "66.254.231.49",
-    "129.74.154.194",
-    "175.196.44.217",
-    "2601:600:8d00:9510:1d77:b610:9358:f443",
-    "74.90.222.68",
-    "2a02:169:3e9:0:6ce8:e76f:faed:c830",
-    "70.50.179.57",
-    "2a02:842a:24:5a01:8cd6:5b22:1189:6035",
-    "2408:8418:6390:7603:40b:555f:774:a05d"
-]
-logging.basicConfig(level=logging.WARNING)
-log = logging.getLogger(__name__)
-def get_ip_from_jsonl(file_path):
-    """Extract IP from the first line of a JSONL file"""
-    try:
-        with smbclient.open_file(file_path, mode='r') as f:
-            first_line = f.readline()
-            data = json.loads(first_line)
-            return data.get('ip')
-    except Exception as e:
-        log.error(f"Error reading file {file_path}: {e}")
-        return None
-def get_chat_session_id(file_path):
-    """Extract chat_session_id based on the file location:
-    - For files under conv_logs: extract from filename
-    - For files under sandbox_logs: read from file content
-    """
-    try:
-        if 'conv_logs' in file_path:
-            # Extract from filename for conv_logs
-            # Handle Windows UNC path format
-            filename = file_path.split('\\')[-1]  # Get the last component of the path
-            match = re.match(r'conv-log-([a-f0-9]+)\.json', filename)
-            if match:
-                return match.group(1)
-        elif 'sandbox_logs' in file_path:
-            # Read from file content for sandbox_logs
-            with smbclient.open_file(file_path, mode='r') as f:
-                data = json.loads(f.read())
-                return data['sandbox_state'].get('chat_session_id')
-        return None
-    except Exception as e:
-        log.error(f"Error getting chat_session_id from {file_path}: {e}")
-        return None
-def get_sandbox_session_ids(server, share, date_str):
-    """Get all chat_session_ids from sandbox logs for a given date"""
-    sandbox_folder = f"\\\\{server}\\{share}\\{date_str}\\sandbox_logs"
-    session_ids = set()
-    if not smbclient.path.exists(sandbox_folder):
-        return session_ids
-    try:
-        for file_info in smbclient.scandir(sandbox_folder):
-            if file_info.name.endswith('.json'):
-                file_path = f"{sandbox_folder}\\{file_info.name}"
-                session_id = get_chat_session_id(file_path)
-                if session_id:
-                    session_ids.add(session_id)
-    except Exception as e:
-        log.error(f"Error scanning sandbox folder {sandbox_folder}: {e}")
-    return session_ids
-def check_vote_conditions(file_path):
-    """Check if the last line of the file has type:vote and feedback dict with 6 keys"""
-    try:
-        with smbclient.open_file(file_path, mode='r') as f:
-            # Read all lines and get the last non-empty line
-            lines = [line.strip() for line in f if line.strip()]
-            if not lines:
-                return False
-            last_line = lines[-1]
-            try:
-                data = json.loads(last_line)
-                feedback = data.get('feedback')
-                return (data.get('type') == 'vote' and
-                       isinstance(feedback, dict) and
-                       len(feedback) == 6)
-            except json.JSONDecodeError:
-                return False
-    except Exception as e:
-        log.error(f"Error checking vote conditions in file {file_path}: {e}")
-        return False
-def get_file_data(file_path):
-    """Read file and return IP and vote condition status"""
-    try:
-        with smbclient.open_file(file_path, mode='r') as f:
-            lines = [line.strip() for line in f if line.strip()]
-            if not lines:
-                return None, False
-            # Get IP from first line
-            try:
-                first_line_data = json.loads(lines[0])
-                ip = first_line_data.get('ip')
-                # Early return if IP is not in whitelist
-                if ip not in WHITELIST_IPS:
-                    return None, False
-            except json.JSONDecodeError:
-                ip = None
-            # Check vote conditions from last line
-            try:
-                last_line_data = json.loads(lines[-1])
-                feedback = last_line_data.get('feedback')
-                vote_conditions_met = (last_line_data.get('type') == 'vote' and
-                                    isinstance(feedback, dict) and
-                                    len(feedback) == 6)
-            except json.JSONDecodeError:
-                vote_conditions_met = False
-            return ip, vote_conditions_met
-    except Exception as e:
-        log.error(f"Error reading file {file_path}: {e}")
-        return None, False
-def count_files_per_ip(smb_url, start_date_str="2025_02_18"):
-    """Count files per IP address from the given start date"""
-    # Remove 'smb://' prefix and parse URL components
-    url = smb_url[6:]
-    creds_server, share = url.split('/', 1)
-    creds, server = creds_server.rsplit('@', 1)
-    username, password = creds.split(':', 1)
-    password = unquote(password)
-    # Register the SMB session
-    smbclient.register_session(server, username=username, password=password, port=8080)
-    # Convert start date string to datetime
-    start_date = datetime.strptime(start_date_str, "%Y_%m_%d")
-    ip_counts = defaultdict(int)
-    try:
-        # Get current date for iteration
-        current_date = start_date
-        today = datetime.now()
-        while current_date <= today:
-            date_str = current_date.strftime("%Y_%m_%d")
-            folder_path = f"\\\\{server}\\{share}\\{date_str}\\conv_logs\\battle_anony"
-            try:
-                # List all JSON files in the battle_anony folder
-                if smbclient.path.exists(folder_path):
-                    for file_info in smbclient.scandir(folder_path, search_pattern="conv-log-*.json"):
-                        file_path = f"{folder_path}\\{file_info.name}"
-                        ip, vote_conditions_met = get_file_data(file_path)
-                        if vote_conditions_met and ip:
-                            ip_counts[ip] += 1
-            except Exception as e:
-                log.error(f"Error processing folder {date_str}: {e}")
-            # Move to next day
-            current_date += timedelta(days=1)
-    except Exception as e:
-        log.error(f"Error accessing SMB share: {e}")
-    return dict(ip_counts)
-def download_files_by_ip(smb_url, start_date_str="2025_02_18", check_sandbox=True):
-    """Download files and organize them by IP address
-    Args:
-        smb_url (str): The SMB URL to connect to
-        start_date_str (str): The start date in YYYY_MM_DD format
-        check_sandbox (bool): Whether to check for matching sandbox logs
-    """
-    # Remove 'smb://' prefix and parse URL components
-    url = smb_url[6:]
-    creds_server, share = url.split('/', 1)
-    creds, server = creds_server.rsplit('@', 1)
-    username, password = creds.split(':', 1)
-    password = unquote(password)
-    # Register the SMB session
-    smbclient.register_session(server, username=username, password=password)
-    # Create base data directory
-    data_dir = os.path.join(os.getcwd(), "data")
-    os.makedirs(data_dir, exist_ok=True)
-    # Convert start date string to datetime
-    start_date = datetime.strptime(start_date_str, "%Y_%m_%d")
-    try:
-        # Get current date for iteration
-        current_date = start_date
-        today = datetime.now()
-        while current_date <= today:
-            date_str = current_date.strftime("%Y_%m_%d")
-            folder_path = f"\\\\{server}\\{share}\\{date_str}\\conv_logs\\battle_anony"
-            # Get all sandbox session IDs for this date
-            sandbox_session_ids = get_sandbox_session_ids(server, share, date_str) if check_sandbox else set()
-            try:
-                # List all JSON files in the battle_anony folder
-                if smbclient.path.exists(folder_path):
-                    for file_info in smbclient.scandir(folder_path):
-                        # Skip macOS metadata files
-                        if file_info.name.startswith('._'):
-                            continue
-                        if file_info.name.endswith('.json'):
-                            file_path = f"{folder_path}\\{file_info.name}"
-                            ip = get_ip_from_jsonl(file_path)
-                            if ip:
-                                # Create directory structure for this IP
-                                ip_dir = os.path.join(data_dir, ip)
-                                valid_dir = os.path.join(ip_dir, "valid")
-                                invalid_dir = os.path.join(ip_dir, "invalid")
-                                os.makedirs(valid_dir, exist_ok=True)
-                                os.makedirs(invalid_dir, exist_ok=True)
-                                # Check if chat_session_id exists in sandbox logs
-                                if check_sandbox:
-                                    chat_session_id = get_chat_session_id(file_path)
-                                    has_sandbox = chat_session_id in sandbox_session_ids if chat_session_id else False
-                                    target_dir = valid_dir if has_sandbox else invalid_dir
-                                else:
-                                    # When sandbox checking is disabled, put everything in valid
-                                    target_dir = valid_dir
-                                # Download the file
-                                local_file_path = os.path.join(target_dir, file_info.name)
-                                try:
-                                    with smbclient.open_file(file_path, mode='rb') as remote_file:
-                                        with open(local_file_path, 'wb') as local_file:
-                                            shutil.copyfileobj(remote_file, local_file)
-                                    log.info(f"Downloaded {file_info.name} to {target_dir}")
-                                except Exception as e:
-                                    log.error(f"Error downloading file {file_info.name}: {e}")
-            except Exception as e:
-                log.error(f"Error processing folder {date_str}: {e}")
-            # Move to next day
-            current_date += timedelta(days=1)
-    except Exception as e:
-        log.error(f"Error accessing SMB share: {e}")
-def main():
-    smb_url = os.getenv("SMB_URL")
-    # Add argument parser for optional parameters
-    parser = argparse.ArgumentParser(description='Download and organize conversation files by IP')
-    parser.add_argument('--sandbox-check', action='store_true', help='Check for matching sandbox logs')
-    parser.add_argument('--download', action='store_true', help='Enable file download')
-    args = parser.parse_args()
-    # Download files if enabled
-    if args.download:
-        print("\nDownloading files and organizing by IP address...")
-        download_files_by_ip(smb_url, check_sandbox=args.sandbox_check)
-    # Count and display statistics
-    ip_counts = count_files_per_ip(smb_url)
-    print("\nFile counts per IP address:")
-    for ip, count in sorted(ip_counts.items(), key=lambda x: x[1], reverse=True):
-        print(f"IP: {ip:<15} Count: {count}")
-if __name__ == "__main__":
-    main()

data/battle_stats.csv ADDED Viewed

	@@ -0,0 +1 @@


1	+ Annotator,Battle Count

data/last_update.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 2025-03-12 17:35:20