File size: 2,949 Bytes
0ca99ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/bin/bash

# 检查环境变量
if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
    echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
    exec java ${JVM_OPTS} -jar /opt/halo/halo.jar
    exit 0
fi

# 激活虚拟环境
source /opt/venv/bin/activate

# Python 函数: 上传备份
upload_backup() {
  file_path="$1"
  file_name="$2"
  token="$HF_TOKEN"
  repo_id="$DATASET_ID"

  python3 -c "
from huggingface_hub import HfApi
import sys
import os
import tarfile
import tempfile
api = HfApi(token='$token')
try:
    api.upload_file(
        path_or_fileobj='$file_path',
        path_in_repo='$file_name',
        repo_id='$repo_id',
        repo_type='dataset'
    )
    print(f'Successfully uploaded $file_name')
except Exception as e:
    print(f'Error uploading file: {str(e)}')
"
}

# Python 函数: 下载最新备份
download_latest_backup() {
  token="$HF_TOKEN"
  repo_id="$DATASET_ID"

  python3 -c "
from huggingface_hub import HfApi
import sys
import os
import tarfile
import tempfile
api = HfApi(token='$token')
try:
    files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
    backup_files = [f for f in files if f.startswith('halo_backup_') and f.endswith('.tar.gz')]
    
    if not backup_files:
        print('No backup files found')
        sys.exit()
        
    latest_backup = sorted(backup_files)[-1]
    
    with tempfile.TemporaryDirectory() as temp_dir:
        filepath = api.hf_hub_download(
            repo_id='$repo_id',
            filename=latest_backup,
            repo_type='dataset',
            local_dir=temp_dir
        )
        
        if filepath and os.path.exists(filepath):
            with tarfile.open(filepath, 'r:gz') as tar:
                tar.extractall(os.path.expanduser('~/.halo2'))
            print(f'Successfully restored backup from {latest_backup}')
        
except Exception as e:
    print(f'Error downloading backup: {str(e)}')
"
}

# 首次启动时下载最新备份
echo "Downloading latest backup from HuggingFace..."
download_latest_backup

# 同步函数
sync_data() {
    while true; do
        echo "Starting sync process at $(date)"
        
        if [ -d ~/.halo2 ]; then
            timestamp=$(date +%Y%m%d_%H%M%S)
            backup_file="halo_backup_${timestamp}.tar.gz"
            
            # 压缩数据目录
            tar -czf "/tmp/${backup_file}" -C ~/.halo2 .
            
            echo "Uploading backup to HuggingFace..."
            upload_backup "/tmp/${backup_file}" "${backup_file}"
            
            rm -f "/tmp/${backup_file}"
        else
            echo "Data directory does not exist yet, waiting for next sync..."
        fi
        
        SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
        echo "Next sync in ${SYNC_INTERVAL} seconds..."
        sleep $SYNC_INTERVAL
    done
}

# 后台启动同步进程
sync_data &

# 启动 Halo
exec java ${JVM_OPTS} -jar /opt/halo/halo.jar