wuhunfeng77 commited on
Commit
0ca99ad
·
verified ·
1 Parent(s): e4a89e2

Create sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +114 -0
sync_data.sh ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # 检查环境变量
4
+ if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
+ echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
+ exec java ${JVM_OPTS} -jar /opt/halo/halo.jar
7
+ exit 0
8
+ fi
9
+
10
+ # 激活虚拟环境
11
+ source /opt/venv/bin/activate
12
+
13
+ # Python 函数: 上传备份
14
+ upload_backup() {
15
+ file_path="$1"
16
+ file_name="$2"
17
+ token="$HF_TOKEN"
18
+ repo_id="$DATASET_ID"
19
+
20
+ python3 -c "
21
+ from huggingface_hub import HfApi
22
+ import sys
23
+ import os
24
+ import tarfile
25
+ import tempfile
26
+ api = HfApi(token='$token')
27
+ try:
28
+ api.upload_file(
29
+ path_or_fileobj='$file_path',
30
+ path_in_repo='$file_name',
31
+ repo_id='$repo_id',
32
+ repo_type='dataset'
33
+ )
34
+ print(f'Successfully uploaded $file_name')
35
+ except Exception as e:
36
+ print(f'Error uploading file: {str(e)}')
37
+ "
38
+ }
39
+
40
+ # Python 函数: 下载最新备份
41
+ download_latest_backup() {
42
+ token="$HF_TOKEN"
43
+ repo_id="$DATASET_ID"
44
+
45
+ python3 -c "
46
+ from huggingface_hub import HfApi
47
+ import sys
48
+ import os
49
+ import tarfile
50
+ import tempfile
51
+ api = HfApi(token='$token')
52
+ try:
53
+ files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
54
+ backup_files = [f for f in files if f.startswith('halo_backup_') and f.endswith('.tar.gz')]
55
+
56
+ if not backup_files:
57
+ print('No backup files found')
58
+ sys.exit()
59
+
60
+ latest_backup = sorted(backup_files)[-1]
61
+
62
+ with tempfile.TemporaryDirectory() as temp_dir:
63
+ filepath = api.hf_hub_download(
64
+ repo_id='$repo_id',
65
+ filename=latest_backup,
66
+ repo_type='dataset',
67
+ local_dir=temp_dir
68
+ )
69
+
70
+ if filepath and os.path.exists(filepath):
71
+ with tarfile.open(filepath, 'r:gz') as tar:
72
+ tar.extractall(os.path.expanduser('~/.halo2'))
73
+ print(f'Successfully restored backup from {latest_backup}')
74
+
75
+ except Exception as e:
76
+ print(f'Error downloading backup: {str(e)}')
77
+ "
78
+ }
79
+
80
+ # 首次启动时下载最新备份
81
+ echo "Downloading latest backup from HuggingFace..."
82
+ download_latest_backup
83
+
84
+ # 同步函数
85
+ sync_data() {
86
+ while true; do
87
+ echo "Starting sync process at $(date)"
88
+
89
+ if [ -d ~/.halo2 ]; then
90
+ timestamp=$(date +%Y%m%d_%H%M%S)
91
+ backup_file="halo_backup_${timestamp}.tar.gz"
92
+
93
+ # 压缩数据目录
94
+ tar -czf "/tmp/${backup_file}" -C ~/.halo2 .
95
+
96
+ echo "Uploading backup to HuggingFace..."
97
+ upload_backup "/tmp/${backup_file}" "${backup_file}"
98
+
99
+ rm -f "/tmp/${backup_file}"
100
+ else
101
+ echo "Data directory does not exist yet, waiting for next sync..."
102
+ fi
103
+
104
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
105
+ echo "Next sync in ${SYNC_INTERVAL} seconds..."
106
+ sleep $SYNC_INTERVAL
107
+ done
108
+ }
109
+
110
+ # 后台启动同步进程
111
+ sync_data &
112
+
113
+ # 启动 Halo
114
+ exec java ${JVM_OPTS} -jar /opt/halo/halo.jar