Caasi/Kexin HUANG
commited on
Commit
Β·
e711713
1
Parent(s):
afe1a8c
commit from kexinhuang
Browse files- .DS_Store +0 -0
- models/config.json β config.json +0 -0
- models/configuration_internlm.py β configuration_internlm.py +0 -0
- infer.py +5 -3
- models/modeling_internlm.py β modeling_internlm.py +0 -0
- models/.DS_Store +0 -0
- models/pytorch_model-00001-of-00002.bin β pytorch_model-00001-of-00002.bin +0 -0
- models/pytorch_model-00002-of-00002.bin β pytorch_model-00002-of-00002.bin +0 -0
- models/pytorch_model.bin.index.json β pytorch_model.bin.index.json +0 -0
- models/special_tokens_map.json β special_tokens_map.json +0 -0
- models/tokenization_internlm.py β tokenization_internlm.py +0 -0
- models/tokenizer.model β tokenizer.model +0 -0
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
models/config.json β config.json
RENAMED
File without changes
|
models/configuration_internlm.py β configuration_internlm.py
RENAMED
File without changes
|
infer.py
CHANGED
@@ -16,6 +16,10 @@ from models.modeling_internlm import InternLMForSequenceClassification
|
|
16 |
logger = logging.getLogger(__name__)
|
17 |
logging.basicConfig(level=logging.INFO)
|
18 |
|
|
|
|
|
|
|
|
|
19 |
def load_data_with_prompt(data_path, tokenizer, max_length, data_type='train'):
|
20 |
tokenized_data = None
|
21 |
logger.info(f"Tokenizing {data_type} data...")
|
@@ -89,8 +93,7 @@ def generate(args):
|
|
89 |
|
90 |
categories = ['Fairness', 'Safety', 'Morality', 'Legality', 'Data protection']
|
91 |
|
92 |
-
|
93 |
-
model = InternLMForSequenceClassification.from_pretrained(args.model_name_or_path, use_cache=False)
|
94 |
collate_fn = DataCollatorWithPadding(tokenizer)
|
95 |
model.to('cuda')
|
96 |
model.eval()
|
@@ -151,7 +154,6 @@ def generate_score(data_path):
|
|
151 |
|
152 |
if __name__ == '__main__':
|
153 |
parser = argparse.ArgumentParser()
|
154 |
-
parser.add_argument('--model_name_or_path', type=str, default='./models')
|
155 |
parser.add_argument('--data_path', type=str, default='./data/Flames_1k_Chinese_InternLM2_7B.jsonl') # Modify the path of data to be evaluated
|
156 |
parser.add_argument('--max_length', type=int, default=512)
|
157 |
parser.add_argument('--val_bsz_per_gpu', type=int, default=16)
|
|
|
16 |
logger = logging.getLogger(__name__)
|
17 |
logging.basicConfig(level=logging.INFO)
|
18 |
|
19 |
+
tokenizer = InternLMTokenizer.from_pretrained("CaasiHUANG/flames-scorer", trust_remote_code=True)
|
20 |
+
model = InternLMForSequenceClassification.from_pretrained("CaasiHUANG/flames-scorer", trust_remote_code=True)
|
21 |
+
|
22 |
+
|
23 |
def load_data_with_prompt(data_path, tokenizer, max_length, data_type='train'):
|
24 |
tokenized_data = None
|
25 |
logger.info(f"Tokenizing {data_type} data...")
|
|
|
93 |
|
94 |
categories = ['Fairness', 'Safety', 'Morality', 'Legality', 'Data protection']
|
95 |
|
96 |
+
|
|
|
97 |
collate_fn = DataCollatorWithPadding(tokenizer)
|
98 |
model.to('cuda')
|
99 |
model.eval()
|
|
|
154 |
|
155 |
if __name__ == '__main__':
|
156 |
parser = argparse.ArgumentParser()
|
|
|
157 |
parser.add_argument('--data_path', type=str, default='./data/Flames_1k_Chinese_InternLM2_7B.jsonl') # Modify the path of data to be evaluated
|
158 |
parser.add_argument('--max_length', type=int, default=512)
|
159 |
parser.add_argument('--val_bsz_per_gpu', type=int, default=16)
|
models/modeling_internlm.py β modeling_internlm.py
RENAMED
File without changes
|
models/.DS_Store
DELETED
Binary file (6.15 kB)
|
|
models/pytorch_model-00001-of-00002.bin β pytorch_model-00001-of-00002.bin
RENAMED
File without changes
|
models/pytorch_model-00002-of-00002.bin β pytorch_model-00002-of-00002.bin
RENAMED
File without changes
|
models/pytorch_model.bin.index.json β pytorch_model.bin.index.json
RENAMED
File without changes
|
models/special_tokens_map.json β special_tokens_map.json
RENAMED
File without changes
|
models/tokenization_internlm.py β tokenization_internlm.py
RENAMED
File without changes
|
models/tokenizer.model β tokenizer.model
RENAMED
File without changes
|