commit from kexinhuang

Files changed (12) hide show

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

models/config.json → config.json RENAMED Viewed

File without changes

models/configuration_internlm.py → configuration_internlm.py RENAMED Viewed

File without changes

infer.py CHANGED Viewed

@@ -16,6 +16,10 @@ from models.modeling_internlm import InternLMForSequenceClassification
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
 def load_data_with_prompt(data_path, tokenizer, max_length, data_type='train'):
     tokenized_data = None
     logger.info(f"Tokenizing {data_type} data...")
@@ -89,8 +93,7 @@ def generate(args):
     categories = ['Fairness', 'Safety', 'Morality', 'Legality', 'Data protection']
-    tokenizer = InternLMTokenizer.from_pretrained(args.model_name_or_path)
-    model = InternLMForSequenceClassification.from_pretrained(args.model_name_or_path, use_cache=False)
     collate_fn = DataCollatorWithPadding(tokenizer)
     model.to('cuda')
     model.eval()
@@ -151,7 +154,6 @@ def generate_score(data_path):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('--model_name_or_path', type=str, default='./models')
     parser.add_argument('--data_path', type=str, default='./data/Flames_1k_Chinese_InternLM2_7B.jsonl') # Modify the path of data to be evaluated
     parser.add_argument('--max_length', type=int, default=512)
     parser.add_argument('--val_bsz_per_gpu', type=int, default=16)

 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
+tokenizer = InternLMTokenizer.from_pretrained("CaasiHUANG/flames-scorer", trust_remote_code=True)
+model = InternLMForSequenceClassification.from_pretrained("CaasiHUANG/flames-scorer", trust_remote_code=True)
 def load_data_with_prompt(data_path, tokenizer, max_length, data_type='train'):
     tokenized_data = None
     logger.info(f"Tokenizing {data_type} data...")
     categories = ['Fairness', 'Safety', 'Morality', 'Legality', 'Data protection']
     collate_fn = DataCollatorWithPadding(tokenizer)
     model.to('cuda')
     model.eval()
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--data_path', type=str, default='./data/Flames_1k_Chinese_InternLM2_7B.jsonl') # Modify the path of data to be evaluated
     parser.add_argument('--max_length', type=int, default=512)
     parser.add_argument('--val_bsz_per_gpu', type=int, default=16)

models/modeling_internlm.py → modeling_internlm.py RENAMED Viewed

File without changes

models/.DS_Store DELETED Viewed

Binary file (6.15 kB)

models/pytorch_model-00001-of-00002.bin → pytorch_model-00001-of-00002.bin RENAMED Viewed

File without changes

models/pytorch_model-00002-of-00002.bin → pytorch_model-00002-of-00002.bin RENAMED Viewed

File without changes

models/pytorch_model.bin.index.json → pytorch_model.bin.index.json RENAMED Viewed

File without changes

models/special_tokens_map.json → special_tokens_map.json RENAMED Viewed

File without changes

models/tokenization_internlm.py → tokenization_internlm.py RENAMED Viewed

File without changes

models/tokenizer.model → tokenizer.model RENAMED Viewed

File without changes