diff --git "a/index/docstore.json" "b/index/docstore.json" new file mode 100644--- /dev/null +++ "b/index/docstore.json" @@ -0,0 +1 @@ +{"docstore/data": {"3e9bf844-0a4e-4de1-8be3-8a00f47f9be1": {"__data__": {"id_": "3e9bf844-0a4e-4de1-8be3-8a00f47f9be1", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "13560595-482a-49aa-a22e-445536e10517", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "e17811e7213cf8210798fac0f1cdd533bbdb0c93d1d79ee941cc306fb0e97a7f", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "d950eb15-82e3-4c1c-b8bb-d5a7249aadae", "node_type": "1", "metadata": {}, "hash": "9361f3141db96314f35b9d7266cfa9e85487d4f1f7800286de2823bf579c157f", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "arXiv:2105.07464v6 [cs.CL] 1 Sep 2021\n\n FEW-NERD: A Few-shot Named Entity Recognition Dataset\n Ning Ding1, Pengjun Xie2, Hai-Tao Zheng33\u2217, Xiaobin Wang2,\n Xu Han1,3\u2217 , Guangwei Xu2\u2217, Yulin Chen\u2020, Zhiyuan Liu1\u2020\n 1Department of Computer Science and Technology, Tsinghua University\n 2Alibaba Group, 3Shenzhen International Graduate School, Tsinghua University\n {dingn18, yl-chen17, hanxu17}@mails.tsinghua.edu.cn\n {kunka.xgw, xuanjie.wxb, chengchen.xpj}@alibaba-inc.com\n {zheng.haitao}@sz.tsinghua.edu.cn, {liuzy}@tsinghua.edu.cn\n https://ningding97.github.io/fewnerd/\n\n Abstract\n Recently, considerable literature has grown up\n\n\n around the theme of few-shot named entity\n recognition (NER), but little published bench-\n\n mark data specifically focused on the practical\n and challenging task. Current approaches col-\n lect existing supervised NER datasets and re-\n organize them into the few-shot setting for em-\n pirical study. These strategies conventionally\n aim to recognize coarse-grained entity types\n\n\n with few examples, while in practice, most\n unseen entity types are fine-grained. In this\n\n paper, we present FEW-NERD, a large-scale\n\n\n human-annotated few-shot NER dataset with\n\n a hierarchy of 8 coarse-grained and 66 fine-\n grained entity types. FEW-NERD consists of\n 188,238 sentences from Wikipedia, 4,601,160\n words are included and each is annotated as\n context or a part of a two-level entity type.\n To the best of our knowledge, this is the first\n few-shot NER dataset and the largest human-\n crafted NER dataset. We construct bench-\n mark tasks with different emphases to com-\n prehensively assess the generalization capabil-\n ity of models. Extensive empirical results and\n analysis show that FEW-NERD is challeng-\n ing and the problem requires further research.\n ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 2789, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "d950eb15-82e3-4c1c-b8bb-d5a7249aadae": {"__data__": {"id_": "d950eb15-82e3-4c1c-b8bb-d5a7249aadae", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "13560595-482a-49aa-a22e-445536e10517", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "e17811e7213cf8210798fac0f1cdd533bbdb0c93d1d79ee941cc306fb0e97a7f", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "3e9bf844-0a4e-4de1-8be3-8a00f47f9be1", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "576796b57e689b22de15d34b82d5053b9d5bdf4a99467e23748720950f46b830", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "We make FEW-NERD public at https://\n ningding97.github.io/fewnerd/. 1\n1 Introduction\nNamed entity recognition (NER), as a fundamental\ntask in information extraction, aims to locate and\nclassify named entities from unstructured natural\nlanguage. A considerable number of approaches\nequipped with deep neural networks have shown\npromising performance (Chiu and Nichols, 2016)\non fully supervised NER. Notably, pre-trained lan-\nguage models (e.g., BERT (Devlin et al., 2019a))\n \u2217 equal contributions\n \u2020 corresponding authors\n 1The baselines are available at https://github.\ncom/thunlp/Few-NERD\n AirportOtherPaintingFilmMusicBroadcast\n OtherLibraryHotelHospital TrainShipGameCarSoftwareFoodOther\n War SoldierScholar\n Disaster AirplanePolitician\n Election Other\n Director\n BuildingArt Athlete\n Event\n Product Artist/author\n Person\n GPE Actor\n Location\n OrganizationMISC LivingthingMedical\n LanguageLaw\n EDUdegreeGod\n ChemicalCurrencyDisease\n AstronomyBiology\n Award\n PoliticalpartySportsleagueSportsteam\n ShowORG\n BodiesofwaterIsland GovernmentReligion\n MountainOtherPark MediaOther\n Transit\n Company\n Education\n\n Figure 1: An overview of FEW-NERD. The inner cir-\n cle represents the coarse-grained entity types and the\n outer circle represents the fine-grained entity types,\n some types are denoted by abbreviations.\n\n\n\nwith an additional classifier achieve significant suc-\n cess on this task and gradually become the base\n paradigm. Such studies demonstrate that deep mod-\n els could yield remarkable results accompanied by\n a large amount of annotated corpora.\n With the emerging of knowledge from various\n domains, named entities, especially ones that need\n professional knowledge to understand, are diffi-\n cult to be manually annotated on a large scale.\n Under this circumstance, studying NER systems\n that could learn unseen entity types with few ex-\n amples, i.e., few-shot NER, plays a critical role\n in this area. There is a growing body of litera-\n ture that recognizes the importance of few-shot\n NER and contributes to the task (Hofer et al., 2018;\n Fritzler et al., 2019; Yang and Katiyar, 2020; Li\n et al., 2020a; Huang et al., 2020). Unfortunately,\n there is still no dataset specifically designed for", "mimetype": "text/plain", "start_char_idx": 2789, "end_char_idx": 6880, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "89d6be11-da41-4dd7-899f-1340a92c4cd2": {"__data__": {"id_": "89d6be11-da41-4dd7-899f-1340a92c4cd2", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "19de4927-f29a-4b3f-8694-79ef720fc706", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "c9801241be9f2302d87263774a128abb94c2e9fa41f952966c8a9af0d2a7e86e", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "3a410009-58ad-4f35-9627-bfaa50dd56d8", "node_type": "1", "metadata": {}, "hash": "4c97d26a250d3d4ce5e994115b6ac715f8f24719ea9e0fae9bb36e126a07a358", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "few-shot NER. Hence, these methods collect pre-\nviously proposed supervised NER datasets and re-\norganize them into a few-shot setting. Common\noptions of datasets include OntoNotes (Weischedel\net al., 2013), CoNLL\u201903 (Tjong Kim Sang, 2002),\nWNUT\u201917 (Derczynski et al., 2017), etc. These\nresearch efforts of few-shot learning for named\nentities mainly face two challenges: First, most\ndatasets used for few-shot learning have only 4-\n18 coarse-grained entity types, making it hard to\nconstruct an adequate variety of \u201cN-way\u201d meta-\ntasks and learn correlation features. And in real-\nity, we observe that most unseen entities are fine-\ngrained. Second, because of the lack of benchmark\ndatasets, the settings of different works are inconsis-\ntent (Huang et al., 2020; Yang and Katiyar, 2020),\nleading to unclear comparisons. To sum up, these\nmethods make promising contributions to few-shot\nNER, nevertheless, a specific dataset is urgently\nneeded to provide a unified benchmark dataset for\nrigorous comparisons.\n\n To alleviate the above challenges, we present a\nlarge-scale human-annotated few-shot NER dataset,\nFEW-NERD , which consists of 188.2k sentences\nextracted from the Wikipedia articles and 491.7k\nentities are manually annotated by well-trained an-\nnotators (Section 4.3). To the best of our knowl-\nedge, FEW-NERD is the first dataset specially con-\nstructed for few-shot NER and also one of the\nlargest human-annotated NER dataset (statistics\nin Section 5.1). We carefully design an annota-\ntion schema of 8 coarse-grained entity types and\n66 fine-grained entity types by conducting several\npre-annotation rounds. (Section 4.1). In contrast,\nas the most widely-used NER datasets, CoNLL\nhas 4 entity types, WNUT\u201917 has 6 entity types\nand OntoNotes has 18 entity types (7 of them are\nvalue types). The variety of entity types makes\nFEW-NERD contain rich contextual features with\na finer granularity for better evaluation of few-\nshot NER. The distribution of the entity types in\nFEW-NERD is shown in Figure 1, more details are\nreported in Section 5.1. We conduct an analysis of\nthe mutual similarities among all the entity types\nof FEW-NERD to study knowledge transfer (Sec-\ntion 5.2). The results show that our dataset can\nprovide sufficient correlation information between\ndifferent entity types for few-shot learning.\n For benchmark settings, we design three tasks\non the basis of FEW-NERD , including a stan-\ndard supervised task (FEW-NERD (SUP)) and two\n few-shot tasks ( FEW-NERD-INTRA) and FEW-\n NRTD (INTER)), for more details see Section 6.\n FEW-NERD (SUP), FEW-NERD (INTRA), and\n FEW-NERD (INTER) assess instance-level gener-\n alization, type-level generalization and knowledge\n transfer of NER methods, respectively. We im-\n plement models based on the recent state-of-the-\n art approaches and evaluate them on FEW-NERD\n(Section 7). ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 2862, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "3a410009-58ad-4f35-9627-bfaa50dd56d8": {"__data__": {"id_": "3a410009-58ad-4f35-9627-bfaa50dd56d8", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "19de4927-f29a-4b3f-8694-79ef720fc706", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "c9801241be9f2302d87263774a128abb94c2e9fa41f952966c8a9af0d2a7e86e", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "89d6be11-da41-4dd7-899f-1340a92c4cd2", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "cb4d43621c35c5b8f1ff16218a1e21d6f654171eea545a23bef5b185dcae6c5d", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "08a4de1b-58e0-4975-a68c-99b215ddca75", "node_type": "1", "metadata": {}, "hash": "14e4f82f60f388fa669cf79d4ab333062b043b8a018d325a887feee503968e1d", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "And empirical results show that\n FEW-NERD is challenging on all these three set-\n tings. We also conduct sets of subsidiary experi-\n ments to analyze promising directions of few-shot\n NER. ", "mimetype": "text/plain", "start_char_idx": 2862, "end_char_idx": 3051, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "08a4de1b-58e0-4975-a68c-99b215ddca75": {"__data__": {"id_": "08a4de1b-58e0-4975-a68c-99b215ddca75", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "19de4927-f29a-4b3f-8694-79ef720fc706", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "c9801241be9f2302d87263774a128abb94c2e9fa41f952966c8a9af0d2a7e86e", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "3a410009-58ad-4f35-9627-bfaa50dd56d8", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "973f10c7404cc1f6eb4b7419ecb9f9b65f7f4a1960933a0d9e77ae416d60afb4", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "Hopefully, the research of few-shot NER\n could be further facilitated by FEW-NERD.\n\n 2 Related Work\n\nAs a pivotal task of information extraction, NER\n is essential for a wide range of technologies (Cui\n et al., 2017; Li et al., 2019b; Ding et al., 2019; Shen\n et al., 2020). And a considerable number of NER\n datasets have been proposed over the years. For\n example, CoNLL\u201903 (Tjong Kim Sang, 2002) is re-\n garded as one of the most popular datasets, which is\n curated from Reuters News and includes 4 coarse-\n grained entity types. Subsequently, a series of NER\n datasets from various domains are proposed (Bala-\n suriya et al., 2009; Ritter et al., 2011; Weischedel\n et al., 2013; Stubbs and Uzuner, 2015; Derczynski\n et al., 2017). These datasets formulate a sequence\n labeling task and most of them contain 4-18 entity\n types. Among them, due to the high quality and\n size, OntoNotes 5.0 (Weischedel et al., 2013) is\n considered as one of the most widely used NER\n datasets recently.\n As approaches equipped with deep neural net-\nworks have shown satisfactory performance on\n NER with sufficient supervision (Lample et al.,\n 2016; Ma and Hovy, 2016), few-shot NER has\n received increasing attention (Hofer et al., 2018;\n Fritzler et al., 2019; Yang and Katiyar, 2020; Li\n et al., 2020a). Few-shot NER is a considerably\n challenging and practical problem that could facil-\n itate the understanding of textual knowledge for\n neural model (Huang et al., 2020). Due to the lack\n of specific benchmarks of few-shot NER, current\n methods collect existing NER datasets and use dif-\n ferent few-shot settings. To provide a benchmark\n that could comprehensively assess the generaliza-\n tion of models under few examples, we annotate\n FEW-NERD. To make the dataset practical and\n close to reality, we adopt a fine-grained schema of", "mimetype": "text/plain", "start_char_idx": 3051, "end_char_idx": 4882, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "703bb83a-4aea-4eb3-85a6-086d25555ccb": {"__data__": {"id_": "703bb83a-4aea-4eb3-85a6-086d25555ccb", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "7a282a7e-fbe9-4d98-a3ca-17e66c5b5818", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "9ff787cc5d317b04ad7ea97478454a58f4d9d955a18ef033743f48266cabc9ed", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "f6c2c3db-ba3c-489e-9459-e6b4f579286b", "node_type": "1", "metadata": {}, "hash": "18f27a3e388a6377a52748c1e10ebabcd503f8e045e24ab7b9b845f67d9291ee", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "entity annotation, which is inspired and modified with dense entities. Thus, as shown in Algorithm 1\nfrom previous fine-grained entity recognition stud- we adopt a N -way K\u223c2K-shot setting in our pa-\nies (Ling and Weld, 2012; Gillick et al., 2014; Choi per, the primary principle of which is to ensure that\net al., 2018; Ringland et al., 2019). each class in S contain K\u223c2K examples, effec-\n tively alleviating the limitations of sampling.\n3 Problem Formulation\n3.1 Named Entity Recognition Algorithm 1: Greedy N -way K\u223c2K-shot\n sampling algorithm\nNER is normally formulated as a sequence labeling Input: Dataset X , Label set Y, N , K\nproblem. Specifically, for an input sequence of Output: output result\ntokens x = {x1, x2, ..., x}, NER aims to assignt 1 S \u2190 \u2205; // Init the support set\neach token xi a label yi \u2208 Y to indicate either the // Init the count of entity types\ntoken is a part of a named entity (such as Person, 2 for i = 1 to N do\nOrganization, Location) or not belong to 3 Count[i] = 0 ;\nany entities (denoted as O class), Y being a set of\npre-defined entity-types. 4 repeat\n 5 Randomly sample (x, y) \u2208 X ;\n3.2 Few-shot Named Entity Recognition 6 Compute |Count| and Counti after\nN -way K-shot learning is conducted by iteratively update ;\nconstructing episodes. For each episode in train- 7 if |Count| > N or \u2203Count[i] > 2K\ning, N classes (N -way) and K examples (K-shot) then\nfor each class are sampled to build a support set 8 Continue ;\nStrain = {x(i), y(i)}i=1 N \u2217K, and K\u2032 examples for 9 elseS = S \u22c3(x, y) ;\neach of N classes are sampled to construct a query 10\nset Qtrain = {x(j), y(j)}j=1 \u2032, and S \u22c2 Q = \u2205.N \u2217K 11 Update Counti ;\nFew-shot learning systems are trained by predict- 12 until Counti \u2265 K for i = 1 to N;\ning labels of query set Qtrain with the information\nof support set Strain. The supervision of Strain and\nQtrain are available in training. In the testing pro- 4 Collection of FEW-NERD\ncedure, all the classes are unseen in the training\nphase, and by using few labeled examples of sup- 4.1 Schema of Entity Types\nport set Stest, few-shot learning systems need to The primary goal of FEW-NERD is to construct a\n(S \u22c2 Q = \u2205). However, in the sequence labeling\n make predictions of the unlabeled query set Qtest fine-grained dataset that could specifically be used\n in the few-shot NER scenario. Hence, schemas\nproblem like NER, a sentence may contain multiple of traditional NER datasets such as CoNLL\u201903,\nentities from different classes. And it is imperative OntoNotes that only contain 4-18 coarse-grained\nto sample examples in sentence-level since contex- types could not meet the requirements. The schema\ntual information is crucial for sequence labeling of FEW-NERD is inspired by FIGER (Ling and\nproblems, especially for NER. Thus the sampling Weld, 2012), which contains 112 entity tags with\nis more difficult than conventional classification good coverage. On this basis, we make some mod-\ntasks like relation extraction (Han et al., 2018). ifications according to the practical situation. It is\n Some previous works (Yang and Katiyar, 2020; worth noting that FEW-NERD focuses on named\nLi et al., 2020a) use greedy-based sampling strate- entities, omitting value/numerical/time/date entity\ngies to iteratively judge if a sentence could be types (Weischedel et al., 2013; Ringland et al.,\nadded into the support set, but the limitation be- 2019) like Cardinal, Day, Percent, etc.\ncomes gradually strict during the sampling. For First, we modify the FIGER schema into a\nexample, when it comes to a 5-way 5-shot setting, two-level hierarchy to incorporate simple do-\nif the support set already had 4 classes with 5 exam- main information (Gillick et al., 2014). The\nples and 1 class with 4 examples, the next sampled coarse-grained types are {Person, Location,\nsentence must only contain the specific one entity Organization, Art, Building, Product,\nto strictly meet the requirement of 5 way 5 shot. ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 6523, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "f6c2c3db-ba3c-489e-9459-e6b4f579286b": {"__data__": {"id_": "f6c2c3db-ba3c-489e-9459-e6b4f579286b", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "7a282a7e-fbe9-4d98-a3ca-17e66c5b5818", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "9ff787cc5d317b04ad7ea97478454a58f4d9d955a18ef033743f48266cabc9ed", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "703bb83a-4aea-4eb3-85a6-086d25555ccb", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "c1fea1f263e861918b8164dd5899ca02120368c82ba7b941e44eca96b94f63f8", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "It Event, Miscellaneous }. Then we statisti-\nis not suitable for FEW-NERD since it is annotated cally count the frequency of entity types in the", "mimetype": "text/plain", "start_char_idx": 6523, "end_char_idx": 6759, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "547f541a-ed82-4d22-af00-51a95dc3f0e1": {"__data__": {"id_": "547f541a-ed82-4d22-af00-51a95dc3f0e1", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "48384cbb-f501-4add-bb32-198c5bc033a8", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "4c2347194cf32acf5b01836de9186cd908d8886adcabbe90c5ec0e8fdfdcafdd", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "5bd4a82d-022c-47e6-9bbb-8bdeef20f515", "node_type": "1", "metadata": {}, "hash": "4f7a69ed7a63d5b5d88d8e0c864f4c8853a1ff280af54e1480ad198bda04f518", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": " automatically annotated FIGER. By removing en-\n tity types with low frequency, there are 80 fine-\n grained types remaining. Finally, to ensure the\n practicality of the annotation process, we conduct\n rounds of pre-annotation and make further mod-\n ifications to the schema. For example, we com-\n bine the types of Country, Province/State,\n City, Restrict into a class GPE, since it is\n difficult to distinguish these types only based on\n context (especially GPEs at different times). For\n\n another example, we create a Person-Scholar\n type, because in the pre-annotation step, we found\n that there are numerous person entities that express\n the semantics of research, such as mathematician,\n physicist, chemist, biologist, paleontologist, but\n the Figer schema does not define this kind of entity\n type. We also conduct rounds of manual denoising\n to select types with truly high frequency.\n Consequently, the finalized schema of FEW-\n NERD includes 8 coarse-grained types and 66\n fine-grained types, which is detailedly shown ac-\n companied by selected examples in Appendix.\n\n 4.2 Paragraph Selection\n\nThe raw corpus we use is the entire Wikipedia\n dump in English, which has been widely used in\n constructions of NLP datasets (Han et al., 2018;\nYang et al., 2018; Wang et al., 2020). Wikipedia\n contains a large variety of entities and rich contex-\n tual information for each entity.\n FEW-NERD is annotated in paragraph-level,\n and it is crucial to effectively select paragraphs\nwith sufficient entity information. Moreover, the\n category distribution of the data is expected to\n be balanced since the data is applied in a few-\n shot scenario. It is also a key difference between\n FEW-NERD and previous NER datasets, whose\n entity distributions are usually considerably uneven.\n In order to do so, we construct a dictionary for each\n fine-grained type by automatically collecting entity\n mentions annotated in FIGER, then the dictionaries\n are manually denoised. We develop a search engine\n to retrieve paragraphs including entity mentions of\n the distant dictionary. For each entity, we choose\n10 paragraphs and construct a candidate set. Then,\n for each fine-grained class, we randomly select\n1000 paragraphs for manual annotation. Eventu-\n ally, 66,000 paragraphs are selected, consisting of\n 66 fine-grained entity types, and each paragraph\n contains an average of 61.3 tokens.\n ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 2398, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "5bd4a82d-022c-47e6-9bbb-8bdeef20f515": {"__data__": {"id_": "5bd4a82d-022c-47e6-9bbb-8bdeef20f515", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "48384cbb-f501-4add-bb32-198c5bc033a8", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "4c2347194cf32acf5b01836de9186cd908d8886adcabbe90c5ec0e8fdfdcafdd", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "547f541a-ed82-4d22-af00-51a95dc3f0e1", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "4f67430b5ae0a067bfe8d8399de04e224234e324f5e4e2ca18e032a9ab9275f4", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "de0a20d6-b6dc-4ff3-8b6e-f6ad19472b08", "node_type": "1", "metadata": {}, "hash": "e923b7225a47572e82d13980d6e364dd38b01d009812324d8c368525add1c594", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "Paragraph\n London[Art-Music] is the fifth al-\n bum by the British[Loc-GPE] rock band\n Jesus Jones[Org-ShowOrg] in 2001 through\n Koch Records[Org-Company]. Following the com-\n mercial failure of 1997\u2019s \u201dAlready[Art-Music]\u201d\n which led to the band and EMI[Org-Company] part-\n ing ways, the band took a hiatus before regathering\n for the recording of \u201dLondon[Art-Music]\u201d for\n Koch/Mi5 Recordings, with a more alternative\n rock approach as opposed to the techno sounds\n on their previous albums. The album had low-key\n promotion, initially only being released in the\n United States[Loc-GPE]. ", "mimetype": "text/plain", "start_char_idx": 2398, "end_char_idx": 3085, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "de0a20d6-b6dc-4ff3-8b6e-f6ad19472b08": {"__data__": {"id_": "de0a20d6-b6dc-4ff3-8b6e-f6ad19472b08", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "48384cbb-f501-4add-bb32-198c5bc033a8", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "4c2347194cf32acf5b01836de9186cd908d8886adcabbe90c5ec0e8fdfdcafdd", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "5bd4a82d-022c-47e6-9bbb-8bdeef20f515", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "7431dd7f03f5d180517e133d870a0219c77f32d2552a9b6218cce714a9867162", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "Two EP\u2019s were released\n from the album, \u201dNowhere Slow[Art-Music]\u201d and\n \u201dIn the Face Of All This[Art-Music]\u201d.\n\n Table 1: An annotated case of FEW-NERD\n\n 4.3 Human Annotation\n As named entities are expected to be context-\n dependent, annotation of named entities is com-\n plicated, especially with such a large number of\n entity types. For example, shown in Table 1,\n\u201cLondon is the fifth album by the British rock\n band Jesus Jones..\u201d, where London should be an-\n notated as an entity of Art-Music rather than\n Location-GPE. Such a situation requires that\n the annotator has basic linguistic training and can\n make reasonable judgments based on the context.\n Annotators of FEW-NERD include 70 annota-\n tors and 10 experienced experts. All the annotators\n have linguistic knowledge and are instructed with\n detailed and formal annotation principles. Each\n paragraph is independently annotated by two well-\n trained annotators. Then, an experienced expert\n goes over the paragraph for possible wrong or omis-\n sive annotations, and make the final decision. With\n 70 annotators participated, each annotator spends\n an average of 32 hours during the annotation pro-\n cess. We ensure that all the annotators are fairly\n compensated by market price according to their\n workload (the number of examples per hour). The\n data is annotated and submitted in batches, and\n each batch contains 1000\u223c3000 sentences. To en-\n sure the quality of FEW-NERD , for each batch\n of data, we randomly select 10% sentences and\n conduct double-checking. If the accuracy of the an-\n notation is lower than 95 % (measured in sentence-\n level), the batch will be re-annotated. Furthermore,\n we calculate the Cohen\u2019s Kappa (Cohen, 1960) to\n measure the aggreements between two annotators,", "mimetype": "text/plain", "start_char_idx": 3085, "end_char_idx": 4866, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "39abd0c8-e1f5-4ee3-8da1-537353646ec6": {"__data__": {"id_": "39abd0c8-e1f5-4ee3-8da1-537353646ec6", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "297587ad-1bff-4027-9c1e-7b5732f0d283", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "63f1d00453bd4c3bd83a691d994f62f1c05e4d9e110966dee0998fbd8a27b3c0", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "ec59971c-cf54-40e2-9a55-c5de0cdbea76", "node_type": "1", "metadata": {}, "hash": "6c9ea7767d7a070032e2a32ebc07b5b3fac0e8ae19e07604840c27e16a867207", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": " Art Building Event Loc Org MISC Person Product\n\n the result is 76.44%, which indicates a high degree 0\n 1\n 2 z 400\n 3\n\n of consistency. 4\n 5\n\n 6 300\n 7\n\n 8\n 9\n\n 10\n\n 11 200\n 12\n 13\n\n\n\n\n 5 Data Analysis 14\n 15\n 16\n 17 100\n 18\n\n 19\n 20\n\n 21 0\n\n\n\n\n 5.1 Size and Distribution of FEW-NERD 22\n 23\n 24\n 25\n 26\n\n 27\n FEW-NERD is not only the first few-shot dataset 28\n 29\n\n 30\n\n 31\n 32\n\n\n\n\n\n\n\n for NER, but it also is one of the biggest human- 33\n 34\n 35\n 36\n 37\n\n\n\n\n\n\n\n annotated NER datasets. We report the the statistics 38\n 39\n 40\n 41\n 42\n\n\n\n\n of the number of sentences, tokens, entity types and 43\n 44\n 45\n 46\n 47\n\n\n\n\n\n\n\n entities of FEW-NERD and several widely-used 48\n 49\n 50\n 51\n 52\n\n\n\n\n\n\n\n NER datasets in Table 2, including CoNLL\u201903, 53\n 54\n 55\n 56\n 57\n\n\n\n\n WikiGold, OntoNotes 5.0, WNUT\u201917 and I2B2. 58\n 59\n 60\n 61\n 62\n\n\n\n\n We observe that although OntoNotes and I2B2 are 63\n 64\n 65\n x\n considered as large-scale datasets, FEW-NERD is\n significantly larger than all these datasets. More- Figure 2: A heat map to illustrate knowledge correla-\n over, FEW-NERD contains more entity types and tions among type in FEW-NERD, each small colored\n annotated entities. As introduced in Section 4.2, square represents the similarity of two entity types.\n FEW-NERD is designed for few-shot learningy0\n\n and the distribution could not be severely uneven.123ties, most of them across coarse-grained types share45Hence, we balance the dataset by selecting para-678little correlations due to distinct contextual features.\n9\n10\n\n graphs through a distant dictionary. ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 24634, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "ec59971c-cf54-40e2-9a55-c5de0cdbea76": {"__data__": {"id_": "ec59971c-cf54-40e2-9a55-c5de0cdbea76", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "297587ad-1bff-4027-9c1e-7b5732f0d283", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "63f1d00453bd4c3bd83a691d994f62f1c05e4d9e110966dee0998fbd8a27b3c0", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "39abd0c8-e1f5-4ee3-8da1-537353646ec6", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "946e0fda6aa5de3de03554f4cd8f69a4c4c4cc39c9c6c3c0d18ab2f3d2397579", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "ce1695d1-7872-48ae-8589-5b5ed5355234", "node_type": "1", "metadata": {}, "hash": "7eea7485e9ce6bab3c3997dfa4a651cba7e8435dca7180317dc7ada601918b86", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "The data distri-111213This result is consistent with intuition. Moreover,1415\n\n bution is illustrated in Figure 1, where Location161718it inspires our benchmark-setting from the perspec-1920(especially GPE) and Person are entity types with212223tive of knowledge transfer (see Section 6.2).\n24\n25\n\n the most examples. Although utilizing a distant2627282930dictionary to balance the entity types could not313233\n34\n35\n produce a fully balanced data distribution, it still 6 Benchmark Settings3637383940ensures that each fine-grained type has a sufficient414243\n44\n45\n number of examples for few-shot learning. We collect and manually annotate 188,238 sen-4647484950tences with 66 fine-grained entity types in to-515253\n\n 5.2 Knowledge Correlations among Types5455tal, which makes FEW-NERD one of the largest5657585960\n Knowledge transfer is crucial for few-shot learn-\n relations among all the entity types of FEW-NERD ,\nwe conduct an empirical study about entity type\n similarities in this section. We train a BERT-Tagger\n(details in Section 7.1) of 70% arbitrarily selected\n data on FEW-NERD and use 10% data to select the\n model with best performance (it is actually the set-\n ting of FEW-NERD (SUP) in Section 6.1). After\n obtaining a contextualized encoder, we produce en-\n tity mention representations of the remaining 20%\n data of FEW-NERD. Then, for each fine-grained\n types, we randomly select 100 instances of entity\n embeddings. ", "mimetype": "text/plain", "start_char_idx": 24634, "end_char_idx": 26795, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "ce1695d1-7872-48ae-8589-5b5ed5355234": {"__data__": {"id_": "ce1695d1-7872-48ae-8589-5b5ed5355234", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "297587ad-1bff-4027-9c1e-7b5732f0d283", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "63f1d00453bd4c3bd83a691d994f62f1c05e4d9e110966dee0998fbd8a27b3c0", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "ec59971c-cf54-40e2-9a55-c5de0cdbea76", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "7164356aa9e8f2df54101a489109a375b5e85b1cb43ad0fb15ccd21f4ecc4ffe", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "We mutually compute the dot product\n among entity embeddings for each type two by two\n and average them to obtain the similarities among\n types, which is illustrated in Figure 2. We observe\n that entity types shared identical coarse-grained\n types typically have larger similarities, resulting in\n an easier knowledge transfer. In contrast, although\n some of the fine-grained types have large similari-\n\n human-annotated NER datasets. To comprehen-6162636465ing (Li et al., 2019a). To explore the knowledge cor-sively exploit such rich information of entities and\n contexts, as well as evaluate the generalization of\n models from different perspectives, we construct\n three tasks based on FEW-NERD (Statistics are\n reported in Table 3).\n\n 6.1 Standard Supervised NER\n\n FEW-NERD (SUP) We first adopt a standard su-\n pervised setting for NER by randomly splitting\n 70% data as the training data, 10% as the validation\n data and 20% as the testing data. In this setting,\n the training set, dev set, and test set contain the\nwhole 66 entity types. Although the supervised\n setting is not the ultimate goal of the construction\n of FEW-NERD, it is still meaningful to assess the\n instance-level generalization for NER models. As\n shown in Section 6.2, due to the large number of\n entity types, FEW-NERD is very challenging even\n in a standard supervised setting.", "mimetype": "text/plain", "start_char_idx": 26795, "end_char_idx": 28280, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "5a2138f4-d397-4d63-9cac-d45d9fe4de7e": {"__data__": {"id_": "5a2138f4-d397-4d63-9cac-d45d9fe4de7e", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "12b98149-050c-4c93-81db-ea720272647e", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "b2c3b4fde5e3823dcb32f15b37d9702a9a73c0b6c0aa3c2e38e38431ce7159d5", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "a2435907-a143-49c8-b483-ee3e8a02ba74", "node_type": "1", "metadata": {}, "hash": "5612a97dd51458cf4337501a25a225772f991690c117abe21e7cf830660c28a7", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": " Datasets # Sentences # Tokens # Entities # Entity Types Domain\n CoNLL\u201903 (Tjong Kim Sang, 2002) 22.1k 301.4k 35.1k 4 Newswire\n WikiGold (Balasuriya et al., 2009) 1.7k 39k 3.6k 4 General\n OntoNotes (Weischedel et al., 2013) 103.8k 2067k 161.8k 18 General\n WNUT\u201917 (Derczynski et al., 2017) 4.7k 86.1k 3.1k 6 SocialMedia\n I2B2 (Stubbs and Uzuner, 2015) 107.9k 805.1k 28.9k 23 Medical\n FEW-NERD 188.2k 4601.2k 491.7k 66 General\n\n Table 2: Statistics of FEW-NERD and multiple widely used NER datasets. For CoNLL\u201903, WikiGold, and I2B2,\n we report the statistics in the original paper. For OntoNotes 5.0 (LDC2013T19), we download and count all the data\n (English) annotated by the NER labels, some works use different split of OntoNotes 5.0 and may report different\n statistics. ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 1479, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "a2435907-a143-49c8-b483-ee3e8a02ba74": {"__data__": {"id_": "a2435907-a143-49c8-b483-ee3e8a02ba74", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "12b98149-050c-4c93-81db-ea720272647e", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "b2c3b4fde5e3823dcb32f15b37d9702a9a73c0b6c0aa3c2e38e38431ce7159d5", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "5a2138f4-d397-4d63-9cac-d45d9fe4de7e", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "1a6d32e20e86e7ff943edededb2390c3598451572dfb8a49fa89230f4d3c598c", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "b3793ecc-96fc-4f50-bc61-21be9868e23b", "node_type": "1", "metadata": {}, "hash": "418385c5f9279f5de08b8b2d3dd2ffdec12c13f3c4ff1b2fa39f93b5eddeaa12", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "For WNUT\u201917, we download and count all the data.\n\n6.2 Few-shot NER\nThe core intuition of few-shot learning is to learn\nnew classes from few examples. Hence, we first\nsplit the overall entity set (denoted as E) into three\nmutually disjoint subsets, respectively denoted\nas Etrain , Edev, Etest, and Etrain\u22c3 Edev\u22c3 Etest = E,\nEtrain\u22c2Edev\u22c2 Etest = \u2205. ", "mimetype": "text/plain", "start_char_idx": 1479, "end_char_idx": 1829, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "b3793ecc-96fc-4f50-bc61-21be9868e23b": {"__data__": {"id_": "b3793ecc-96fc-4f50-bc61-21be9868e23b", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "12b98149-050c-4c93-81db-ea720272647e", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "b2c3b4fde5e3823dcb32f15b37d9702a9a73c0b6c0aa3c2e38e38431ce7159d5", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "a2435907-a143-49c8-b483-ee3e8a02ba74", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "8dae53115baed88ef4682b02b0e2d08e94e24a4984ce6b08396678c1a7c97ce7", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "Note that all the entity\n\ntypes are fine-grained types. Under this circum-\nstance, instances in train, dev and test datasets only\nconsist of instances with entities in Etrain, Edev, Etest\nrespectively. However, NER is a sequence labeling\nproblem, and it is possible that a sentence contains\nseveral different entities. To avoid the observation\nof new entity types in the training phase, we replace\nthe labels of entities that belong to Etest with O in\nthe training set. Similarly, in the test set, entities\nthat belongs to Etrain and Edev are also replaced by\nO. Based on this setting, we develop two few-shot\nNER tasks adopting different splitting strategies.\nFEW-NERD (INTRA) Firstly, we construct\nEtrain, Edev and Etest according to the coarse-grained\ntypes. In other words, all the entities in differ-\nent sets belong to different coarse-grained types.\nIn the basis of the principle that we should re-\nplace as few as possible entities with O, we\nassign all the fine-grained entity types belong-\ning to People, MISC, Art, Product to\nEtrain, all the fine-grained entity types belonging\nto Event, Building to Edev, and all the fine-\ngrained entity types belonging to ORG, LOC to\nEtest, respectively. Based on Figure 2, in this set-\nting, the training set, dev set and test set share little\nknowledge, making it a difficult benchmark.\nFEW-NERD (INTER) In this task, although all\nthe fine-grained entity types are mutually disjoint\nin Etrain, Edev, the coarse-grained types are shared.\nSpecifically, we roughly assign 60% fine-grained\ntypes of all the 8 coarse-grained types to Etrain, 20%\nto Edev and 20% Etest, respectively. The intuition of\n Split #Train #Dev #Test\n FEW-NERD (SUP) 131,767 18,824 37,648\n FEW-NERD (INTRA) 99,519 19,358 44,059\n FEW-NERD (INTER) 130,112 18,817 14,007\n\nTable 3: Statistics of train, dev and test sets for three\ntasks of FEW-NERD. We remove the sentences with\nno entities for the few-shot benchmarks.\n\nthis setting is to explore if the coarse information\nwill affect the prediction of new entities.\n\n7 Experiments\n\n7.1 Models\nRecent studies show that pre-trained language mod-\nels with deep transformers (e.g., BERT (Devlin\net al., 2019a)) have become a strong encoder for\nNER (Li et al., 2020b). We thus follow the em-\npirical settings and use BERT as the backbone en-\ncoder in our experiments. We denote the parame-\nters as \u03b8 and the encoder as f\u03b8. Given a sequence\nx = {x1, ..., xn}, for each token xi, the encoder\nproduces contextualized representations as:\n\n h = [h1, ..., hn] = f\u03b8([x1, ..., xn]). (1)\n\nSpecifically, we implement four BERT-based mod-\nels for supervised and few-shot NER, which\nare BERT-Tagger (Devlin et al., 2019b), Proto-\nBERT (Snell et al., 2017), NNShot (Yang and\nKatiyar, 2020) and StructShot (Yang and Katiyar,\n2020).\nBERT-Tagger As stated in Section 6.1, we\nconstruct a standard supervised task based on\nFEW-NERD , thus we implement a simple but\nstrong baseline BERT-Tagger for supervised NER.\nBERT-Tagger is built by adding a linear classifier\non top of BERT and trained with a cross-entropy\nobjective under a full supervision setting.", "mimetype": "text/plain", "start_char_idx": 1829, "end_char_idx": 5108, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "c33b63d5-7341-40f1-9016-43201810afd5": {"__data__": {"id_": "c33b63d5-7341-40f1-9016-43201810afd5", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "5c930767-ef2a-434e-91d1-780d7a9deb81", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "951fa020d243279fd40ee2632cbff6d12455ac75bd998f140cc10a0c50148355", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "ecacd21e-1829-48fa-95ab-5c90846e8dd3", "node_type": "1", "metadata": {}, "hash": "0dfd5f3bd973fdb3811f8e1e9d7862cd89fb6c3011ccff576ce2dd533f7d0fab", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": " ProtoBERT Inspired by achievements of meta- Datasets P R F1\n learning approaches (Finn et al., 2017; Snell et al., CoNLL\u201903 90.62 92.07 91.34\n 2017; Ding et al., 2021) on few-shot learning. OntoNotes 5.0 90.00 88.24 89.11\nThe first baseline model we implement is Proto- FEW-NERD (SUP) 65.56 (\u2193) 68.78 (\u2193) 67.13 (\u2193)\n BERT, which is a method based on prototypical\n network (Snell et al., 2017) with a backbone of Table 4: Results of BERT-Tagger on previous NER\n BERT (Devlin et al., 2019a) encoder. This ap- datasets and the supervised setting of FEW-NERD.\n proach derives a prototype z for each entity type\n by computing the average of the embeddings of the p(y|x) and solve the problem:\n tokens that share the same entity type. The compu- T\n tation is conducted in support set S. For the i-th y\u2217 = arg max\u220fp(yt|x) \u00d7 p(yt|yt\u22121). ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 2341, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "ecacd21e-1829-48fa-95ab-5c90846e8dd3": {"__data__": {"id_": "ecacd21e-1829-48fa-95ab-5c90846e8dd3", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "5c930767-ef2a-434e-91d1-780d7a9deb81", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "951fa020d243279fd40ee2632cbff6d12455ac75bd998f140cc10a0c50148355", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "c33b63d5-7341-40f1-9016-43201810afd5", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "3d5e80599a4375c7886c2967fcf8a54badb3513ff488d9a03b295b8074e1fa7a", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "95509f41-b5f0-4bc4-ba2c-886ad18a6046", "node_type": "1", "metadata": {}, "hash": "2aba92c7724e7aa235b41f83c8177397425cb235110b489a8de524232eb60d41", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "(5)\n type, the prototype is denoted as zi and the support y t=1\n set is S,i \u2211f\u03b8(x).\n zi = 1i|S|x\u2208Si (2) acknowledged baseline that could produceTosum up, BERT-Tagger is a well-\n pronounced results on supervised NER. Proto-\nWhile in the query set Q, for each token x \u2208 Q, BERT, and NNShot & StructShot respectively use\nwe firstly compute the distance between x and all prototype-level and token-level similarity scores to\n the prototypes. We use the l-2 distance as the met- tackle the few-shot NER problem. These baselines\n ric function d(f\u03b8(x), z) = ||f\u03b8(x) \u2212 z||22. Then, are strong and representative models of the NER\n through the distances between x and all other pro- task. For implementation details, please refer to\n totypes, we compute the prediction probability of Appendix.\n ", "mimetype": "text/plain", "start_char_idx": 2341, "end_char_idx": 4328, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "95509f41-b5f0-4bc4-ba2c-886ad18a6046": {"__data__": {"id_": "95509f41-b5f0-4bc4-ba2c-886ad18a6046", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "5c930767-ef2a-434e-91d1-780d7a9deb81", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "951fa020d243279fd40ee2632cbff6d12455ac75bd998f140cc10a0c50148355", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "ecacd21e-1829-48fa-95ab-5c90846e8dd3", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "959bf9577e6f311a7c7757d259611276d0f23bc8487222cd6678c94fb44efeba", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "x over all types. In the training step, parameters We evaluate models by considering query sets\n are updated in each meta-task. In the testing step, Qtest of test episodes. We calculate the precision\n the prediction is the label of the nearest prototype (P), recall (R) and micro F1-score over all test\n to x. That is, for a support set SY with types of Y episodes. Instead of the popular BIO schema, we\nand a query x, the prediction process is given as utilize the IO schema in our experiments, using\n y\u2217 = arg mindy(x), I-type to denote all the tokens of a named entity\n y\u2208Y (3) and O to denote other tokens.\n dy(x) = d(f\u03b8(x), zy). 7.2 The Overall Results\n NNShot & StructShot NNShot and Struct- We evaluate all baseline models on the three bench-\n Shot (Yang and Katiyar, 2020) are the state-of-the- mark settings introduced in Section 6, including\n art methods based on token-level nearest neighbor FEW-NERD (SUP), FEW-NERD (INTRA) and\n classification. In our experiments, we use BERT FEW-NERD (INTER).\n as the backbone encoder to produce contextualized Supervised NER As mentioned in Section 6.1,\n representations for fair comparison. Different from we first split the FEW-NERD as a standard super-\n the prototype-based method, NNShot determines vised NER dataset. As shown in Table 4, BERT-\n the tag of one query based on the token-level dis- Tagger yields promising results on the two widely\n tance, which is computed as d(f\u03b8(x), f\u03b8(x\u2032)) = used supervised datasets. The F1-score is 91.34%,\n ||f\u03b8(x) \u2212 f\u03b8(x\u2032)||22. Hence, for a support set SY 89.11%, respectively. However, the model suffers\nwith type of Y and a query x, a grave drop in the performance on FEW-NERD\n y\u2217 = arg mindy(x), (SUP) because the number of types of FEW-NERD\n y\u2208Y (SUP) is larger than others. The results indicate\n dy(x) = minyd(f\u03b8(x), f\u03b8(x\u2032)). (4) that FEW-NERD is challenging in the supervised\n x\u2032\u2208S setting and worth studying.\n We further analyze the performance of different\n With the identical basic structure as NNShot, entity types (see Figure 3). We find that the model\n StructShot adopts an additional Viterbi decoder achieves the best performance on the Person type\n during the inference phase (Hou et al., 2020) (not and yields the worst performance on the Product\n in training phase), where we estimate a transition type. And almost for all the coarse-grained types,\n distribution p(y\u2032|y) and an emission distribution the Coarse-Other type has the lowest F1-score.", "mimetype": "text/plain", "start_char_idx": 4328, "end_char_idx": 10047, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "b778bdc3-b7ac-4222-b5f9-8e068507f3a6": {"__data__": {"id_": "b778bdc3-b7ac-4222-b5f9-8e068507f3a6", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "fc3045bc-4ea0-4c56-aa91-9689fa1cee0d", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "595935693e299e435c4fa70872784da0511641c8bb4e8489e41fc9743f946167", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "810ba2d6-65c6-4378-91c4-4ba38f087746", "node_type": "1", "metadata": {}, "hash": "b42f80987ce6c5ac642f3fdf7ee3893520cb187b60aa749b96b1461ba338bd44", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": " 100\n Art - 77% Building - 67% Event - 67% Location - 79% Organization - 73% Person - 85% Product - 60% Miscellaneous - 63%\n\n 80\n\n 60\n\n 40\n\n 20\n\n 0\n\n BroadcastPaintingFilmMusicWrittenAirportHotelArt-OtherHospitalLibrarySportsTheaterDisasterAttackElectionProtestGPESportsWaterMountainTransitIslandLoc-otherCompanyNewspaperPark EducationOrg-OtherPartyShowReligion ActorAthletePer-OtherSoldierCarFoodAuthorDirectorPoliticianAirplaneScholar GameShipSoftwareAstronomyChemicalDegreeTrainWeaponAwardBiologyCurrencyDiseaseLanguageMedicalGod Law\n Restaurant Event-other SportsTeam Livingthing\n Government SportsLeague Product-Other\n Building-Other\n\n Figure 3: F1-scores of different entity types on FEW-NERD (SUP), we report the average performance of each\n coarse-grained entity type on the legends.\n\n FEW-NERD(INTRA)\n Model 5 way 1\u223c2 shot 5 way 5\u223c10 shot 10 way 1\u223c2 shot 10 way 5\u223c10 shot\n P R F1 P R F1 P R F1 P R F1\n Proto 15.97\u00b10.61 29.66\u00b11.39 20.76\u00b10.84 36.34\u00b11.33 51.32\u00b10.45 42.54\u00b10.94 11.33\u00b10.57 22.47\u00b10.49 15.05\u00b10.44 29.39\u00b10.27 44.51\u00b11.00 35.40\u00b10.13\n NNShot 24.15\u00b10.35 27.65\u00b11.63 25.78\u00b10.91 32.91\u00b10.62 40.19\u00b11.22 36.18\u00b10.79 16.25\u00b10.22 20.90\u00b11.38 18.27\u00b10.41 24.86\u00b10.30 30.49\u00b10.96 27.38\u00b10.53\n Struct 32.99\u00b10.76 27.85\u00b10.98 30.21\u00b10.90 46.78\u00b11.00 32.06\u00b12.17 38.00\u00b11.29 26.05\u00b10.53 17.65\u00b11.34 21.03\u00b11.13 40.88\u00b10.83 19.52\u00b10.49 26.42\u00b10.60\n\n Table 5: Performance of state-of-art models on FEW-NERD (INTRA).\n\n FEW-NERD(INTER)\n Model 5 way 1\u223c2 shot 5 way 5\u223c10 shot 10 way 1\u223c2 shot 10 way 5\u223c10 shot\n P R F1 P R F1 P R F1 P R F1\n Proto 32.04\u00b11.75 49.30\u00b10.68 38.83\u00b11.49 52.54\u00b11.32 66.76\u00b11.01 58.79\u00b10.44 26.02\u00b11.32 43.17\u00b10.92 32.45\u00b10.79 46.38\u00b10.42 61.60\u00b10.36 52.92\u00b10.37\n NNShot 42.57\u00b11.27 53.09\u00b10.54 47.24\u00b11.00 51.03\u00b10.63 61.15\u00b10.63 55.64\u00b10.63 34.36\u00b10.24 44.76\u00b10.33 38.87\u00b10.21 44.96\u00b12.69 55.25\u00b12.77 49.57\u00b12.73\n Struct 53.89\u00b10.78 50.02\u00b10.62 51.88\u00b10.69 62.12\u00b10.41 53.21\u00b10.91 57.32\u00b10.63 47.07\u00b10.15 40.16\u00b10.12 43.34\u00b10.10 57.61\u00b11.87 43.54\u00b13.70 49.57\u00b13.08\n\n Table 6: Performance of state-of-art models on FEW-NERD (INTER).\n\nThis is because the semantics of such fine-grained FEW-NERD (INTER) than FEW-NERD (INTRA) ,\n types are relatively sparse and difficult to be recog- and the latter is regarded as a more difficult task as\n nized. A natural intuition is that the performance of we analyze in Section 5.2 and Section 6, it splits the\n each entity type is related to the portion of the type. data according to the coarse-grained entity types,\n But surprisingly, we find that they are not linearly which means entity types between the training set\n correlated. For examples, the model performs very and test set share less knowledge.\nwell on the Art type, although this type represents In a horizontal comparison, consistent with in-\n only a small fraction of FEW-NERD. tuition, almost all the methods produce the worst\n Few-shot NER For the few-shot benchmarks, results on 10 way 1\u223c2 shot and achieve the best\nwe adopt 4 sampling settings, which are 5 way performance on 5 way 5\u223c10. In the comparison\n1\u223c2 shot, 5 way 5\u223c10 shot, 10 way 1\u223c2 shot, across models, ProtoBERT generally achieves bet-\n and 10 way 5\u223c10 shot. Intuitively, 10 way 1\u223c2 ter performance than NNShot and StructShot, es-\n shot is the hardest setting because it has the largest pecially in 5\u223c10 shot setting where calculation by\n number of entity types and the fewest number of prototype may differ more from calculation by en-\n examples, and similarly, 5 way 5\u223c10 shot is the tity. StructShot has seen a large improvement in\n easiest setting. ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 11590, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "810ba2d6-65c6-4378-91c4-4ba38f087746": {"__data__": {"id_": "810ba2d6-65c6-4378-91c4-4ba38f087746", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "fc3045bc-4ea0-4c56-aa91-9689fa1cee0d", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "595935693e299e435c4fa70872784da0511641c8bb4e8489e41fc9743f946167", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "b778bdc3-b7ac-4222-b5f9-8e068507f3a6", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "a593b49223ff84672127a243d4951709bb632f75b3dec73238a7928fce3fe317", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "All results of FEW-NERD (INTRA) precision in FEW-NERD (INTRA) . It shows that\n and FEW-NERD (INTER) are reported in Table 5 Viterbi decoder at the inference stage can help re-\n and Table 6 respectively. Overall, we observe move false positive predictions when knowledge\n that the previous state-of-the-art methods equipped transfer is hard. It is also observed that NNShot and\n by BERT encoder could not yield promising re- StructShot may suffer from the instability of the\n sults on FEW-NERD . From a perspective of nearest neighbor mechanism in the training phase,\n high level, models generally perform better on and prototypical models are more stable because", "mimetype": "text/plain", "start_char_idx": 11590, "end_char_idx": 13597, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "27c32a2f-d0a1-4540-90a2-aed3847dc7e4": {"__data__": {"id_": "27c32a2f-d0a1-4540-90a2-aed3847dc7e4", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "69aa87b9-95d8-46bd-b46f-f3948d0a5708", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "f748fdf06ccfc10a5aa0f6be1fb318b5c10741f653d841242c0454432291994a", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "c2ae573a-cfd8-4747-a7c2-ce1d55a0484b", "node_type": "1", "metadata": {}, "hash": "a7910a341863b0ad922932bd832aaf0a68e61920b5a15730883d7ef8e3d7ae92", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": " Models Span Error Type Error\n FP FN Within Outer\n ProtoNet 4.29% 2.17% 3.87% 5.35%\n NNShot 3.87% 3.67% 3.86% 6.90%\n StructShot 2.84% 4.45% 3.94% 5.56%\n\nTable 7: Error analysis of 5 way 5\u223c10 shot on\nFEW-NERD (INTER), \u201cWithin\u201d indicates \u201cwithin the\ncoarse types\u201d and \u201cOuter\u201d is \u201couter the coarse types\u201d.\nthe calculation of prototypes essentially serves as\nregularization.\n\n7.3 Error Analysis\nWe conduct error analysis to explore the challenges\nof FEW-NERD , the results are reported in Table 7.\nWe choose the setting of FEW-NERD (INTER) be-\ncause the test set contains all the coarse-grained\ntypes. We analyze the errors of models from two\nperspectives. Span Error denotes the misclassify-\ning in token-level classification. If an O token is\nmisclassified as a part of entity, i.e., I-type, it is\nan FP case, and if a token with the type I-type is\nmisclassified to O, it is FN. Type Error indicates the\nmisclassification of entity types when the spans are\ncorrectly classified. A \u201cWithin\u201d error represents\nthe entity is misclassified to another type within the\nsame coarse-grained type, while \u201cOuter\u201d denotes\nthe entity is misclassified to another type in a dif-\nferent coarse-grained type. As the statistics of type\nerrors may be impacted by the sampled episodes\nin testing, we conduct 5 rounds of experiments and\nreport the average results. The results demonstrate\nthat the token-level accuracy is not that low since\nmost O tokens could be detected. But an entity men-\ntion is considered to be wrong if one token is wrong,\nwhich becomes the main reason for the challenge\nof FEW-NERD . If an entity span could be accu-\nrately detected, the models could yield relatively\ngood performance on entity typing, indicating the\neffectiveness of metric learning.\n\n8 Conclusion and Future Work\nWe propose FEW-NERD , a large-scale few-shot\nNER dataset with fine-grained entity types. This\nis the first few-shot NER dataset and also one\nof the largest human-annotated NER dataset.\nFEW-NERD provides three unified benchmarks\nto assess approaches of few-shot NER and could\nfacilitate future research in this area. By imple-\n\nmenting state-of-the-art methods, we carry out a se-\nries of experiments on FEW-NERD , demonstrating\nthat few-shot NER remains a challenging problem\nand worth exploring. In the future, we will extend\nFEW-NERD by adding cross-domain annotations,\ndistant annotations, and finer-grained entity types.\nFEW-NERD also has the potential to advance the\nconstruction of continual knowledge graphs.\n\nAcknowledgements\nThis research is supported by National Natu-\nral Science Foundation of China (Grant No.\n61773229 and 6201101015), National Key Re-\nsearch and Development Program of China\n(No. 2020AAA0106501), Alibaba Innovation Re-\nsearch (AIR) programme, the General Research\nProject (Grand No. JCYJ20190813165003837,\nNo.JCYJ20190808182805919), and Overseas Co-\noperation Research Fund of Graduate School at\nTsinghua University (Grant No. HW2018002). ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 3095, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "c2ae573a-cfd8-4747-a7c2-ce1d55a0484b": {"__data__": {"id_": "c2ae573a-cfd8-4747-a7c2-ce1d55a0484b", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "69aa87b9-95d8-46bd-b46f-f3948d0a5708", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "f748fdf06ccfc10a5aa0f6be1fb318b5c10741f653d841242c0454432291994a", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "27c32a2f-d0a1-4540-90a2-aed3847dc7e4", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "bbabec9c4778e4f90d334b2020e61df2ae1bdc85d03e854ff28e0da66e1a8dcf", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "9cc52dba-eaee-481f-b340-5c0a400c28e7", "node_type": "1", "metadata": {}, "hash": "2a7b6c7d5494f767cf87d1e36d1c3fab50bb293e1a876d3cd0b47a333693080a", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "Fi-\nnally, we thank the valuable help of Ronny, Xiaozhi,\nZiyu and comments of anonymous reviewers.\nEthical Considerations\n\nIn this paper, we present a human-annotated\ndataset, FEW-NERD , for few-shot learning in\nNER. We describe the details of the collection pro-\ncess and conditions, the compensation of annota-\ntors, the measurements to ensure the quality in the\nmain text. The corpus of the dataset is publicly ob-\ntained from Wikipedia and we have not modified or\ninterfered with the content. FEW-NERD is likely\nto directly facilitate the research of few-shot NER,\nand further increase the progress of the construction\nof large-scale knowledge graphs (KGs). Models\nand systems built on FEW-NERD may contribute\nto construct KGs in various domains, including\nbiomedical, financial, and legal fields, and further\npromote the development of NLP applications on\nspecific domains. FEW-NERD is annotated in En-\nglish, thus the dataset may mainly facilitate NLP\nresearch in English. For the sake of energy saving,\nwe will not only open source the dataset and the\ncode, but also release the checkpoints of our mod-\nels from the experiments to reduce unnecessary\ncarbon emission.\n\nReferences\n\nDominic Balasuriya, Nicky Ringland, Joel Nothman,\n Tara Murphy, and James R. Curran. ", "mimetype": "text/plain", "start_char_idx": 3095, "end_char_idx": 4369, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "9cc52dba-eaee-481f-b340-5c0a400c28e7": {"__data__": {"id_": "9cc52dba-eaee-481f-b340-5c0a400c28e7", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "69aa87b9-95d8-46bd-b46f-f3948d0a5708", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "f748fdf06ccfc10a5aa0f6be1fb318b5c10741f653d841242c0454432291994a", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "c2ae573a-cfd8-4747-a7c2-ce1d55a0484b", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "15380f84e88453210db0a8c7ab5cc42009ff66f68d4e66e029d3d1fcf38efd86", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "2009. Named\n entity recognition in Wikipedia. In Proceedings of\n the 2009 Workshop on The People\u2019s Web Meets NLP:\n Collaboratively Constructed Semantic Resources", "mimetype": "text/plain", "start_char_idx": 4369, "end_char_idx": 4536, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "f1116f47-ab33-4225-bb26-ddc62fe95589": {"__data__": {"id_": "f1116f47-ab33-4225-bb26-ddc62fe95589", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "7d05593a-cace-44c8-ad7c-15b475ca0267", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "bd293015dce9ca2a346361ea4af1ca08e03356003311fea5e5c30f67f50034e0", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "e72dac24-34a6-4159-818b-d6f023d89f0c", "node_type": "1", "metadata": {}, "hash": "2dae6087e6c22f27561d177b46f2291d4b30e3204ce4d725d2530af957449325", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": " (People\u2019s Web), pages 10\u201318, Suntec, Singapore. As-\n sociation for Computational Linguistics.\n\n Jason P.C. ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 114, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "e72dac24-34a6-4159-818b-d6f023d89f0c": {"__data__": {"id_": "e72dac24-34a6-4159-818b-d6f023d89f0c", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "7d05593a-cace-44c8-ad7c-15b475ca0267", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "bd293015dce9ca2a346361ea4af1ca08e03356003311fea5e5c30f67f50034e0", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "f1116f47-ab33-4225-bb26-ddc62fe95589", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "1bbeb825aa73acd4f2bda14f92aebcaf9ef109db74efb2e92cd7058f95df94f1", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "00f9b9f2-a717-4ccb-a263-c9c92e3a0604", "node_type": "1", "metadata": {}, "hash": "28d30c7f4af4a30790aa08955d1e377b1d7c49bdbff3510ebba78f4cae663a01", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "Chiu and Eric Nichols. ", "mimetype": "text/plain", "start_char_idx": 114, "end_char_idx": 137, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "00f9b9f2-a717-4ccb-a263-c9c92e3a0604": {"__data__": {"id_": "00f9b9f2-a717-4ccb-a263-c9c92e3a0604", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "7d05593a-cace-44c8-ad7c-15b475ca0267", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "bd293015dce9ca2a346361ea4af1ca08e03356003311fea5e5c30f67f50034e0", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "e72dac24-34a6-4159-818b-d6f023d89f0c", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "e91fed0f32692abd2070a7bf73e855efebac7c6516b221b1f5ef89283bcad78b", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "0b352382-f3d6-4693-8571-1762bd92e288", "node_type": "1", "metadata": {}, "hash": "fa068ddcf54a1b84dcb277c7d02d80f5ddf7aac4d6517165af8f07df295c728f", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "2016. Named entity\n recognition with bidirectional LSTM-CNNs. Trans-\n actions of the Association for Computational Lin-\n guistics, 4:357\u2013370.\n\nEunsol Choi, Omer Levy, Yejin Choi, and Luke Zettle-\n moyer. 2018. Ultra-fine entity typing. In Proceed-\n ings of the 56th Annual Meeting of the Association\n for Computational Linguistics (Volume 1: Long Pa-\n pers), pages 87\u201396, Melbourne, Australia. Associa-\n tion for Computational Linguistics.\nJacob Cohen. 1960. A coefficient of agreement for\n\n nominal scales. Educational and psychological mea-\n surement, 20(1):37\u201346.\n Wanyun Cui, Yanghua Xiao, Haixun Wang, Yangqiu\n Song, Seung-won Hwang, and Wei Wang. ", "mimetype": "text/plain", "start_char_idx": 137, "end_char_idx": 823, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "0b352382-f3d6-4693-8571-1762bd92e288": {"__data__": {"id_": "0b352382-f3d6-4693-8571-1762bd92e288", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "7d05593a-cace-44c8-ad7c-15b475ca0267", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "bd293015dce9ca2a346361ea4af1ca08e03356003311fea5e5c30f67f50034e0", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "00f9b9f2-a717-4ccb-a263-c9c92e3a0604", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "1dd73aff08b1e4d79f5ebb9551934a92650bfe7bffcbc212780a9f2d886731eb", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "812846d5-bd57-4218-8039-072d4826c457", "node_type": "1", "metadata": {}, "hash": "9fb0aae4b9f16a6c7e601e74f9730da32b3dee7f43a75a8be3640461813686e7", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "2017.\n Kbqa: learning question answering over qa cor-\n pora and knowledge bases. In Proceedings of 43rd\n Very Large Data Base Conference Endowment, vol-\n ume 10.\n Leon Derczynski, Eric Nichols, Marieke van Erp, and\n Nut Limsopatham. 2017. Results of the WNUT2017\n shared task on novel and emerging entity recogni-\n tion. In Proceedings of the 3rd Workshop on Noisy\n User-generated Text, pages 140\u2013147, Copenhagen,\n Denmark. Association for Computational Linguis-\n tics.\n Jacob Devlin, Ming-Wei Chang, Kenton Lee, and\n Kristina Toutanova. 2019a. BERT: Pre-training of\n deep bidirectional transformers for language under-\n standing. In Proceedings of the 2019 Conference\n of the North American Chapter of the Association\n for Computational Linguistics: Human Language\n Technologies, Volume 1 (Long and Short Papers),\n pages 4171\u20134186, Minneapolis, Minnesota. Associ-\n ation for Computational Linguistics.\n Jacob Devlin, Ming-Wei Chang, Kenton Lee, and\n Kristina Toutanova. 2019b. BERT: Pre-training of\n deep bidirectional transformers for language under-\n standing. In Proceedings of the 2019 Conference\n of the North American Chapter of the Association\n for Computational Linguistics: Human Language\n Technologies, Volume 1 (Long and Short Papers),\n pages 4171\u20134186, Minneapolis, Minnesota. Associ-\n ation for Computational Linguistics.\n Ning Ding, Ziran Li, Zhiyuan Liu, Haitao Zheng,\n and Zibo Lin. 2019. Event detection with trigger-\n aware lattice neural network. In Proceedings of\n the 2019 Conference on Empirical Methods in Natu-\n ral Language Processing and the 9th International\n Joint Conference on Natural Language Process-\n ing (EMNLP-IJCNLP), pages 347\u2013356, Hong Kong,\n China. Association for Computational Linguistics.\n Ning Ding, Xiaobin Wang, Yao Fu, Guangwei Xu, Rui\n Wang, Pengjun Xie, Ying Shen, Fei Huang, Hai-Tao\n Zheng, and Rui Zhang. 2021. Prototypical repre-\n sentation learning for relation extraction. In Inter-\n national Conference on Learning Representations.\n\nChelsea Finn, Pieter Abbeel, and Sergey Levine. 2017.\n Model-agnostic meta-learning for fast adaptation of\n deep networks. In Proceedings of the 34th Inter-\n national Conference on Machine Learning, ICML\n 2017, Sydney, NSW, Australia, 6-11 August 2017,\n volume 70 of Proceedings of Machine Learning Re-\n search, pages 1126\u20131135. PMLR.\n\nAlexander Fritzler, Varvara Logacheva, and Maksim\n Kretov. 2019. Few-shot classification in named en-\n ACM/SIGAPP Symposium on Applied Computing\n tity recognition task. In Proceedings of the 34th,\n pages 993\u20131000.\n\n", "mimetype": "text/plain", "start_char_idx": 823, "end_char_idx": 3555, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "812846d5-bd57-4218-8039-072d4826c457": {"__data__": {"id_": "812846d5-bd57-4218-8039-072d4826c457", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "7d05593a-cace-44c8-ad7c-15b475ca0267", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "bd293015dce9ca2a346361ea4af1ca08e03356003311fea5e5c30f67f50034e0", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "0b352382-f3d6-4693-8571-1762bd92e288", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "f36c6ca5eebedf6abb5b59826751b9a025960535bfd565d7fd44080971aefc81", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "c52e3f4a-332f-4829-9c57-c42ad62c4c61", "node_type": "1", "metadata": {}, "hash": "2dc6df9854cf3a239010ffb66377ecfcc59fd09c6f6fc747a252d72e8f255a86", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "Dan Gillick, Nevena Lazic, Kuzman Ganchev, Jesse\n Kirchner, and David Huynh. ", "mimetype": "text/plain", "start_char_idx": 3555, "end_char_idx": 3635, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "c52e3f4a-332f-4829-9c57-c42ad62c4c61": {"__data__": {"id_": "c52e3f4a-332f-4829-9c57-c42ad62c4c61", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "7d05593a-cace-44c8-ad7c-15b475ca0267", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "bd293015dce9ca2a346361ea4af1ca08e03356003311fea5e5c30f67f50034e0", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "812846d5-bd57-4218-8039-072d4826c457", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "8afe57d020b291f7d2c074a8cfbea21e5d91fa2a6ec737f129bf199d39f7c070", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "2014. Context-\n dependent fine-grained entity type tagging. arXiv\n preprint arXiv:1412.1820.\nXu Han, Hao Zhu, Pengfei Yu, Ziyun Wang, Yuan\n Yao, Zhiyuan Liu, and Maosong Sun. 2018. FewRel:\n A large-scale supervised few-shot relation classifica-\n tion dataset with state-of-the-art evaluation. In Pro-\n ceedings of the 2018 Conference on Empirical Meth-\n ods in Natural Language Processing, pages 4803\u2013\n 4809, Brussels, Belgium. Association for Computa-\n tional Linguistics.\nMaximilian Hofer, Andrey Kormilitzin, Paul Goldberg,\n and Alejo Nevado-Holgado. 2018. Few-shot learn-\n ing for named entity recognition in medical text.\n arXiv preprint arXiv:1811.05468.\nYutai Hou, Wanxiang Che, Yongkui Lai, Zhihan Zhou,\n Yijia Liu, Han Liu, and Ting Liu. 2020. Few-shot\n slot tagging with collapsed dependency transfer and\n label-enhanced task-adaptive projection network. In\n Proceedings of the 58th Annual Meeting of the Asso-\n ciation for Computational Linguistics, pages 1381\u2013\n 1393, Online. Association for Computational Lin-\n guistics.\n\nJiaxin Huang, Chunyuan Li, Krishan Subudhi, Damien\n Jose, Shobana Balakrishnan, Weizhu Chen, Baolin\n Peng, Jianfeng Gao, and Jiawei Han. 2020. Few-\n shot named entity recognition: A comprehensive\n study. arXiv preprint arXiv:2012.14978.\nGuillaume Lample, Miguel Ballesteros, Sandeep Sub-\n ramanian, Kazuya Kawakami, and Chris Dyer. 2016.\n Neural architectures for named entity recognition.\n In Proceedings of the 2016 Conference of the North\n American Chapter of the Association for Computa-\n tional Linguistics: Human Language Technologies,\n pages 260\u2013270, San Diego, California. Association\n for Computational Linguistics.\nAoxue Li, Tiange Luo, Zhiwu Lu, Tao Xiang, and Li-\n wei Wang. 2019a. Large-scale few-shot learning:\n Knowledge transfer with class hierarchy. In IEEE", "mimetype": "text/plain", "start_char_idx": 3635, "end_char_idx": 5561, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "e32886ff-2b1a-422c-b95b-e421bd43419f": {"__data__": {"id_": "e32886ff-2b1a-422c-b95b-e421bd43419f", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "44bfd932-fee6-4074-a125-50f4c9a9ec00", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "b8b680e22151226fad595439f435384144884a056595224b163178d8e549e603", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "fbb1da9d-8adb-456b-a269-3544ffe0f8c3", "node_type": "1", "metadata": {}, "hash": "5ab2d48e5650a9d4f146033564432a9f1cad466fcc1f391b86e7fc8b8fc4feec", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": " Conference on Computer Vision and Pattern Recog-\n nition, CVPR 2019, Long Beach, CA, USA, June 16-\n 20, 2019, pages 7212\u20137220. Computer Vision Foun-\n dation / IEEE.\n\n Jing Li, Billy Chiu, Shanshan Feng, and Hao Wang.\n ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 234, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "fbb1da9d-8adb-456b-a269-3544ffe0f8c3": {"__data__": {"id_": "fbb1da9d-8adb-456b-a269-3544ffe0f8c3", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "44bfd932-fee6-4074-a125-50f4c9a9ec00", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "b8b680e22151226fad595439f435384144884a056595224b163178d8e549e603", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "e32886ff-2b1a-422c-b95b-e421bd43419f", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "e0846d4696ffe6313c11e374b1bba89f9cac0be7bb6b7a5a01858b2b136efddd", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "5b74caa6-0e1a-4998-8fce-bc485614f693", "node_type": "1", "metadata": {}, "hash": "23766ac4f90b54b4e76856b4cc8ea2fb739af601e40e81609bdaacbf6efdc6aa", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "2020a. Few-shot named entity recognition via meta-\n learning. IEEE Transactions on Knowledge and\n Data Engineering.\n\n Xiaoya Li, Jingrong Feng, Yuxian Meng, Qinghong\n Han, Fei Wu, and Jiwei Li. 2020b. A unified MRC\n framework for named entity recognition. In Pro-\n ceedings of the 58th Annual Meeting of the Asso-\n ciation for Computational Linguistics, pages 5849\u2013\n 5859, Online. Association for Computational Lin-\n guistics.\nZiran Li, Ning Ding, Zhiyuan Liu, Haitao Zheng,\n and Ying Shen. ", "mimetype": "text/plain", "start_char_idx": 234, "end_char_idx": 756, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "5b74caa6-0e1a-4998-8fce-bc485614f693": {"__data__": {"id_": "5b74caa6-0e1a-4998-8fce-bc485614f693", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "44bfd932-fee6-4074-a125-50f4c9a9ec00", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "b8b680e22151226fad595439f435384144884a056595224b163178d8e549e603", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "fbb1da9d-8adb-456b-a269-3544ffe0f8c3", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "9c1acae81d6cf06908c3d8764c2a1ce3939120549a1ea21483b3414c13017bed", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "ae5d7634-5d34-44d1-a4e7-8d200469f0db", "node_type": "1", "metadata": {}, "hash": "8796dabca3ae60633d853ab117ed2e2b842cf6e5db9116b0d8c53f4f6c41e879", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "2019b. Chinese relation extraction\n with multi-grained information and external linguis-\n tic knowledge. In Proceedings of the 57th Annual\n Meeting of the Association for Computational Lin-\n guistics, pages 4377\u20134386, Florence, Italy. Associa-\n tion for Computational Linguistics.\n\n Xiao Ling and Daniel S. Weld. 2012. Fine-grained en-\n tity recognition. In Proceedings of the Twenty-Sixth\n AAAI Conference on Artificial Intelligence, July 22-\n 26, 2012, Toronto, Ontario, Canada. AAAI Press.\n Ilya Loshchilov and Frank Hutter. ", "mimetype": "text/plain", "start_char_idx": 756, "end_char_idx": 1308, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "ae5d7634-5d34-44d1-a4e7-8d200469f0db": {"__data__": {"id_": "ae5d7634-5d34-44d1-a4e7-8d200469f0db", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "44bfd932-fee6-4074-a125-50f4c9a9ec00", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "b8b680e22151226fad595439f435384144884a056595224b163178d8e549e603", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "5b74caa6-0e1a-4998-8fce-bc485614f693", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "fdfb36f9efb828c887c3928c11563f0abed35d6b5a617e8ea570abd17c2d6464", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "51714cff-a266-4cf3-96f1-bbb555068ce9", "node_type": "1", "metadata": {}, "hash": "191eb860d6e58621b5eeec9b6aa6eb96c8d0dfd21ca58783d63626170959bb07", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "2019. Decoupled\n weight decay regularization. In International Con-\n ference on Learning Representations.\n\nXuezhe Ma and Eduard Hovy. 2016. End-to-end\n sequence labeling via bi-directional LSTM-CNNs-\n CRF. In Proceedings of the 54th Annual Meeting of\n the Association for Computational Linguistics (Vol-\n ume 1: Long Papers), pages 1064\u20131074, Berlin, Ger-\n many. Association for Computational Linguistics.\nAdam Paszke, Sam Gross, Francisco Massa, Adam\n Lerer, James Bradbury, Gregory Chanan, Trevor\n Killeen, Zeming Lin, Natalia Gimelshein, Luca\n Antiga, Alban Desmaison, Andreas K\u00a8opf, Edward\n Yang, Zachary DeVito, Martin Raison, Alykhan Te-\n jani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang,\n Junjie Bai, and Soumith Chintala. 2019. Py-\n torch: An imperative style, high-performance deep\n learning library. In Advances in Neural Informa-\n tion Processing Systems 32: Annual Conference\n on Neural Information Processing Systems 2019,\n NeurIPS 2019, December 8-14, 2019, Vancouver,\n BC, Canada, pages 8024\u20138035.\n\n Nicky Ringland, Xiang Dai, Ben Hachey, Sarvnaz\n Karimi, Cecile Paris, and James R. Curran. 2019.\n NNE: A dataset for nested named entity recognition\n in English newswire. In Proceedings of the 57th An-\n nual Meeting of the Association for Computational\n Linguistics, pages 5176\u20135181, Florence, Italy. Asso-\n ciation for Computational Linguistics.\nAlan Ritter, Sam Clark, Mausam, and Oren Etzioni.\n 2011. Named entity recognition in tweets: An ex-\n perimental study. In Proceedings of the 2011 Con-\n ference on Empirical Methods in Natural Language\n Processing, pages 1524\u20131534, Edinburgh, Scotland,\n UK. Association for Computational Linguistics.\n\nYing Shen, Ning Ding, Hai-Tao Zheng, Yaliang Li,\n and Min Yang. 2020. Modeling relation paths for\n knowledge graph completion. IEEE Transactions\n on Knowledge and Data Engineering.\nJake Snell, Kevin Swersky, and Richard S. Zemel.\n 2017. Prototypical networks for few-shot learning.\n In Advances in Neural Information Processing Sys-\n tems 30: Annual Conference on Neural Informa-\n tion Processing Systems 2017, December 4-9, 2017,\n Long Beach, CA, USA, pages 4077\u20134087.\nAmber Stubbs and \u00a8Ozlem Uzuner. 2015. Annotating\n longitudinal clinical narratives for de-identification:\n The 2014 i2b2/uthealth corpus. Journal of biomedi-\n cal informatics, 58:S20\u2013S29.\n\nErik F. Tjong Kim Sang. ", "mimetype": "text/plain", "start_char_idx": 1308, "end_char_idx": 3804, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "51714cff-a266-4cf3-96f1-bbb555068ce9": {"__data__": {"id_": "51714cff-a266-4cf3-96f1-bbb555068ce9", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "44bfd932-fee6-4074-a125-50f4c9a9ec00", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "b8b680e22151226fad595439f435384144884a056595224b163178d8e549e603", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "ae5d7634-5d34-44d1-a4e7-8d200469f0db", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "f8eb07f417ebf2d68de8a444127f40066c8f8311545bb7cf8fa2a85315239053", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "2002. Introduction to the\n CoNLL-2002 shared task: Language-independent\n named entity recognition. In COLING-02: The\n 6th Conference on Natural Language Learning 2002\n (CoNLL-2002).\n\nXiaozhi Wang, Ziqi Wang, Xu Han, Wangyi Jiang,Rong Han, Zhiyuan Liu, Juanzi Li, Peng Li, Yankai\n Lin, and Jie Zhou. 2020. MAVEN: A Massive Gen-\n eral Domain Event Detection Dataset. In Proceed-\n ings of the 2020 Conference on Empirical Methods\n in Natural Language Processing (EMNLP), pages\n 1652\u20131671, Online. Association for Computational\n Linguistics.\nRalph Weischedel, Martha Palmer, Mitchell Marcus,\n Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Ni-\n anwen Xue, Ann Taylor, Jeff Kaufman, Michelle\n Franchini, et al. 2013. Ontonotes release 5.0\n ldc2013t19. Linguistic Data Consortium, Philadel-\n phia, PA, 23.\nThomas Wolf, Lysandre Debut, Victor Sanh, Julien\n Chaumond, Clement Delangue, Anthony Moi, Pier-\n ric Cistac, Tim Rault, Remi Louf, Morgan Funtow-\n icz, Joe Davison, Sam Shleifer, Patrick von Platen,\n Clara Ma, Yacine Jernite, Julien Plu, Canwen Xu,\n Teven Le Scao, Sylvain Gugger, Mariama Drame,\n Quentin Lhoest, and Alexander Rush. 2020. Trans-\n formers: State-of-the-art natural language process-\n ing. In Proceedings of the 2020 Conference on Em-\n pirical Methods in Natural Language Processing:\n System Demonstrations, pages 38\u201345, Online. Asso-\n ciation for Computational Linguistics.\n\nYi Yang and Arzoo Katiyar. 2020. Simple and effective\n few-shot named entity recognition with structured\n nearest neighbor learning. In Proceedings of the\n 2020 Conference on Empirical Methods in Natural\n Language Processing (EMNLP), pages 6365\u20136375,\n Online. Association for Computational Linguistics.", "mimetype": "text/plain", "start_char_idx": 3804, "end_char_idx": 5579, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "22d3e563-46d5-4e6a-a7d5-84b175421878": {"__data__": {"id_": "22d3e563-46d5-4e6a-a7d5-84b175421878", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "003a1c4d-0a7c-41a7-a905-8606e7d9e8d7", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "ad0e18b971bb4ab9c8187f0bb04337219fce5dc6ea6e8318b639d54ec4120184", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "04883a01-7aeb-46c7-ab74-6fa9337c61ee", "node_type": "1", "metadata": {}, "hash": "55aa69047f8e3909256c5458ebfa758fc460ce43edfe33236b07b60402cabefb", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "Zhilin Yang, Peng Qi, Saizheng Zhang, Yoshua Bengio,\n William Cohen, Ruslan Salakhutdinov, and Christo-\n pher D. Manning. ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 126, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "04883a01-7aeb-46c7-ab74-6fa9337c61ee": {"__data__": {"id_": "04883a01-7aeb-46c7-ab74-6fa9337c61ee", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "003a1c4d-0a7c-41a7-a905-8606e7d9e8d7", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "ad0e18b971bb4ab9c8187f0bb04337219fce5dc6ea6e8318b639d54ec4120184", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "22d3e563-46d5-4e6a-a7d5-84b175421878", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "ca7ccbc67094b071ae54681cfe5a52dd20523d7e9f4dc33f993608e3202a94b6", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "2018. HotpotQA: A dataset\n for diverse, explainable multi-hop question answer-\n ing. In Proceedings of the 2018 Conference on Em-\n pirical Methods in Natural Language Processing,\n pages 2369\u20132380, Brussels, Belgium. Association\n for Computational Linguistics.", "mimetype": "text/plain", "start_char_idx": 126, "end_char_idx": 408, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "b7178a9a-baa5-4df6-bf34-fe7e2076eb3f": {"__data__": {"id_": "b7178a9a-baa5-4df6-bf34-fe7e2076eb3f", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "a4ada096-e175-47ff-9fc4-b609c217c6ba", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "c9cc67b545e7102c022d100a5ebde2ae77b4cd3823b2fdfa28afd9e7eca78373", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "5a13abdf-cef2-4d15-a4c6-2678fd859672", "node_type": "1", "metadata": {}, "hash": "f317051bb1e81164671436f9177d43d02200e199ff23099fb2826ab5291bfafc", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": " A Data Details B Implementation Details\n A.1 Processing All the four models use BERTbase (Devlin et al.,\nWe use the dump2 of English Wikipedia, and ex- 2019a) and the backbone encoder and initial-\n tract the raw text by WikiExtractor3. NLTK lan- ized with the corresponding pre-trained uncased\n guage tool4 is used for word and sentence tok- weights6. The hidden size is 768, and the\n number of layers and heads are 12. ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 1658, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "5a13abdf-cef2-4d15-a4c6-2678fd859672": {"__data__": {"id_": "5a13abdf-cef2-4d15-a4c6-2678fd859672", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "a4ada096-e175-47ff-9fc4-b609c217c6ba", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "c9cc67b545e7102c022d100a5ebde2ae77b4cd3823b2fdfa28afd9e7eca78373", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "b7178a9a-baa5-4df6-bf34-fe7e2076eb3f", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "d2e2e7a04514266dfa807951883842eb702bf4c7ef1babd02dd7c1a89c77e42f", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "310337fe-3f15-42a8-a1fd-8a9bfc87f6a4", "node_type": "1", "metadata": {}, "hash": "e37bef9d9618bebaa14b5891b0c7db2793b16a917ffcfb43e1e7355fe7844552", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "Models\n enization in the preprocessing stage. As stated are implemented by Pytorch framework7 (Paszke\n in Section 4.2, we develope a search engine to et al., 2019) and Huggingface transformers8 (Wolf\n index and select paragraphs with key words in dis- et al., 2020). BERT models are optimized by\n tant dictionaries. If the search is performed with AdamW9 (Loshchilov and Hutter, 2019) with the\n linear operations, the calculation process will be learning rate of 1e-4. We evaluate our implemen-\n extremely slow, instead, we adopt a search engine tations of NNShot and StructShot on the datasets\nwith Lucene5 to conduct effective indexing and used in the original paper, producing similar results.\n searching. For supervised NER, the batch size is 8, and we\n train BERT-Tagger for 70000 steps and evaluate\n A.2 More Details of the Schema it on the test set. For 5 way 1\u223c2 and 5\u223c10 shot\nAs stated in Section 4.1, we use FIGER (Ling and settings, the batch sizes are 16 and 4, and for 10\nWeld, 2012) as the start point and conduct rounds of way 1\u223c2 and 5\u223c10 shot settings, the batch sizes\n make a series of modifications. Despite the modifi- are 8 and 1. We train 12000 episodes and use 500\n cations mentioned in Section 4.1, we also conduct episodes of the dev set to select the best model,\n manual denoising of the automatically annotated and test it on 5000 episodes of the test set. Most\n data of FIER. For each entity type and the cor- hyper-parameters are from original settings. We\n responding automatically annotated mentions, we manually tune the hyper-parameter \u03c4 in Viterbi for\n randomly select 500 mentions and compute the StructShot, and the value for 1\u223c2 settings shot is\n accuracy to obtain the real frequency. For exam- 0.320, for 5\u223c10 shot settings is 0.434. All the ex-\n ple, statistics report that cemetery is a type with periments are conducted with CUDA on NVIDIA\n high frequency. However, a plenty number of the Tesla V100 GPUs. With 2 GPUs used, the average\n mentions labeled as cemetery are actually GPE. time to train 10000 episodes is 135 minutes. The\n Similarly, engineer is also affected by noise. ", "mimetype": "text/plain", "start_char_idx": 1658, "end_char_idx": 7692, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "310337fe-3f15-42a8-a1fd-8a9bfc87f6a4": {"__data__": {"id_": "310337fe-3f15-42a8-a1fd-8a9bfc87f6a4", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "a4ada096-e175-47ff-9fc4-b609c217c6ba", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "c9cc67b545e7102c022d100a5ebde2ae77b4cd3823b2fdfa28afd9e7eca78373", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "5a13abdf-cef2-4d15-a4c6-2678fd859672", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "4a97f8e28500fa2c16bf6a826663c16c49a54e60f0f8628619b859fb20781d54", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "number of parameters of the models is 120M.\n A.3 Interface C Entity Types\nThe interface in shown in Figure 4, where anno- As introduced in Section 4.1 in main text,\n tators could expediently select entity spans and FEW-NERD is manually annotated with 8 coarse-\n annotate the corresponding coarse and fine types. grained and 66 fine-grained entity types, and we\nAnd annotators could check the current annotation list all the types in Table 8. The schema is designed\n information on the interface. under practical situation, we hope the schema could\n help to better understand FEW-NERD . Note that\n Save AlI Confirm AII Deliver AII Task Summary Guideline Add Query Query List ORG is the abbreviation of Organization, and\n Annotation\n Annotation\n Entities MISC is the abbreviation of Miscellaneous.\n organization sportsteam\n person athlete person athlete person athlete\n On March 4, 2008_ Gilbert surpassed Paul Coffey and Marc- Oilers Nashville Predators\n person athlete\n organization sporsteam Gilbert Paul Coffey Marc-Andre Bergeron Dan Ellis\n AndreBergeron for the Oilers' franchise record for most goals scored by rookie defencemen with his tenth goal on the power play\n Reference\n against the Nashville Predators goaltender Dan Ellis athleteorganization sporsteampelson Reference\n person-athlete\n Figure 4: Screeshot of the interface used to annotate\n FEW-NERD.\n 6https://github.com/google-research/\n 2https://dumps.wikimedia.org/enwiki/ bert7https://pytorch.org\n 3https://github.com/attardi/ 8https://github.com/huggingface/\nwikiextractor4https://www.nltk.org transformers\n 5https://lucene.apache.org/ 9https://www.fast.ai/2018/07/02/\n adam-weight-decay/#adamw", "mimetype": "text/plain", "start_char_idx": 7692, "end_char_idx": 13204, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "91ed48ed-da65-4f77-98c0-99f800d0db39": {"__data__": {"id_": "91ed48ed-da65-4f77-98c0-99f800d0db39", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "df5c2ab9-c043-48d1-b030-1e78c26fe080", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "d658722730c057e212c837b778ba32953eae565ad65c58e2936b7cf1b7c851d0", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "5998e668-1c0b-4446-ba84-6386fe51b607", "node_type": "1", "metadata": {}, "hash": "c4b192940978fdd36510941059d370d29caa71987793f34201bb0472461dca9d", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "Coarse Type Fine Type\n GPE\n Body of Water\nLocation\n Island\n Mountain\n\n Park\n\n Road/Transit\n\n Other\n\n Actor\n Artist/Author\n Person Athlete\n Director\n Politician\n\n Scholar\n\n Soldier\n\n Other\n Company\n\n Education\n\n ORG Government\n Media\n\n Political/party\n Religion\n\n Sports League\n Sports Team\n Show ORG\n\n Other\n Airport\n Hospital\n\n Building Hotel\n\n Library\n\n Restaurant\n\n Sports Facility\n Theater\nExample\nThe company moved to a new office in Las Vegas, Nevada.\nThe Finke River normally drains into the Simpson Desert to the north west\nof the Macumba.\nAn invading army of Teutonic Knights conquered Gotland in 1398.\nC.G.E. ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 1215, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "5998e668-1c0b-4446-ba84-6386fe51b607": {"__data__": {"id_": "5998e668-1c0b-4446-ba84-6386fe51b607", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "df5c2ab9-c043-48d1-b030-1e78c26fe080", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "d658722730c057e212c837b778ba32953eae565ad65c58e2936b7cf1b7c851d0", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "91ed48ed-da65-4f77-98c0-99f800d0db39", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "8642d18aff05ba41c66b6502118f6cd10945423f7b5edb3c11e6a9783eae2a27", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "06456051-5542-40dd-9ddd-87258d76aa23", "node_type": "1", "metadata": {}, "hash": "a2e0ef7ef3d82054483dbf0249c544a5ee8dfc469082cdc0ac8c46fcd12d7908", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "Mannerheim met Thubten Gyatso in Wutai Shan during the course of\nhis expedition from Turkestan to Peking.\nVictoria Park contains examples of work by several architects including\nAlfred Waterhouse (Xaverian College).\nThe thirty-first race of the 1951 season was held on October 7 at the one-mile\ndirt Occoneechee Speedway.\nHerodotus (7.59) reports that Doriscus was the first place Xerxes the Great\nstopped to review his troops.\nThe first performance of any work of Gustav Holst given in that capital.\nA film adaption was made by Arne Bornebusch in 1936.\nSmith was named co-Player of the Week in the Big Ten on offense.\nMargin for Error is a 1943 American drama film directed by Otto Preminger.\nThen-President Gloria Macapagal Arroyo led the inauguration rites of the\nfacility on August 19, 2002.\nJeffery Westbrook and Robert Tarjan (1992) developed an efficient data\nstructure for this problem based on disjoint-set data structures.\nSadowski was promoted to general, and took command of the freshly created\nFortified Area of Silesia.\nIn Albany, Doane planned a cathedral like those in England.\nA Vocaloid voicebank developed and distributed by Yamaha Corporation for\nVocaloid 4.\nLong volunteer coached the offensive line for Briarcrest Christian School\nfor 9 seasons.\nIt was constructed using the savings of the Quezon provincial government.\nHe was the Editor in Chief of Grenada\u2019s national newspaper \u201dThe Free West\nIndian\u201d.\nStanley Norman Evans was a British industrialist and Labour Party politician.\nD\u2019Souza was born on 10 November 1985 into a Goan Catholic family in\nGoa, India.\nHis strong performances convinced him that he was ready for the NBA.\n", "mimetype": "text/plain", "start_char_idx": 1215, "end_char_idx": 2867, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "06456051-5542-40dd-9ddd-87258d76aa23": {"__data__": {"id_": "06456051-5542-40dd-9ddd-87258d76aa23", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "df5c2ab9-c043-48d1-b030-1e78c26fe080", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "d658722730c057e212c837b778ba32953eae565ad65c58e2936b7cf1b7c851d0", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "5998e668-1c0b-4446-ba84-6386fe51b607", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "ce00a88f68c0ad1319ad7e09222473578e04f19a9e6109de5ded3c9c1d9d14b5", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "The Pirates won the game and the World Series with Oldham on the mound.\nStanding in the Way of Control is the third studio album by American indie\nrock band Gossip.\nHe is the Creative Director of the Oliver Sacks Foundation.\nThe city is served by the Sir Seretse Khama International Airport.\nThen he did residency in ophthalmology at Farabi Eye Hospital from 1979\nto 1982.\nNick also played at the regular Sunday evening sessions that were held at\nthe Ramada Inn in Schenectady.\nRMIT University Library consists of six academic branch libraries in Aus-\ntralia and Vietnam.\nThe first Panda Express restaurant opened in Galleria II in the same year, on\nlevel 3 near Bloomingdale\u2019s.\nThis was the last year that the Razorbacks would play in Barnhill Arena.\nFrom 1954, she became a guest singer at the Vienna State Opera.", "mimetype": "text/plain", "start_char_idx": 2867, "end_char_idx": 3682, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "492b4f97-d056-4cde-bbf6-d2fa2a5b21b0": {"__data__": {"id_": "492b4f97-d056-4cde-bbf6-d2fa2a5b21b0", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "b927cf40-69a0-4d7a-9d6f-768c72cbf5d2", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "c152e3c4ee6611a83b375fd6b7975ca60011353e30fd0e38986be3a95948c53c", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "fa1b2e06-8569-4c40-b557-50ab94a0728d", "node_type": "1", "metadata": {}, "hash": "8ccc7359253962eb9eb679631bc797c0fa109dcf61efd4760e05a82bc4b78894", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": " Other Eissler designated Masson to succeed him as Director of the Sigmund Freud\n Archives after his and Anna Freud\u2019s death.\n Music \u201dGet Right\u201d is a song recorded by American singer Jennifer Lopez for her\n fourth studio album.\n Art Film Margin for Error is a 1943 American drama film directed by Otto Preminger.\n Written Art The Count is a text adventure written by Scott Adams and published by\n Adventure International in 1979.\n Broadcast In the fall of 1957, Mitchell starred in ABC\u2019s \u201dThe Guy Mitchell Show\u201d.\n Painting His painting \u2019Rooftops\u2019 has been in the collection of the City of London\n Corporation since 1989.\n Other Kirwan appeared on stage at the Chichester Festival Theatre in a Jeremy\n Herrin production of Uncle Vanya.\n Airplane The Royal Norwegian Air Force\u2019s 330 Squadron operates a Westland Sea\n King search and rescue helicopter out of Flor\u00f8.\n Car The BYD Tang plug-in hybrid SUV was the top selling plug-in car with\nProduct 31,405 units delivered.\n Food The words \u201dTime to make the donuts\u201d are printed on the side of Dunkin\u2019\n Donuts boxes in memory of Michael Vale/Fred the Baker.\n Game Team Andromeda wanted to create a fully 3D arcade game, having worked\n on similar games such as \u201dOut Run\u201d which were not truly 3D.\n Ship As night fell, Marine Corps General Holland Smith studied reports aboard\n the command ship \u201dEldorado\u201d.\n Software It allows communication between the Wolfram Mathematica kernel and\n front-end.\n Train On 9 June 1929, railcar No. 220 \u201dWaterwitch\u201d overran signals at Marshgate\n Junction.\n Weapon Mannerheim gave Tibet\u2019s spiritual pontiff a Browning revolver and showed\n him how to reload the weapon.\n Other Rhinestone is as artificial and synthetic a concoction as has ever made its\n way to the screen.\n Attack It was on this route that Tecumseh was killed at the Battle of the Thames on\n October 5, 1813.\n Event Election At the 1935 United Kingdom general election, McGleenan stood in Armagh\n as an Independent Republican.\n Natural Disaster He was originally from Chicago, but moved to Japan after the Second Great\n Kanto earthquake that all but decimated Japan\u2019s infrastructure.\n ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 3013, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "fa1b2e06-8569-4c40-b557-50ab94a0728d": {"__data__": {"id_": "fa1b2e06-8569-4c40-b557-50ab94a0728d", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "b927cf40-69a0-4d7a-9d6f-768c72cbf5d2", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "c152e3c4ee6611a83b375fd6b7975ca60011353e30fd0e38986be3a95948c53c", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "492b4f97-d056-4cde-bbf6-d2fa2a5b21b0", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "9275d481485002e742d430b0789ca6e90589eaa44b188a7790fecf04ffb61047", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "f70535c7-2605-4c2f-b0fc-4e390501a1e4", "node_type": "1", "metadata": {}, "hash": "2b89d7b9dc93a020bc5669695aec18eee0be614c2dae21179ba633dfc5f96f09", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "Protest In 1832, following the failed Polish November Uprising, the Dominican\n monastery was sequestrated.\n Sports Event Carle received a new defense partner when the Flyers traded for Chris\n Pronger at the 2009 NHL Entry Draft.\n Other One of TMG\u2019s first performances was in September 1972 at the Waitara\n Festival.\n ", "mimetype": "text/plain", "start_char_idx": 3013, "end_char_idx": 3476, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "f70535c7-2605-4c2f-b0fc-4e390501a1e4": {"__data__": {"id_": "f70535c7-2605-4c2f-b0fc-4e390501a1e4", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "b927cf40-69a0-4d7a-9d6f-768c72cbf5d2", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "c152e3c4ee6611a83b375fd6b7975ca60011353e30fd0e38986be3a95948c53c", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "fa1b2e06-8569-4c40-b557-50ab94a0728d", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "9ee325e155102138302e74b1241fdaae467b551764014d87255c4702e4eff852", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "Astronomy He discovered a number of double stars and took many photographs of Mars.\n Award He was awarded the Bialik Prize eight years later for these efforts.\n Biology Estradiol valerate is rapidly hydrolyzed into estradiol in the intestines.\n Chemistry It was the first gas manufacturer in Kuwait to provide industrial gases such\n MISC as oxygen and nitrogen to the local petroleum industry.\n Currency Total investment has been 19 billion Norwegian krone.\n Disease The 2020 competition was cancelled as part of the effort to minimize the\n COVID-19 pandemic.\n Educational Degree Sigurlaug enrolled into the medical department of the University of Iceland\n and graduated as a Medical Doctor in 2010.\n God Originally a farmer, Viking Ragnar Lothbrok claims to be descended from\n the god Odin.", "mimetype": "text/plain", "start_char_idx": 3476, "end_char_idx": 4528, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "76c294c4-2bf8-4452-9ad4-beb68c0848c3": {"__data__": {"id_": "76c294c4-2bf8-4452-9ad4-beb68c0848c3", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "c5f79cf4-c754-4363-b83f-4f33f84bb398", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "cae5d206ee6b291b2b022af2d4ceddd1925842a410b5abb435403ea4d737176c", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "c82a6593-cdd2-458f-915a-b0cbba22ba2a", "node_type": "1", "metadata": {}, "hash": "557f6f478ab11d50cc1b3ebd07fe2701a98d8cdd00c02544412bab67167f4460", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": " Language The play was translated into English by Michael Hofmann and published in\n 1987 by Hamish Hamilton.\n Law Four of his five policy recommendations were incorporated into the U.S.\n ", "mimetype": "text/plain", "start_char_idx": 0, "end_char_idx": 408, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}, "c82a6593-cdd2-458f-915a-b0cbba22ba2a": {"__data__": {"id_": "c82a6593-cdd2-458f-915a-b0cbba22ba2a", "embedding": null, "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "c5f79cf4-c754-4363-b83f-4f33f84bb398", "node_type": "4", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "cae5d206ee6b291b2b022af2d4ceddd1925842a410b5abb435403ea4d737176c", "class_name": "RelatedNodeInfo"}, "2": {"node_id": "76c294c4-2bf8-4452-9ad4-beb68c0848c3", "node_type": "1", "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}, "hash": "eba346f03d76d266510635e125d822a6ac398628de7719f7ee3791db2145d514", "class_name": "RelatedNodeInfo"}}, "metadata_template": "{key}: {value}", "metadata_separator": "\n", "text": "Federal Financial Law of 1966.\n Living Thing Schistura horai is a species of ray-finned fish in the stone loach genus\n \u201dSchistura\u201d.\n Medical Precious Blood Hospital offers specialist outpatient and inpatient services in\n General medicine.\n\nTable 8: All the coarse-grained and fine-grained entity types in FEW-NERD, we only highlight the entities with\nthe corresponding entity types in \u201cExample\u201d.", "mimetype": "text/plain", "start_char_idx": 408, "end_char_idx": 1017, "metadata_seperator": "\n", "text_template": "{metadata_str}\n\n{content}", "class_name": "TextNode"}, "__type__": "1"}}, "docstore/metadata": {"3e9bf844-0a4e-4de1-8be3-8a00f47f9be1": {"doc_hash": "576796b57e689b22de15d34b82d5053b9d5bdf4a99467e23748720950f46b830", "ref_doc_id": "13560595-482a-49aa-a22e-445536e10517"}, "d950eb15-82e3-4c1c-b8bb-d5a7249aadae": {"doc_hash": "c1f96e27b46409fd7c6f3cac110b6b12f040b98823a535ffba4579f5196168ac", "ref_doc_id": "13560595-482a-49aa-a22e-445536e10517"}, "89d6be11-da41-4dd7-899f-1340a92c4cd2": {"doc_hash": "cb4d43621c35c5b8f1ff16218a1e21d6f654171eea545a23bef5b185dcae6c5d", "ref_doc_id": "19de4927-f29a-4b3f-8694-79ef720fc706"}, "3a410009-58ad-4f35-9627-bfaa50dd56d8": {"doc_hash": "973f10c7404cc1f6eb4b7419ecb9f9b65f7f4a1960933a0d9e77ae416d60afb4", "ref_doc_id": "19de4927-f29a-4b3f-8694-79ef720fc706"}, "08a4de1b-58e0-4975-a68c-99b215ddca75": {"doc_hash": "886f24d810ac240a26ce195301ff78bf68c2e691894a7442daf75bbd3b93bb2c", "ref_doc_id": "19de4927-f29a-4b3f-8694-79ef720fc706"}, "703bb83a-4aea-4eb3-85a6-086d25555ccb": {"doc_hash": "c1fea1f263e861918b8164dd5899ca02120368c82ba7b941e44eca96b94f63f8", "ref_doc_id": "7a282a7e-fbe9-4d98-a3ca-17e66c5b5818"}, "f6c2c3db-ba3c-489e-9459-e6b4f579286b": {"doc_hash": "b019f6b69c45a12b54f0253a7ef02aa219ff58969e4136031fb9d8284e5b2151", "ref_doc_id": "7a282a7e-fbe9-4d98-a3ca-17e66c5b5818"}, "547f541a-ed82-4d22-af00-51a95dc3f0e1": {"doc_hash": "4f67430b5ae0a067bfe8d8399de04e224234e324f5e4e2ca18e032a9ab9275f4", "ref_doc_id": "48384cbb-f501-4add-bb32-198c5bc033a8"}, "5bd4a82d-022c-47e6-9bbb-8bdeef20f515": {"doc_hash": "7431dd7f03f5d180517e133d870a0219c77f32d2552a9b6218cce714a9867162", "ref_doc_id": "48384cbb-f501-4add-bb32-198c5bc033a8"}, "de0a20d6-b6dc-4ff3-8b6e-f6ad19472b08": {"doc_hash": "3255d025003acd326f1bc88fdaa9176eb886b6027ee323e73b9f687405d0ce07", "ref_doc_id": "48384cbb-f501-4add-bb32-198c5bc033a8"}, "39abd0c8-e1f5-4ee3-8da1-537353646ec6": {"doc_hash": "946e0fda6aa5de3de03554f4cd8f69a4c4c4cc39c9c6c3c0d18ab2f3d2397579", "ref_doc_id": "297587ad-1bff-4027-9c1e-7b5732f0d283"}, "ec59971c-cf54-40e2-9a55-c5de0cdbea76": {"doc_hash": "7164356aa9e8f2df54101a489109a375b5e85b1cb43ad0fb15ccd21f4ecc4ffe", "ref_doc_id": "297587ad-1bff-4027-9c1e-7b5732f0d283"}, "ce1695d1-7872-48ae-8589-5b5ed5355234": {"doc_hash": "ba3a7cfe46d943d4be44eed67ff6bd9acc215493ddfeac55bec780ddd5d933de", "ref_doc_id": "297587ad-1bff-4027-9c1e-7b5732f0d283"}, "5a2138f4-d397-4d63-9cac-d45d9fe4de7e": {"doc_hash": "1a6d32e20e86e7ff943edededb2390c3598451572dfb8a49fa89230f4d3c598c", "ref_doc_id": "12b98149-050c-4c93-81db-ea720272647e"}, "a2435907-a143-49c8-b483-ee3e8a02ba74": {"doc_hash": "8dae53115baed88ef4682b02b0e2d08e94e24a4984ce6b08396678c1a7c97ce7", "ref_doc_id": "12b98149-050c-4c93-81db-ea720272647e"}, "b3793ecc-96fc-4f50-bc61-21be9868e23b": {"doc_hash": "8a7a3f8bb5b2063ab717850df1411b14083f16386a2a6360d7d756256bd366c4", "ref_doc_id": "12b98149-050c-4c93-81db-ea720272647e"}, "c33b63d5-7341-40f1-9016-43201810afd5": {"doc_hash": "3d5e80599a4375c7886c2967fcf8a54badb3513ff488d9a03b295b8074e1fa7a", "ref_doc_id": "5c930767-ef2a-434e-91d1-780d7a9deb81"}, "ecacd21e-1829-48fa-95ab-5c90846e8dd3": {"doc_hash": "959bf9577e6f311a7c7757d259611276d0f23bc8487222cd6678c94fb44efeba", "ref_doc_id": "5c930767-ef2a-434e-91d1-780d7a9deb81"}, "95509f41-b5f0-4bc4-ba2c-886ad18a6046": {"doc_hash": "063045c6ffb3241c763f647f10411ec81558a4e1c93cdabb503301e008d0febe", "ref_doc_id": "5c930767-ef2a-434e-91d1-780d7a9deb81"}, "b778bdc3-b7ac-4222-b5f9-8e068507f3a6": {"doc_hash": "a593b49223ff84672127a243d4951709bb632f75b3dec73238a7928fce3fe317", "ref_doc_id": "fc3045bc-4ea0-4c56-aa91-9689fa1cee0d"}, "810ba2d6-65c6-4378-91c4-4ba38f087746": {"doc_hash": "5c86f26215998964aa4f80733a31ff48d71834cefbc0e7c508652bddf358767a", "ref_doc_id": "fc3045bc-4ea0-4c56-aa91-9689fa1cee0d"}, "27c32a2f-d0a1-4540-90a2-aed3847dc7e4": {"doc_hash": "bbabec9c4778e4f90d334b2020e61df2ae1bdc85d03e854ff28e0da66e1a8dcf", "ref_doc_id": "69aa87b9-95d8-46bd-b46f-f3948d0a5708"}, "c2ae573a-cfd8-4747-a7c2-ce1d55a0484b": {"doc_hash": "15380f84e88453210db0a8c7ab5cc42009ff66f68d4e66e029d3d1fcf38efd86", "ref_doc_id": "69aa87b9-95d8-46bd-b46f-f3948d0a5708"}, "9cc52dba-eaee-481f-b340-5c0a400c28e7": {"doc_hash": "e57cd03791979380484f8ad4b23c0ac1689681fe0a3a8a838cb94995b2e71224", "ref_doc_id": "69aa87b9-95d8-46bd-b46f-f3948d0a5708"}, "f1116f47-ab33-4225-bb26-ddc62fe95589": {"doc_hash": "1bbeb825aa73acd4f2bda14f92aebcaf9ef109db74efb2e92cd7058f95df94f1", "ref_doc_id": "7d05593a-cace-44c8-ad7c-15b475ca0267"}, "e72dac24-34a6-4159-818b-d6f023d89f0c": {"doc_hash": "e91fed0f32692abd2070a7bf73e855efebac7c6516b221b1f5ef89283bcad78b", "ref_doc_id": "7d05593a-cace-44c8-ad7c-15b475ca0267"}, "00f9b9f2-a717-4ccb-a263-c9c92e3a0604": {"doc_hash": "1dd73aff08b1e4d79f5ebb9551934a92650bfe7bffcbc212780a9f2d886731eb", "ref_doc_id": "7d05593a-cace-44c8-ad7c-15b475ca0267"}, "0b352382-f3d6-4693-8571-1762bd92e288": {"doc_hash": "f36c6ca5eebedf6abb5b59826751b9a025960535bfd565d7fd44080971aefc81", "ref_doc_id": "7d05593a-cace-44c8-ad7c-15b475ca0267"}, "812846d5-bd57-4218-8039-072d4826c457": {"doc_hash": "8afe57d020b291f7d2c074a8cfbea21e5d91fa2a6ec737f129bf199d39f7c070", "ref_doc_id": "7d05593a-cace-44c8-ad7c-15b475ca0267"}, "c52e3f4a-332f-4829-9c57-c42ad62c4c61": {"doc_hash": "999e3a4b02d38c963706e1d05333b4277f2012e56ef909d182fa73e9e77e030e", "ref_doc_id": "7d05593a-cace-44c8-ad7c-15b475ca0267"}, "e32886ff-2b1a-422c-b95b-e421bd43419f": {"doc_hash": "e0846d4696ffe6313c11e374b1bba89f9cac0be7bb6b7a5a01858b2b136efddd", "ref_doc_id": "44bfd932-fee6-4074-a125-50f4c9a9ec00"}, "fbb1da9d-8adb-456b-a269-3544ffe0f8c3": {"doc_hash": "9c1acae81d6cf06908c3d8764c2a1ce3939120549a1ea21483b3414c13017bed", "ref_doc_id": "44bfd932-fee6-4074-a125-50f4c9a9ec00"}, "5b74caa6-0e1a-4998-8fce-bc485614f693": {"doc_hash": "fdfb36f9efb828c887c3928c11563f0abed35d6b5a617e8ea570abd17c2d6464", "ref_doc_id": "44bfd932-fee6-4074-a125-50f4c9a9ec00"}, "ae5d7634-5d34-44d1-a4e7-8d200469f0db": {"doc_hash": "f8eb07f417ebf2d68de8a444127f40066c8f8311545bb7cf8fa2a85315239053", "ref_doc_id": "44bfd932-fee6-4074-a125-50f4c9a9ec00"}, "51714cff-a266-4cf3-96f1-bbb555068ce9": {"doc_hash": "51f2db76684508d27aa171fd8830fe03652e44af930add124e60d9816a15b89c", "ref_doc_id": "44bfd932-fee6-4074-a125-50f4c9a9ec00"}, "22d3e563-46d5-4e6a-a7d5-84b175421878": {"doc_hash": "ca7ccbc67094b071ae54681cfe5a52dd20523d7e9f4dc33f993608e3202a94b6", "ref_doc_id": "003a1c4d-0a7c-41a7-a905-8606e7d9e8d7"}, "04883a01-7aeb-46c7-ab74-6fa9337c61ee": {"doc_hash": "03eb769ba7629a8ef408fb471a560359be549810df1dbc93e6d6b84ee2947526", "ref_doc_id": "003a1c4d-0a7c-41a7-a905-8606e7d9e8d7"}, "b7178a9a-baa5-4df6-bf34-fe7e2076eb3f": {"doc_hash": "d2e2e7a04514266dfa807951883842eb702bf4c7ef1babd02dd7c1a89c77e42f", "ref_doc_id": "a4ada096-e175-47ff-9fc4-b609c217c6ba"}, "5a13abdf-cef2-4d15-a4c6-2678fd859672": {"doc_hash": "4a97f8e28500fa2c16bf6a826663c16c49a54e60f0f8628619b859fb20781d54", "ref_doc_id": "a4ada096-e175-47ff-9fc4-b609c217c6ba"}, "310337fe-3f15-42a8-a1fd-8a9bfc87f6a4": {"doc_hash": "8c24e3e71eb1d3868096bed9e6509ab02650f568637014c55491d9bf890eb80e", "ref_doc_id": "a4ada096-e175-47ff-9fc4-b609c217c6ba"}, "91ed48ed-da65-4f77-98c0-99f800d0db39": {"doc_hash": "8642d18aff05ba41c66b6502118f6cd10945423f7b5edb3c11e6a9783eae2a27", "ref_doc_id": "df5c2ab9-c043-48d1-b030-1e78c26fe080"}, "5998e668-1c0b-4446-ba84-6386fe51b607": {"doc_hash": "ce00a88f68c0ad1319ad7e09222473578e04f19a9e6109de5ded3c9c1d9d14b5", "ref_doc_id": "df5c2ab9-c043-48d1-b030-1e78c26fe080"}, "06456051-5542-40dd-9ddd-87258d76aa23": {"doc_hash": "ae01a3026568bf3de3ae7aa19abc61054c0fbc8416c250b57616a83b36ee7794", "ref_doc_id": "df5c2ab9-c043-48d1-b030-1e78c26fe080"}, "492b4f97-d056-4cde-bbf6-d2fa2a5b21b0": {"doc_hash": "9275d481485002e742d430b0789ca6e90589eaa44b188a7790fecf04ffb61047", "ref_doc_id": "b927cf40-69a0-4d7a-9d6f-768c72cbf5d2"}, "fa1b2e06-8569-4c40-b557-50ab94a0728d": {"doc_hash": "9ee325e155102138302e74b1241fdaae467b551764014d87255c4702e4eff852", "ref_doc_id": "b927cf40-69a0-4d7a-9d6f-768c72cbf5d2"}, "f70535c7-2605-4c2f-b0fc-4e390501a1e4": {"doc_hash": "f119230e0539df52ca6e9b72b018498680cc81cac108492673c48b0c459b7a0d", "ref_doc_id": "b927cf40-69a0-4d7a-9d6f-768c72cbf5d2"}, "76c294c4-2bf8-4452-9ad4-beb68c0848c3": {"doc_hash": "eba346f03d76d266510635e125d822a6ac398628de7719f7ee3791db2145d514", "ref_doc_id": "c5f79cf4-c754-4363-b83f-4f33f84bb398"}, "c82a6593-cdd2-458f-915a-b0cbba22ba2a": {"doc_hash": "a66614eb078d9801404b651cb8d4987412accf5d8b356598b1060bf1396a1fa1", "ref_doc_id": "c5f79cf4-c754-4363-b83f-4f33f84bb398"}}, "docstore/ref_doc_info": {"13560595-482a-49aa-a22e-445536e10517": {"node_ids": ["3e9bf844-0a4e-4de1-8be3-8a00f47f9be1", "d950eb15-82e3-4c1c-b8bb-d5a7249aadae"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "19de4927-f29a-4b3f-8694-79ef720fc706": {"node_ids": ["89d6be11-da41-4dd7-899f-1340a92c4cd2", "3a410009-58ad-4f35-9627-bfaa50dd56d8", "08a4de1b-58e0-4975-a68c-99b215ddca75"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "7a282a7e-fbe9-4d98-a3ca-17e66c5b5818": {"node_ids": ["703bb83a-4aea-4eb3-85a6-086d25555ccb", "f6c2c3db-ba3c-489e-9459-e6b4f579286b"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "48384cbb-f501-4add-bb32-198c5bc033a8": {"node_ids": ["547f541a-ed82-4d22-af00-51a95dc3f0e1", "5bd4a82d-022c-47e6-9bbb-8bdeef20f515", "de0a20d6-b6dc-4ff3-8b6e-f6ad19472b08"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "297587ad-1bff-4027-9c1e-7b5732f0d283": {"node_ids": ["39abd0c8-e1f5-4ee3-8da1-537353646ec6", "ec59971c-cf54-40e2-9a55-c5de0cdbea76", "ce1695d1-7872-48ae-8589-5b5ed5355234"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "12b98149-050c-4c93-81db-ea720272647e": {"node_ids": ["5a2138f4-d397-4d63-9cac-d45d9fe4de7e", "a2435907-a143-49c8-b483-ee3e8a02ba74", "b3793ecc-96fc-4f50-bc61-21be9868e23b"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "5c930767-ef2a-434e-91d1-780d7a9deb81": {"node_ids": ["c33b63d5-7341-40f1-9016-43201810afd5", "ecacd21e-1829-48fa-95ab-5c90846e8dd3", "95509f41-b5f0-4bc4-ba2c-886ad18a6046"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "fc3045bc-4ea0-4c56-aa91-9689fa1cee0d": {"node_ids": ["b778bdc3-b7ac-4222-b5f9-8e068507f3a6", "810ba2d6-65c6-4378-91c4-4ba38f087746"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "69aa87b9-95d8-46bd-b46f-f3948d0a5708": {"node_ids": ["27c32a2f-d0a1-4540-90a2-aed3847dc7e4", "c2ae573a-cfd8-4747-a7c2-ce1d55a0484b", "9cc52dba-eaee-481f-b340-5c0a400c28e7"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "7d05593a-cace-44c8-ad7c-15b475ca0267": {"node_ids": ["f1116f47-ab33-4225-bb26-ddc62fe95589", "e72dac24-34a6-4159-818b-d6f023d89f0c", "00f9b9f2-a717-4ccb-a263-c9c92e3a0604", "0b352382-f3d6-4693-8571-1762bd92e288", "812846d5-bd57-4218-8039-072d4826c457", "c52e3f4a-332f-4829-9c57-c42ad62c4c61"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "44bfd932-fee6-4074-a125-50f4c9a9ec00": {"node_ids": ["e32886ff-2b1a-422c-b95b-e421bd43419f", "fbb1da9d-8adb-456b-a269-3544ffe0f8c3", "5b74caa6-0e1a-4998-8fce-bc485614f693", "ae5d7634-5d34-44d1-a4e7-8d200469f0db", "51714cff-a266-4cf3-96f1-bbb555068ce9"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "003a1c4d-0a7c-41a7-a905-8606e7d9e8d7": {"node_ids": ["22d3e563-46d5-4e6a-a7d5-84b175421878", "04883a01-7aeb-46c7-ab74-6fa9337c61ee"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "a4ada096-e175-47ff-9fc4-b609c217c6ba": {"node_ids": ["b7178a9a-baa5-4df6-bf34-fe7e2076eb3f", "5a13abdf-cef2-4d15-a4c6-2678fd859672", "310337fe-3f15-42a8-a1fd-8a9bfc87f6a4"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "df5c2ab9-c043-48d1-b030-1e78c26fe080": {"node_ids": ["91ed48ed-da65-4f77-98c0-99f800d0db39", "5998e668-1c0b-4446-ba84-6386fe51b607", "06456051-5542-40dd-9ddd-87258d76aa23"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "b927cf40-69a0-4d7a-9d6f-768c72cbf5d2": {"node_ids": ["492b4f97-d056-4cde-bbf6-d2fa2a5b21b0", "fa1b2e06-8569-4c40-b557-50ab94a0728d", "f70535c7-2605-4c2f-b0fc-4e390501a1e4"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}, "c5f79cf4-c754-4363-b83f-4f33f84bb398": {"node_ids": ["76c294c4-2bf8-4452-9ad4-beb68c0848c3", "c82a6593-cdd2-458f-915a-b0cbba22ba2a"], "metadata": {"file_path": "C:\\MAIN\\it\\projects\\vs\\ds_rag\\data\\2105.07464v6.pdf", "file_name": "2105.07464v6.pdf", "file_type": "application/pdf", "file_size": 843730, "creation_date": "2024-12-05", "last_modified_date": "2024-11-14"}}}} \ No newline at end of file