emanuelaboros commited on
Commit
f4e99e2
Β·
1 Parent(s): 8458bdc

url --> wk_url

Browse files
Files changed (1) hide show
  1. app.py +1 -72
app.py CHANGED
@@ -23,77 +23,6 @@ nel_pipeline = pipeline(
23
  print("Model loaded successfully!")
24
 
25
 
26
- def get_wikipedia_page_props(input_str: str):
27
- """
28
- Retrieves the QID for a given Wikipedia page name from the specified language Wikipedia.
29
- If the request fails, it falls back to using the OpenRefine Wikidata API.
30
-
31
- Args:
32
- input_str (str): The input string in the format "page_name >> language".
33
-
34
- Returns:
35
- str: The QID or "NIL" if the QID is not found.
36
- """
37
- try:
38
- # Preprocess the input string
39
- page_name, language = input_str.split(" >> ")
40
- page_name = page_name.strip()
41
- language = language.strip()
42
- except ValueError:
43
- return "Invalid input format. Use 'page_name >> language'."
44
-
45
- wikipedia_url = f"https://{language}.wikipedia.org/w/api.php"
46
- wikipedia_params = {
47
- "action": "query",
48
- "prop": "pageprops",
49
- "format": "json",
50
- "titles": page_name,
51
- }
52
-
53
- qid = "NIL"
54
- try:
55
- # Attempt to fetch from Wikipedia API
56
- response = requests.get(wikipedia_url, params=wikipedia_params)
57
- response.raise_for_status()
58
- data = response.json()
59
-
60
- if "pages" in data["query"]:
61
- page_id = list(data["query"]["pages"].keys())[0]
62
-
63
- if "pageprops" in data["query"]["pages"][page_id]:
64
- page_props = data["query"]["pages"][page_id]["pageprops"]
65
-
66
- if "wikibase_item" in page_props:
67
- return page_props["wikibase_item"]
68
- else:
69
- return qid
70
- else:
71
- return qid
72
- except Exception as e:
73
- return qid
74
-
75
-
76
- def get_wikipedia_title(qid, language="en"):
77
- url = f"https://www.wikidata.org/w/api.php"
78
- params = {
79
- "action": "wbgetentities",
80
- "format": "json",
81
- "ids": qid,
82
- "props": "sitelinks/urls",
83
- "sitefilter": f"{language}wiki",
84
- }
85
-
86
- response = requests.get(url, params=params)
87
- data = response.json()
88
-
89
- try:
90
- title = data["entities"][qid]["sitelinks"][f"{language}wiki"]["title"]
91
- url = data["entities"][qid]["sitelinks"][f"{language}wiki"]["url"]
92
- return title, url
93
- except KeyError:
94
- return "NIL", "None"
95
-
96
-
97
  def disambiguate_sentence(sentence):
98
  # Generate model outputs for the sentence
99
  linked_entity = nel_pipeline(sentence)
@@ -105,7 +34,7 @@ def disambiguate_sentence(sentence):
105
  <strong>Entity:</strong> {linked_entity['surface']} <br>
106
  <strong>Wikidata QID:</strong> {linked_entity['wkd_id']} <br>
107
  <strong>Wikipedia Title:</strong> {linked_entity['wkpedia_pagename']} <br>
108
- <a href="{linked_entity['url']}" target="_blank">Wikipedia Page</a>
109
  </div>
110
  """
111
  return entity_info
 
23
  print("Model loaded successfully!")
24
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def disambiguate_sentence(sentence):
27
  # Generate model outputs for the sentence
28
  linked_entity = nel_pipeline(sentence)
 
34
  <strong>Entity:</strong> {linked_entity['surface']} <br>
35
  <strong>Wikidata QID:</strong> {linked_entity['wkd_id']} <br>
36
  <strong>Wikipedia Title:</strong> {linked_entity['wkpedia_pagename']} <br>
37
+ <a href="{linked_entity['wkpedia_url']}" target="_blank">Wikipedia Page</a>
38
  </div>
39
  """
40
  return entity_info