Update beispiele
Signed-off-by: Christopher Arndt <chris@chrisarndt.de>
This commit is contained in:
parent
eab71dd490
commit
205c83790c
|
@ -0,0 +1,34 @@
|
|||
import sys
|
||||
import requests
|
||||
import lxml.html
|
||||
|
||||
base_url = "https://en.wikipedia.org/"
|
||||
if len(sys.argv) >= 2:
|
||||
url = base_url + sys.argv[1]
|
||||
else:
|
||||
url = "https://en.wikipedia.org/wiki/Python_(programming_language)"
|
||||
|
||||
resp = requests.get(url)
|
||||
|
||||
see_also = []
|
||||
|
||||
if resp.status_code == 200:
|
||||
html = resp.text
|
||||
#print(html[:100])
|
||||
tree = lxml.html.fromstring(html)
|
||||
|
||||
see_also_heading = tree.xpath("//span[@id='See_also']")[0].getparent()
|
||||
|
||||
for sibling in see_also_heading.itersiblings():
|
||||
hrefs = sibling.xpath("li/a")
|
||||
|
||||
for href in hrefs:
|
||||
title = href.get("title")
|
||||
see_also_url = href.get("href")
|
||||
#print(title, see_also_url)
|
||||
## TODO
|
||||
## Parse URL from command line with urlllib.parse.urlsplit()
|
||||
## and concatenate base URL and relative URL from link
|
||||
see_also.append((title, base_url + see_also_url))
|
||||
|
||||
print(see_also)
|
|
@ -24,11 +24,13 @@ try:
|
|||
print(f"Warning: could not parse line {i+1}: {exc}")
|
||||
else:
|
||||
try:
|
||||
items = [int(item) for item in raw_data.split(",")] # split raw data at commas
|
||||
# split raw data at commas
|
||||
items = [int(item.strip()) for item in raw_data.split(",")]
|
||||
except (ValueError, TypeError) as exc:
|
||||
print(f"Warning: could not parse data on line {i+1}: {exc}")
|
||||
|
||||
else:
|
||||
data[name.strip()] = items
|
||||
|
||||
except OSError as exc:
|
||||
print(f"Could not open file {filename}: {exc}")
|
||||
|
||||
|
|
Loading…
Reference in New Issue