Update beispiele
Signed-off-by: Christopher Arndt <chris@chrisarndt.de>
This commit is contained in:
parent
eab71dd490
commit
205c83790c
|
@ -0,0 +1,34 @@
|
||||||
|
import sys
|
||||||
|
import requests
|
||||||
|
import lxml.html
|
||||||
|
|
||||||
|
base_url = "https://en.wikipedia.org/"
|
||||||
|
if len(sys.argv) >= 2:
|
||||||
|
url = base_url + sys.argv[1]
|
||||||
|
else:
|
||||||
|
url = "https://en.wikipedia.org/wiki/Python_(programming_language)"
|
||||||
|
|
||||||
|
resp = requests.get(url)
|
||||||
|
|
||||||
|
see_also = []
|
||||||
|
|
||||||
|
if resp.status_code == 200:
|
||||||
|
html = resp.text
|
||||||
|
#print(html[:100])
|
||||||
|
tree = lxml.html.fromstring(html)
|
||||||
|
|
||||||
|
see_also_heading = tree.xpath("//span[@id='See_also']")[0].getparent()
|
||||||
|
|
||||||
|
for sibling in see_also_heading.itersiblings():
|
||||||
|
hrefs = sibling.xpath("li/a")
|
||||||
|
|
||||||
|
for href in hrefs:
|
||||||
|
title = href.get("title")
|
||||||
|
see_also_url = href.get("href")
|
||||||
|
#print(title, see_also_url)
|
||||||
|
## TODO
|
||||||
|
## Parse URL from command line with urlllib.parse.urlsplit()
|
||||||
|
## and concatenate base URL and relative URL from link
|
||||||
|
see_also.append((title, base_url + see_also_url))
|
||||||
|
|
||||||
|
print(see_also)
|
|
@ -24,11 +24,13 @@ try:
|
||||||
print(f"Warning: could not parse line {i+1}: {exc}")
|
print(f"Warning: could not parse line {i+1}: {exc}")
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
items = [int(item) for item in raw_data.split(",")] # split raw data at commas
|
# split raw data at commas
|
||||||
|
items = [int(item.strip()) for item in raw_data.split(",")]
|
||||||
except (ValueError, TypeError) as exc:
|
except (ValueError, TypeError) as exc:
|
||||||
print(f"Warning: could not parse data on line {i+1}: {exc}")
|
print(f"Warning: could not parse data on line {i+1}: {exc}")
|
||||||
|
else:
|
||||||
|
data[name.strip()] = items
|
||||||
|
|
||||||
data[name.strip()] = items
|
|
||||||
except OSError as exc:
|
except OSError as exc:
|
||||||
print(f"Could not open file {filename}: {exc}")
|
print(f"Could not open file {filename}: {exc}")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue