Update beispiele

Signed-off-by: Christopher Arndt <chris@chrisarndt.de>
This commit is contained in:
Christopher Arndt 2024-05-08 16:20:55 +02:00
parent eab71dd490
commit 205c83790c
2 changed files with 38 additions and 2 deletions

34
beispiele/http1.py Normal file
View File

@ -0,0 +1,34 @@
import sys
import requests
import lxml.html
base_url = "https://en.wikipedia.org/"
if len(sys.argv) >= 2:
url = base_url + sys.argv[1]
else:
url = "https://en.wikipedia.org/wiki/Python_(programming_language)"
resp = requests.get(url)
see_also = []
if resp.status_code == 200:
html = resp.text
#print(html[:100])
tree = lxml.html.fromstring(html)
see_also_heading = tree.xpath("//span[@id='See_also']")[0].getparent()
for sibling in see_also_heading.itersiblings():
hrefs = sibling.xpath("li/a")
for href in hrefs:
title = href.get("title")
see_also_url = href.get("href")
#print(title, see_also_url)
## TODO
## Parse URL from command line with urlllib.parse.urlsplit()
## and concatenate base URL and relative URL from link
see_also.append((title, base_url + see_also_url))
print(see_also)

View File

@ -24,11 +24,13 @@ try:
print(f"Warning: could not parse line {i+1}: {exc}") print(f"Warning: could not parse line {i+1}: {exc}")
else: else:
try: try:
items = [int(item) for item in raw_data.split(",")] # split raw data at commas # split raw data at commas
items = [int(item.strip()) for item in raw_data.split(",")]
except (ValueError, TypeError) as exc: except (ValueError, TypeError) as exc:
print(f"Warning: could not parse data on line {i+1}: {exc}") print(f"Warning: could not parse data on line {i+1}: {exc}")
else:
data[name.strip()] = items data[name.strip()] = items
except OSError as exc: except OSError as exc:
print(f"Could not open file {filename}: {exc}") print(f"Could not open file {filename}: {exc}")