diff --git a/beispiele/http1.py b/beispiele/http1.py new file mode 100644 index 0000000..4f4fd67 --- /dev/null +++ b/beispiele/http1.py @@ -0,0 +1,34 @@ +import sys +import requests +import lxml.html + +base_url = "https://en.wikipedia.org/" +if len(sys.argv) >= 2: + url = base_url + sys.argv[1] +else: + url = "https://en.wikipedia.org/wiki/Python_(programming_language)" + +resp = requests.get(url) + +see_also = [] + +if resp.status_code == 200: + html = resp.text + #print(html[:100]) + tree = lxml.html.fromstring(html) + + see_also_heading = tree.xpath("//span[@id='See_also']")[0].getparent() + + for sibling in see_also_heading.itersiblings(): + hrefs = sibling.xpath("li/a") + + for href in hrefs: + title = href.get("title") + see_also_url = href.get("href") + #print(title, see_also_url) + ## TODO + ## Parse URL from command line with urlllib.parse.urlsplit() + ## and concatenate base URL and relative URL from link + see_also.append((title, base_url + see_also_url)) + +print(see_also) \ No newline at end of file diff --git a/beispiele/readfile6.py b/beispiele/readfile6.py index efabef5..afa3a93 100644 --- a/beispiele/readfile6.py +++ b/beispiele/readfile6.py @@ -24,11 +24,13 @@ try: print(f"Warning: could not parse line {i+1}: {exc}") else: try: - items = [int(item) for item in raw_data.split(",")] # split raw data at commas + # split raw data at commas + items = [int(item.strip()) for item in raw_data.split(",")] except (ValueError, TypeError) as exc: print(f"Warning: could not parse data on line {i+1}: {exc}") + else: + data[name.strip()] = items - data[name.strip()] = items except OSError as exc: print(f"Could not open file {filename}: {exc}")