import sys import requests import lxml.html base_url = "https://en.wikipedia.org/" if len(sys.argv) >= 2: url = base_url + sys.argv[1] else: url = "https://en.wikipedia.org/wiki/Python_(programming_language)" resp = requests.get(url) see_also = [] if resp.status_code == 200: html = resp.text #print(html[:100]) tree = lxml.html.fromstring(html) see_also_heading = tree.xpath("//span[@id='See_also']")[0].getparent() for sibling in see_also_heading.itersiblings(): hrefs = sibling.xpath("li/a") for href in hrefs: title = href.get("title") see_also_url = href.get("href") #print(title, see_also_url) ## TODO ## Parse URL from command line with urlllib.parse.urlsplit() ## and concatenate base URL and relative URL from link see_also.append((title, base_url + see_also_url)) print(see_also)