34 lines
958 B
Python
34 lines
958 B
Python
import sys
|
|
import requests
|
|
import lxml.html
|
|
|
|
base_url = "https://en.wikipedia.org/"
|
|
if len(sys.argv) >= 2:
|
|
url = base_url + sys.argv[1]
|
|
else:
|
|
url = "https://en.wikipedia.org/wiki/Python_(programming_language)"
|
|
|
|
resp = requests.get(url)
|
|
|
|
see_also = []
|
|
|
|
if resp.status_code == 200:
|
|
html = resp.text
|
|
#print(html[:100])
|
|
tree = lxml.html.fromstring(html)
|
|
|
|
see_also_heading = tree.xpath("//span[@id='See_also']")[0].getparent()
|
|
|
|
for sibling in see_also_heading.itersiblings():
|
|
hrefs = sibling.xpath("li/a")
|
|
|
|
for href in hrefs:
|
|
title = href.get("title")
|
|
see_also_url = href.get("href")
|
|
#print(title, see_also_url)
|
|
## TODO
|
|
## Parse URL from command line with urlllib.parse.urlsplit()
|
|
## and concatenate base URL and relative URL from link
|
|
see_also.append((title, base_url + see_also_url))
|
|
|
|
print(see_also) |