Compare commits
No commits in common. "7f2c84f53d9c0ad5e18ae24e9e512e61fdfecc47" and "a7cf6aed5f440d4c058198393d25fe79103641fe" have entirely different histories.
7f2c84f53d
...
a7cf6aed5f
|
@ -1,5 +0,0 @@
|
||||||
record 1: 100, 300, 50, 200
|
|
||||||
record 2 300, 450, 50, 100
|
|
||||||
record 3: 150, 200, 0, 150
|
|
||||||
record 4: 350, 200, 5O, 250
|
|
||||||
record 5: 50, 100, 200, 300
|
|
|
@ -1,5 +0,0 @@
|
||||||
record 1: 100, 300, 50, 200
|
|
||||||
record 2: 300, 450, 50, 100
|
|
||||||
record 3: 150, 200, 0, 150
|
|
||||||
record 4: 350, 200, 50, 250
|
|
||||||
record 5: 50, 100, 200, 300
|
|
|
@ -8,7 +8,7 @@ with open(filename, "r") as fileobj:
|
||||||
max_linelength = 0
|
max_linelength = 0
|
||||||
longest_line = -1
|
longest_line = -1
|
||||||
|
|
||||||
for i, line in enumerate(fileobj):
|
for i, line in enumerate(data):
|
||||||
if len(line) > max_linelength:
|
if len(line) > max_linelength:
|
||||||
max_linelength = len(line)
|
max_linelength = len(line)
|
||||||
longest_line = i + 1
|
longest_line = i + 1
|
||||||
|
|
|
@ -2,15 +2,12 @@
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
if len(sys.argv) >= 2:
|
filename = sys.argv[1]
|
||||||
filename = sys.argv[1]
|
|
||||||
else:
|
|
||||||
filename = "data.txt"
|
|
||||||
|
|
||||||
data = {}
|
data = {}
|
||||||
|
|
||||||
with open(filename, "r") as fileobj:
|
with open(filename, "r") as fileobj:
|
||||||
for i, line in enumerate(fileobj):
|
for i, line in enumerate(data):
|
||||||
line = line.strip() # remove whitespace from start/end of line
|
line = line.strip() # remove whitespace from start/end of line
|
||||||
|
|
||||||
if line.startswith('#'):
|
if line.startswith('#'):
|
||||||
|
|
|
@ -1,29 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
if len(sys.argv) >= 2:
|
|
||||||
filename = sys.argv[1]
|
|
||||||
else:
|
|
||||||
filename = "data.txt"
|
|
||||||
|
|
||||||
data = {}
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(filename, "r") as fileobj:
|
|
||||||
for i, line in enumerate(fileobj):
|
|
||||||
line = line.strip() # remove whitespace from start/end of line
|
|
||||||
|
|
||||||
if line.startswith('#'):
|
|
||||||
# ignore comment lines
|
|
||||||
continue
|
|
||||||
|
|
||||||
name, raw_data = line.split(":", 1) # split line at first colon
|
|
||||||
items = raw_data.split(",") # split raw data at commas
|
|
||||||
|
|
||||||
data[name.strip()] = items
|
|
||||||
except OSError as exc:
|
|
||||||
print(f"Could not open file {filename}: {exc}")
|
|
||||||
|
|
||||||
for key in data:
|
|
||||||
print(key, data[key])
|
|
|
@ -1,36 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
if len(sys.argv) >= 2:
|
|
||||||
filename = sys.argv[1]
|
|
||||||
else:
|
|
||||||
filename = "data.txt"
|
|
||||||
|
|
||||||
data = {}
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(filename, "r") as fileobj:
|
|
||||||
for i, line in enumerate(fileobj):
|
|
||||||
line = line.strip() # remove whitespace from start/end of line
|
|
||||||
|
|
||||||
if line.startswith('#'):
|
|
||||||
# ignore comment lines
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
name, raw_data = line.split(":", 1) # split line at first colon
|
|
||||||
except ValueError as exc:
|
|
||||||
print(f"Warning: could not parse line {i+1}: {exc}")
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
items = [int(item) for item in raw_data.split(",")] # split raw data at commas
|
|
||||||
except (ValueError, TypeError) as exc:
|
|
||||||
print(f"Warning: could not parse data on line {i+1}: {exc}")
|
|
||||||
|
|
||||||
data[name.strip()] = items
|
|
||||||
except OSError as exc:
|
|
||||||
print(f"Could not open file {filename}: {exc}")
|
|
||||||
|
|
||||||
for key in data:
|
|
||||||
print(key, data[key])
|
|
|
@ -1,52 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import urllib.parse as urlparse
|
|
||||||
|
|
||||||
import requests
|
|
||||||
import lxml.html
|
|
||||||
|
|
||||||
|
|
||||||
def find_see_also_links(tree):
|
|
||||||
|
|
||||||
# get heading element of "See also" section using XPath
|
|
||||||
see_also_heading = tree.xpath("//span[@id='See_also']")[0].getparent()
|
|
||||||
|
|
||||||
# find following UL element, which contains the LI elements with the actual links
|
|
||||||
for sibling in see_also_heading.itersiblings():
|
|
||||||
hrefs = sibling.xpath("li/a")
|
|
||||||
|
|
||||||
if hrefs:
|
|
||||||
return [(href.text, href.get("href")) for href in hrefs]
|
|
||||||
|
|
||||||
if sibling.tag.startswith("h"):
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
if len(sys.argv) < 2:
|
|
||||||
sys.exit("Usage: wp_crawler.py <url>")
|
|
||||||
|
|
||||||
url = sys.argv[1]
|
|
||||||
url_parts = urlparse.urlsplit(url)
|
|
||||||
base_url = urlparse.urlunsplit(list(url_parts[:3]) + ['', ''])
|
|
||||||
|
|
||||||
resp = requests.get(url)
|
|
||||||
|
|
||||||
if not resp.status_code == 200:
|
|
||||||
sys.exit(f"Could not retrive URL '{url}'. Aborting.")
|
|
||||||
tree = lxml.html.fromstring(resp.text)
|
|
||||||
|
|
||||||
links = find_see_also_links(tree)
|
|
||||||
|
|
||||||
if links:
|
|
||||||
for title, url in links:
|
|
||||||
if not url.startswith("https://"):
|
|
||||||
if url.startswith("/"):
|
|
||||||
url = urlparse.urljoin(base_url, url)
|
|
||||||
|
|
||||||
print(title, url)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
Loading…
Reference in New Issue