Update beispiele
Signed-off-by: Christopher Arndt <chris@chrisarndt.de>
This commit is contained in:
		
							parent
							
								
									eab71dd490
								
							
						
					
					
						commit
						205c83790c
					
				
							
								
								
									
										34
									
								
								beispiele/http1.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								beispiele/http1.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,34 @@
 | 
				
			|||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					import lxml.html
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					base_url = "https://en.wikipedia.org/"
 | 
				
			||||||
 | 
					if len(sys.argv) >= 2:
 | 
				
			||||||
 | 
					    url = base_url + sys.argv[1]
 | 
				
			||||||
 | 
					else:
 | 
				
			||||||
 | 
					    url = "https://en.wikipedia.org/wiki/Python_(programming_language)"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					resp = requests.get(url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					see_also = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if resp.status_code == 200:
 | 
				
			||||||
 | 
					    html = resp.text
 | 
				
			||||||
 | 
					    #print(html[:100])
 | 
				
			||||||
 | 
					    tree = lxml.html.fromstring(html)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    see_also_heading = tree.xpath("//span[@id='See_also']")[0].getparent()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for sibling in see_also_heading.itersiblings():
 | 
				
			||||||
 | 
					        hrefs = sibling.xpath("li/a")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for href in hrefs:
 | 
				
			||||||
 | 
					            title = href.get("title")
 | 
				
			||||||
 | 
					            see_also_url = href.get("href")
 | 
				
			||||||
 | 
					            #print(title, see_also_url)
 | 
				
			||||||
 | 
					            ## TODO
 | 
				
			||||||
 | 
					            ## Parse URL from command line with urlllib.parse.urlsplit()
 | 
				
			||||||
 | 
					            ## and concatenate base URL and relative URL from link
 | 
				
			||||||
 | 
					            see_also.append((title, base_url + see_also_url))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					print(see_also)
 | 
				
			||||||
@ -24,11 +24,13 @@ try:
 | 
				
			|||||||
                print(f"Warning: could not parse line {i+1}: {exc}")
 | 
					                print(f"Warning: could not parse line {i+1}: {exc}")
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                try:
 | 
					                try:
 | 
				
			||||||
                    items = [int(item) for item in raw_data.split(",")]          # split raw data at commas
 | 
					                    # split raw data at commas
 | 
				
			||||||
 | 
					                    items = [int(item.strip()) for item in raw_data.split(",")]
 | 
				
			||||||
                except (ValueError, TypeError) as exc:
 | 
					                except (ValueError, TypeError) as exc:
 | 
				
			||||||
                    print(f"Warning: could not parse data on line {i+1}: {exc}")
 | 
					                    print(f"Warning: could not parse data on line {i+1}: {exc}")
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    data[name.strip()] = items
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            data[name.strip()] = items
 | 
					 | 
				
			||||||
except OSError as exc:
 | 
					except OSError as exc:
 | 
				
			||||||
    print(f"Could not open file {filename}: {exc}")
 | 
					    print(f"Could not open file {filename}: {exc}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user