2019独角兽企业重金招聘Python工程师标准>>>
话不多说,直接上代码,很简单,很容易看懂
import requests
from bs4 import BeautifulSoup
import randomdef get_ip_list():print("正在获取代理列表...")ip_url = 'http://www.xicidaili.com/nn/'headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"}wb_data = requests.get(ip_url, headers=headers)soup = BeautifulSoup(wb_data.text, 'lxml')raw_list = soup.select('#ip_list tr')mature_list = []for i in raw_list:if i == raw_list[0]:continuetd_list = i.find_all('td')ip_info = 'http://' + td_list[1].text + ':' + td_list[2].text # proxy参数的格式 http://ip_number:port_numbermature_list.append(ip_info)print("代理列表抓取成功")return mature_listdef get_random_ip(ip_list):print("正在设置随机代理...")proxy_ip = random.choice(ip_list) # 随机选择一个proxies = {'http': proxy_ip}print("代理设置成功.")return proxies