Compare commits
4 Commits
3339925f6a
...
e4c5e12a4e
Author | SHA1 | Date | |
---|---|---|---|
e4c5e12a4e | |||
f45daea457 | |||
7b61554f65 | |||
dcb5daf014 |
92
ips.py
Normal file
92
ips.py
Normal file
@ -0,0 +1,92 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
# 1. 获取代理IP列表
|
||||
def get_proxy_list():
|
||||
# 构造请求头,模拟浏览器请求
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"
|
||||
}
|
||||
|
||||
# 请求代理IP网页
|
||||
url = "http://www.zdopen.com/ShortProxy/GetIP/?api&akey×pan=5&type=1"
|
||||
response = requests.get(url, headers=headers)
|
||||
|
||||
# 解析网页获取代理IP列表
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
proxy_list = []
|
||||
table = soup.find("table", {"id": "ip_list"})
|
||||
for tr in table.find_all("tr"):
|
||||
td_list = tr.find_all("td")
|
||||
if len(td_list) > 0:
|
||||
ip = td_list[1].text.strip()
|
||||
port = td_list[2].text.strip()
|
||||
type = td_list[5].text.strip()
|
||||
proxy_list.append({
|
||||
"ip": ip,
|
||||
"port": port,
|
||||
"type": type
|
||||
})
|
||||
return proxy_list
|
||||
|
||||
|
||||
# 2. 验证代理IP可用性
|
||||
def verify_proxy(proxy):
|
||||
# 构造请求头,模拟浏览器请求
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"
|
||||
}
|
||||
|
||||
# 请求目标网页并判断响应码
|
||||
url = "http://www.baidu.com"
|
||||
try:
|
||||
response = requests.get(url, headers=headers, proxies=proxy, timeout=5)
|
||||
if response.status_code == 200:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
# 3. 测试代理IP列表可用性
|
||||
def test_proxy_list(proxy_list):
|
||||
valid_proxy_list = []
|
||||
for proxy in proxy_list:
|
||||
if verify_proxy(proxy):
|
||||
valid_proxy_list.append(proxy)
|
||||
return valid_proxy_list
|
||||
|
||||
|
||||
# 4. 使用代理IP发送请求
|
||||
def send_request(url, headers, proxy):
|
||||
# 发送请求并返回响应结果
|
||||
response = requests.get(url, headers=headers, proxies=proxy)
|
||||
return response.text
|
||||
|
||||
|
||||
# 程序入口
|
||||
if __name__ == "__main__":
|
||||
# 获取代理IP列表
|
||||
proxy_list = get_proxy_list()
|
||||
|
||||
# 验证代理IP可用性
|
||||
valid_proxy_list = test_proxy_list(proxy_list)
|
||||
|
||||
# 输出可用代理IP
|
||||
print("有效代理IP列表:")
|
||||
for proxy in valid_proxy_list:
|
||||
print(proxy)
|
||||
|
||||
# 使用代理IP发送请求
|
||||
url = "http://www.baidu.com"
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"
|
||||
}
|
||||
proxy = {
|
||||
"http": "http://" + valid_proxy_list[0]["ip"] + ":" + valid_proxy_list[0]["port"],
|
||||
"https": "https://" + valid_proxy_list[0]["ip"] + ":" + valid_proxy_list[0]["port"]
|
||||
}
|
||||
response = send_request(url, headers, proxy)
|
||||
print(response)
|
@ -1,11 +1,32 @@
|
||||
import csv
|
||||
|
||||
import requests
|
||||
from fake_useragent import UserAgent
|
||||
import pandas as pd
|
||||
import random
|
||||
|
||||
|
||||
# ----------------爬取各城市链接
|
||||
# ip池构建
|
||||
# 用于存储IP地址和端口号的列表
|
||||
ip_port_list = []
|
||||
username = 'd3347396121'
|
||||
password = 'ufinmek6'
|
||||
# 读取CSV文件
|
||||
with open('ips.csv', newline='') as csvfile:
|
||||
reader = csv.reader(csvfile)
|
||||
next(reader) # 跳过第一行,即表头
|
||||
for row in reader:
|
||||
# 将IP地址和端口号以字符串形式拼接,并添加到列表中
|
||||
ip_port = ':'.join(row) # 在IP地址和端口号之前添加"https://"
|
||||
ip_port_list.append(ip_port)
|
||||
random_proxy = random.choice(ip_port_list)
|
||||
proxies_dict = {
|
||||
"http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": random_proxy},
|
||||
"https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": random_proxy}
|
||||
}
|
||||
|
||||
|
||||
|
||||
# ----------------爬取各城市链接------------
|
||||
ua = UserAgent()
|
||||
headers = {
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
@ -20,7 +41,7 @@ url = 'http://www.nmc.cn/f/rest/province' # 替换成实际的URL
|
||||
# public
|
||||
# 发送GET请求获取响应对象
|
||||
def get_response(url):
|
||||
response = requests.get(url, headers=headers)
|
||||
response = requests.get(url, headers=headers,proxies=proxies_dict)
|
||||
return response
|
||||
|
||||
|
||||
@ -35,7 +56,7 @@ def find_code_by_name(json_data, target_name):
|
||||
|
||||
def get_cityCode(target_name,url):
|
||||
# 发送GET请求获取JSON数据
|
||||
response = get_response(url)
|
||||
response = get_response(url,)
|
||||
if response.status_code == 200:
|
||||
json_data = response.json()
|
||||
code = find_code_by_name(json_data, target_name)
|
||||
@ -95,4 +116,6 @@ def get_citys():
|
||||
df = pd.DataFrame(citys_urllist, columns=['city_url'])
|
||||
df.to_csv('cities_urls.csv', index=False)
|
||||
|
||||
|
||||
|
||||
get_citys()
|
||||
|
401
sobear/ips.csv
Normal file
401
sobear/ips.csv
Normal file
@ -0,0 +1,401 @@
|
||||
IP:Port
|
||||
222.79.58.14:21907
|
||||
180.105.65.180:16865
|
||||
122.234.33.147:16076
|
||||
221.229.212.170:40026
|
||||
183.152.65.141:21709
|
||||
218.86.67.198:22935
|
||||
221.229.212.170:40688
|
||||
219.150.218.53:40777
|
||||
114.232.89.48:18416
|
||||
221.131.165.73:27415
|
||||
119.102.157.94:17402
|
||||
219.150.218.21:25002
|
||||
218.95.37.135:40335
|
||||
218.95.37.135:40123
|
||||
219.150.218.53:40254
|
||||
221.229.212.170:40554
|
||||
218.95.37.135:40089
|
||||
219.150.218.53:40074
|
||||
219.150.218.53:40703
|
||||
221.229.212.170:40181
|
||||
218.95.37.135:40456
|
||||
49.79.1.76:18154
|
||||
218.95.37.135:40427
|
||||
223.113.54.130:27015
|
||||
221.229.212.170:40548
|
||||
27.189.130.244:21592
|
||||
221.229.212.170:40066
|
||||
219.150.218.53:40593
|
||||
219.150.218.53:40562
|
||||
115.210.72.43:18940
|
||||
125.113.134.211:15419
|
||||
223.113.54.130:27215
|
||||
218.95.37.135:40443
|
||||
218.95.37.135:40172
|
||||
223.113.54.130:27115
|
||||
119.102.158.73:17599
|
||||
219.150.218.53:40276
|
||||
221.229.212.170:40313
|
||||
221.229.212.170:40199
|
||||
221.229.212.170:40401
|
||||
119.102.12.61:23958
|
||||
114.218.86.118:18583
|
||||
218.95.37.135:40137
|
||||
221.131.165.73:27015
|
||||
221.229.212.170:40472
|
||||
123.97.62.165:20001
|
||||
116.208.198.78:15007
|
||||
221.229.212.170:40283
|
||||
219.150.218.53:40774
|
||||
223.113.54.130:27420
|
||||
221.131.165.71:27015
|
||||
49.76.17.162:18359
|
||||
218.95.37.135:40383
|
||||
219.150.218.53:40123
|
||||
125.87.89.90:20342
|
||||
218.95.37.11:25007
|
||||
119.41.204.41:21594
|
||||
125.113.61.162:19268
|
||||
114.232.88.255:17348
|
||||
125.113.143.193:17273
|
||||
27.189.134.75:20272
|
||||
183.133.74.133:16208
|
||||
119.102.157.127:19775
|
||||
115.209.84.134:22192
|
||||
119.41.204.198:22265
|
||||
27.189.128.130:22315
|
||||
183.143.174.179:21197
|
||||
222.211.147.124:19323
|
||||
114.229.139.0:21515
|
||||
60.185.41.78:15365
|
||||
119.102.156.215:20433
|
||||
119.102.158.245:18020
|
||||
219.150.218.53:40412
|
||||
221.131.165.71:27115
|
||||
219.150.218.53:40445
|
||||
60.185.212.100:15026
|
||||
221.229.212.170:40198
|
||||
171.41.131.39:15804
|
||||
221.131.165.71:27215
|
||||
221.229.212.170:40588
|
||||
221.229.212.170:40276
|
||||
221.229.212.170:40354
|
||||
219.150.218.53:40151
|
||||
221.229.212.170:40338
|
||||
123.160.10.195:25002
|
||||
223.113.54.130:27315
|
||||
221.131.165.73:27315
|
||||
218.95.37.251:25002
|
||||
119.102.153.220:23329
|
||||
218.95.37.11:25234
|
||||
117.92.49.124:18230
|
||||
117.86.82.5:18153
|
||||
221.229.212.170:40673
|
||||
36.40.195.111:16858
|
||||
221.229.212.170:40576
|
||||
219.150.218.53:40550
|
||||
221.229.212.170:40329
|
||||
221.229.212.170:40058
|
||||
218.95.37.11:25137
|
||||
122.232.239.178:17032
|
||||
218.95.37.135:40474
|
||||
221.229.212.170:40358
|
||||
27.189.131.205:18674
|
||||
183.133.71.215:19479
|
||||
183.165.245.48:19805
|
||||
222.89.70.65:25002
|
||||
117.86.5.151:17677
|
||||
219.150.218.53:40176
|
||||
221.229.212.170:40482
|
||||
182.106.136.210:25005
|
||||
221.229.212.170:40308
|
||||
114.232.91.168:22201
|
||||
114.229.220.46:15154
|
||||
218.95.37.135:40457
|
||||
121.226.90.228:16793
|
||||
219.150.218.21:25001
|
||||
218.95.37.135:40313
|
||||
219.150.218.53:40362
|
||||
106.122.201.250:19357
|
||||
219.150.218.53:40345
|
||||
114.229.139.132:19932
|
||||
222.89.70.65:25001
|
||||
175.155.177.3:16923
|
||||
219.150.218.53:40581
|
||||
221.131.165.73:27216
|
||||
218.95.37.135:40120
|
||||
221.229.212.174:25002
|
||||
218.95.37.135:40265
|
||||
219.150.218.53:40328
|
||||
219.150.218.53:40516
|
||||
221.131.165.73:27215
|
||||
221.229.212.173:25001
|
||||
218.95.37.135:40332
|
||||
221.229.212.170:40225
|
||||
219.150.218.53:40085
|
||||
182.106.136.210:25064
|
||||
218.95.37.135:40056
|
||||
221.229.212.170:40374
|
||||
221.229.212.170:40526
|
||||
219.150.218.53:40046
|
||||
218.95.37.135:40112
|
||||
218.95.37.135:40136
|
||||
219.150.218.53:40339
|
||||
219.150.218.53:40556
|
||||
219.150.218.53:40501
|
||||
218.95.37.135:40230
|
||||
49.89.33.217:17682
|
||||
218.95.37.135:40207
|
||||
221.229.212.170:40489
|
||||
218.95.37.251:25003
|
||||
221.229.212.170:40363
|
||||
123.160.10.195:25003
|
||||
221.131.165.73:27115
|
||||
218.95.37.135:40013
|
||||
218.95.37.135:40130
|
||||
221.227.233.94:18601
|
||||
115.195.250.21:23555
|
||||
218.95.37.135:40439
|
||||
219.150.218.53:40418
|
||||
218.95.37.135:40111
|
||||
219.150.218.53:40341
|
||||
171.41.128.10:15942
|
||||
218.95.37.11:25001
|
||||
221.229.212.170:40413
|
||||
171.41.148.214:17035
|
||||
182.106.136.210:25017
|
||||
42.51.49.179:23855
|
||||
219.150.218.53:40269
|
||||
218.95.37.135:40217
|
||||
218.95.37.135:40024
|
||||
42.51.45.30:15789
|
||||
119.102.47.218:15796
|
||||
218.95.37.11:25152
|
||||
119.41.198.8:16547
|
||||
218.95.37.11:25003
|
||||
219.150.218.53:40294
|
||||
114.229.244.54:17454
|
||||
221.229.212.170:40049
|
||||
116.208.206.94:20414
|
||||
114.103.81.11:20471
|
||||
222.89.70.171:25001
|
||||
219.150.218.53:40230
|
||||
221.227.143.26:20584
|
||||
219.150.218.21:25003
|
||||
106.122.231.65:18706
|
||||
222.89.70.65:25003
|
||||
218.95.37.251:25163
|
||||
218.95.37.251:25001
|
||||
115.207.101.67:21632
|
||||
115.207.101.175:20363
|
||||
182.106.136.210:25052
|
||||
218.95.37.11:25002
|
||||
219.150.218.53:40486
|
||||
221.229.212.174:25001
|
||||
117.86.82.107:16909
|
||||
219.150.218.53:40223
|
||||
49.84.27.154:17065
|
||||
182.106.136.210:25180
|
||||
182.106.136.210:25057
|
||||
182.106.136.210:25154
|
||||
221.229.212.170:40652
|
||||
221.131.165.73:27054
|
||||
221.131.165.73:27012
|
||||
222.184.193.159:16131
|
||||
219.150.218.53:40331
|
||||
27.189.133.157:22812
|
||||
218.95.37.11:25004
|
||||
221.229.212.170:40368
|
||||
223.247.47.66:17983
|
||||
117.92.148.132:22132
|
||||
221.131.165.71:27108
|
||||
221.131.165.71:27094
|
||||
221.131.165.71:27252
|
||||
221.131.165.71:27041
|
||||
221.131.165.73:27347
|
||||
221.131.165.73:27283
|
||||
223.113.54.130:27069
|
||||
219.150.218.53:40562
|
||||
114.103.80.8:22900
|
||||
221.131.165.71:27275
|
||||
49.89.33.164:16158
|
||||
221.131.165.73:27125
|
||||
171.41.150.166:22737
|
||||
221.131.165.73:27334
|
||||
183.165.251.228:21430
|
||||
220.188.57.226:19390
|
||||
221.131.165.71:27236
|
||||
221.229.212.170:40045
|
||||
49.89.34.71:16578
|
||||
114.239.150.126:19611
|
||||
221.131.165.73:27209
|
||||
223.113.54.130:27054
|
||||
60.184.41.110:20693
|
||||
218.95.37.135:40173
|
||||
221.131.165.73:27321
|
||||
125.106.193.102:15499
|
||||
223.113.54.130:27039
|
||||
221.131.165.73:27442
|
||||
223.113.54.130:27095
|
||||
221.131.165.71:27086
|
||||
221.131.165.71:27038
|
||||
221.131.165.71:27107
|
||||
221.131.165.73:27356
|
||||
223.113.54.130:27330
|
||||
221.131.165.71:27002
|
||||
221.131.165.73:27424
|
||||
218.95.37.135:40075
|
||||
221.131.165.73:27178
|
||||
221.131.165.73:27450
|
||||
219.150.218.53:40445
|
||||
219.150.218.53:40214
|
||||
123.180.173.133:17040
|
||||
221.131.165.73:27009
|
||||
221.131.165.73:27475
|
||||
221.131.165.73:27190
|
||||
221.131.165.71:27265
|
||||
221.131.165.73:27446
|
||||
221.131.165.71:27010
|
||||
219.150.218.53:40254
|
||||
221.131.165.73:27333
|
||||
117.69.31.206:23029
|
||||
171.41.131.146:18979
|
||||
123.180.174.176:20891
|
||||
114.106.135.42:19089
|
||||
119.41.207.37:17523
|
||||
180.105.37.229:23544
|
||||
171.41.128.100:15619
|
||||
122.232.236.6:15969
|
||||
115.207.101.141:22614
|
||||
125.106.192.19:15170
|
||||
183.133.73.17:21682
|
||||
221.234.31.24:16598
|
||||
117.92.148.174:21957
|
||||
114.103.80.243:23729
|
||||
125.106.193.40:20756
|
||||
125.87.92.138:22719
|
||||
125.105.229.93:23747
|
||||
125.106.192.120:22461
|
||||
61.175.144.3:22480
|
||||
125.105.226.212:22185
|
||||
117.83.72.85:19534
|
||||
171.41.150.48:19600
|
||||
223.245.213.80:22674
|
||||
49.72.20.77:23058
|
||||
221.229.212.170:40137
|
||||
219.150.218.53:40516
|
||||
218.95.37.135:40441
|
||||
221.229.212.170:40464
|
||||
218.95.37.135:40308
|
||||
125.105.228.8:19239
|
||||
125.106.195.229:20029
|
||||
218.95.37.135:40154
|
||||
116.26.6.138:20546
|
||||
115.207.100.90:16144
|
||||
119.41.204.56:16433
|
||||
219.150.218.53:40067
|
||||
218.95.37.135:40246
|
||||
223.113.54.130:27358
|
||||
61.175.144.218:23800
|
||||
218.86.73.197:22935
|
||||
114.229.202.204:16917
|
||||
118.113.245.198:17674
|
||||
115.207.100.99:17189
|
||||
219.150.218.53:40123
|
||||
219.150.218.53:40627
|
||||
125.106.192.30:15512
|
||||
125.106.193.241:20139
|
||||
125.106.194.236:18802
|
||||
115.207.101.79:20210
|
||||
27.189.134.65:23363
|
||||
219.150.218.53:40547
|
||||
218.90.69.96:16287
|
||||
221.131.165.71:27011
|
||||
125.121.73.71:15274
|
||||
125.106.193.57:22951
|
||||
117.92.154.154:16006
|
||||
122.232.236.69:17900
|
||||
220.188.57.222:20617
|
||||
219.150.218.53:40164
|
||||
219.150.218.53:40151
|
||||
221.229.212.170:40472
|
||||
221.131.165.71:27241
|
||||
221.229.212.170:40498
|
||||
111.224.11.229:16883
|
||||
60.184.41.125:18147
|
||||
125.105.229.192:17088
|
||||
221.131.165.73:27166
|
||||
221.131.165.71:27123
|
||||
223.113.54.130:27031
|
||||
125.106.195.4:16701
|
||||
180.121.145.198:18505
|
||||
221.234.31.221:17423
|
||||
119.41.195.84:23936
|
||||
125.106.194.40:18740
|
||||
221.131.165.73:27003
|
||||
171.41.128.237:17249
|
||||
223.113.54.130:27082
|
||||
221.131.165.73:27148
|
||||
221.131.165.73:27222
|
||||
49.89.32.35:23837
|
||||
218.95.37.11:25005
|
||||
221.229.212.170:40541
|
||||
221.229.212.170:40640
|
||||
219.150.218.53:40190
|
||||
121.226.11.211:18979
|
||||
171.41.150.37:23030
|
||||
221.131.165.71:27046
|
||||
221.131.165.71:27008
|
||||
221.234.31.138:17713
|
||||
125.106.193.5:17559
|
||||
125.106.195.7:19555
|
||||
115.207.101.132:18655
|
||||
114.237.245.74:19512
|
||||
221.229.212.170:40670
|
||||
221.229.212.170:40607
|
||||
219.150.218.53:40453
|
||||
219.150.218.53:40328
|
||||
219.150.218.53:40703
|
||||
219.150.218.53:40507
|
||||
125.105.225.220:17382
|
||||
125.105.230.182:20842
|
||||
219.150.218.53:40412
|
||||
219.150.218.53:40593
|
||||
221.229.212.170:40697
|
||||
125.106.195.159:19687
|
||||
221.229.212.170:40136
|
||||
219.150.218.53:40046
|
||||
122.232.237.96:16797
|
||||
114.229.138.29:16762
|
||||
122.232.236.228:19998
|
||||
221.229.212.170:40304
|
||||
221.229.212.170:40117
|
||||
115.207.101.135:15190
|
||||
221.229.212.170:40387
|
||||
218.95.37.135:40472
|
||||
202.101.213.79:19003
|
||||
221.229.212.170:40095
|
||||
116.208.198.64:20652
|
||||
114.106.172.24:21705
|
||||
219.150.218.53:40089
|
||||
116.1.7.230:19455
|
||||
223.113.54.130:27018
|
||||
221.131.165.71:27254
|
||||
221.131.165.73:27232
|
||||
117.82.114.228:16890
|
||||
221.229.212.170:40615
|
||||
221.229.212.170:40071
|
||||
221.229.212.170:40122
|
||||
218.95.37.251:25154
|
||||
220.188.37.149:20099
|
||||
219.150.218.53:40466
|
||||
221.131.165.71:27287
|
||||
180.121.190.58:18162
|
||||
218.95.37.135:40442
|
||||
218.95.37.135:40489
|
||||
218.95.37.135:40401
|
||||
218.95.37.11:25231
|
||||
219.150.218.53:40255
|
||||
125.105.228.92:21174
|
||||
222.89.70.171:25002
|
|
Loading…
Reference in New Issue
Block a user