开头
你会经常看到随机图片,如果碰到精品,你肯定想全爬取下来吧?现在就提供一个Python程序,带去重功能。
代码
from requests import get
import time
try:
with open('url.txt','r') as f: #该文件放入爬取到的图片链接
urllist = f.read().splitlines()
except:
urllist = []
print(urllist)
def GetPic():
api = 'https://bbs.1ove.club' #将要爬取的站点
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'}
url=get(api,headers=headers,timeout=2).request.url
if url:
return url
def init():
i=0
ii=0
while i<30 and ii<3000: #连续30张图都重复或者本次采集到3000张图就停止
url=GetPic()
if url not in urllist:
print(url)
urllist.append(url)
with open('url.txt', 'a+') as f:
f.write(url+'\n')
ii=ii+1
i=0
else:
i=i+1
print("重复次数"+str(i)+",重复的: "+url)
# time.sleep(1)
init()
来几个随机图
素材来源:https://hostloc.com/thread-889676-2-1.html