python爬虫10分钟就能会的教程

今天录一个简单的python爬虫超简单只需要按步骤就可以!

10分钟就要学会哦!

演示代码:

#爬表情包
#获取网页
import requests
#正则表达式
import re
page = 0
while(page<2520):
    page +=1
    r = requests.get('http://www.doutula.com/photo/list/?page=%d'%page)
    htmltext = r.text
    # print(htmltext)
    html = re.findall(r'<ul class="list-group">(.*?)</ul>',htmltext,re.S)[0]
    # print(html)
    imgurl = re.findall(r'<img referrerpolicy="no-referrer" src="//www.doutula.com/img/loader.gif" style="width: 100%; height: 100%;" data-original="(.*?)" alt="(.*?)" class="img-responsive lazy image_dta"',html,re.S)
    # print(imgurl)

    for img in imgurl:
        # print(img[1])
        title = img[1]
        imgget = requests.get(img[0])
        with open('img/%s.jpg'%title,'wb') as openimg:
            openimg.write(imgget.content)

        print("正在下载:%s"%title)
#爬美女
import requests
import re

def guturl(num):
    # drrik()
    # 用get 请求访问一个网站
    r = requests.get('https://www.suibianlu.com/meitu_%s/'%num)
    # 编码 格式
    r.encoding = 'utf-8'
    # 以文本的格式输出
    a = r.text
    html = re.findall(r'<ul class="list-meizitu border pd5 mb10 clearfix">.*?</ul>', a, re.S)[0]
    urll = re.findall(r'<img src="(.*?)"', html, re.S)
    tilate = re.findall(r'alt="(.*?)"', html, re.S)
    i= 0
    for img in urll:
        aaa = tilate[i]
        i += 1

        imgres = requests.get(img)
        try:
            with open('img/%s.jfif'%aaa,'wb') as aff:
                aff.write(imgres.content)
        except:
            pass
        print("正在下载:%s"%aaa)
# def drrik():
#     for inn in range(15):
#         os.mkdir('img/美图文件%s'%inn)
def ru():
    for i in range(14):
        guturl(i)

if __name__ == '__main__':
    ru()
上一篇

基于WordPress博客的微信小程序版幼苗Seedling小程序