内容

  • urllib
  • request
  • BeautifulSoup
  • os.path
  • with-as
  • open()

废话

等我梳理的差不多了就来补上

都快开学了那就别补了

话说为什么要学爬虫,契机是因为看到了这个随机涩图


#下载头像并保存在/avatar/{时间戳}.jpg中
import requests
import os
import time
i = 0
c = os.path.dirname(__file__)
url = 'https://api.yimian.xyz/img?type=head'
while True:
    x = requests.get(url)
    b = int(time.time())
    a = os.path.join('avatar','{b}.jpg'.format(b=b))
    with open(a,'wb') as f:
        f.write(x.content)
    print(b,'.jpg')
#爬取leao.blog的文章封面图直链
import requests,os,time
from bs4 import BeautifulSoup
pagenum = 0
while pagenum<5:
    pagenum+=1
    url = os.path.join('https://leao.blog/page/','{pagenum}'.format(pagenum=pagenum))
    req = requests.get(url)
    soup = BeautifulSoup(req.text,'lxml')
    data = soup.select('#brand-waterfall > div > a > div > img')
    n = -1
    while n<4:
        try:    
            n+=1
            print(data[n])
        except:
            pass
    else:
        pass
else:
    pass
#pass写这么全有必要吗

先溜了,可能要被某人杠