这个代码,爬不出结果

"""
    功能:爬取豆瓣2018年度电影榜单
"""


import requests
from bs4 import BeautifulSoup


def get_film_list_info(film_list_address):
    """
        获取电影榜单的具体信息(电影名称和豆瓣评分)
    """
    headers = {

        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0",

        "Host": "movie.douban.com"

    }
    url = film_list_address
    r = requests.get(url, timeout=30)
    soup = BeautifulSoup(r.text, 'html.parser')
    div_list = soup.find_all('div', {'class': 'Bbvlm'})

    film_list_info = []
    for i in range(10):
        if i == 0:
            div_content = div_list[i]
            div_content_tit = div_content.find('span', {'class': '_3j9RA'})
            film_name = div_content_tit.text
            film_score = div_content.find('div', {'class': '_2YEJY'}).text
            film_list_info.append((film_name, film_score))
        else:
            div_content = div_list[i]
            div_content_li = div_content.find('li', {'class': '_111mb'})

            film_name = div_content_li.find('a', {'class': '_1RaNl'}).text
            film_score = div_content_li.find('span', {'class': 'kegxb'}).text
            film_list_info.append((film_name, film_score))
    return film_list_info


def get_all_film_lists():
    # url = 'https://movie.douban.com/annual/2018?source=navigation#'
    # film_list = []
    # r = requests.get(url,timeout=30)
    # soup = BeautifulSoup(r.text,'lxml')
    #
    # film_list_div = soup.find_all('div',{'class':''})
    # film_list_link_list = film_list_div.find_all('a')
    # for film_list_link in film_list_link_list:
    #     film_list_name = film_list_link.text
    headers = {

        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0",

        "Host": "movie.douban.com"

    }
    film_list_list = []

    for i in range(64):
        url = 'https://movie.douban.com/annual/2018?source=navigation#'+str(i)
        r = requests.get(url, timeout=30)
        soup = BeautifulSoup(r.text, 'html.parser')

        list_div = soup.find('div', {'class': 'Rz9z5'})
        if list_div is None:
            continue
        else:
            film_list_name = list_div.text
            film_list_address = 'https://movie.douban.com/annual/2018?source=navigation#'+str(i)

            film_list_list.append((film_list_name, film_list_address))

    return film_list_list


def main():
    """
        主函数
    """

    film_list_list = get_all_film_lists()

    # header = ['榜单名','Top 1','Top 2','Top 3','Top 4','Top 5','Top 6','Top 7','Top 8','Top 9','Top 10']
    # with open('film_lists.csv','w',encoding='utf-8',newline='') as f:
    #     writer = csv.writer(f)
    #     writer.writerow(header)
    #     for i,film_list in enumerate(film_list_list):
    #         if (i+1) % 10 == 0:
    #             print('已处理{}条记录。(共{}条记录)'.format(i+1,len(film_list_list)))

    for film_list in film_list_list:
        film_list_name = film_list[0]
        film_list_address = film_list[1]
        film_list_info = get_film_list_info(film_list_address)
        print(film_list_name, film_list_info)


if __name__ == '__main__':
    main()

ORCA - 中国科学院大学本科四年级CS专业

赞同来自:

具体哪里有问题。。。?

要回复问题请先登录注册