The Algorithms logo
The Algorithms
AboutDonate

Netflix Scrapper

G

Netflix Scrapper

The purpose of the code is to get details of all the Categories on Netflix and then to gather information about Sub-Categories and movies under each Sub-Category.

from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
def make_soup(url):
    return BeautifulSoup(requests.get(url).text, 'html.parser')
def browseCategory(category, data):
    category_url = data[category-1][2]
    category = data[category-1][1]
    subCategory_details = []
    count = 1
    subCategories = []
    soup = make_soup(category_url)
    cards_list = soup.find_all('section',{'class':'nm-collections-row'})
    for card in cards_list:
        try:
            subCategory = card.find('h1').text
            movie_list = []
            movies = card.find_all('li')
            movie_count = 1
            for movie in movies:
                try:
                    movie_title = movie.find('span',{'class':'nm-collections-title-name'}).text
                    movie_link = movie.find('a').get('href')
                    movie_list.append([movie_count, movie_title , movie_link])
                    movie_count += 1
                except AttributeError:
                    pass
            subCategories.append(subCategory)
            subCategory_details.append(movie_list)
            count += 1
        except AttributeError:
            pass
    return subCategories, subCategory_details, count-1
def getCategories(base_url):
    category_soup = make_soup(base_url)
    categories = category_soup.find_all('section',{'class':'nm-collections-row'})
    result=[]
    count = 1
    for category in categories:
        try:
            Title = category.find('span', {'class':'nm-collections-row-name'}).text
            url = category.find('a').get('href')
            result.append([count, Title, url])
            count += 1
        except AttributeError:
            pass
    #print(result)
    return result
def main():
    netflix_url = "https://www.netflix.com/in/browse/genre/839338"
    categories = getCategories(netflix_url)
    print("Please select one of the category")
    df = pd.DataFrame(np.array(categories), columns=['Sr.No', 'Title', 'link'])
    print(df.to_string(index=False))
    choice = int(input('\n\n Please Enter your Choice: \n'))
    subCategories, movieList, count = browseCategory(choice, categories)
    for i in range(0, count):
        print(subCategories[i],'\n\n')
        subCategory_df = pd.DataFrame(np.array(movieList[i]), columns=['Sr.No', 'Title', 'link'])
        print(subCategory_df.to_string(index=False))
        print("\n\n\n")
    
if __name__ == '__main__':
    main()