The Algorithms logo
The Algorithms
AboutDonate

ScrapNewsfromIndiaToday

G
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import requests
def make_soup(url):
    return BeautifulSoup(requests.get(url).text, 'html.parser')
url = 'https://www.indiatoday.in/top-stories'
indiatoday = 'https://www.indiatoday.in'
top_stories = make_soup(url).find_all('div',{'class':'catagory-listing'})
articles_list = []
for story in top_stories:
    image = story.find('img')['src']
    title = story.find('a').text
    story_soup = make_soup(indiatoday + story.find('a')['href'])
    brief = story.find('p').text
    
    article = []
    for description in story_soup.find_all('div',{'class':'description'}): 
        for paragraph in description.find_all('p'):
            article.append(paragraph.text)

    articles_list.append([title, brief, article, image])
df = pd.DataFrame(articles_list, columns=['Title', 'Brief Intro', 'Paragraph', 'Image Url'])