Podcast Scraping: 99Percent Invisible

Today in desert island podcast scraping:

99 Percent Invisible

 import time
import requests
from bs4 import BeautifulSoup
import os.path
def download(href, title, extension="mp3", dirname='.'):
    filename = "%s.%s" % (title, extension)
    filename = filename.replace("/", "-")
    # todo, path management
    local_filename = os.path.join(dirname, filename)
    if not os.path.exists(dirname):
        print("making dir %s" % dirname)
        os.makedirs(dirname)
    local_filename = os.path.join(dirname, filename)
    r = requests.get(href, stream=True)
    with open(local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
    return local_filename
for i in range(1,29):
    archive_page = requests.get("https://99percentinvisible.org/episodes/page/%s" % i)
    soup = BeautifulSoup(archive_page.content, 'html.parser')
    for ep_row in soup.find_all('article', attrs={"class": "post-block post episode"}):
        ep_page_link = ep_row.find('a', attrs={"class": "download"})
        title = ep_row.find('h3', attrs={"class": "post-title"}).text.replace('Episode ', '')
        number = ep_row.find('h4', attrs={"class": "post-label"}).text.replace('Episode ', '')
        href = "https://99percentinvisible.org%s" % ep_page_link.attrs['href']
        print(ep_page_link.attrs['href'], number, title)
        print('->', title, href)
        download(href, "%s. %s" % (number, title))
        time.sleep(1)

27th January 2018

Comments and Messages

I won't ever give out your email address. I don't publish comments but if you'd like to write to me then you could use this form.