Desert Island Two: Planet Money

Desert Island Media Prep Day Two: Planet Money.



#https://www.npr.org/sections/money/127413729/planet-money/archive?date=12-31-2017
## Check out the date at the end? last day of month is the archive page
# Weird episode numbering, going to make folders so they stay in roughly date order
import time
import requests
from bs4 import BeautifulSoup
import os.path
def download(href, title, extension="mp3", dirname='.'):
    filename = "%s.%s" % (title, extension)
    filename = filename.replace("/", "-")
    # todo, path management
    local_filename = os.path.join(dirname, filename)
    if not os.path.exists(dirname):
        print("making dir %s" % dirname)
        os.makedirs(dirname)
    local_filename = os.path.join(dirname, filename)
    r = requests.get(href, stream=True)
    with open(local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
    return local_filename
dates = [
    '01-31-2018',
    '12-31-2017',
    '11-30-2017',
    '10-31-2017',
#    '09-30-2017',
#    '08-31-2017',
#    '07-31-2017',
#    '06-30-2017',
#    '05-31-2017',
#    '04-30-2017',
#    '03-31-2017',
#    '02-28-2017',
#    '01-31-2017',
]
for d in dates:
    archive_page = requests.get("https://www.npr.org/sections/money/127413729/planet-money/archive?date=%s" % d)
    soup = BeautifulSoup(archive_page.content, 'html.parser')
    for ep_row in soup.find_all('article', attrs={"class": "item"}):
        ep_page_link = ep_row.find('a')
        title = ep_row.find('h2').text.replace('Episode ', '')
        print(ep_page_link.attrs['href'])
        ep_page = requests.get(ep_page_link.attrs['href'])
        ep_soup = BeautifulSoup(archive_page.content, 'html.parser')
        module = ep_soup.find('div', attrs={"class": "audio-module"})
        href = module.find('li', attrs={'class': "audio-tool-download"}).find('a').attrs['href']
        print('->', title, href)
        download(href, title, dirname=d)
        time.sleep(1)



Comments and Messages

I won't ever give out your email address. I don't publish comments but if you'd like to write to me then you could use this form.

Issac Kelly