from lec_utils import *
# For illustration purposes only.
def err():
    raise ValueError('😭😭😭 I just deleted all of your files! 😭😭😭')

<head>
    <title>3*Canada-2022-06-04</title>
</head>
<body>
    <h1>Spotify Top 3 - Canada</h1>
    <table>
        <tr class='heading'>
            <th>Rank</th>
            <th>Artist(s)</th> 
            <th>Song</th>
        </tr>
        <tr class=1>
            <td>1</td>
            <td>Harry Styles</td> 
            <td>As It Was</td>
        </tr>
        <tr class=2>
            <td>2</td>
            <td>Jack Harlow</td> 
            <td>First Class</td>
        </tr>
        <tr class=3>
            <td>3</td>
            <td>Kendrick Lamar</td> 
            <td>N95</td>
        </tr>
    </table>
</body>

<Response [200]>

107

<div class="col-xs-12 col-sm-4 col-md-4 col-lg-2 flex no-pad">
                        
<div class="event-listing-grid event-single">
<time class="time-banner" datetime="2024-09-26 8:00"><i class="fa fa-clock-o"></i> Sep 26, 2024 8:00am</time>


 <div class="list-image">
                                <a alt="MiCAN Summit" href="/event/123300" style="background:url(https://events.umich.edu/media/cache/event_list_2x/media/attachments/2024/07/event_123300_original-1.jpeg) center center no-repeat; background-size:cover; position:absolute; width:100%;height:100%; top:0px;left:0px;" title="MiCAN Summit">
          </a>
     </div>
 
 <div class="event-info">
    <div class="event-title"><h3>
       <a href="/event/123300" title="2024 Michigan Climate Summit">
    2024 Michigan Climate Summit
    </a></h3>
                <h4>2024 MiCAN Summit</h4>
                </div>
  <ul class="event-details">
    
    <li class="item">
        <a href="/list?filter=locations:71" title="Rackham Graduate School (Horace H.)"><i class="fa fa-location-arrow fa-fw"></i><span> Rackham Graduate School...</span></a>
    </li>
             
                     <li class="item"><a href="/group/3454" title="School for Environment and Sustainability"><i class="fa fa-group fa-fw"></i><span>
        School for Environment and...
    </span></a></li>
            
        
    <li class="item"><a href="/list?filter=alltypes:5"><i class="fa fa-list fa-fw"></i><span> Conference / Symposium </span></a></li>
    
                          
         <li class="item"><a href="https://www.miclimateaction.org/2024_michigan_climate_summit">
                     <i class="fa fa-link fa-fw"></i>
                  
         <span>RSVP</span>
         </a></li>
    
    
 </ul>   

<!--
    <p>
    This year&#039;s Summit is all about Climate Civics! Together, attendees will explore the intersections between civic engagement, social justice,...
    (
        2024-09-26 8:00am
    )
    </p>
-->


 </div>

</div>
                    </div>

'2024 Michigan Climate Summit'

'2024-09-26 8:00'

'Rackham Graduate School (Horace H.)'

{'title': "An expert's guide to breaking into Architecture, Engineering & Construction",
 'time': Timestamp('2024-09-26 12:00:00'),
 'location': ''}

'NoneType' object has no attribute 'find'

{
    "name": "Grandma",
    "age": 94,
    "children": [
        {
        "name": "Dad",
        "age": 60,
        "children": [{"name": "Me", "age": 24}, 
                     {"name": "Brother", "age": 22}]
        },
        {
        "name": "My Aunt",
        "children": [{"name": "Cousin 1", "age": 34}, 
                     {"name": "Cousin 2", "age": 36, "children": 
                        [{"name": "Cousin 2 Jr.", "age": 2}]
                     }
                    ]
        }
    ]
}

{'name': 'Grandma',
 'age': 94,
 'children': [{'name': 'Dad',
   'age': 60,
   'children': [{'name': 'Me', 'age': 24}, {'name': 'Brother', 'age': 22}]},
  {'name': 'My Aunt',
   'children': [{'name': 'Cousin 1', 'age': 34},
    {'name': 'Cousin 2',
     'age': 36,
     'children': [{'name': 'Cousin 2 Jr.', 'age': 2}]}]}]}

len(soup.find_all("td"))

soup.find("tr").get("class")

import requests
from bs4 import BeautifulSoup

res = requests.get('https://events.umich.edu') 
res

<Response [200]>

soup = BeautifulSoup(res.text)

divs = soup.find_all(class_='col-xs-12')

len(divs)

107

divs[0]

<div class="col-xs-12 col-sm-4 col-md-4 col-lg-2 flex no-pad">
                        
<div class="event-listing-grid event-single">
<time class="time-banner" datetime="2024-09-26 8:00"><i class="fa fa-clock-o"></i> Sep 26, 2024 8:00am</time>


 <div class="list-image">
                                <a alt="MiCAN Summit" href="/event/123300" style="background:url(https://events.umich.edu/media/cache/event_list_2x/media/attachments/2024/07/event_123300_original-1.jpeg) center center no-repeat; background-size:cover; position:absolute; width:100%;height:100%; top:0px;left:0px;" title="MiCAN Summit">
          </a>
     </div>
 
 <div class="event-info">
    <div class="event-title"><h3>
       <a href="/event/123300" title="2024 Michigan Climate Summit">
    2024 Michigan Climate Summit
    </a></h3>
                <h4>2024 MiCAN Summit</h4>
                </div>
  <ul class="event-details">
    
    <li class="item">
        <a href="/list?filter=locations:71" title="Rackham Graduate School (Horace H.)"><i class="fa fa-location-arrow fa-fw"></i><span> Rackham Graduate School...</span></a>
    </li>
             
                     <li class="item"><a href="/group/3454" title="School for Environment and Sustainability"><i class="fa fa-group fa-fw"></i><span>
        School for Environment and...
    </span></a></li>
            
        
    <li class="item"><a href="/list?filter=alltypes:5"><i class="fa fa-list fa-fw"></i><span> Conference / Symposium </span></a></li>
    
                          
         <li class="item"><a href="https://www.miclimateaction.org/2024_michigan_climate_summit">
                     <i class="fa fa-link fa-fw"></i>
                  
         <span>RSVP</span>
         </a></li>
    
    
 </ul>   

<!--
    <p>
    This year&#039;s Summit is all about Climate Civics! Together, attendees will explore the intersections between civic engagement, social justice,...
    (
        2024-09-26 8:00am
    )
    </p>
-->


 </div>

</div>
                    </div>

divs[0].find('div', class_='event-title').find('a').get('title')

'2024 Michigan Climate Summit'

divs[0].find('time').get('datetime')

'2024-09-26 8:00'

divs[0].find('ul').find('a').get('title')

'Rackham Graduate School (Horace H.)'

def process_event(div):
    title = div.find('div', class_='event-title').find('a').get('title')
    location = div.find('ul').find('a').get('title')
    time = pd.to_datetime(div.find('time').get('datetime')) # Good idea!
    return {'title': title, 'time': time, 'location': location}

process_event(divs[12])

{'title': "An expert's guide to breaking into Architecture, Engineering & Construction",
 'time': Timestamp('2024-09-26 12:00:00'),
 'location': ''}

row_list = []
for div in divs:
    try:
        row_list.append(process_event(div))
    except Exception as e:
        print(e)

'NoneType' object has no attribute 'find'

events = pd.DataFrame(row_list) 
events.head()

# Which events are in-person today?
events[~events['location'].isin(['Virtual', ''])]

!cat data/family.json

{
    "name": "Grandma",
    "age": 94,
    "children": [
        {
        "name": "Dad",
        "age": 60,
        "children": [{"name": "Me", "age": 24}, 
                     {"name": "Brother", "age": 22}]
        },
        {
        "name": "My Aunt",
        "children": [{"name": "Cousin 1", "age": 34}, 
                     {"name": "Cousin 2", "age": 36, "children": 
                        [{"name": "Cousin 2 Jr.", "age": 2}]
                     }
                    ]
        }
    ]
}

import json
with open('data/family.json', 'r') as f:
    family_str = f.read()
    family_tree = json.loads(family_str)

family_tree

{'name': 'Grandma',
 'age': 94,
 'children': [{'name': 'Dad',
   'age': 60,
   'children': [{'name': 'Me', 'age': 24}, {'name': 'Brother', 'age': 22}]},
  {'name': 'My Aunt',
   'children': [{'name': 'Cousin 1', 'age': 34},
    {'name': 'Cousin 2',
     'age': 36,
     'children': [{'name': 'Cousin 2 Jr.', 'age': 2}]}]}]}

family_tree['children'][1]['children'][0]['age']

34

x = 4
eval('x + 5')

9

{'name': 'Grandma',
 'age': 94,
 'children': [{'name': 'Dad',
   'age': 60,
   'children': [{'name': 'Me', 'age': 24}, {'name': 'Brother', 'age': 22}]},
  {'name': 'My Aunt',
   'children': [{'name': 'Cousin 1', 'age': 34},
    {'name': 'Cousin 2',
     'age': 36,
     'children': [{'name': 'Cousin 2 Jr.', 'age': 2}]}]}]}

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[22], line 3
      1 with open('data/evil_family.json', 'r') as f:
      2     evil_family_str = f.read()
----> 3 eval(evil_family_str)

File <string>:6

Cell In[1], line 4, in err()
      3 def err():
----> 4     raise ValueError('😭😭😭 I just deleted all of your files! 😭😭😭')

ValueError: 😭😭😭 I just deleted all of your files! 😭😭😭

---------------------------------------------------------------------------
JSONDecodeError                           Traceback (most recent call last)
Cell In[23], line 1
----> 1 json.loads(evil_family_str)

File ~/miniforge3/envs/pds/lib/python3.10/json/__init__.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    341     s = s.decode(detect_encoding(s), 'surrogatepass')
    343 if (cls is None and object_hook is None and
    344         parse_int is None and parse_float is None and
    345         parse_constant is None and object_pairs_hook is None and not kw):
--> 346     return _default_decoder.decode(s)
    347 if cls is None:
    348     cls = JSONDecoder

File ~/miniforge3/envs/pds/lib/python3.10/json/decoder.py:337, in JSONDecoder.decode(self, s, _w)
    332 def decode(self, s, _w=WHITESPACE.match):
    333     """Return the Python representation of ``s`` (a ``str`` instance
    334     containing a JSON document).
    335 
    336     """
--> 337     obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    338     end = _w(s, end).end()
    339     if end != len(s):

File ~/miniforge3/envs/pds/lib/python3.10/json/decoder.py:355, in JSONDecoder.raw_decode(self, s, idx)
    353     obj, end = self.scan_once(s, idx)
    354 except StopIteration as err:
--> 355     raise JSONDecodeError("Expecting value", s, err.value) from None
    356 return obj, end

JSONDecodeError: Expecting value: line 6 column 17 (char 84)

<Response [200]>

b'{"abilities":[{"ability":{"name":"static","url":"https://pokeapi.co/api/v2/ability/9/"},"is_hidden":false,"slot":1},{"ability":{"name":"lightning-rod","url":"https://pokeapi.co/api/v2/ability/31/"},"is_hidden":true,"slot":3}],"base_experience":112,"cries":{"latest":"https://raw.githubusercontent.com/PokeAPI/cries/main/cries/pokemon/latest/25.ogg","legacy":"https://raw.githubusercontent.com/PokeAPI/cries/main/cries/pokemon/legacy/25.ogg"},"forms":[{"name":"pikachu","url":"https://pokeapi.co/api/v2/pokemon-form/25/"}],"game_indices":[{"game_index":84,"version":{"name":"red","url":"https://pokeapi.co/api/v2/version/1/"}},{"game_index":84,"version":{"name":"blue","url":"https://pokeapi.co/api/v2/version/2/"}},{"game_index":84,"version":{"name":"yellow","url":"https://pokeapi.co/api/v2/version/3/"}},{"game_index":25,"version":{"name":"gold","url":"https://pokeapi.co/api/v2/version/4/"}},{"game_index":25,"version":{"name":"silver","url":"https://pokeapi.co/api/v2/version/5/"}},{"game_index":'

dict_keys(['abilities', 'base_experience', 'cries', 'forms', 'game_indices', 'height', 'held_items', 'id', 'is_default', 'location_area_encounters', 'moves', 'name', 'order', 'past_abilities', 'past_types', 'species', 'sprites', 'stats', 'types', 'weight'])

60

'lightning-rod'

<Response [404]>

x = 4
eval('x + 5')

9

eval(family_str)

{'name': 'Grandma',
 'age': 94,
 'children': [{'name': 'Dad',
   'age': 60,
   'children': [{'name': 'Me', 'age': 24}, {'name': 'Brother', 'age': 22}]},
  {'name': 'My Aunt',
   'children': [{'name': 'Cousin 1', 'age': 34},
    {'name': 'Cousin 2',
     'age': 36,
     'children': [{'name': 'Cousin 2 Jr.', 'age': 2}]}]}]}

with open('data/evil_family.json', 'r') as f:
    evil_family_str = f.read()
eval(evil_family_str)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[22], line 3
      1 with open('data/evil_family.json', 'r') as f:
      2     evil_family_str = f.read()
----> 3 eval(evil_family_str)

File <string>:6

Cell In[1], line 4, in err()
      3 def err():
----> 4     raise ValueError('😭😭😭 I just deleted all of your files! 😭😭😭')

ValueError: 😭😭😭 I just deleted all of your files! 😭😭😭

json.loads(evil_family_str)

---------------------------------------------------------------------------
JSONDecodeError                           Traceback (most recent call last)
Cell In[23], line 1
----> 1 json.loads(evil_family_str)

File ~/miniforge3/envs/pds/lib/python3.10/json/__init__.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    341     s = s.decode(detect_encoding(s), 'surrogatepass')
    343 if (cls is None and object_hook is None and
    344         parse_int is None and parse_float is None and
    345         parse_constant is None and object_pairs_hook is None and not kw):
--> 346     return _default_decoder.decode(s)
    347 if cls is None:
    348     cls = JSONDecoder

File ~/miniforge3/envs/pds/lib/python3.10/json/decoder.py:337, in JSONDecoder.decode(self, s, _w)
    332 def decode(self, s, _w=WHITESPACE.match):
    333     """Return the Python representation of ``s`` (a ``str`` instance
    334     containing a JSON document).
    335 
    336     """
--> 337     obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    338     end = _w(s, end).end()
    339     if end != len(s):

File ~/miniforge3/envs/pds/lib/python3.10/json/decoder.py:355, in JSONDecoder.raw_decode(self, s, idx)
    353     obj, end = self.scan_once(s, idx)
    354 except StopIteration as err:
--> 355     raise JSONDecodeError("Expecting value", s, err.value) from None
    356 return obj, end

JSONDecodeError: Expecting value: line 6 column 17 (char 84)

<Response [200]>

b'{"abilities":[{"ability":{"name":"static","url":"https://pokeapi.co/api/v2/ability/9/"},"is_hidden":false,"slot":1},{"ability":{"name":"lightning-rod","url":"https://pokeapi.co/api/v2/ability/31/"},"is_hidden":true,"slot":3}],"base_experience":112,"cries":{"latest":"https://raw.githubusercontent.com/PokeAPI/cries/main/cries/pokemon/latest/25.ogg","legacy":"https://raw.githubusercontent.com/PokeAPI/cries/main/cries/pokemon/legacy/25.ogg"},"forms":[{"name":"pikachu","url":"https://pokeapi.co/api/v2/pokemon-form/25/"}],"game_indices":[{"game_index":84,"version":{"name":"red","url":"https://pokeapi.co/api/v2/version/1/"}},{"game_index":84,"version":{"name":"blue","url":"https://pokeapi.co/api/v2/version/2/"}},{"game_index":84,"version":{"name":"yellow","url":"https://pokeapi.co/api/v2/version/3/"}},{"game_index":25,"version":{"name":"gold","url":"https://pokeapi.co/api/v2/version/4/"}},{"game_index":25,"version":{"name":"silver","url":"https://pokeapi.co/api/v2/version/5/"}},{"game_index":'

dict_keys(['abilities', 'base_experience', 'cries', 'forms', 'game_indices', 'height', 'held_items', 'id', 'is_default', 'location_area_encounters', 'moves', 'name', 'order', 'past_abilities', 'past_types', 'species', 'sprites', 'stats', 'types', 'weight'])

60

'lightning-rod'

<Response [404]>

Index(['rank', 'uri', 'artist_names', 'track_name', 'source', 'peak_rank',
       'previous_rank', 'weeks_on_chart', 'streams'],
      dtype='object')

'spotify:track:3AJwUDP919kvQ9QcozQPxg'

212960137

json.loads(evil_family_str)

---------------------------------------------------------------------------
JSONDecodeError                           Traceback (most recent call last)
Cell In[23], line 1
----> 1 json.loads(evil_family_str)

File ~/miniforge3/envs/pds/lib/python3.10/json/__init__.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    341     s = s.decode(detect_encoding(s), 'surrogatepass')
    343 if (cls is None and object_hook is None and
    344         parse_int is None and parse_float is None and
    345         parse_constant is None and object_pairs_hook is None and not kw):
--> 346     return _default_decoder.decode(s)
    347 if cls is None:
    348     cls = JSONDecoder

File ~/miniforge3/envs/pds/lib/python3.10/json/decoder.py:337, in JSONDecoder.decode(self, s, _w)
    332 def decode(self, s, _w=WHITESPACE.match):
    333     """Return the Python representation of ``s`` (a ``str`` instance
    334     containing a JSON document).
    335 
    336     """
--> 337     obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    338     end = _w(s, end).end()
    339     if end != len(s):

File ~/miniforge3/envs/pds/lib/python3.10/json/decoder.py:355, in JSONDecoder.raw_decode(self, s, idx)
    353     obj, end = self.scan_once(s, idx)
    354 except StopIteration as err:
--> 355     raise JSONDecodeError("Expecting value", s, err.value) from None
    356 return obj, end

JSONDecodeError: Expecting value: line 6 column 17 (char 84)

with open('data/family.json', 'r') as f:
    family_df = pd.read_json(f)
family_df

<Response [200]>

b'{"abilities":[{"ability":{"name":"static","url":"https://pokeapi.co/api/v2/ability/9/"},"is_hidden":false,"slot":1},{"ability":{"name":"lightning-rod","url":"https://pokeapi.co/api/v2/ability/31/"},"is_hidden":true,"slot":3}],"base_experience":112,"cries":{"latest":"https://raw.githubusercontent.com/PokeAPI/cries/main/cries/pokemon/latest/25.ogg","legacy":"https://raw.githubusercontent.com/PokeAPI/cries/main/cries/pokemon/legacy/25.ogg"},"forms":[{"name":"pikachu","url":"https://pokeapi.co/api/v2/pokemon-form/25/"}],"game_indices":[{"game_index":84,"version":{"name":"red","url":"https://pokeapi.co/api/v2/version/1/"}},{"game_index":84,"version":{"name":"blue","url":"https://pokeapi.co/api/v2/version/2/"}},{"game_index":84,"version":{"name":"yellow","url":"https://pokeapi.co/api/v2/version/3/"}},{"game_index":25,"version":{"name":"gold","url":"https://pokeapi.co/api/v2/version/4/"}},{"game_index":25,"version":{"name":"silver","url":"https://pokeapi.co/api/v2/version/5/"}},{"game_index":'

with open('data/family.json', 'r') as f:
    family_df = pd.read_json(f)
family_df

def request_pokemon(name):
    url = f'https://pokeapi.co/api/v2/pokemon/{name}'
    return requests.get(url)
res = request_pokemon('pikachu')
res

<Response [200]>

res.content[:1000]

b'{"abilities":[{"ability":{"name":"static","url":"https://pokeapi.co/api/v2/ability/9/"},"is_hidden":false,"slot":1},{"ability":{"name":"lightning-rod","url":"https://pokeapi.co/api/v2/ability/31/"},"is_hidden":true,"slot":3}],"base_experience":112,"cries":{"latest":"https://raw.githubusercontent.com/PokeAPI/cries/main/cries/pokemon/latest/25.ogg","legacy":"https://raw.githubusercontent.com/PokeAPI/cries/main/cries/pokemon/legacy/25.ogg"},"forms":[{"name":"pikachu","url":"https://pokeapi.co/api/v2/pokemon-form/25/"}],"game_indices":[{"game_index":84,"version":{"name":"red","url":"https://pokeapi.co/api/v2/version/1/"}},{"game_index":84,"version":{"name":"blue","url":"https://pokeapi.co/api/v2/version/2/"}},{"game_index":84,"version":{"name":"yellow","url":"https://pokeapi.co/api/v2/version/3/"}},{"game_index":25,"version":{"name":"gold","url":"https://pokeapi.co/api/v2/version/4/"}},{"game_index":25,"version":{"name":"silver","url":"https://pokeapi.co/api/v2/version/5/"}},{"game_index":'

pikachu = res.json()
pikachu

pikachu.keys()

dict_keys(['abilities', 'base_experience', 'cries', 'forms', 'game_indices', 'height', 'held_items', 'id', 'is_default', 'location_area_encounters', 'moves', 'name', 'order', 'past_abilities', 'past_types', 'species', 'sprites', 'stats', 'types', 'weight'])

pikachu['weight']

60

pikachu['abilities'][1]['ability']['name']

'lightning-rod'

request_pokemon('wolverine')

<Response [404]>

loans.loc[loans['state'] == 'MI', 'int_rate']

charts = pd.read_csv('data/regional-global-weekly-2024-09-19.csv') 
charts

charts.columns

Index(['rank', 'uri', 'artist_names', 'track_name', 'source', 'peak_rank',
       'previous_rank', 'weeks_on_chart', 'streams'],
      dtype='object')

def show_spotify(uri):
    code = uri[uri.rfind(':')+1:]
    src = f"https://open.spotify.com/embed/track/{code}"
    width = 400
    height = 75
    display(IFrame(src, width, height))

my_uri = charts.loc[charts['track_name'] == 'Yellow', 'uri'].iloc[0] 
my_uri

'spotify:track:3AJwUDP919kvQ9QcozQPxg'

show_spotify(my_uri)

task_1 = charts.loc[charts['artist_names'] == 'Sabrina Carpenter', 'streams'].sum()
task_1

212960137

task_2 = (
    charts
    .groupby('artist_names')
    ['streams']
    .sum()
    .sort_values(ascending=False)
    .head(5)
)
task_2

artist_names
Sabrina Carpenter        212960137
Billie Eilish            107797821
Chappell Roan             92990557
Linkin Park               89935206
Lady Gaga, Bruno Mars     76502673
Name: streams, dtype: int64

task_3 = (
    charts
    .groupby('artist_names')
    .filter(lambda df: df.shape[0] >= 5)
    .groupby('artist_names')
    ['streams']
    .mean()
    .sort_values()
    .head(1)
)
task_3

artist_names
Bruno Mars    1.24e+07
Name: streams, dtype: float64

task_4 = charts[charts['rank'] > charts['previous_rank']].shape[0]
task_4

119

charts_old = pd.read_csv('data/regional-global-weekly-2024-09-12.csv')
charts_old.head()

with_old = (
    charts[['uri', 'track_name', 'artist_names', 'streams']]
    .merge(charts_old[['uri', 'streams',]], on='uri', suffixes=('_new', '_old'))
)
task_5 = (
    with_old
    .assign(change=with_old['streams_new'] - with_old['streams_old'])
    .sort_values('change', ascending=False)
    [['track_name', 'artist_names', 'change']]
    .head(1)
)
task_5

task_6 = (
    charts
    .assign(num_artists=charts['artist_names'].str.count(', '))
    .sort_values('num_artists', ascending=False)
    .head(4)
)
task_6

SELECT artist_names, SUM(streams) AS total_streams FROM charts
        GROUP BY artist_names
        ORDER BY total_streams DESC
        LIMIT 5;

task_1

212960137

task_2

artist_names
Sabrina Carpenter        212960137
Billie Eilish            107797821
Chappell Roan             92990557
Linkin Park               89935206
Lady Gaga, Bruno Mars     76502673
Name: streams, dtype: int64

!pip install duckdb

Requirement already satisfied: duckdb in /Users/surajrampure/miniforge3/envs/pds/lib/python3.10/site-packages (1.1.1)

import duckdb

def run_sql(query_str, as_df=False):
    out = duckdb.query(query_str)
    if as_df:
        return out.to_df()
    else:
        return out

run_sql('''
SELECT artist_names, SUM(streams) AS total_streams FROM charts
GROUP BY artist_names
ORDER BY total_streams DESC
LIMIT 5;
''')

┌───────────────────────┬───────────────┐
│     artist_names      │ total_streams │
│        varchar        │    int128     │
├───────────────────────┼───────────────┤
│ Sabrina Carpenter     │     212960137 │
│ Billie Eilish         │     107797821 │
│ Chappell Roan         │      92990557 │
│ Linkin Park           │      89935206 │
│ Lady Gaga, Bruno Mars │      76502673 │
└───────────────────────┴───────────────┘

run_sql('''
SELECT artist_names, SUM(streams) AS total_streams FROM charts
GROUP BY artist_names
ORDER BY total_streams DESC
LIMIT 5;
''', as_df=True)

run_sql('''
SELECT artist_names, AVG(streams) as avg_streams FROM charts
GROUP BY artist_names
HAVING COUNT(*) >= 5
ORDER BY avg_streams
LIMIT 1;
''')

┌──────────────┬─────────────┐
│ artist_names │ avg_streams │
│   varchar    │   double    │
├──────────────┼─────────────┤
│ Bruno Mars   │  12411488.8 │
└──────────────┴─────────────┘

task_3

artist_names
Bruno Mars    1.24e+07
Name: streams, dtype: float64

run_sql('''
SELECT COUNT(*) as num_songs FROM (
    SELECT * FROM charts
    WHERE rank > previous_rank
)
''')

┌───────────┐
│ num_songs │
│   int64   │
├───────────┤
│       119 │
└───────────┘

task_4

119

run_sql('''
SELECT track_name, artist_names, (new_streams - old_streams) AS change
FROM (
    SELECT charts.uri, 
           charts.track_name, 
           charts.artist_names, 
           charts.streams AS new_streams, 
           charts_old.uri, 
           charts_old.streams AS old_streams
    FROM charts
    INNER JOIN charts_old ON charts.uri = charts_old.uri
) AS merged
ORDER BY change DESC
LIMIT 1;
''', as_df=True)

task_5

run_sql('''
SELECT track_name, artist_names, array_length(str_split(artist_names, ', ')) AS num_artists
FROM charts
ORDER BY num_artists DESC
LIMIT 4;
''', as_df=True)

task_6

task_1

212960137

task_2

artist_names
Sabrina Carpenter        212960137
Billie Eilish            107797821
Chappell Roan             92990557
Linkin Park               89935206
Lady Gaga, Bruno Mars     76502673
Name: streams, dtype: int64

task_3

artist_names
Bruno Mars    1.24e+07
Name: streams, dtype: float64

task_4

119

task_5

task_6

	title	time	location
0	2024 Michigan Climate Summit	2024-09-26 08:00:00	Rackham Graduate School (Horace H.)
1	LSA Transfer Information Session	2024-09-26 09:00:00	Virtual
2	Earthfest 2024	2024-09-26 10:00:00	Diag - Central Campus
3	FAA Aviation Safety STEM Career Symposium-Expl...	2024-09-26 10:00:00
4	Internship Lab	2024-09-26 11:00:00

	title	time	location
0	2024 Michigan Climate Summit	2024-09-26 08:00:00	Rackham Graduate School (Horace H.)
2	Earthfest 2024	2024-09-26 10:00:00	Diag - Central Campus
6	Mini Museum: The Sum of Small Parts Student Ex...	2024-09-26 11:00:00	Stamps Gallery, 201 South Division Street Ann ...
...	...	...	...
103	Markley Hall (2024-2025) (Housing)	2024-09-26 20:00:00	South Lounge
104	NPHC Council Meetings - Fall 2024	2024-09-26 20:00:00	1443 Washtenaw Ave
105	Women's Soccer vs Rutgers	2024-09-26 20:00:00	U-M Soccer Stadium

Type	Description
String	Anything inside double quotes.
Number	Any number (no difference between ints and floats).
Boolean	`true` and `false`.
Null	JSON's empty value, denoted by `null`.
Array	Like Python lists.
Object	A collection of key-value pairs, like dictionaries. Keys must be strings, values can be anything (even other objects).

Platform	Pros ✅	Cons ❌
`pandas` DataFrames	Works well with the Python ecosystem (for visualization, machien learning, domain-specifc purposes, etc.), extremely flexible, reproducible steps	Steep learning curve (need to know Python too) and messy code, easy to make destructive in-place modifications, no persistence (everything starts from a `CSV` file)
R data frames	Designed specifically for data science so statistics and visualizations are easy; reproducible steps	R isn't as general-purpose as Python, no persistence
SQL	Scalable, good for maintaining many large, important datasets with many concurrent users	Requires lots of infrastructure, advanced operations can be challenging
Spreadsheets	Widespread use, very easy to get started, easy for sharing	Steps aren't reproducible, advanced operations can be challenging

	rank	uri	artist_names	track_name	...	previous_rank	weeks_on_chart	streams	num_artists
45	46	spotify:track:1BJJbSX6muJVF2AK7uH1x4	Adam Port, Stryv, Keinemusik, Orso, Malachiii	Move	...	31	14	15987047	4
186	187	spotify:track:22skzmqfdWrjJylampe0kt	Macklemore & Ryan Lewis, Macklemore, Ryan Lewi...	Can't Hold Us (feat. Ray Dalton)	...	163	132	9050984	3
194	195	spotify:track:28drn6tQo95MRvO0jQEo5C	Future, Metro Boomin, Travis Scott, Playboi Carti	Type Shit	...	-1	25	8942475	3
177	178	spotify:track:4QNpBfC0zvjKqPJcyqBy9W	Pitbull, AFROJACK, Ne-Yo, Nayer	Give Me Everything (feat. Nayer)	...	165	19	9265696	3

	name	age	children
0	Grandma	94	{'name': 'Dad', 'age': 60, 'children': [{'name...
1	Grandma	94	{'name': 'My Aunt', 'children': [{'name': 'Cou...

	rank	uri	artist_names	track_name	...	peak_rank	previous_rank	weeks_on_chart	streams
0	1	spotify:track:2plbrEY59IikOBgBGLjaoe	Lady Gaga, Bruno Mars	Die With A Smile	...	1	1	5	76502673
1	2	spotify:track:6dOtVTDdiauQNBQEDOtlAB	Billie Eilish	BIRDS OF A FEATHER	...	1	2	18	54756808
2	3	spotify:track:5G2f63n7IPVPPjfNIGih7Q	Sabrina Carpenter	Taste	...	3	3	4	48790619
...	...	...	...	...	...	...	...	...	...
197	198	spotify:track:3EaJDYHA0KnX88JvDhL9oa	Steve Lacy	Dark Red	...	66	200	118	8901006
198	199	spotify:track:0gEyKnHvgkrkBM6fbeHdwK	The Cranberries	Linger	...	199	-1	2	8867800
199	200	spotify:track:7iUtQNMRB8ZkKC4AmEuCJC	Myke Towers	LA FALDA	...	24	185	36	8848312

	artist_names	total_streams
0	Sabrina Carpenter	2.13e+08
1	Billie Eilish	1.08e+08
2	Chappell Roan	9.30e+07
3	Linkin Park	8.99e+07
4	Lady Gaga, Bruno Mars	7.65e+07

Lecture 10¶

APIs, SQL, and Spreadsheets¶

EECS 398-003: Practical Data Science, Fall 2024¶

Announcements 📣¶

Agenda¶

Activity

Example: Scraping the Happening @ Michigan page¶

Example: Scraping the Happening @ Michigan page¶

Identifying <div>s¶

Parsing a single event, and then every event¶

Web data in practice¶

APIs and JSON¶

Recap: Scraping vs. APIs¶

JSON¶

JSON data types¶

Example JSON object¶

Reference Slide¶

Aside: eval¶

Reference Slide¶

Using the json module¶

Reference Slide¶

Key takeaways¶

Aside: pd.read_json¶

API terminology¶

API requests¶

Working with JSON objects¶

Invalid GET requests¶

More on APIs¶

Generalized table manipulation¶

Representations of tabular data¶

Alternatives¶

Relational algebra¶

Comparing pandas, SQL, and Google Sheets¶

Overview: Top 200 streams¶

SQL¶

Overview: SQL¶

Example SQL syntax¶

Connecting to a database using sqlite3¶

Aside: DuckDB¶

Google Sheets¶

Overview: Google Sheets¶

Key takeaways¶

What's next?¶

Identifying `<div>`s¶

Aside: `eval`¶

Using the `json` module¶

Aside: `pd.read_json`¶

Invalid `GET` requests¶

Comparing `pandas`, SQL, and Google Sheets¶

Connecting to a database using `sqlite3`¶