API_KEY = open("/cluster/home/wehrwes/Documents/openweathermap_api_key.txt").read().strip()
BASE_URL = "http://api.openweathermap.org/data/2.5/weather"

import time
import json
import requests
import pandas as pd
import seaborn as sns

def get_weather_json(city):
    params = {
        'q': city,
        'appid': API_KEY,
        'units': 'metric'
    }
    return requests.get(BASE_URL, params=params).json()

seattle = get_weather_json("Seattle")
print(seattle)

{'coord': {'lon': -122.3321, 'lat': 47.6062}, 'weather': [{'id': 803, 'main': 'Clouds', 'description': 'broken clouds', 'icon': '04d'}], 'base': 'stations', 'main': {'temp': 9.89, 'feels_like': 8.06, 'temp_min': 8.46, 'temp_max': 11.04, 'pressure': 1026, 'humidity': 84, 'sea_level': 1026, 'grnd_level': 1016}, 'visibility': 10000, 'wind': {'speed': 3.6, 'deg': 200}, 'clouds': {'all': 75}, 'dt': 1761592252, 'sys': {'type': 2, 'id': 2009669, 'country': 'US', 'sunrise': 1761576372, 'sunset': 1761613193}, 'timezone': -25200, 'id': 5809844, 'name': 'Seattle', 'cod': 200}

print(json.dumps(seattle, indent=2))

{
  "coord": {
    "lon": -122.3321,
    "lat": 47.6062
  },
  "weather": [
    {
      "id": 803,
      "main": "Clouds",
      "description": "broken clouds",
      "icon": "04d"
    }
  ],
  "base": "stations",
  "main": {
    "temp": 9.89,
    "feels_like": 8.06,
    "temp_min": 8.46,
    "temp_max": 11.04,
    "pressure": 1026,
    "humidity": 84,
    "sea_level": 1026,
    "grnd_level": 1016
  },
  "visibility": 10000,
  "wind": {
    "speed": 3.6,
    "deg": 200
  },
  "clouds": {
    "all": 75
  },
  "dt": 1761592252,
  "sys": {
    "type": 2,
    "id": 2009669,
    "country": "US",
    "sunrise": 1761576372,
    "sunset": 1761613193
  },
  "timezone": -25200,
  "id": 5809844,
  "name": "Seattle",
  "cod": 200
}

with open("seattle_json.csv", "w") as f:
    json.dump(seattle, f)

# if you didn't make the API call, use this to load up a sample JSON result:
seattle = requests.get("https://facultyweb.cs.wwu.edu/~wehrwes/courses/data311_25f/lectures/L12/seattle_json.csv").json()

# get the temperature
seattle["main"]["temp"]

9.89

seattle["wind"]["speed"]

3.6

cities = ["Seattle", "Los Angeles", "London", "Paris", "Tokyo", "Sydney", "New York"]

# Fetch current weather for multiple cities
data = {}
for city in cities:
    data[city] = get_weather_json(city)

    time.sleep(1)

data_table = []
for city, resp in data.items():
    data_table.append({
            'city': city,
            'temp': resp['main']['temp'],
            'feels_like': resp['main']['feels_like'],
            'humidity': resp['main']['humidity']
        })

df_orig = pd.DataFrame(data_table)
df_orig

df_wide = df_orig.set_index('city')
df_wide

sns.barplot(df_wide["temp"]);

sns.barplot(data=df_wide);

df_long = df_orig.melt(id_vars=['city'],
                       value_vars=["temp", "feels_like", "humidity"],
                       var_name='metric', value_name='value')
df_long

sns.barplot(data=df_long, x='city', y='value', hue='metric');

# long to wide
df_long.pivot(index="city", columns="metric", values="value")

tips = sns.load_dataset('tips')
tips.head()

tips_long = tips.melt(id_vars=["day"],
                      value_vars=["total_bill", "tip"],
                      var_name="amount_type", value_name="dollars")
tips_long

sns.barplot(data=tips_long, x='day', y='dollars', hue='amount_type');

flights = sns.load_dataset('flights')
flights

sns.lineplot(data=flights, x="month", y="passengers", hue="year");

#flights_wide = ...
flights_wide

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[93], line 2
      1 #flights_wide = ...
----> 2 flights_wide

NameError: name 'flights_wide' is not defined

sns.heatmap(flights_wide, annot=True, fmt='d')

employees = pd.DataFrame({
    'emp_id': [1, 2, 3, 4],
    'name': ['Alice', 'Bob', 'Charlie', 'Diana'],
    'dept_id': [10, 20, 10, 30]
})
employees

departments = pd.DataFrame({
    'dept_id': [10, 20, 40],
    'dept_name': ['Engineering', 'Sales', 'Marketing']
})
departments

# INNER JOIN - Only employees with valid departments
employees.merge(departments, on='dept_id', how='inner')

# LEFT JOIN - All employees, even if department missing
employees.merge(departments, on='dept_id', how='left')

# RIGHT JOIN - All departments, even if no employees
employees.merge(departments, on='dept_id', how='right')

# OUTER JOIN - Everything from both tables
employees.merge(departments, on='dept_id', how='outer')

	city	temp	feels_like	humidity
0	Seattle	9.90	8.07	84
1	Los Angeles	21.16	21.19	71
2	London	11.75	10.81	70
3	Paris	11.01	10.23	79
4	Tokyo	15.73	14.77	54
5	Sydney	13.06	12.22	69
6	New York	13.30	11.83	44

	temp	feels_like	humidity
city
Seattle	9.90	8.07	84
Los Angeles	21.16	21.19	71
London	11.75	10.81	70
Paris	11.01	10.23	79
Tokyo	15.73	14.77	54
Sydney	13.06	12.22	69
New York	13.30	11.83	44

metric	feels_like	humidity	temp
city
London	10.81	70.0	11.75
Los Angeles	21.19	71.0	21.16
New York	11.83	44.0	13.30
Paris	10.23	79.0	11.01
Seattle	8.07	84.0	9.90
Sydney	12.22	69.0	13.06
Tokyo	14.77	54.0	15.73

	total_bill	tip	sex	smoker	day	time	size
0	16.99	1.01	Female	No	Sun	Dinner	2
1	10.34	1.66	Male	No	Sun	Dinner	3
2	21.01	3.50	Male	No	Sun	Dinner	3
3	23.68	3.31	Male	No	Sun	Dinner	2
4	24.59	3.61	Female	No	Sun	Dinner	4

	day	amount_type	dollars
0	Sun	total_bill	16.99
1	Sun	total_bill	10.34
2	Sun	total_bill	21.01
3	Sun	total_bill	23.68
4	Sun	total_bill	24.59
...	...	...	...
483	Sat	tip	5.92
484	Sat	tip	2.00
485	Sat	tip	2.00
486	Sat	tip	1.75
487	Thur	tip	3.00

L12 - Data Collection and Structured Data 2¶

Scraping Ethics; APIs; Reshaping and Merging¶

Announcements¶

Goals:¶

Scraping Ethics¶

Scraping Etiquette¶

Using APIs¶

API Demo¶

API Keys and Security¶

A Basic API request with Python¶

Reshaping Data Tables: Long format vs Wide Format¶

Exercise: tips dataset¶

Exercise / Example: Flights dataset¶

Merging Tables¶

	year	month	passengers
0	1949	Jan	112
1	1949	Feb	118
2	1949	Mar	132
3	1949	Apr	129
4	1949	May	121
...	...	...	...
139	1960	Aug	606
140	1960	Sep	508
141	1960	Oct	461
142	1960	Nov	390
143	1960	Dec	432

	emp_id	name	dept_id	dept_name
0	1	Alice	10	Engineering
1	2	Bob	20	Sales
2	3	Charlie	10	Engineering
3	4	Diana	30	NaN