Announcements:
Goals:
& and | (beware the precedence!)def data_url(city_name):
    base_url = "https://facultyweb.cs.wwu.edu/~wehrwes/courses/csci141_21s/fp/data/"
    return base_url + city_name + ".csv"
Game Plan:
HourlyDryBulbTemperature and HourlyPrecipitation to floatsDATE from object/string to a datetimeimport pandas as pd
def data_url(city_name):
    base_url = "https://facultyweb.cs.wwu.edu/~wehrwes/courses/csci141_21s/fp/data/"
    return base_url + city_name + ".csv"
bham = pd.read_csv(data_url("WA_Bellingham"), low_memory=False)
# Convert HourlyDryBulbTemperature to float, stripping 's' off the end where applicable
temps = bham["HourlyDryBulbTemperature"].str.rstrip("s").astype(float)
bham["HourlyDryBulbTemperature"] = temps
# Convert HourlyPrecipitation to float, replacing "T" with 0 and stripping "s"
precips = bham["HourlyPrecipitation"].str.replace("T", "0").str.rstrip("s")
bham["HourlyPrecipitation"] = precips.astype(float)
bham[["DATE", "HourlyDryBulbTemperature", "HourlyPrecipitation"]].info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 11348 entries, 0 to 11347 Data columns (total 3 columns): DATE 11348 non-null object HourlyDryBulbTemperature 10982 non-null float64 HourlyPrecipitation 9296 non-null float64 dtypes: float64(2), object(1) memory usage: 266.0+ KB
DATE from object/string to a datetimebham["DATE"] = pd.to_datetime(bham["DATE"])
bham[["DATE", "HourlyDryBulbTemperature", "HourlyPrecipitation"]]
| DATE | HourlyDryBulbTemperature | HourlyPrecipitation | |
|---|---|---|---|
| 0 | 2020-01-01 00:14:00 | 53.0 | NaN | 
| 1 | 2020-01-01 00:24:00 | 53.0 | NaN | 
| 2 | 2020-01-01 00:53:00 | 53.0 | 0.00 | 
| 3 | 2020-01-01 01:53:00 | 52.0 | 0.00 | 
| 4 | 2020-01-01 02:53:00 | 52.0 | 0.00 | 
| 5 | 2020-01-01 03:53:00 | 52.0 | 0.00 | 
| 6 | 2020-01-01 04:53:00 | 51.0 | NaN | 
| 7 | 2020-01-01 05:53:00 | 50.0 | NaN | 
| 8 | 2020-01-01 06:53:00 | 49.0 | 0.00 | 
| 9 | 2020-01-01 07:53:00 | 49.0 | 0.00 | 
| 10 | 2020-01-01 08:53:00 | 49.0 | 0.00 | 
| 11 | 2020-01-01 09:53:00 | 50.0 | 0.00 | 
| 12 | 2020-01-01 10:53:00 | 50.0 | 0.00 | 
| 13 | 2020-01-01 11:53:00 | 51.0 | 0.00 | 
| 14 | 2020-01-01 12:53:00 | 50.0 | 0.00 | 
| 15 | 2020-01-01 13:53:00 | 52.0 | NaN | 
| 16 | 2020-01-01 14:53:00 | 50.0 | 0.00 | 
| 17 | 2020-01-01 15:53:00 | 50.0 | 0.00 | 
| 18 | 2020-01-01 16:53:00 | 50.0 | 0.00 | 
| 19 | 2020-01-01 17:53:00 | 49.0 | 0.00 | 
| 20 | 2020-01-01 18:53:00 | 48.0 | 0.00 | 
| 21 | 2020-01-01 19:53:00 | 46.0 | 0.00 | 
| 22 | 2020-01-01 20:53:00 | 45.0 | 0.00 | 
| 23 | 2020-01-01 21:27:00 | 45.0 | NaN | 
| 24 | 2020-01-01 21:53:00 | 45.0 | 0.00 | 
| 25 | 2020-01-01 22:53:00 | 44.0 | 0.00 | 
| 26 | 2020-01-01 23:53:00 | 44.0 | 0.00 | 
| 27 | 2020-01-01 23:59:00 | NaN | NaN | 
| 28 | 2020-01-02 00:53:00 | 44.0 | 0.00 | 
| 29 | 2020-01-02 01:53:00 | 44.0 | 0.00 | 
| ... | ... | ... | ... | 
| 11318 | 2020-12-31 06:53:00 | 41.0 | 0.02 | 
| 11319 | 2020-12-31 07:10:00 | 41.0 | 0.00 | 
| 11320 | 2020-12-31 07:53:00 | 41.0 | 0.00 | 
| 11321 | 2020-12-31 08:53:00 | 41.0 | 0.00 | 
| 11322 | 2020-12-31 09:18:00 | 42.0 | NaN | 
| 11323 | 2020-12-31 09:39:00 | 42.0 | NaN | 
| 11324 | 2020-12-31 09:53:00 | 42.0 | 0.00 | 
| 11325 | 2020-12-31 10:53:00 | 43.0 | 0.00 | 
| 11326 | 2020-12-31 11:53:00 | 43.0 | 0.00 | 
| 11327 | 2020-12-31 12:53:00 | 44.0 | 0.00 | 
| 11328 | 2020-12-31 13:53:00 | 45.0 | 0.00 | 
| 11329 | 2020-12-31 14:53:00 | 45.0 | NaN | 
| 11330 | 2020-12-31 15:53:00 | 45.0 | 0.01 | 
| 11331 | 2020-12-31 16:53:00 | 44.0 | 0.01 | 
| 11332 | 2020-12-31 16:56:00 | 44.0 | NaN | 
| 11333 | 2020-12-31 17:15:00 | 45.0 | 0.00 | 
| 11334 | 2020-12-31 17:38:00 | 45.0 | 0.00 | 
| 11335 | 2020-12-31 17:53:00 | 45.0 | 0.01 | 
| 11336 | 2020-12-31 17:55:00 | 45.0 | 0.00 | 
| 11337 | 2020-12-31 18:02:00 | 45.0 | 0.00 | 
| 11338 | 2020-12-31 18:09:00 | 45.0 | 0.00 | 
| 11339 | 2020-12-31 18:16:00 | 45.0 | 0.00 | 
| 11340 | 2020-12-31 18:43:00 | 45.0 | 0.00 | 
| 11341 | 2020-12-31 18:53:00 | 45.0 | 0.00 | 
| 11342 | 2020-12-31 19:53:00 | 46.0 | NaN | 
| 11343 | 2020-12-31 20:53:00 | 49.0 | NaN | 
| 11344 | 2020-12-31 21:53:00 | 50.0 | NaN | 
| 11345 | 2020-12-31 22:53:00 | 49.0 | 0.01 | 
| 11346 | 2020-12-31 23:53:00 | 49.0 | 0.03 | 
| 11347 | 2020-12-31 23:59:00 | NaN | NaN | 
11348 rows × 3 columns
mask = (bham['DATE'] > "2020-02-01") & (bham['DATE'] < "2020-03-01")
feb = bham[mask]
feb["HourlyDryBulbTemperature"].plot.hist()
<matplotlib.axes._subplots.AxesSubplot at 0x7fe756c28588>
feb["HourlyPrecipitation"].plot.hist()
<matplotlib.axes._subplots.AxesSubplot at 0x7fe76f94cc18>
feb.plot.scatter("HourlyDryBulbTemperature", "HourlyPrecipitation")
<matplotlib.axes._subplots.AxesSubplot at 0x7fe76f8dd860>