import pandas as pd


zero_two = [0 for i in range(10)] + [2 for i in range(100)]
df = pd.DataFrame(zero_two)
df.std(), df.mean()

(0    0.577591
 dtype: float64,
 0    1.818182
 dtype: float64)


# [value for loop_var in collection]
[i**2 for i in range(1,10)]

[]

for _ in range(10):
    print("*", end="")
    
[0 for _ in range(10)]

**********


zero_two = [0 for i in range(10)] + [2 for i in range(10)]
df = pd.DataFrame(zero_two)
df.std()


import random
import seaborn as sns


def flip_coins(N, bias=0.5):
    """ Flip N coins, return a list of the resutls """
    return random.choices(["H", "T"], weights=[bias, 1-bias], k=N)

N = 100000

sns.histplot(flip_coins(N))

<AxesSubplot:ylabel='Count'>


def roll_dice(N):
    """ Roll N fair 6-sided dice """
    return random.choices(range(1,7), k=N)

N = 1000000
sns.histplot(roll_dice(N), bins=6)

<AxesSubplot:ylabel='Count'>


def n_heads(flips):
    return sum([1 if x == "H" else 0 for x in flips])

n = 2000
N = 10000
results = [n_heads(flip_coins(n, bias=.1)) for _ in range(N)]
sns.histplot(results, bins=10)

<AxesSubplot:ylabel='Count'>


data_url = "https://fw.cs.wwu.edu/~wehrwes/courses/data311_21f/data/NHANES/NHANES.csv"
cols_renamed = {"SEQN": "SEQN",
                "RIAGENDR": "Gender", # 1 = M, 2 = F
                "RIDAGEYR": "Age", # years
                "BMXWT": "Weight", # kg
                "BMXHT": "Height", # cm
                "BMXLEG": "Leg", # cm
                "BMXARML": "Arm", # cm
                "BMXARMC": "Arm Cir", # cm
                "BMXWAIST": "Waist Cir"} # cm

df = pd.read_csv(data_url)
df = df.rename(cols_renamed, axis='columns')
df = df.drop("SEQN", axis='columns')
df = df[df["Age"] >= 21]
sns.histplot(x="Height", data=df)

<AxesSubplot:xlabel='Height', ylabel='Count'>


ax = sns.histplot(x="Arm", data=df, stat="density", bins=20)


sns.histplot(x="Leg", data=df, bins=20)

<AxesSubplot:xlabel='Leg', ylabel='Count'>


worth_millions = 190000
count = 1
worths = []
counts = []
for i in range(10):
    worths.append(worth_millions)
    counts.append(count)
    worth_millions = worth_millions / 2
    count = count * 4

    
df = pd.DataFrame({"Worth": worths, "Count": counts})
df


df.plot.scatter(x="Worth", y="Count")

<AxesSubplot:xlabel='Worth', ylabel='Count'>


df.plot.scatter(x="Worth", y="Count", loglog=True)

<AxesSubplot:xlabel='Worth', ylabel='Count'>


data_url = "https://fw.cs.wwu.edu/~wehrwes/courses/data311_21f/data/NHANES/NHANES.csv"
cols_renamed = {"SEQN": "SEQN",
                "RIAGENDR": "Gender", # 1 = M, 2 = F
                "RIDAGEYR": "Age", # years
                "BMXWT": "Weight", # kg
                "BMXHT": "Height", # cm
                "BMXLEG": "Leg", # cm
                "BMXARML": "Arm", # cm
                "BMXARMC": "Arm Cir", # cm
                "BMXWAIST": "Waist Cir"} # cm

df = pd.read_csv(data_url)
df = df.rename(cols_renamed, axis='columns')
df = df.drop("SEQN", axis='columns')
df = df[df["Age"] >= 21]


ht_col = df["Height"]
ht_col


df["Height-z"] = (ht_col - ht_col.mean()) / ht_col.std()
df["Height-z"]


sns.histplot(x="Height-z", data=df)

	Worth	Count
0	190000.00000	1
1	95000.00000	4
2	47500.00000	16
3	23750.00000	64
4	11875.00000	256
5	5937.50000	1024
6	2968.75000	4096
7	1484.37500	16384
8	742.18750	65536
9	371.09375	262144

Lecture 12 - Probability Distributions, Z Scores and Normalization, (Logs)¶

Announcements:¶

Goals:¶

Uniform Distribution¶

Uniform Distribution - Analytically:¶

Binomial Distribution¶

Binomial Distribution - Analytically:¶

Gaussian Distribution¶

Gaussian Distribution, Analytically¶

Power Law Distributions¶

Power Law Distribution, Analytically:¶

Z-Scores and Normalization¶