import numpy as np

a = np.array(range(12)).reshape((4, 3))
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

a[0:2,:]

array([[0, 1, 2],
       [3, 4, 5]])

a = np.array(range(10, 20))
a

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

# get the first, third, and fifth elements:
a[np.array([0, 2, 4])]

array([10, 12, 14])

# get the fourth, second, and second elements (!):
a[np.array([3, 1, 1])]

array([13, 11, 11])

b = np.ones((2, 2))
b[0,0] = 2
b[1,1] = 0

b

array([[2., 1.],
       [1., 0.]])

mask = np.array([
    [True, False],
    [False, True]
])
mask

array([[ True, False],
       [False,  True]])

b[mask]

array([2., 0.])

# pull out all elements of b that are greater than zero:
b[b > 0]

array([2., 1., 1.])

c = np.array(range(24)).reshape(2, 4, 3)
c

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11]],

       [[12, 13, 14],
        [15, 16, 17],
        [18, 19, 20],
        [21, 22, 23]]])

# take one 2D slice
c[:,:,0]

array([[ 0,  3,  6,  9],
       [12, 15, 18, 21]])

# take another 2D slice along a different axis
c[:, 1, :]

array([[ 3,  4,  5],
       [15, 16, 17]])

import imageio.v3 as imageio
import matplotlib.pyplot as plt

beans = imageio.imread("/cluster/academic/DATA311/202620/beans_gray.jpeg")

plt.imshow(beans, cmap='gray')

<matplotlib.image.AxesImage at 0x14ee7d243260>

beans.shape

(200, 200)

beans.dtype

dtype('uint8')

plt.imshow(beans>127, cmap='gray')

<matplotlib.image.AxesImage at 0x14ee7cf047a0>

pixels_of_interest = beans[beans > 127]
pixels_of_interest.mean()

np.float64(160.46054636482367)

np.sum(beans)

np.uint64(4149006)

beans.mean(axis=0).argmax()

np.int64(193)

from pandas import Series, DataFrame
import pandas as pd

s = Series([9,6,8,4])
s

0    9
1    6
2    8
3    4
dtype: int64

# get the values:
s.values

array([9, 6, 8, 4])

# get the index:
s.index

RangeIndex(start=0, stop=4, step=1)

# get the third value:
s[2]

np.int64(8)

s2 = Series([9,6,8,4],index=['win','spr','sum','fal'])
s2

win    9
spr    6
sum    8
fal    4
dtype: int64

# get the values:
s2.values

array([9, 6, 8, 4])

# get the indices:
s2.index

Index(['win', 'spr', 'sum', 'fal'], dtype='str')

# get the value at index "win":
s2["win"]

np.int64(9)

s2[1]

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /opt/miniforge/lib/python3.12/site-packages/pandas/core/indexes/base.py:3641, in Index.get_loc(self, key)
   3640 try:
-> 3641     return self._engine.get_loc(casted_key)
   3642 except KeyError as err:

File pandas/_libs/index.pyx:168, in pandas._libs.index.IndexEngine.get_loc()
--> 168 'Could not get source, probably due dynamically evaluated source code.'

File pandas/_libs/index.pyx:176, in pandas._libs.index.IndexEngine.get_loc()
--> 176 'Could not get source, probably due dynamically evaluated source code.'

File pandas/_libs/index.pyx:583, in pandas._libs.index.StringObjectEngine._check_type()
--> 583 'Could not get source, probably due dynamically evaluated source code.'

KeyError: 1

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
Cell In[33], line 1
----> 1 s2[1]

File /opt/miniforge/lib/python3.12/site-packages/pandas/core/series.py:959, in Series.__getitem__(self, key)
    954     key = unpack_1tuple(key)
    956 elif key_is_scalar:
    957     # Note: GH#50617 in 3.0 we changed int key to always be treated as
    958     #  a label, matching DataFrame behavior.
--> 959     return self._get_value(key)
    961 # Convert generator to list before going through hashable part
    962 # (We will iterate through the generator there to check for slices)
    963 if is_iterator(key):

File /opt/miniforge/lib/python3.12/site-packages/pandas/core/series.py:1046, in Series._get_value(self, label, takeable)
   1043     return self._values[label]
   1045 # Similar to Index.get_value, but we do not fall back to positional
-> 1046 loc = self.index.get_loc(label)
   1048 if is_integer(loc):
   1049     return self._values[loc]

File /opt/miniforge/lib/python3.12/site-packages/pandas/core/indexes/base.py:3648, in Index.get_loc(self, key)
   3643     if isinstance(casted_key, slice) or (
   3644         isinstance(casted_key, abc.Iterable)
   3645         and any(isinstance(x, slice) for x in casted_key)
   3646     ):
   3647         raise InvalidIndexError(key) from err
-> 3648     raise KeyError(key) from err
   3649 except TypeError:
   3650     # If we have a listlike key, _check_indexing_error will raise
   3651     #  InvalidIndexError. Otherwise we fall through and re-raise
   3652     #  the TypeError.
   3653     self._check_indexing_error(key)

KeyError: 1

# get the second thing using iloc
s2.iloc[1]

np.int64(6)

# get the second and third things:
s2.iloc[1:3]

spr    6
sum    8
dtype: int64

d = {}
d['win'] = 9
d['spr'] = 6
d['sum'] = 8
d['fal'] = 4
s3 = Series(d)
s3

win    9
spr    6
sum    8
fal    4
dtype: int64

# is 'fal' a key in s3? using the in keyword
'fal' in s3

True

# is 'jan' a key in s3?
'jan' in s3

False

# is 6 a value in s3?
6 in s3.values

True

data = {'city': ['Seattle','Spokane','Tacoma','Vancouver'],
        'pop': [787,230,222,189], # units are in thousands
        'tax': [10.25,9.0,10.3,8.5]}
df = DataFrame(data)
df

# get the city column using square brackets:
df["city"]

0      Seattle
1      Spokane
2       Tacoma
3    Vancouver
Name: city, dtype: str

# get the city column using property accessor:
df.city

0      Seattle
1      Spokane
2       Tacoma
3    Vancouver
Name: city, dtype: str

# divide the tax column by 100:
df["tax"] / 100

0    0.1025
1    0.0900
2    0.1030
3    0.0850
Name: tax, dtype: float64

df['visits'] = [20,2,5,4]
df

data_url = '/cluster/academic/DATA311/202620/avengers/avengers.csv'

avengers = pd.read_csv(data_url, encoding='latin-1')
avengers

# use head:
avengers.head(2)

# use tail:
avengers.tail(3)

# list the columns
avengers.columns

Index(['URL', 'Name/Alias', 'Appearances', 'Current?', 'Gender',
       'Probationary Introl', 'Full/Reserve Avengers Intro', 'Year',
       'Years since joining', 'Honorary', 'Death1', 'Return1', 'Death2',
       'Return2', 'Death3', 'Return3', 'Death4', 'Return4', 'Death5',
       'Return5', 'Notes'],
      dtype='str')

# use drop:
avengers.drop(columns=["URL"])

# use shape
avengers.shape

(173, 21)

# extract the Name/Alias columnn:
avengers["Name/Alias"]

0        Henry Jonathan "Hank" Pym
1                   Janet van Dyne
2      Anthony Edward "Tony" Stark
3              Robert Bruce Banner
4                     Thor Odinson
                  ...             
168                    Eric Brooks
169                  Adam Brashear
170                 Victor Alvarez
171                      Ava Ayala
172                          Kaluu
Name: Name/Alias, Length: 173, dtype: str

# get a dataframe with only the Name/Alias and Appearances columns
na = avengers[["Name/Alias", "Appearances"]]
na

# extract a DataFrame with just Name/Alias:
avengers[["Name/Alias"]]

# use sort_values:
na = na.sort_values("Appearances", ascending=False)
na

# get the 10th through 20th most-appearing avengers

# Extract the first four avengers, and the second through fourth columns:
na.iloc[:4, 1:5]

# line plot of appearances
na.plot(y="Appearances", use_index=False)

<Axes: >

# show the Gender column:
avengers["Gender"]

0        MALE
1      FEMALE
2        MALE
3        MALE
4        MALE
        ...  
168      MALE
169      MALE
170      MALE
171    FEMALE
172      MALE
Name: Gender, Length: 173, dtype: str

# use value_counts to see frequency of each categorical label:
avengers["Gender"].value_counts()

Gender
MALE      115
FEMALE     58
Name: count, dtype: int64

avengers.columns

Index(['URL', 'Name/Alias', 'Appearances', 'Current?', 'Gender',
       'Probationary Introl', 'Full/Reserve Avengers Intro', 'Year',
       'Years since joining', 'Honorary', 'Death1', 'Return1', 'Death2',
       'Return2', 'Death3', 'Return3', 'Death4', 'Return4', 'Death5',
       'Return5', 'Notes'],
      dtype='str')

# scatter plot Years since joining vs Appearances
avengers.plot.scatter(x="Years since joining", y="Appearances")

<Axes: xlabel='Years since joining', ylabel='Appearances'>

avengers.groupby("Gender")["Appearances"].mean()

avengers[avengers["Gender"] == "FEMALE"].head(3)

avengers[avengers["Appearances"] > 2000]

# column info
avengers.info()

avengers.describe()

	URL	Name/Alias	Appearances	Current?	Gender	Probationary Introl	Full/Reserve Avengers Intro	Year	Years since joining	Honorary	...	Return1	Death2	Return2	Death3	Return3	Death4	Return4	Death5	Return5	Notes
0	http://marvel.wikia.com/Henry_Pym_(Earth-616)	Henry Jonathan "Hank" Pym	1269	YES	MALE	NaN	Sep-63	1963	52	Full	...	NO	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Merged with Ultron in Rage of Ultron Vol. 1. A...
1	http://marvel.wikia.com/Janet_van_Dyne_(Earth-...	Janet van Dyne	1165	YES	FEMALE	NaN	Sep-63	1963	52	Full	...	YES	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Dies in Secret Invasion V1:I8. Actually was se...
2	http://marvel.wikia.com/Anthony_Stark_(Earth-616)	Anthony Edward "Tony" Stark	3068	YES	MALE	NaN	Sep-63	1963	52	Full	...	YES	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Death: "Later while under the influence of Imm...
3	http://marvel.wikia.com/Robert_Bruce_Banner_(E...	Robert Bruce Banner	2089	YES	MALE	NaN	Sep-63	1963	52	Full	...	YES	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Dies in Ghosts of the Future arc. However "he ...
4	http://marvel.wikia.com/Thor_Odinson_(Earth-616)	Thor Odinson	2402	YES	MALE	NaN	Sep-63	1963	52	Full	...	YES	YES	NO	NaN	NaN	NaN	NaN	NaN	NaN	Dies in Fear Itself brought back because that'...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
168	http://marvel.wikia.com/Eric_Brooks_(Earth-616)#	Eric Brooks	198	YES	MALE	NaN	13-Nov	2013	2	Full	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
169	http://marvel.wikia.com/Adam_Brashear_(Earth-6...	Adam Brashear	29	YES	MALE	NaN	14-Jan	2014	1	Full	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
170	http://marvel.wikia.com/Victor_Alvarez_(Earth-...	Victor Alvarez	45	YES	MALE	NaN	14-Jan	2014	1	Full	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
171	http://marvel.wikia.com/Ava_Ayala_(Earth-616)#	Ava Ayala	49	YES	FEMALE	NaN	14-Jan	2014	1	Full	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
172	http://marvel.wikia.com/Kaluu_(Earth-616)#	Kaluu	35	YES	MALE	NaN	15-Jan	2015	0	Full	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	URL	Name/Alias	Appearances	Current?	Gender	Probationary Introl	Full/Reserve Avengers Intro	Year	Years since joining	Honorary	...	Return1	Death2	Return2	Death3	Return3	Death4	Return4	Death5	Return5	Notes
0	http://marvel.wikia.com/Henry_Pym_(Earth-616)	Henry Jonathan "Hank" Pym	1269	YES	MALE	NaN	Sep-63	1963	52	Full	...	NO	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Merged with Ultron in Rage of Ultron Vol. 1. A...
1	http://marvel.wikia.com/Janet_van_Dyne_(Earth-...	Janet van Dyne	1165	YES	FEMALE	NaN	Sep-63	1963	52	Full	...	YES	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Dies in Secret Invasion V1:I8. Actually was se...

	URL	Name/Alias	Appearances	Current?	Gender	Probationary Introl	Full/Reserve Avengers Intro	Year	Years since joining	Honorary	...	Return1	Death2	Return2	Death3	Return3	Death4	Return4	Death5	Return5	Notes
170	http://marvel.wikia.com/Victor_Alvarez_(Earth-...	Victor Alvarez	45	YES	MALE	NaN	14-Jan	2014	1	Full	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
171	http://marvel.wikia.com/Ava_Ayala_(Earth-616)#	Ava Ayala	49	YES	FEMALE	NaN	14-Jan	2014	1	Full	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
172	http://marvel.wikia.com/Kaluu_(Earth-616)#	Kaluu	35	YES	MALE	NaN	15-Jan	2015	0	Full	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	Name/Alias	Appearances	Current?	Gender	Probationary Introl	Full/Reserve Avengers Intro	Year	Years since joining	Honorary	Death1	Return1	Death2	Return2	Death3	Return3	Death4	Return4	Death5	Return5	Notes
0	Henry Jonathan "Hank" Pym	1269	YES	MALE	NaN	Sep-63	1963	52	Full	YES	NO	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Merged with Ultron in Rage of Ultron Vol. 1. A...
1	Janet van Dyne	1165	YES	FEMALE	NaN	Sep-63	1963	52	Full	YES	YES	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Dies in Secret Invasion V1:I8. Actually was se...
2	Anthony Edward "Tony" Stark	3068	YES	MALE	NaN	Sep-63	1963	52	Full	YES	YES	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Death: "Later while under the influence of Imm...
3	Robert Bruce Banner	2089	YES	MALE	NaN	Sep-63	1963	52	Full	YES	YES	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Dies in Ghosts of the Future arc. However "he ...
4	Thor Odinson	2402	YES	MALE	NaN	Sep-63	1963	52	Full	YES	YES	YES	NO	NaN	NaN	NaN	NaN	NaN	NaN	Dies in Fear Itself brought back because that'...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
168	Eric Brooks	198	YES	MALE	NaN	13-Nov	2013	2	Full	NO	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
169	Adam Brashear	29	YES	MALE	NaN	14-Jan	2014	1	Full	NO	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
170	Victor Alvarez	45	YES	MALE	NaN	14-Jan	2014	1	Full	NO	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
171	Ava Ayala	49	YES	FEMALE	NaN	14-Jan	2014	1	Full	NO	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
172	Kaluu	35	YES	MALE	NaN	15-Jan	2015	0	Full	NO	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	Name/Alias	Appearances
73	Peter Benjamin Parker	4333
6	Steven Rogers	3458
92	James "Logan" Howlett	3130
2	Anthony Edward "Tony" Stark	3068
4	Thor Odinson	2402
...	...	...
117	Dennis Sykes	6
65	Gene Lorrene	4
68	Doug Taggert	3
39	Moira Brandon	2
125	Fiona	2

DATA 311 - Lecture 3¶

Announcements¶

Goals¶

Advantage of Jupyter:¶

Numpy, Continued¶

Fancy indexing¶

Integer indexing¶

Boolean Indexing¶

Tips for multidimensional arrays¶

Exercise 3 Play with my cat¶

Pandas: a library for working with tabular data¶

How to Learn Pandas (and other tools we'll use in this class):¶

But seriously¶

Pandas: basic data structures/concepts¶

Series - a 1D list-like thing (think of it as a column with labels)¶

DataFrames¶

More pandas, now with Avengers¶

Getting fancier¶

pandas: a whirlwind tour¶

	city	pop	tax
0	Seattle	787	10.25
1	Spokane	230	9.00
2	Tacoma	222	10.30
3	Vancouver	189	8.50