0% found this document useful (0 votes)
41 views

Pandas

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
41 views

Pandas

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 9

doubling_array = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]

INTRODUCTION TO NUMPY
# Create an array of integers from one to ten
1. Your first NumPy array
one_to_ten = np.arange(1, 11)
# edited/added # Create your scatterplot
import numpy as np plt.scatter(one_to_ten, doubling_array)
sudoku_list = np.load('sudoku_game.npy') plt.show()
# Import NumPy
4. Array dimensionality
import numpy as np
# Convert sudoku_list into an array # edited/added
sudoku_array = np.array(sudSoku_list) sudoku_solution = np.load('sudoku_solution.npy')
# Print the type of sudoku_array sudoku_list = np.load('sudoku_game.npy')
print(type(sudoku_array)) sudoku_game = np.array(sudoku_list)
# Create the game_and_solution 3D array
2. Creating arrays from scratch
game_and_solution = np.array([sudoku_game, sudoku_solution])
# Create an array of zeros which has four columns and two rows # Print game_and_solution
zero_array = np.zeros((2, 4)) print(game_and_solution)
print(zero_array)
5. The fourth dimension
# Create an array of random floats which has six columns and three rows
random_array = np.random.random((3, 6)) # edited/added
print(random_array) new_sudoku_game = np.load('new_sudoku_game.npy')
new_sudoku_solution = np.load('new_sudoku_solution.npy')
3. A range array
game_and_solution = np.load('game_and_solution.npy')
doubling_array = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] # Create a second 3D array of another game and its solution
plt.scatter(x_values, y_values) new_game_and_solution = np.array([new_sudoku_game, new_sudoku_soluti
plt.show() on])
# edited/addedfrom matplotlib import pyplot as plt # Create a 4D array of both game and solution 3D arrays
games_and_solutions = np.array([game_and_solution, new_game_and_soluti # Print the data type of zero_array
on]) print(zero_array.dtype)
# Print the shape of your 4D array # Create an array of zeros with three rows and two columns
print(games_and_solutions.shape) zero_array = np.zeros((3, 2))

6. Flattening and reshaping # Print the data type of zero_array


print(zero_array.dtype)
# edited/added
# Create a new array of int32 zeros with three rows and two columns
sudoku_game = np.load('sudoku_game_new.npy')
zero_int_array = np.zeros((3, 2), dtype=np.int32)
# Flatten sudoku_game
# Print the data type of zero_int_array
flattened_game = sudoku_game.flatten()
print(zero_int_array.dtype)
# Print the shape of flattened_game
print(flattened_game.shape) 8. Anticipating data types
# Flatten sudoku_game A string data type
flattened_game = sudoku_game.flatten()
 np.array([78.988, "NumPy", True])
# Print the shape of flattened_game  np.array([9, 1.12, True]).astype("<U5")
print(flattened_game.shape)
An integer data type
# Reshape flattened_game back to a nine by nine array
reshaped_game = flattened_game.reshape((9, 9))  np.array([34.62, 70.13, 9]).astype(np.int64)
 np.array([45.67, True], dtype=np.int8)
# Print sudoku_game and reshaped_game
print(sudoku_game) A float data type
print(reshaped_game)
 np.array([[6, 15.7], [True, False]])
 np.random.random((4, 5))
7. NumPy data types
9. A smaller sudoku game
The dtype argument
# Print the data type of sudoku_game
# Create an array of zeros with three rows and two columns
print(sudoku_game.dtype)
zero_array = np.zeros((3, 2))
# Print the data type of sudoku_game
print(sudoku_game.dtype) hundred_diameters = tree_census[:100, 2]
# Change the data type of sudoku_game to int8 print(hundred_diameters)
small_sudoku_game = sudoku_game.astype(np.int8) # Create an array of trunk diameters with even row indices from 50 to 100 in
# Print the data type of small_sudoku_game clusive

print(small_sudoku_game.dtype) every_other_diameter = tree_census[50:101:2, 2]


print(every_other_diameter)
10. Slicing and indexing trees
12. Sorting trees
# edited/added
tree_census = np.load('tree_census.npy') # Extract trunk diameters information and sort from smallest to largest

# Select all rows of block ID data from the second column sorted_trunk_diameters = np.sort(tree_census[:, 2])

block_ids = tree_census[:, 1] print(sorted_trunk_diameters)

# Print the first five block_ids 13. Filtering with masks


print(block_ids[:5])
# Create an array which contains row data on the largest tree in tree_census
# Select all rows of block ID data from the second column
largest_tree_data = tree_census[tree_census[:, 2] == 51]
block_ids = tree_census[:, 1]
print(largest_tree_data)
# Select the tenth block ID from block_ids
# Create an array which contains row data on the largest tree in tree_census
tenth_block_id = block_ids[9]
largest_tree_data = tree_census[tree_census[:, 2] == 51]
print(tenth_block_id)
print(largest_tree_data)
# Select all rows of block ID data from the second column
# Slice largest_tree_data to get only the block id
block_ids = tree_census[:, 1]
largest_tree_block_id = largest_tree_data[:, 1]
# Select five block IDs from block_ids starting with the tenth ID
print(largest_tree_block_id)
block_id_slice = block_ids[9:14]
# Create an array which contains row data on the largest tree in tree_census
print(block_id_slice)
largest_tree_data = tree_census[tree_census[:, 2] == 51]
11. Stepping into 2D print(largest_tree_data)

# Create an array of the first 100 trunk diameters from tree_census # Slice largest_tree_data to get only the block ID
largest_tree_block_id = largest_tree_data[:, 1]
print(largest_tree_block_id) Compatible along the second axis

# Create an array which contains row data on all trees with largest_tree_blo  (4, 2) and (4, 3)
ck_id
trees_on_largest_tree_block = tree_census[tree_census[:, 1] == largest_tree_ Not compatible
block_id]
 (5, 2) and (7, 4)
print(trees_on_largest_tree_block)  (4, 2) and (4,)
 (4, 2) and (2,)
14. Fancy indexing vs. np.where()
17. Adding rows
# Create the block_313879 array containing trees on block 313879
block_313879 = tree_census[tree_census[:, 1] == 313879] # edited/added

print(block_313879) new_trees = np.array([[1211, 227386, 20, 0], [1212, 227386, 8, 0]])

# Create an array of row_indices for trees on block 313879 # Print the shapes of tree_census and new_trees

row_indices = np.where(tree_census[:, 1] == 313879) print(tree_census.shape, new_trees.shape)

# Create an array which only contains data for trees on block 313879 # Print the shapes of tree_census and new_trees

block_313879 = tree_census[row_indices] print(tree_census.shape, new_trees.shape)

print(block_313879) # Add rows to tree_census which contain data for the new trees
updated_tree_census = np.concatenate((tree_census, new_trees))
15. Creating arrays from conditions print(updated_tree_census)
# Create and print a 1D array of tree and stump diameters
18. Adding columns
trunk_stump_diameters = np.where(tree_census[:, 2] == 0, tree_census[:, 3], t
ree_census[:, 2]) # Print the shapes of tree_census and trunk_stump_diameters
print(trunk_stump_diameters) print(trunk_stump_diameters.shape, tree_census.shape)
# Print the shapes of tree_census and trunk_stump_diameters
16. Compatible or not?
print(trunk_stump_diameters.shape, tree_census.shape)
Compatible along the first axis
# Reshape trunk_stump_diameters
 (4, 2) and (6, 2) reshaped_diameters = trunk_stump_diameters.reshape((1000, 1))
 (15, 5) and (100, 5)
# Print the shapes of tree_census and trunk_stump_diameters
print(trunk_stump_diameters.shape, tree_census.shape) monthly_industry_sales = monthly_sales.sum(axis=1, keepdims=True)
# Reshape trunk_stump_diameters print(monthly_industry_sales)
reshaped_diameters = trunk_stump_diameters.reshape((1000, 1)) # Create a 2D array of total monthly sales across industries
# Concatenate reshaped_diameters to tree_census as the last column monthly_industry_sales = monthly_sales.sum(axis=1, keepdims=True)
concatenated_tree_census = np.concatenate((tree_census, reshaped_diameter print(monthly_industry_sales)
s), axis=1) # Add this column as the last column in monthly_sales
print(concatenated_tree_census) monthly_sales_with_total = np.concatenate((monthly_sales, monthly_industr
y_sales), axis=1)
19. Deleting with np.delete()
print(monthly_sales_with_total)
# Delete the stump diameter column from tree_census
tree_census_no_stumps = np.delete(tree_census, 3, axis=1) 21. Plotting averages

# Save the indices of the trees on block 313879 # Create the 1D array avg_monthly_sales
private_block_indices = np.where(tree_census[:, 1] == 313879) avg_monthly_sales = monthly_sales.mean(axis=1)
# Delete the stump diameter column from tree_census print(avg_monthly_sales)
tree_census_no_stumps = np.delete(tree_census, 3, axis=1) # Create the 1D array avg_monthly_sales
# Save the indices of the trees on block 313879 avg_monthly_sales = monthly_sales.mean(axis=1)
private_block_indices = np.where(tree_census[:,1] == 313879) print(avg_monthly_sales)
# Delete the rows for trees on block 313879 from tree_census_no_stumps # Plot avg_monthly_sales by month
tree_census_clean = np.delete(tree_census_no_stumps, private_block_indices, plt.plot(np.arange(1, 13), avg_monthly_sales, label="Average sales across in
axis=0) dustries")
# Print the shape of tree_census_clean # Plot department store sales by month
print(tree_census_clean.shape) plt.plot(np.arange(1, 13), monthly_sales[:, 2], label="Department store sales")
plt.legend()
20. Sales totals
plt.show()
# edited/added
monthly_sales = np.load('monthly_sales.npy') 22. Cumulative sales

# Create a 2D array of total monthly sales across industries # Find cumulative monthly sales for each industry
cumulative_monthly_industry_sales = monthly_sales.cumsum(axis=0) # edited/added
print(cumulative_monthly_industry_sales) monthly_industry_multipliers = np.load('monthly_industry_multipliers.npy')
# Find cumulative monthly sales for each industry # Create an array of monthly projected sales for all industries
cumulative_monthly_industry_sales = monthly_sales.cumsum(axis=0) projected_monthly_sales = monthly_sales * monthly_industry_multipliers
print(cumulative_monthly_industry_sales) print(projected_monthly_sales)
# Plot each industry's cumulative sales by month as separate lines # Create an array of monthly projected sales for all industries
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 0], label="Li projected_monthly_sales = monthly_sales * monthly_industry_multipliers
quor Stores") print(projected_monthly_sales)
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 1], label="R # Graph current liquor store sales and projected liquor store sales by month
estaurants")
plt.plot(np.arange(1, 13), monthly_sales[:, 0], label="Current liquor store sale
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 2], label="D s")
epartment stores")
plt.plot(np.arange(1, 13), projected_monthly_sales[:, 0], label="Projected liq
plt.legend() uor store sales")
plt.show() plt.legend()

23. Tax calculations plt.show()

# Create an array of tax collected by industry and month 25. Vectorizing .upper()
tax_collected = monthly_sales * 0.05 # edited/added
print(tax_collected) names = np.array([["Izzy", "Monica", "Marvin"],
# Create an array of tax collected by industry and month ["Weber", "Patel", "Hernandez"]])
tax_collected = monthly_sales * 0.05 # Vectorize the .upper() string method
print(tax_collected) vectorized_upper = np.vectorize(str.upper)
# Create an array of sales revenue plus tax collected by industry and month # Apply vectorized_upper to the names array
total_tax_and_revenue = tax_collected + monthly_sales uppercase_names = vectorized_upper(names)
print(total_tax_and_revenue) print(uppercase_names)

24. Projecting sales 26. Broadcastable or not?


Broadcastable # Print the shapes of mean_multipliers and monthly_sales
 (3, 4) and (1, 4) print(mean_multipliers.shape, monthly_sales.shape)
 (3, 4) and (4, ) # Find the mean sales projection multiplier for each industry
 (3, 4) and (3, 1)
mean_multipliers = monthly_industry_multipliers.mean(axis=0)
Not Broadcastable print(mean_multipliers)
# Print the shapes of mean_multipliers and monthly_sales
 (3, 4) and (1, 2)
 (3, 4) and (4, 1) print(mean_multipliers.shape, monthly_sales.shape)
 (3, 4) and (3, ) # Multiply each value by the multiplier for that industry
27. Broadcasting across columns projected_sales = monthly_sales * mean_multipliers
print(projected_sales)
# edited/added
monthly_growth_rate = [1.01, 1.03, 1.03, 1.02, 1.05, 1.03, 1.06, 1.04, 1.03, 1. 29. Loading .npy files
04, 1.02, 1.01]
# Convert monthly_growth_rate into a NumPy array # Load the mystery_image.npy file with open("mystery_image.npy", "rb") as
f:
monthly_growth_1D = np.array(monthly_growth_rate)
rgb_array = np.load(f)
# Reshape monthly_growth_1D
monthly_growth_2D = monthly_growth_1D.reshape((12, 1))
plt.imshow(rgb_array)
# Multiply each column in monthly_sales by monthly_growth_2D
plt.show()
print(monthly_growth_2D * monthly_sales)
30. Getting help
28. Broadcasting across rows
# Display the documentation for .astype()
# Find the mean sales projection multiplier for each industry
help(np.ndarray.astype)
mean_multipliers = monthly_industry_multipliers.mean(axis=0)
print(mean_multipliers) 31. Update and save
# Find the mean sales projection multiplier for each industry
# edited/added
mean_multipliers = monthly_industry_multipliers.mean(axis=0)
rgb_array = np.load('rgb_array.npy')
print(mean_multipliers)
# Reduce every value in rgb_array by 50 percent
darker_rgb_array = rgb_array * 0.5 33. Transposing your masterpiece

# Reduce every value in rgb_array by 50 percent # Transpose rgb_array


darker_rgb_array = rgb_array * 0.5 transposed_rgb = np.transpose(rgb_array, axes=(1, 0, 2))
# Convert darker_rgb_array into an array of integers plt.imshow(transposed_rgb)
darker_rgb_int_array = darker_rgb_array.astype(np.int8) plt.show()
plt.imshow(darker_rgb_int_array)
34. 2D split and stack
plt.show()
# Reduce every value in rgb_array by 50 percent # Split monthly_sales into quarterly data
darker_rgb_array = rgb_array * 0.5 q1_sales, q2_sales, q3_sales, q4_sales = np.split(monthly_sales, 4)
# Convert darker_rgb_array into an array of integers print(q1_sales)
darker_rgb_int_array = darker_rgb_array.astype(np.int8) # Split monthly_sales into quarterly data
plt.imshow(darker_rgb_int_array) q1_sales, q2_sales, q3_sales, q4_sales = np.split(monthly_sales, 4)
plt.show() print(q1_sales)
# Save darker_rgb_int_array to an .npy file called darker_monet.npywith op # Stack the four quarterly sales arrays
en("darker_monet.npy", "wb") as f: quarterly_sales = np.stack([q1_sales, q2_sales, q3_sales, q4_sales])
np.save(f, darker_rgb_int_array) print(quarterly_sales)

32. Augmenting Monet 35. Splitting RGB data


# Flip rgb_array so that it is the mirror image of the original # Split rgb_array into red, green, and blue arrays
mirrored_monet = np.flip(rgb_array, axis=1) red_array, green_array, blue_array = np.split(rgb_array, 3, axis=2)
plt.imshow(mirrored_monet) # Split rgb_array into red, green, and blue arrays
plt.show() red_array, green_array, blue_array = np.split(rgb_array, 3, axis=2)
# Flip rgb_array so that it is upside down # Create emphasized_blue_array
upside_down_monet = np.flip(rgb_array, axis=(0, 1)) emphasized_blue_array = np.where(blue_array > blue_array.mean(), 255, blu
plt.imshow(upside_down_monet) e_array)
plt.show() # Print the shape of emphasized_blue_array
print(emphasized_blue_array.shape)
# Split rgb_array into red, green, and blue arrays plt.imshow(emphasized_blue_monet)
red_array, green_array, blue_array = np.split(rgb_array, 3, axis=2) plt.show()
# Create emphasized_blue_array
emphasized_blue_array = np.where(blue_array > blue_array.mean(), 255, blu
e_array)
# Print the shape of emphasized_blue_array
print(emphasized_blue_array.shape)
# Remove the trailing dimension from emphasized_blue_array
emphasized_blue_array_2D = emphasized_blue_array.reshape((675, 843)) #
edited/added

36. Stacking RGB data

# Print the shapes of blue_array and emphasized_blue_array_2D


print(blue_array.shape, emphasized_blue_array_2D.shape)
# Print the shapes of blue_array and emphasized_blue_array_2D
print(blue_array.shape, emphasized_blue_array_2D.shape)
# Reshape red_array and green_array
red_array_2D = red_array.reshape((675, 843)) # edited/added
green_array_2D = green_array.reshape((675, 843)) # edited/added
# Print the shapes of blue_array and emphasized_blue_array_2D
print(blue_array.shape, emphasized_blue_array_2D.shape)
# Reshape red_array and green_array
red_array_2D = red_array.reshape((675, 843)) # edited/added
green_array_2D = green_array.reshape((675, 843)) # edited/added
# Stack red_array_2D, green_array_2D, and emphasized_blue_array_2D
emphasized_blue_monet = np.stack([red_array_2D, green_array_2D, emphas
ized_blue_array_2D], axis=2)

You might also like