Pandas
Pandas
INTRODUCTION TO NUMPY
# Create an array of integers from one to ten
1. Your first NumPy array
one_to_ten = np.arange(1, 11)
# edited/added # Create your scatterplot
import numpy as np plt.scatter(one_to_ten, doubling_array)
sudoku_list = np.load('sudoku_game.npy') plt.show()
# Import NumPy
4. Array dimensionality
import numpy as np
# Convert sudoku_list into an array # edited/added
sudoku_array = np.array(sudSoku_list) sudoku_solution = np.load('sudoku_solution.npy')
# Print the type of sudoku_array sudoku_list = np.load('sudoku_game.npy')
print(type(sudoku_array)) sudoku_game = np.array(sudoku_list)
# Create the game_and_solution 3D array
2. Creating arrays from scratch
game_and_solution = np.array([sudoku_game, sudoku_solution])
# Create an array of zeros which has four columns and two rows # Print game_and_solution
zero_array = np.zeros((2, 4)) print(game_and_solution)
print(zero_array)
5. The fourth dimension
# Create an array of random floats which has six columns and three rows
random_array = np.random.random((3, 6)) # edited/added
print(random_array) new_sudoku_game = np.load('new_sudoku_game.npy')
new_sudoku_solution = np.load('new_sudoku_solution.npy')
3. A range array
game_and_solution = np.load('game_and_solution.npy')
doubling_array = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] # Create a second 3D array of another game and its solution
plt.scatter(x_values, y_values) new_game_and_solution = np.array([new_sudoku_game, new_sudoku_soluti
plt.show() on])
# edited/addedfrom matplotlib import pyplot as plt # Create a 4D array of both game and solution 3D arrays
games_and_solutions = np.array([game_and_solution, new_game_and_soluti # Print the data type of zero_array
on]) print(zero_array.dtype)
# Print the shape of your 4D array # Create an array of zeros with three rows and two columns
print(games_and_solutions.shape) zero_array = np.zeros((3, 2))
# Select all rows of block ID data from the second column sorted_trunk_diameters = np.sort(tree_census[:, 2])
# Create an array of the first 100 trunk diameters from tree_census # Slice largest_tree_data to get only the block ID
largest_tree_block_id = largest_tree_data[:, 1]
print(largest_tree_block_id) Compatible along the second axis
# Create an array which contains row data on all trees with largest_tree_blo (4, 2) and (4, 3)
ck_id
trees_on_largest_tree_block = tree_census[tree_census[:, 1] == largest_tree_ Not compatible
block_id]
(5, 2) and (7, 4)
print(trees_on_largest_tree_block) (4, 2) and (4,)
(4, 2) and (2,)
14. Fancy indexing vs. np.where()
17. Adding rows
# Create the block_313879 array containing trees on block 313879
block_313879 = tree_census[tree_census[:, 1] == 313879] # edited/added
# Create an array of row_indices for trees on block 313879 # Print the shapes of tree_census and new_trees
# Create an array which only contains data for trees on block 313879 # Print the shapes of tree_census and new_trees
print(block_313879) # Add rows to tree_census which contain data for the new trees
updated_tree_census = np.concatenate((tree_census, new_trees))
15. Creating arrays from conditions print(updated_tree_census)
# Create and print a 1D array of tree and stump diameters
18. Adding columns
trunk_stump_diameters = np.where(tree_census[:, 2] == 0, tree_census[:, 3], t
ree_census[:, 2]) # Print the shapes of tree_census and trunk_stump_diameters
print(trunk_stump_diameters) print(trunk_stump_diameters.shape, tree_census.shape)
# Print the shapes of tree_census and trunk_stump_diameters
16. Compatible or not?
print(trunk_stump_diameters.shape, tree_census.shape)
Compatible along the first axis
# Reshape trunk_stump_diameters
(4, 2) and (6, 2) reshaped_diameters = trunk_stump_diameters.reshape((1000, 1))
(15, 5) and (100, 5)
# Print the shapes of tree_census and trunk_stump_diameters
print(trunk_stump_diameters.shape, tree_census.shape) monthly_industry_sales = monthly_sales.sum(axis=1, keepdims=True)
# Reshape trunk_stump_diameters print(monthly_industry_sales)
reshaped_diameters = trunk_stump_diameters.reshape((1000, 1)) # Create a 2D array of total monthly sales across industries
# Concatenate reshaped_diameters to tree_census as the last column monthly_industry_sales = monthly_sales.sum(axis=1, keepdims=True)
concatenated_tree_census = np.concatenate((tree_census, reshaped_diameter print(monthly_industry_sales)
s), axis=1) # Add this column as the last column in monthly_sales
print(concatenated_tree_census) monthly_sales_with_total = np.concatenate((monthly_sales, monthly_industr
y_sales), axis=1)
19. Deleting with np.delete()
print(monthly_sales_with_total)
# Delete the stump diameter column from tree_census
tree_census_no_stumps = np.delete(tree_census, 3, axis=1) 21. Plotting averages
# Save the indices of the trees on block 313879 # Create the 1D array avg_monthly_sales
private_block_indices = np.where(tree_census[:, 1] == 313879) avg_monthly_sales = monthly_sales.mean(axis=1)
# Delete the stump diameter column from tree_census print(avg_monthly_sales)
tree_census_no_stumps = np.delete(tree_census, 3, axis=1) # Create the 1D array avg_monthly_sales
# Save the indices of the trees on block 313879 avg_monthly_sales = monthly_sales.mean(axis=1)
private_block_indices = np.where(tree_census[:,1] == 313879) print(avg_monthly_sales)
# Delete the rows for trees on block 313879 from tree_census_no_stumps # Plot avg_monthly_sales by month
tree_census_clean = np.delete(tree_census_no_stumps, private_block_indices, plt.plot(np.arange(1, 13), avg_monthly_sales, label="Average sales across in
axis=0) dustries")
# Print the shape of tree_census_clean # Plot department store sales by month
print(tree_census_clean.shape) plt.plot(np.arange(1, 13), monthly_sales[:, 2], label="Department store sales")
plt.legend()
20. Sales totals
plt.show()
# edited/added
monthly_sales = np.load('monthly_sales.npy') 22. Cumulative sales
# Create a 2D array of total monthly sales across industries # Find cumulative monthly sales for each industry
cumulative_monthly_industry_sales = monthly_sales.cumsum(axis=0) # edited/added
print(cumulative_monthly_industry_sales) monthly_industry_multipliers = np.load('monthly_industry_multipliers.npy')
# Find cumulative monthly sales for each industry # Create an array of monthly projected sales for all industries
cumulative_monthly_industry_sales = monthly_sales.cumsum(axis=0) projected_monthly_sales = monthly_sales * monthly_industry_multipliers
print(cumulative_monthly_industry_sales) print(projected_monthly_sales)
# Plot each industry's cumulative sales by month as separate lines # Create an array of monthly projected sales for all industries
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 0], label="Li projected_monthly_sales = monthly_sales * monthly_industry_multipliers
quor Stores") print(projected_monthly_sales)
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 1], label="R # Graph current liquor store sales and projected liquor store sales by month
estaurants")
plt.plot(np.arange(1, 13), monthly_sales[:, 0], label="Current liquor store sale
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 2], label="D s")
epartment stores")
plt.plot(np.arange(1, 13), projected_monthly_sales[:, 0], label="Projected liq
plt.legend() uor store sales")
plt.show() plt.legend()
# Create an array of tax collected by industry and month 25. Vectorizing .upper()
tax_collected = monthly_sales * 0.05 # edited/added
print(tax_collected) names = np.array([["Izzy", "Monica", "Marvin"],
# Create an array of tax collected by industry and month ["Weber", "Patel", "Hernandez"]])
tax_collected = monthly_sales * 0.05 # Vectorize the .upper() string method
print(tax_collected) vectorized_upper = np.vectorize(str.upper)
# Create an array of sales revenue plus tax collected by industry and month # Apply vectorized_upper to the names array
total_tax_and_revenue = tax_collected + monthly_sales uppercase_names = vectorized_upper(names)
print(total_tax_and_revenue) print(uppercase_names)