MEE 6070 Data Science and Analytics: Importing Data Using Plotting The Data Checking For Linearity
MEE 6070 Data Science and Analytics: Importing Data Using Plotting The Data Checking For Linearity
Example: 1
import pandas as pd
my_dist={'name' : ["a", "b", "c", "d", "e","f", "g"],
'age' : [20,27, 35, 55, 18, 21, 35],
'designation': ["VP", "CEO", "CFO", "VP", "VP", "CEO
", "MD"]}
df=pd.DataFrame(my_dist)
print(df)
Example:2
import pandas as pd
df = pd.DataFrame({'X':[78,85,96,80,86],'Y':[84,94,89,83,86],'Z':
[86,97,96,72,83]});
print(df)
# Example Python program to get the contents of a DataFrame as a CSV st
ring
import pandas as pds
# Standard deviations in the thickness of three wooden board variants i
n mm
standardDeviations = {"Wood 1": [0.4, 0.5, 0.3],
"Wood 2": [0.1, 0.2, 0.3],
"Wood 3": [0.7, 0.6, 0.7]
};
# Load data into a DataFrame instance
dataFrame = pds.DataFrame(data=standardDeviations, index=("Variant1",
"Variant2",
"Variant3"));
# Get the contents of the DataFrame as a CSV file
csv=dataFrame.to_csv();
print("Contents of the DataFrame as a CSV string:")
print(csv);
# Example Python program to write the contents of a DataFrame to a buff
er
import pandas as pds
from io import StringIO
# Closing price of 3 different stocks over 5 trading days
closingPrices = {"Stock1": [34.17, 34.25, 34.2, 34.24, 34.3],
"Stock2": [10.01, 10.20, 10.1, 10.15, 10.2],
"Stock3": [41.6, 42.1, 41.89, 42.4, 42.7]
};
# Load data from the Python dictionary into a DataFrame instance
dataFrame = pds.DataFrame(data=closingPrices);
# Create an in-memory text stream
textStream = StringIO();
# Write the DataFrame contents to the text stream's buffer as a CSV
dataFrame.to_csv(textStream);
print("DataFrame as CSV (from the buffer):");
# Print the buffer contents
print(textStream.getvalue());
In the following example, we will use multiple linear regression to predict
the stock index price (i.e., the dependent variable) of a fictitious economy
by using 2 independent/input variables:
Interest Rate
Unemployment Rate
import pandas as pd
Stock_Market = {'Year':
[2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2
017,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,201
6,2016],
'Month': [12,
11,10,9,8,7,6,5,4,3,2,1,12,11,10,9,8,7,6,5,4,3,2,1],
'Interest_Rate':
[2.75,2.5,2.5,2.5,2.5,2.5,2.5,2.25,2.25,2.25,2,2,2,1.75,1
.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75],
'Unemployment_Rate':
[5.3,5.3,5.3,5.3,5.4,5.6,5.5,5.5,5.5,5.6,5.7,5.9,6,5.9,5.
8,6.1,6.2,6.1,6.1,6.1,5.9,6.2,6.2,6.1],
'Stock_Index_Price':
[1464,1394,1357,1293,1256,1254,1234,1195,1159,1167,1130,1
075,1047,965,943,958,971,949,884,866,876,822,704,719]
df =
pd.DataFrame(Stock_Market,columns=['Year','Month','Intere
st_Rate','Unemployment_Rate','Stock_Index_Price'])
print (df)
Checking for Linearity (Plotting- scatter diagrams)
import pandas as pd
import matplotlib.pyplot as plt
Stock_Market = {'Year': [2017,2017,2017,2017,2017,2017,2017,2017,2017,2
017,2017,2017,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,20
16],
'Month': [12, 11,10,9,8,7,6,5,4,3,2,1,12,11,10,9,8,7,6,
5,4,3,2,1],
'Interest_Rate': [2.75,2.5,2.5,2.5,2.5,2.5,2.5,2.25,2.2
5,2.25,2,2,2,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75],
'Unemployment_Rate': [5.3,5.3,5.3,5.3,5.4,5.6,5.5,5.5,5
.5,5.6,5.7,5.9,6,5.9,5.8,6.1,6.2,6.1,6.1,6.1,5.9,6.2,6.2,6.1],
'Stock_Index_Price': [1464,1394,1357,1293,1256,1254,123
4,1195,1159,1167,1130,1075,1047,965,943,958,971,949,884,866,876,822,704
,719]
}
df = pd.DataFrame(Stock_Market,columns=['Year','Month','Interest_Rate',
'Unemployment_Rate','Stock_Index_Price'])
plt.scatter(df['Interest_Rate'], df['Stock_Index_Price'], color='red')
plt.title('Stock Index Price Vs Interest Rate', fontsize=14)
plt.xlabel('Interest Rate', fontsize=14)
plt.ylabel('Stock Index Price', fontsize=14)
plt.grid(True)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
Stock_Market = {'Year':
[2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2
017,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,201
6,2016],
'Month': [12,
11,10,9,8,7,6,5,4,3,2,1,12,11,10,9,8,7,6,5,4,3,2,1],
'Interest_Rate':
[2.75,2.5,2.5,2.5,2.5,2.5,2.5,2.25,2.25,2.25,2,2,2,1.75,1
.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75],
'Unemployment_Rate':
[5.3,5.3,5.3,5.3,5.4,5.6,5.5,5.5,5.5,5.6,5.7,5.9,6,5.9,5.
8,6.1,6.2,6.1,6.1,6.1,5.9,6.2,6.2,6.1],
'Stock_Index_Price':
[1464,1394,1357,1293,1256,1254,1234,1195,1159,1167,1130,1
075,1047,965,943,958,971,949,884,866,876,822,704,719]
}
df =
pd.DataFrame(Stock_Market,columns=['Year','Month','Intere
st_Rate','Unemployment_Rate','Stock_Index_Price'])
plt.scatter(df['Unemployment_Rate'],
df['Stock_Index_Price'], color='green')
plt.title('Stock Index Price Vs Unemployment Rate',
fontsize=14)
plt.xlabel('Unemployment Rate', fontsize=14)
plt.ylabel('Stock Index Price', fontsize=14)
plt.grid(True)
plt.show()
https://www.youtube.com/watch?v=z1pBdycYCGY
Digital Assignment-1