-
Notifications
You must be signed in to change notification settings - Fork 1
/
CentralTendency
57 lines (43 loc) · 1.22 KB
/
CentralTendency
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Create a dataframe
import pandas as pd
df_1=read_csv.('file_name')
df_1=pd.read_excel('state_naicssector_2010.xls', skiprows=4)
#list column names
list(df_1)
#Get all rows for Texas
df_2=df_1.loc[df_1['STATE DESCRIPTION']=='Texas']
#Get row at index location 7973
df_2.loc[7973]
#Get first row in datatframe
df_2.iloc[0]
#Get all rows in a particular industry
df_2=df_1.loc[df_1['NAICS DESCRIPTION']=='Agriculture, forestry, fishing and hunting']
#Get all rows with totals for a state
df_3=df_2.loc[df_2['ENTERPRISE EMPLOYMENT SIZE']=='1: Total']
df_3.mode(axis=1)
df_3["NUMBER OF FIRMS"].mode()
df_3["NUMBER OF FIRMS"].median()
df_3['ANNUAL PAYROLL ($1,000)'].median()
###For wednesday
Census data
Take a census table
import pandas as pd
df_1=pd.read_csv('TABLE_FILE_NAME')
df_1=pd.read_csv('TABLE_FILE_NAME', skiprows=1)
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(list(df_1))
pd.set_option('display.max_rows', 1000)
2012
df_1["Total!!Estimate!!Bachelor's degree"]
2018
s1=df_1["Estimate!!Percent!!Population 25 years and over!!Bachelor's degree or higher"]
s1=df_1["COLUMN_NAME"]
s2=s1.replace('-',0)
s3=s2.astype(float)
s3.mean()
import matplotlib.pyplot as plt
s3.hist(bins=4)
plt.show()
s3.median()
s3.mean()