-
Notifications
You must be signed in to change notification settings - Fork 3
/
DATA_VISUALISATION.py
133 lines (78 loc) · 3.03 KB
/
DATA_VISUALISATION.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import numpy as np
import pandas as pd
# for visualizations
import matplotlib.pyplot as plt
import seaborn as sns
import folium
import squarify
# for providing path
import os
data = pd.read_csv('crime/train.csv')
print(data.shape)
print(data.head())
print(data.describe())
#print(data.isnull().sum())
# different categories of crime
fig = plt.figure(figsize = (10, 5))
plt.rcParams['figure.figsize'] = (30,150)
plt.style.use('dark_background')
sns.countplot(data['Category'], palette = 'gnuplot')
plt.title('Major crimes', fontweight = 30, fontsize = 20)
plt.xticks(rotation = 90)
plt.savefig("majorcrimes.jpg")
#plotting a tree map
fig = plt.figure(figsize = (10, 5))
y = data['Category'].value_counts().head(25)
plt.rcParams['figure.figsize'] = (15, 15)
plt.style.use('fivethirtyeight')
color = plt.cm.magma(np.linspace(0, 1, 15))
squarify.plot(sizes = y.values, label = y.index, alpha=.8, color = color)
plt.title('Tree Map for Top 25 Crimes', fontsize = 20)
plt.axis('off')
plt.savefig("treemap.jpg")
# Regions with count of crimes graph
fig = plt.figure(figsize = (10, 5))
plt.rcParams['figure.figsize'] = (20, 9)
plt.style.use('seaborn')
color = plt.cm.ocean(np.linspace(0, 1, 15))
data['Address'].value_counts().head(15).plot.bar(color = color, figsize = (10,20))
plt.title('Top 15 Regions in Crime',fontsize = 20)
plt.xticks(rotation = 90)
plt.savefig("countofcrimes.jpg")
# Regions with count of crimes piechart
fig = plt.figure(figsize = (10, 5))
plt.style.use('seaborn')
data['DayOfWeek'].value_counts().head(15).plot.pie(figsize = (15, 8), explode = (0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1))
plt.title('Crime count on each day',fontsize = 20)
plt.xticks(rotation = 90)
plt.savefig("piechart.jpg")
# Resolutions for crimes
fig = plt.figure(figsize = (10, 5))
plt.style.use('seaborn')
color = plt.cm.winter(np.linspace(0, 10, 20))
data['Resolution'].value_counts().plot.bar(color = color, figsize = (15, 8))
plt.title('Resolutions for Crime',fontsize = 20)
plt.xticks(rotation = 90)
plt.savefig("Resolutions.jpg")
#crimes in each months
fig = plt.figure(figsize = (10, 5))
data['Dates'] = pd.to_datetime(data['Dates'])
data['Month'] = data['Dates'].dt.month
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (15, 8)
sns.countplot(data['Month'], palette = 'autumn',)
plt.title('Crimes in each Months', fontsize = 20)
plt.savefig("crimesineachmonths.jpg")
# checking the time at which crime occurs mostly
#color = plt.cm.twilight(np.linspace(0, 5, 100))
#data['Time'].value_counts().head(20).plot.bar(color = color, figsize = (15, 9))
#plt.title('Distribution of crime over the day', fontsize = 20)
#plt.show()
#district vs category of crime
fig = plt.figure(figsize = (10, 5))
df = pd.crosstab(data['Category'], data['PdDistrict'])
color = plt.cm.Greys(np.linspace(0, 1, 10))
df.div(df.sum(1).astype(float), axis = 0).plot.bar(stacked = True, color = color, figsize = (18, 12))
plt.title('District vs Category of Crime', fontweight = 30, fontsize = 20)
plt.xticks(rotation = 90)
plt.savefig("districtvscategoryofcrime.jpg")