diff --git a/.gitignore b/.gitignore index cf59c8e..59f2a17 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ /.idea/ /rent.db -*html -*csv \ No newline at end of file +*csv +*/.ipynb_checkpoints/ diff --git "a/DeathCompany/\345\200\222\351\227\255\344\274\201\344\270\232\346\225\260\346\215\256\345\210\206\346\236\220.html" "b/DeathCompany/\345\200\222\351\227\255\344\274\201\344\270\232\346\225\260\346\215\256\345\210\206\346\236\220.html" new file mode 100644 index 0000000..3704091 --- /dev/null +++ "b/DeathCompany/\345\200\222\351\227\255\344\274\201\344\270\232\346\225\260\346\215\256\345\210\206\346\236\220.html" @@ -0,0 +1,57593 @@ + + + + +倒闭企业数据分析 + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+

1. 数据集说明

这是一份来自和鲸社区的倒闭企业数据集,总计 6,272 条记录,大小为 2.3 M,包含 21 个字段。

+ +
+
+
+
+
+
In [2]:
+
+
+
import pandas as pd 
+data = pd.read_csv('com.csv')
+data.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[2]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
bianhcom_namecom_addrcatse_catcom_desborn_datadeath_datalive_daysfinancing...death_reasoninvest_nameceo_nameceo_desceo_per_desUnnamed: 16Unnamed: 17Unnamed: 18Unnamed: 19Unnamed: 20
01麦子金服上海金融借贷麦子金服是一家移动互联网金融服务集团,为个人和企业提供包括财富管理、股权投资、投融资咨询等综...2015-03-012019-11-251730B轮...政策监管 法律法规风险海通开元&中缔资本黄大容董事长黄大容,80后,经济学硕士,精通中英德三国语言。连续创业者,23岁创立第一家公司,25岁涉足...NaNNaNNaNNaNNaN
11拓道金服浙江金融借贷拓道金服是一家专注于汽车抵押贷款这一细分市场的P2P互联网金融公司。拓道金服通过互联网民间的...2013-11-012019-11-242214A轮...政策监管 法律法规风险蓝山中国资本&帮实资本&宏桥资本张罗军创始人杭州拓道科技有限公司执行董事兼总经理,重庆邮电学院计算机专业,创立杭州仁润科技有限公司,具有...NaNNaNNaNNaNNaN
21云柏科技广东医疗健康医疗器械及硬件云柏科技是一家智能健康检测腕表研发商,专注于智能可穿戴设备领域。主要产品包括可穿戴式多功能云...2014-09-012019-11-011887尚未获投...行业竞争NaNNaNNaNNaNNaNNaNNaNNaNNaN
31迷你生鲜福建电子商务生鲜食品迷你生鲜是一家会员制水果生鲜电商网站,平台产品由基地直供,采购深入全国及东南亚各地区,减少中...2017-11-012019-10-29727尚未获投...现金流断裂 行业竞争NaNNaNNaNNaNNaNNaNNaNNaNNaN
41一生健康北京医疗健康寻医诊疗微爱康一款基于移动互联网的癌症患者互助型垂直社区产品,通过在线社会化问答和类轻博客交流模式,...2015-01-012019-10-171750尚未获投...政策监管 法律法规风险NaN张耀斌CEO张耀斌,为一生(北京)健康科技有限公司的创始人。NaNNaNNaNNaNNaN
+

5 rows × 21 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

2、数据分析可视化

+
+
+
+
+
+
+

2.1 死亡公司的地区分布

+
+
+
+
+
+
In [3]:
+
+
+
from pyecharts import options as opts
+from pyecharts.charts import Map
+
+data['com_addr'] = data['com_addr'].apply(lambda x: x.strip())
+s = data.groupby('com_addr').size()
+
+c = (
+Map()
+    .add("死亡企业数量", [*s.items()], "china")
+    .set_global_opts(
+        title_opts=opts.TitleOpts(title="地区分布"),
+        visualmap_opts=opts.VisualMapOpts(max_=200),
+    )
+)
+c.render_notebook()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[3]:
+ + + +
+ + + +
+ + + +
+ +
+ +
+
+ +
+
+
+
+

2.2 行业排行TOP10

+
+
+
+
+
+
In [4]:
+
+
+
from pyecharts import options as opts
+from pyecharts.charts import Bar
+from pyecharts.faker import Faker
+
+s = data.groupby('cat').size().sort_values(ascending=False)[:10].to_dict()
+
+c = (
+    Bar()
+    .add_xaxis(list(s.keys()))
+    .add_yaxis("死亡企业数量", list(s.values()))
+    .set_global_opts(title_opts=opts.TitleOpts(title="行业排行TOP10"))
+)
+c.render_notebook()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[4]:
+ + + +
+ + + +
+ + + +
+ +
+ +
+
+ +
+
+
+
+

2.3 细分领域TOP20

+
+
+
+
+
+
In [5]:
+
+
+
s = data.groupby('se_cat').size().sort_values(ascending=False)[:20].sort_values(ascending=True).to_dict()
+
+c = (
+    Bar()
+    .add_xaxis(list(s.keys()))
+    .add_yaxis("死亡企业数量", list(s.values()))
+    .reversal_axis()
+    .set_series_opts(label_opts=opts.LabelOpts(position="right"))
+    .set_global_opts(title_opts=opts.TitleOpts(title="细分领域TOP20"))
+)
+c.render_notebook()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[5]:
+ + + +
+ + + +
+ + + +
+ +
+ +
+
+ +
+
+
+
+

2.4 年份分布

+
+
+
+
+
+
In [6]:
+
+
+
data['born_year'] = data['born_data'].apply(lambda x: x[:4])
+data['death_year'] = data['death_data'].apply(lambda x: x[:4])
+s1 = data.groupby('born_year').size()
+s2 = data.groupby('death_year').size()
+s1 = pd.DataFrame({'year': s1.index, 'born': s1.values})
+s2 = pd.DataFrame({'year': s2.index, 'death': s2.values})
+s = pd.merge(s1,s2, on='year', suffixes=['born', 'death'])
+s = s[s['year'] > '2008']
+
+c = (
+    Bar()
+    .add_xaxis( s['year'].to_list())
+    .add_yaxis("新生企业数量", s['born'].to_list())
+    .add_yaxis("死亡企业数量", s['death'].to_list())
+    .set_global_opts(title_opts=opts.TitleOpts(title="年份分布"))
+)
+c.render_notebook()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[6]:
+ + + +
+ + + +
+ + + +
+ +
+ +
+
+ +
+
+
+
+

2.5 企业存活时长

+
+
+
+
+
+
In [7]:
+
+
+
def live_year(x):
+    if x < 365:
+        return '不到1年'
+    if x < 365 * 2:
+        return '1-2年'
+    if x < 365 * 3:
+        return '2-3年'
+    if x < 365 * 4:
+        return '3-4年'
+    if x < 365 * 5:
+        return '4-5年'
+    if x < 365 * 10:
+        return '5-10年'
+    return '10年以上'
+
+s = data.groupby(data['live_days'].apply(lambda x: live_year(x))).size()
+
+from pyecharts import options as opts
+from pyecharts.charts import Pie
+
+c = (
+    Pie()
+    .add("", [*s.items()])
+    .set_global_opts(title_opts=opts.TitleOpts(title="企业存活时长"))
+    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
+)
+c.render_notebook()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[7]:
+ + + +
+ + + +
+ + + +
+ +
+ +
+
+ +
+
+
+
+

2.6 投资人词云

+
+
+
+
+
+
In [8]:
+
+
+
from pyecharts import options as opts
+from pyecharts.charts import WordCloud
+from pyecharts.globals import SymbolType
+
+invest = {}
+for row in data['invest_name'].values:
+    if not pd.isnull(row):
+        for name in row.split('&'):
+            invest[name] = invest.get(name, 0) + 1
+               
+c = (
+    WordCloud()
+    .add("", [*invest.items()], word_size_range=[20, 100], shape=SymbolType.DIAMOND)
+    .set_global_opts(title_opts=opts.TitleOpts(title="投资人词云"))
+)
+c.render_notebook()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[8]:
+ + + +
+ + + +
+ + + +
+ +
+ +
+
+ +
+
+
+
+

2.7 倒闭原因词云

+
+
+
+
+
+
In [9]:
+
+
+
death_reason = {}
+for row in data['death_reason'].values:
+    if not pd.isnull(row):
+        for name in row.split(' '):
+            death_reason[name] = death_reason.get(name, 0) + 1
+               
+c = (
+    WordCloud()
+    .add("", [*death_reason.items()], word_size_range=[20, 100], shape=SymbolType.DIAMOND)
+    .set_global_opts(title_opts=opts.TitleOpts(title="倒闭原因词云"))
+)
+c.render_notebook()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[9]:
+ + + +
+ + + +
+ + + +
+ +
+ +
+
+ +
+
+
+
+

2.8 ceo描述词云

+
+
+
+
+
+
In [12]:
+
+
+
import jieba
+ceo_per_des = {}
+for row in data['ceo_per_des'].values:
+    if not pd.isnull(row):
+        result = jieba.lcut(row)
+        for name in result:
+            if len(name) == 1:
+                break
+            ceo_per_des[name] = ceo_per_des.get(name, 0) + 1
+
+c = (
+    WordCloud()
+    .add("", [*ceo_per_des.items()], word_size_range=[20, 100], shape=SymbolType.DIAMOND)
+    .set_global_opts(title_opts=opts.TitleOpts(title="ceo描述词云"))
+)
+c.render_notebook()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[12]:
+ + + +
+ + + +
+ + + +
+ +
+ +
+
+ +
+
+
+ + + + + + diff --git "a/DeathCompany/\345\200\222\351\227\255\344\274\201\344\270\232\346\225\260\346\215\256\345\210\206\346\236\220.ipynb" "b/DeathCompany/\345\200\222\351\227\255\344\274\201\344\270\232\346\225\260\346\215\256\345\210\206\346\236\220.ipynb" new file mode 100644 index 0000000..91adb7e --- /dev/null +++ "b/DeathCompany/\345\200\222\351\227\255\344\274\201\344\270\232\346\225\260\346\215\256\345\210\206\346\236\220.ipynb" @@ -0,0 +1,44510 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "worldwide-tampa", + "metadata": {}, + "source": [ + "# 1. 数据集说明\n", + "\n", + "这是一份来自[和鲸社区](https://www.kesci.com/mw/dataset/5e023cd12823a10036af49b4/file)的倒闭企业数据集,总计 6,272 条记录,大小为 2.3 M,包含 21 个字段。\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "economic-orlando", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
bianhcom_namecom_addrcatse_catcom_desborn_datadeath_datalive_daysfinancing...death_reasoninvest_nameceo_nameceo_desceo_per_desUnnamed: 16Unnamed: 17Unnamed: 18Unnamed: 19Unnamed: 20
01麦子金服上海金融借贷麦子金服是一家移动互联网金融服务集团,为个人和企业提供包括财富管理、股权投资、投融资咨询等综...2015-03-012019-11-251730B轮...政策监管 法律法规风险海通开元&中缔资本黄大容董事长黄大容,80后,经济学硕士,精通中英德三国语言。连续创业者,23岁创立第一家公司,25岁涉足...NaNNaNNaNNaNNaN
11拓道金服浙江金融借贷拓道金服是一家专注于汽车抵押贷款这一细分市场的P2P互联网金融公司。拓道金服通过互联网民间的...2013-11-012019-11-242214A轮...政策监管 法律法规风险蓝山中国资本&帮实资本&宏桥资本张罗军创始人杭州拓道科技有限公司执行董事兼总经理,重庆邮电学院计算机专业,创立杭州仁润科技有限公司,具有...NaNNaNNaNNaNNaN
21云柏科技广东医疗健康医疗器械及硬件云柏科技是一家智能健康检测腕表研发商,专注于智能可穿戴设备领域。主要产品包括可穿戴式多功能云...2014-09-012019-11-011887尚未获投...行业竞争NaNNaNNaNNaNNaNNaNNaNNaNNaN
31迷你生鲜福建电子商务生鲜食品迷你生鲜是一家会员制水果生鲜电商网站,平台产品由基地直供,采购深入全国及东南亚各地区,减少中...2017-11-012019-10-29727尚未获投...现金流断裂 行业竞争NaNNaNNaNNaNNaNNaNNaNNaNNaN
41一生健康北京医疗健康寻医诊疗微爱康一款基于移动互联网的癌症患者互助型垂直社区产品,通过在线社会化问答和类轻博客交流模式,...2015-01-012019-10-171750尚未获投...政策监管 法律法规风险NaN张耀斌CEO张耀斌,为一生(北京)健康科技有限公司的创始人。NaNNaNNaNNaNNaN
\n", + "

5 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " bianh com_name com_addr cat se_cat \\\n", + "0 1 麦子金服 上海 金融 借贷 \n", + "1 1 拓道金服 浙江 金融 借贷 \n", + "2 1 云柏科技 广东 医疗健康 医疗器械及硬件 \n", + "3 1 迷你生鲜 福建 电子商务 生鲜食品 \n", + "4 1 一生健康 北京 医疗健康 寻医诊疗 \n", + "\n", + " com_des born_data death_data \\\n", + "0 麦子金服是一家移动互联网金融服务集团,为个人和企业提供包括财富管理、股权投资、投融资咨询等综... 2015-03-01 2019-11-25 \n", + "1 拓道金服是一家专注于汽车抵押贷款这一细分市场的P2P互联网金融公司。拓道金服通过互联网民间的... 2013-11-01 2019-11-24 \n", + "2 云柏科技是一家智能健康检测腕表研发商,专注于智能可穿戴设备领域。主要产品包括可穿戴式多功能云... 2014-09-01 2019-11-01 \n", + "3 迷你生鲜是一家会员制水果生鲜电商网站,平台产品由基地直供,采购深入全国及东南亚各地区,减少中... 2017-11-01 2019-10-29 \n", + "4 微爱康一款基于移动互联网的癌症患者互助型垂直社区产品,通过在线社会化问答和类轻博客交流模式,... 2015-01-01 2019-10-17 \n", + "\n", + " live_days financing ... death_reason invest_name ceo_name ceo_des \\\n", + "0 1730 B轮 ... 政策监管 法律法规风险 海通开元&中缔资本 黄大容 董事长 \n", + "1 2214 A轮 ... 政策监管 法律法规风险 蓝山中国资本&帮实资本&宏桥资本 张罗军 创始人 \n", + "2 1887 尚未获投 ... 行业竞争 NaN NaN NaN \n", + "3 727 尚未获投 ... 现金流断裂 行业竞争 NaN NaN NaN \n", + "4 1750 尚未获投 ... 政策监管 法律法规风险 NaN 张耀斌 CEO \n", + "\n", + " ceo_per_des Unnamed: 16 Unnamed: 17 \\\n", + "0 黄大容,80后,经济学硕士,精通中英德三国语言。连续创业者,23岁创立第一家公司,25岁涉足... NaN NaN \n", + "1 杭州拓道科技有限公司执行董事兼总经理,重庆邮电学院计算机专业,创立杭州仁润科技有限公司,具有... NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 张耀斌,为一生(北京)健康科技有限公司的创始人。 NaN NaN \n", + "\n", + " Unnamed: 18 Unnamed: 19 Unnamed: 20 \n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd \n", + "data = pd.read_csv('com.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "id": "proud-victor", + "metadata": {}, + "source": [ + "# 2、数据分析可视化" + ] + }, + { + "cell_type": "markdown", + "id": "spectacular-shadow", + "metadata": {}, + "source": [ + "## 2.1 死亡公司的地区分布" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "described-blogger", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pyecharts import options as opts\n", + "from pyecharts.charts import Map\n", + "\n", + "data['com_addr'] = data['com_addr'].apply(lambda x: x.strip())\n", + "s = data.groupby('com_addr').size()\n", + "\n", + "c = (\n", + "Map()\n", + " .add(\"死亡企业数量\", [*s.items()], \"china\")\n", + " .set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"地区分布\"),\n", + " visualmap_opts=opts.VisualMapOpts(max_=200),\n", + " )\n", + ")\n", + "c.render_notebook()" + ] + }, + { + "cell_type": "markdown", + "id": "suitable-password", + "metadata": {}, + "source": [ + "## 2.2 行业排行TOP10" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "extraordinary-hometown", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pyecharts import options as opts\n", + "from pyecharts.charts import Bar\n", + "from pyecharts.faker import Faker\n", + "\n", + "s = data.groupby('cat').size().sort_values(ascending=False)[:10].to_dict()\n", + "\n", + "c = (\n", + " Bar()\n", + " .add_xaxis(list(s.keys()))\n", + " .add_yaxis(\"死亡企业数量\", list(s.values()))\n", + " .set_global_opts(title_opts=opts.TitleOpts(title=\"行业排行TOP10\"))\n", + ")\n", + "c.render_notebook()" + ] + }, + { + "cell_type": "markdown", + "id": "continental-printing", + "metadata": {}, + "source": [ + "## 2.3 细分领域TOP20" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "genetic-sociology", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s = data.groupby('se_cat').size().sort_values(ascending=False)[:20].sort_values(ascending=True).to_dict()\n", + "\n", + "c = (\n", + " Bar()\n", + " .add_xaxis(list(s.keys()))\n", + " .add_yaxis(\"死亡企业数量\", list(s.values()))\n", + " .reversal_axis()\n", + " .set_series_opts(label_opts=opts.LabelOpts(position=\"right\"))\n", + " .set_global_opts(title_opts=opts.TitleOpts(title=\"细分领域TOP20\"))\n", + ")\n", + "c.render_notebook()" + ] + }, + { + "cell_type": "markdown", + "id": "assigned-chick", + "metadata": {}, + "source": [ + "## 2.4 年份分布" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "criminal-medline", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['born_year'] = data['born_data'].apply(lambda x: x[:4])\n", + "data['death_year'] = data['death_data'].apply(lambda x: x[:4])\n", + "s1 = data.groupby('born_year').size()\n", + "s2 = data.groupby('death_year').size()\n", + "s1 = pd.DataFrame({'year': s1.index, 'born': s1.values})\n", + "s2 = pd.DataFrame({'year': s2.index, 'death': s2.values})\n", + "s = pd.merge(s1,s2, on='year', suffixes=['born', 'death'])\n", + "s = s[s['year'] > '2008']\n", + "\n", + "c = (\n", + " Bar()\n", + " .add_xaxis( s['year'].to_list())\n", + " .add_yaxis(\"新生企业数量\", s['born'].to_list())\n", + " .add_yaxis(\"死亡企业数量\", s['death'].to_list())\n", + " .set_global_opts(title_opts=opts.TitleOpts(title=\"年份分布\"))\n", + ")\n", + "c.render_notebook()\n" + ] + }, + { + "cell_type": "markdown", + "id": "manufactured-stuart", + "metadata": {}, + "source": [ + "## 2.5 企业存活时长" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "imperial-quality", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def live_year(x):\n", + " if x < 365:\n", + " return '不到1年'\n", + " if x < 365 * 2:\n", + " return '1-2年'\n", + " if x < 365 * 3:\n", + " return '2-3年'\n", + " if x < 365 * 4:\n", + " return '3-4年'\n", + " if x < 365 * 5:\n", + " return '4-5年'\n", + " if x < 365 * 10:\n", + " return '5-10年'\n", + " return '10年以上'\n", + "\n", + "s = data.groupby(data['live_days'].apply(lambda x: live_year(x))).size()\n", + "\n", + "from pyecharts import options as opts\n", + "from pyecharts.charts import Pie\n", + "\n", + "c = (\n", + " Pie()\n", + " .add(\"\", [*s.items()])\n", + " .set_global_opts(title_opts=opts.TitleOpts(title=\"企业存活时长\"))\n", + " .set_series_opts(label_opts=opts.LabelOpts(formatter=\"{b}: {c}\"))\n", + ")\n", + "c.render_notebook()" + ] + }, + { + "cell_type": "markdown", + "id": "earned-flashing", + "metadata": {}, + "source": [ + "## 2.6 投资人词云" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "preceding-bahrain", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pyecharts import options as opts\n", + "from pyecharts.charts import WordCloud\n", + "from pyecharts.globals import SymbolType\n", + "\n", + "invest = {}\n", + "for row in data['invest_name'].values:\n", + " if not pd.isnull(row):\n", + " for name in row.split('&'):\n", + " invest[name] = invest.get(name, 0) + 1\n", + " \n", + "c = (\n", + " WordCloud()\n", + " .add(\"\", [*invest.items()], word_size_range=[20, 100], shape=SymbolType.DIAMOND)\n", + " .set_global_opts(title_opts=opts.TitleOpts(title=\"投资人词云\"))\n", + ")\n", + "c.render_notebook()" + ] + }, + { + "cell_type": "markdown", + "id": "eleven-chance", + "metadata": {}, + "source": [ + " ## 2.7 倒闭原因词云" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "joint-driver", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "death_reason = {}\n", + "for row in data['death_reason'].values:\n", + " if not pd.isnull(row):\n", + " for name in row.split(' '):\n", + " death_reason[name] = death_reason.get(name, 0) + 1\n", + " \n", + "c = (\n", + " WordCloud()\n", + " .add(\"\", [*death_reason.items()], word_size_range=[20, 100], shape=SymbolType.DIAMOND)\n", + " .set_global_opts(title_opts=opts.TitleOpts(title=\"倒闭原因词云\"))\n", + ")\n", + "c.render_notebook()" + ] + }, + { + "cell_type": "markdown", + "id": "formed-citation", + "metadata": {}, + "source": [ + "## 2.8 ceo描述词云" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "wooden-norman", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import jieba\n", + "ceo_per_des = {}\n", + "for row in data['ceo_per_des'].values:\n", + " if not pd.isnull(row):\n", + " result = jieba.lcut(row)\n", + " for name in result:\n", + " if len(name) == 1:\n", + " break\n", + " ceo_per_des[name] = ceo_per_des.get(name, 0) + 1\n", + "\n", + "c = (\n", + " WordCloud()\n", + " .add(\"\", [*ceo_per_des.items()], word_size_range=[20, 100], shape=SymbolType.DIAMOND)\n", + " .set_global_opts(title_opts=opts.TitleOpts(title=\"ceo描述词云\"))\n", + ")\n", + "c.render_notebook()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/README.md b/README.md index d5ae354..e7f4c2f 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,10 @@ | [130 万条深圳通刷卡数据分析](https://github.com/TurboWay/bigdata_analyse/blob/master/SZTcard/深圳通刷卡数据分析.md) | 离线处理 | 清洗 pandas + 分析 impala + 可视化 dbeaver | [百度网盘](https://pan.baidu.com/s/1WslwKXKhVH1q_6u4SvuKkQ) 提取码:t561 | | [10 万条厦门招聘数据分析](https://github.com/TurboWay/bigdata_analyse/blob/master/AmoyJob/2021厦门招聘数据分析.md) | 离线处理 | 清洗 pandas + 分析 hive + 可视化 ( hue + pyecharts ) + 预测 sklearn | [百度网盘](https://pan.baidu.com/s/199_Rss8Y2nLBAbM1qBycgA) 提取码:ef1n | | [7000 条租房数据分析](https://github.com/TurboWay/bigdata_analyse/blob/master/RentFromDanke/租房数据分析.md) | 离线处理 | 清洗 pandas + 分析 sqlite + 可视化 matplotlib | [百度网盘](https://pan.baidu.com/s/1l1x5qurJdkyUxAuhknj_Qw) 提取码:9en3 | +| [6000 条倒闭企业数据分析](https://github.com/TurboWay/bigdata_analyse/blob/master/DeathCompany/倒闭企业数据分析.html) | 离线处理 | 清洗 pandas + 分析 pandas + 可视化 (jupyter notebook + pyecharts) | [百度网盘](https://pan.baidu.com/s/1W1KT2XialQK7gmyXdszfKw) 提取码:y7x4 | ## refer > 1. [https://tianchi.aliyun.com/dataset/](https://tianchi.aliyun.com/dataset/) -> 2. [https://opendata.sz.gov.cn/data/api/toApiDetails/29200_00403601](https://opendata.sz.gov.cn/data/api/toApiDetails/29200_00403601) \ No newline at end of file +> 2. [https://opendata.sz.gov.cn/data/api/toApiDetails/29200_00403601](https://opendata.sz.gov.cn/data/api/toApiDetails/29200_00403601) +> 3. [https://www.kesci.com/home/dataset](https://www.kesci.com/home/dataset) \ No newline at end of file