Skip to content

Commit

Permalink
enhancement for stackoverflow survey
Browse files Browse the repository at this point in the history
  • Loading branch information
[email protected] committed Sep 13, 2019
1 parent 1b1c878 commit cf0d268
Show file tree
Hide file tree
Showing 5 changed files with 220 additions and 29 deletions.
Original file line number Diff line number Diff line change
@@ -1,2 +1,173 @@
# stackoverflow 用户调查问卷分析
## 课程demo
```
sudo bin/logstash -f ./logstash-stackoverflow-survey.conf
PUT final-stackoverflow-survey
{
"mappings": {
"dynamic_templates": [
{
"strings_as_keywords": {
"match_mapping_type": "string",
"mapping": {
"type": "keyword"
}
}
}
]
}
}
数字相关
YearsCode
WorkWeekHrs
Age
Age1stCode 16
YearsCodePro
PUT _ingest/pipeline/stackoverflow_pipeline
{
"description": "Pipeline for stackoverflow survey",
"processors": [
{
"split": {
"field": "DatabaseDesireNextYear",
"separator": ";"
}
},
{
"split": {
"field": "DatabaseWorkedWith",
"separator": ";"
}
},
{
"split": {
"field": "DevEnviron",
"separator": ";"
}
},
{
"split": {
"field": "LanguageWorkedWith",
"separator": ";"
}
},
{
"split": {
"field": "MiscTechDesireNextYear",
"separator": ";"
}
},
{
"split": {
"field": "PlatformWorkedWith",
"separator": ";"
}
},
{
"split": {
"field": "PlatformDesireNextYear",
"separator": ";"
}
},
{
"split": {
"field": "WebFrameWorkedWith",
"separator": ";"
}
},
{
"split": {
"field": "WebFrameDesireNextYear",
"separator": ";"
}
},
{
"split": {
"field": "Containers",
"separator": ";"
}
},
{
"script": {
"source": """
try{
ctx.YearsCode = Integer.parseInt(ctx.YearsCode);
}catch(Exception e){
ctx.YearsCode = 0;
}
"""
}
},
{
"script": {
"source": """
try{
ctx.WorkWeekHrs = Integer.parseInt(ctx.WorkWeekHrs);
}catch(Exception e){
ctx.WorkWeekHrs = 0;
}
"""
}
},
{
"script": {
"source": """
try{
ctx.Age = Integer.parseInt(ctx.Age);
}catch(Exception e){
ctx.Age = 0;
}
"""
}
},
{
"script": {
"source": """
try{
ctx.Age1stCode = Integer.parseInt(ctx.Age1stCode);
}catch(Exception e){
ctx.Age1stCode = 0;
}
"""
}
},
{
"script": {
"source": """
try{
ctx.YearsCodePro = Integer.parseInt(ctx.YearsCodePro);
}catch(Exception e){
ctx.YearsCodePro = 0;
}
"""
}
}
]
}
POST _reindex?wait_for_completion=false
{
"source": {
"index": "stackoverflow-survey-raw"
},
"dest": {
"index": "final-stackoverflow-survey",
"pipeline": "stackoverflow_pipeline"
}
}
GET final-stackoverflow-survey/_mapping
```
## 参考链接
http://stackoverflow.com/research/

Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
input {
file {
path => "/Users/yiruan/geektime/logstash-7.3.2/survey_results_public.csv"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}

filter {
csv {
autogenerate_column_names => false
skip_empty_columns => true

columns => [
"Respondent","MainBranch","Hobbyist","OpenSourcer","OpenSource","Employment","Country","Student","EdLevel","UndergradMajor","EduOther","OrgSize","DevType","YearsCode","Age1stCode","YearsCodePro","CareerSat","JobSat","MgrIdiot","MgrMoney","MgrWant","JobSeek","LastHireDate","LastInt","FizzBuzz","JobFactors","ResumeUpdate","CurrencySymbol","CurrencyDesc","CompTotal","CompFreq","ConvertedComp","WorkWeekHrs","WorkPlan","WorkChallenge","WorkRemote","WorkLoc","ImpSyn","CodeRev","CodeRevHrs","UnitTests","PurchaseHow","PurchaseWhat","LanguageWorkedWith","LanguageDesireNextYear","DatabaseWorkedWith","DatabaseDesireNextYear","PlatformWorkedWith","PlatformDesireNextYear","WebFrameWorkedWith","WebFrameDesireNextYear","MiscTechWorkedWith","MiscTechDesireNextYear","DevEnviron","OpSys","Containers","BlockchainOrg","BlockchainIs","BetterLife","ITperson","OffOn","SocialMedia","Extraversion","ScreenName","SOVisit1st","SOVisitFreq","SOVisitTo","SOFindAnswer","SOTimeSaved","SOHowMuchTime","SOAccount","SOPartFreq","SOJobs","EntTeams","SOComm","WelcomeChange","SONewContent","Age","Gender","Trans,Sexuality","Ethnicity","Dependents","SurveyLength","SurveyEase"
]

}
if ([collector] == "collector") {
drop {}
}
mutate { remove_field => ["message", "@version", "@timestamp", "host"] }
}
output {
stdout { codec => "dots" }
elasticsearch {
hosts => ["http://localhost:9200"]
index => "stackoverflow-survey-raw"
document_type => "_doc"
}
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
POST _reindex?wait_for_completion=false
{
"source": {
"index": "stackoverflow-surveys-2019"
"index": "stackoverflow-survey-raw"
},
"dest": {
"index": "stackoverflow-surveys-results",
"index": "final-stackoverflow-survey",
"pipeline": "stackoverflow_pipeline"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
PUT final-stackoverflow-survey
{
"mappings": {
"dynamic_templates": [
{
"strings_as_keywords": {
"match_mapping_type": "string",
"mapping": {
"type": "keyword"
}
}
}
]
}
}

0 comments on commit cf0d268

Please sign in to comment.