Skip to content

Commit

Permalink
hive를 이용한 데이터 전처리
Browse files Browse the repository at this point in the history
  • Loading branch information
jaehyung-99 committed Dec 9, 2022
1 parent e1bd3d7 commit ec9923c
Show file tree
Hide file tree
Showing 2 changed files with 2,950 additions and 0 deletions.
57 changes: 57 additions & 0 deletions preProcessing/hivePreProcessing.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
drop table temp;

create table if not exists temp(
no int,
content string,
mor string,
cnt string
)
row format delimited
fields terminated by ','
lines terminated by '\n'
stored as textfile;

drop table temp2;

create table if not exists temp2(
content string,
mor string,
cnt int
)
row format delimited
fields terminated by ','
lines terminated by '\n'
stored as textfile;

load data inpath 'hdfs:///user/maria_dev/projectData/countryPostResult.csv'
into table temp;

insert overwrite table temp2
select content, mor, cast(cnt as int) cnt
from temp
where mor in ( "Noun", "Verb", "Adverb", "Adjective" );

insert overwrite table temp2
select content, mor, cnt
from temp2
order by cnt desc;

add jar hdfs:///user/maria_dev/hive/lib/hive-contrib-3.1.2.jar;
create temporary function row_sequence as 'org.apache.hadoop.hive.contrib.udf.UDFRowSequence';

drop table wordCnt;

create table if not exists wordCnt(
no int,
content string,
mor string,
cnt int
)
row format delimited
fields terminated by ','
lines terminated by '\n'
stored as textfile;

insert overwrite table wordCnt
select row_sequence(), content, mor, cnt
from temp2;
Loading

0 comments on commit ec9923c

Please sign in to comment.