Skip to content

Commit

Permalink
파리 연관검색어 전처리 sql문
Browse files Browse the repository at this point in the history
  • Loading branch information
jaehyung-99 committed Dec 11, 2022
1 parent 9347f70 commit 8cb82fb
Showing 1 changed file with 60 additions and 0 deletions.
60 changes: 60 additions & 0 deletions preProcessing/parisContentPreProcessing.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
drop table p_temp;

create table if not exists p_temp(
no int,
content string,
mor string,
cnt string
)
row format delimited
fields terminated by ','
lines terminated by '\n'
stored as textfile;

drop table p_temp2;

create table if not exists p_temp2(
content string,
mor string,
cnt int
)
row format delimited
fields terminated by ','
lines terminated by '\n'
stored as textfile;

load data inpath 'hdfs:///user/maria_dev/projectData/parisPostResult.csv'
into table p_temp;

insert overwrite table p_temp2
select content, mor, cast(cnt as int) cnt
from p_temp
where mor in ( "Noun" );

insert overwrite table p_temp2
select content, mor, cnt
from p_temp2
order by cnt desc;

add jar hdfs:///user/maria_dev/hive/lib/hive-contrib-3.1.2.jar;

create temporary function row_sequence as 'org.apache.hadoop.hive.contrib.udf.UDFRowSequence';

drop table p_wordCnt;

create table if not exists p_wordCnt(
no int,
content string,
mor string,
cnt int
)
row format delimited
fields terminated by ','
lines terminated by '\n'
stored as textfile;

insert overwrite table p_wordCnt
select row_sequence(), content, mor, cnt
from p_temp2;

select * from p_wordcnt;

0 comments on commit 8cb82fb

Please sign in to comment.