Вы находитесь на странице: 1из 1

hadoop fs -put '/home/lokeshm/dataset/POC/Etlhive.

txt' /data/poc/site/Etlhive
---hadoop jar wordcount.jar org.myorg.WordCount /data/poc/site/Etlhive /data/poc
/midout/EtlhiveCount
hadoop jar hadoop-1.2.1/hadoop-examples-1.2.1.jar wordcount /data/poc/site/Etlhi
ve /data/poc/midout/EtlhiveCount
A = LOAD'/data/poc/midout/EtlhiveCount/part-r-00000';
B = FOREACH A GENERATE FLATTEN(($0,$1)) as (col1:chararray,col2);
C = ORDER B BY col2 DESC;
D = LIMIT C 20;
STORE D into '/data/poc/wordcountfilter/Etlhivetop20';
hadoop fs -copyFromLocal '/home/lokeshm/dataset/POC/Banking.csv' /data/poc/categ
ories/Banking
hadoop fs -copyFromLocal '/home/lokeshm/dataset/POC/Ecommerce.csv' /data/poc/cat
egories/Ecommerce
hadoop fs -copyFromLocal '/home/lokeshm/dataset/POC/Educational.csv' /data/poc/c
ategories/Educational
hadoop fs -copyFromLocal '/home/lokeshm/dataset/POC/Entertainment.csv' /data/poc
/categories/Entertainment
hadoop fs -copyFromLocal '/home/lokeshm/dataset/POC/Health.csv' /data/poc/catego
ries/Health
hadoop fs -copyFromLocal '/home/lokeshm/dataset/POC/Travel.csv' /data/poc/catego
ries/Travel
ADD JAR /home/lokeshm/dataset/wordmatch.jar;
list jars;
CREATE TEMPORARY FUNCTION wordcountmatch AS 'com.etlhive.util.WordMatchCount';
CREATE TABLE WORDCOUNTTAB(
caseno INT COMMENT 'For maintaining individual casestudy combine',
inputfile STRING COMMENT 'top words count file',
domainfile STRING COMMENT 'all domain files for input file',
domain STRING COMMENT 'domain name',
matchedcount INT COMMENT 'how much word match for each domain'
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n';
LOAD DATA LOCAL INPATH'/home/lokeshm/dataset/POC/allsource.csv' INTO TABLE WORDC
OUNTTAB;

Вам также может понравиться