Hive Exercise

Session 5
HIVE EXERCISE
A. Create Database
----------------------create database retail;
B. Select Database
-----------------use retail;
C. Create table for storing transactional records

------------------------------------------------create table txnrecords(txnno INT, txndate STRING, custno INT, amount
DOUBLE,
category STRING, product STRING, city STRING, state STRING, spendby
STRING)
row format delimited
fields terminated by ','
stored as textfile;
D. Load the data into the table

------------------------------LOAD DATA LOCAL INPATH 'txns1.txt' OVERWRITE INTO TABLE
txnrecords;
E. Describing metadata or schema of the table

--------------------------------------------describe txnrecords;
F. Counting no of records
------------------------select count(*) from txnrecords;
G. Counting total spending by category of products

-------------------------------------------------select category, sum(amount) from txnrecords group by category;
H. 10 customers
-------------------select custno, sum(amount) from txnrecords group by custno limit 10;
I. Create partitioned table

--------------------------create table txnrecsByCat(txnno INT, txndate STRING, custno INT, amount
DOUBLE,
product STRING, city STRING, state STRING, spendby STRING)
partitioned by (category STRING)
clustered by (state) INTO 10 buckets
fields terminated by ','
stored as textfile;
J. Configure Hive to allow partitions

-------------------------------------
However, a query across all partitions could trigger an enormous MapReduce job if
the table data and number of partitions are large. A highly suggested safety
measure is putting Hive into strict mode, which prohibits queries of partitioned
tables without a WHERE clause that filters on partitions. You can set the mode to
nonstrict, as in the following session:
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
set hive.enforce.bucketing=true;
K. Load data into partition table

---------------------------------from txnrecords txn INSERT OVERWRITE TABLE txnrecsByCat
PARTITION(category)
select txn.txnno, txn.txndate,txn.custno, txn.amount,txn.product,txn.city,txn.state,
txn.spendby, txn.category DISTRIBUTE BY category;
==========================
find sales based on age group
==========================
create table customer(custno string, firstname string, lastname string, age

int,profession string)
fields terminated by ',';
load data local inpath '/home/cloudera/hive/custs' into table customer;
create table out1 (custno int,firstname string,age int,profession string,amount

double,product string)
insert overwrite table out1

select a.custno,a.firstname,a.age,a.profession,b.amount,b.product
from customer a JOIN txnrecords b ON a.custno = b.custno;
select * from out1 limit 100;
create table out2 (custno int,firstname string,age int,profession string,amount

double,product string, level string)

select * , case
when age<30 then 'low'
when age>=30 and age < 50 then 'middle'
when age>=50 then 'old'
else 'others'
end
from out1;
select * from out2 limit 100;
describe out2;
create table out3 (level string, amount double)


select level,sum(amount) from out2 group by level;
==============
simple join
==============
****emp.txt
****swetha,250000,Chennai
****anamika,200000,Kanyakumari
****tarun,300000,Pondi
****anita,250000,Selam
****email.txt
****swetha,swetha@gmail.com
****tarun,tarun@edureka.in
****nagesh,nagesh@yahoo.com
****venkatesh,venki@gmail.com
create table employee(name string, salary float,city string)

load data local inpath 'emp.txt' into table employee;
select * from employee where name='tarun';
create table mailid (name string, email string)

load data local inpath 'email.txt' into table mailid;
select a.name,a.city,a.salary,b.email from

employee a join mailid b on a.name = b.name;

employee a left outer join mailid b on a.name = b.name;

employee a right outer join mailid b on a.name = b.name;

employee a full outer join mailid b on a.name = b.name;

Hive Exercise

Загружено:

Сведения о документе

Исходное описание:

Авторское право

Доступные форматы

Поделиться этим документом

Поделиться или встроить документ

Параметры публикации

Этот документ был вам полезен?

Это неприемлемый материал?

Авторское право:

Доступные форматы

Hive Exercise

Загружено:

Авторское право:

Доступные форматы

Session 5

C. Create table for storing transactional records

D. Load the data into the table

E. Describing metadata or schema of the table

G. Counting total spending by category of products

I. Create partitioned table

J. Configure Hive to allow partitions

K. Load data into partition table

create table customer(custno string, firstname string, lastname string, age

load data local inpath '/home/cloudera/hive/custs' into table customer;

create table out1 (custno int,firstname string,age int,profession string,amount

insert overwrite table out1

select * from out1 limit 100;

create table out2 (custno int,firstname string,age int,profession string,amount

insert overwrite table out2

select * from out2 limit 100;

create table out3 (level string, amount double)

insert overwrite table out3

create table employee(name string, salary float,city string)

load data local inpath 'emp.txt' into table employee;

select * from employee where name='tarun';

create table mailid (name string, email string)

load data local inpath 'email.txt' into table mailid;

select a.name,a.city,a.salary,b.email from

select a.name,a.city,a.salary,b.email from

select a.name,a.city,a.salary,b.email from

select a.name,a.city,a.salary,b.email from

Вам также может понравиться