Data science Software Course Training in Ameerpet Hyderabad

Data science Software Course Training in Ameerpet Hyderabad

Monday, 4 July 2016

Hive Lab10 : json data processing using hive

Json Data Processing Using Hive.

[training@localhost ~]$ cat json1
{"name":"Sankar","age":25,"sex":"m"}
{"name":"Sankari","age":24,"sex":"f","city":"hyd"}
[training@localhost ~]$ 

hive> create database jsonlab;
OK
Time taken: 1.694 seconds
hive> use jsonlab;
OK
Time taken: 0.018 seconds
hive> create table raw(line string);
OK
Time taken: 0.219 seconds
hive> load data local inpath 'json1' into table raw;
hive> select * from raw;
OK
{"name":"Sankar","age":25,"sex":"m"}
{"name":"Sankari","age":24,"sex":"f","city":"hyd"}
Time taken: 0.146 seconds
hive> 
hive> select get_json_object(line,'$.name') from raw;
OK
Sankar
Sankari
hive> select                
    >   get_json_object(line,'$.name') ,
    >   get_json_object(line,'$.age') , 
    >   get_json_object(line,'$.sex') ,
    >   get_json_object(line,'$.city')  
    > from raw;
OK
Sankar  25      m       NULL
Sankari 24      f       hyd

hive> create table info(name string, age int,
    >   sex string, city string);
OK
Time taken: 0.04 seconds
hive> insert overwrite table info
    >   select x.* from raw
    >    lateral view json_tuple

(line,'name','age','sex','city') x
    >   as n,a,s,c;

hive> select * from info;                                       

  
OK
Sankar  25      m       NULL
Sankari 24      f       hyd
Time taken: 0.063 seconds
hive> 

[training@localhost ~]$ cat json2
{"name":"ravi","age":25,"wife":

{"name":"banu","age":24},"city":"hyd"}
{"name":"kavi","age":25,"wife":

{"name":"kavitha","age":21},"city":"del"}
[training@localhost ~]$ 

hive> create table jraw(line string);
OK
Time taken: 0.048 seconds
hive> load data local inpath 'json2' into table jraw;
Copying data from file:/home/training/json2
Copying file: file:/home/training/json2
Loading data to table jsonlab.jraw
OK
Time taken: 0.127 seconds
hive> select * from jraw;
OK
{"name":"ravi","age":25,"wife":

{"name":"banu","age":24},"city":"hyd"}
{"name":"kavi","age":25,"wife":

{"name":"kavitha","age":21},"city":"del"}
Time taken: 0.06 seconds
hive> 
hive> create table raw2(name string, age int, wife string, city 

string);
OK
Time taken: 0.033 seconds
hive> insert overwrite table raw2
    >   select x.* from jraw
    >  lateral view 
    >  json_tuple(line,'name','age','wife','city') x as 

n,a,w,c;

hive> select * from raw2;
OK
ravi    25      {"age":24,"name":"banu"}        hyd
kavi    25      {"age":21,"name":"kavitha"}     del
Time taken: 0.061 seconds
hive> create table jinfo(hname string, wname string, 
    >    hage int, wage int, city string);

hive> insert overwrite table jinfo
    >   select name, get_json_object(wife,'$.name'),
    >   age , get_json_object(wife,'$.age'),
    >  city from raw2;

hive> select * from jinfo;
OK
ravi    banu    25      24      hyd
kavi    kavitha 25      21      del
Time taken: 0.079 seconds
hive> 

1 comment:

  1. Good Post! Thank you so much for sharing this pretty post, it was so good to read and useful to improve my knowledge as updated one, keep blogging.
    Big Data Hadoop Training in electronic city

    ReplyDelete