Data science Software Course Training in Ameerpet Hyderabad

Data science Software Course Training in Ameerpet Hyderabad

Monday 4 July 2016

Hbase Lab 3 : Hive and Hbase Integration

Hive and Hbase Integration:
____________________________

 adv:- i)  we can process Hbase data using hql queries.
     ii) Hbase shell commands  are used for random operations over columns.
  but not for sequential filters, transformations,  aggregations  etc.

   if Hbase table is integrated with Hive,Impala , we can perform all batch related operations.

   generally, for Hive table backend storage is hdfs, if hive table is integrated with hbase,\
 Hive's tables backend is Hbase table.

 when you submit hql statement, hive starts fetching data from Hbase table, query will be executed in MapReduce Excecution model with parallel process style.

Example of Integration:
______________________

i)  
hive> create table hbaseimg1(k int, v int) 
    > stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    > with serdeproperties("hbase.columns.mapping"=":key,x:a")
    > ;
OK
Time taken: 3.446 seconds
hive> select * from hbaseimg1;
OK
Time taken: 0.307 seconds
hive> 

hbase(main):006:0> list 'hbaseimg1'
TABLE                                             
hbaseimg1                                         
1 row(s) in 0.0710 seconds

hbase(main):007:0> 


as per above example, both hive and hbase table names are same.

 want to integrate hive table with some existed or different hbase table.

hive> create table hbaseimg2(k int, a int, b int)
    > stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    > with serdeproperties("hbase.columns.mapping"=":key,x:a,x:b")
    > tblproperties("hbase.table.name"="hbasetabx");
OK
Time taken: 1.255 seconds
hive> 

 now hive table :  hbaseimg2
  its backend hbase table : hbasetabx


hbase(main):009:0> 
hbase(main):009:0> put 'hbasetabx',101,'x:a',10
0 row(s) in 0.0650 seconds

hbase(main):010:0> put 'hbasetabx',101,'x:b',20
0 row(s) in 0.0080 seconds

hbase(main):011:0> put 'hbasetabx',102,'x:a',100
0 row(s) in 0.0040 seconds

hbase(main):012:0> put 'hbasetabx',102,'x:b',200
0 row(s) in 0.0140 seconds

hbase(main):013:0> put 'hbasetabx',103,'x:a',1000
0 row(s) in 0.0090 seconds

hbase(main):014:0> put 'hbasetabx',103,'x:b',2000
0 row(s) in 0.0090 seconds

hbase(main):015:0> 

hbase(main):015:0> scan 'hbasetabx'
ROW           COLUMN+CELL                         
 101          column=x:a, timestamp=1467252561122,
               value=10                           
 101          column=x:b, timestamp=1467252568609,
               value=20                           
 102          column=x:a, timestamp=1467252578422,
               value=100                          
 102          column=x:b, timestamp=1467252583648,
               value=200                          
 103          column=x:a, timestamp=1467252595722,
               value=1000                         
 103          column=x:b, timestamp=1467252603469,
               value=2000                         
3 row(s) in 0.0660 seconds

hbase(main):016:0> 

hive> select * from hbaseimg1;
OK
Time taken: 0.152 seconds
hive> select * from hbaseimg2;
OK
101     10      20
102     100     200
103     1000    2000
Time taken: 0.16 seconds

_______________________________________
 mysql + sqoop + hbase + hive
____________________________

mysql> select * from emp;
+-----+-------+-------+------+------+
| id  | name  | sal   | sex  | dno  |
+-----+-------+-------+------+------+
| 101 | aaa   | 10000 | m    |   11 | 
| 102 | baaa  | 20000 | m    |   12 | 
| 103 | baaav | 30000 | f    |   12 | 
| 104 | xaaav | 50000 | f    |   11 | 
+-----+-------+-------+------+------+
4 rows in set (0.00 sec)

mysql> 
hive> create table demo1(id int, name string,
    >  sal int, sex string, dno int)
    > stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    > with serdeproperties("hbase.columns.mapping"=":key,e:name,e:sal,e:sex,e:dno")
    > tblproperties("hbase.table.name"="hbdemo1");
OK
Time taken: 1.235 seconds
hive> select * from demo1;
OK
Time taken: 0.154 seconds
hive> 


[training@localhost ~]$ sqoop import \
>  --connect jdbc:mysql://localhost/sqdb \
>  --username root \
>  --table emp \
>  --hbase-table hbdemo1 \
>  --column-family e





hbase(main):018:0> list 'hbdemo1'
TABLE                                             
hbdemo1                                           
1 row(s) in 0.0600 seconds

hbase(main):019:0> scan 'hbdemo1'
ROW           COLUMN+CELL                         
 101          column=e:dno, timestamp=146725340767
              5, value=11                         
 101          column=e:name, timestamp=14672534076
              75, value=aaa                       
 101          column=e:sal, timestamp=146725340767
              5, value=10000                      
 101          column=e:sex, timestamp=146725340767
              5, value=m                          
 102          column=e:dno, timestamp=146725340790
              5, value=12                         
 102          column=e:name, timestamp=14672534079
              05, value=baaa                      
 102          column=e:sal, timestamp=146725340790
              5, value=20000                      
 102          column=e:sex, timestamp=146725340790
              5, value=m                          
 103          column=e:dno, timestamp=146725340905
              9, value=12                         
 103          column=e:name, timestamp=14672534090
              59, value=baaav                     
 103          column=e:sal, timestamp=146725340905
              9, value=30000                      
 103          column=e:sex, timestamp=146725340905
              9, value=f                          
 104          column=e:dno, timestamp=146725340905
              9, value=11                         
 104          column=e:name, timestamp=14672534090
              59, value=xaaav                     
 104          column=e:sal, timestamp=146725340905
              9, value=50000                      
 104          column=e:sex, timestamp=146725340905
              9, value=f                          
4 row(s) in 0.0540 seconds

hbase(main):020:0> 


hive> select * from demo1;
OK
101     aaa     10000   m       11
102     baaa    20000   m       12
103     baaav   30000   f       12
104     xaaav   50000   f       11
Time taken: 0.153 seconds
hive> 

_____________________________________

mysql> select * from ed;
+-----+-------+-------+------+-------+------+
| id  | name  | sal   | sex  | dname | loc  |
+-----+-------+-------+------+-------+------+
| 101 | aaa   | 10000 | m    | mrkt  | hyd  | 
| 102 | baaa  | 20000 | m    | hr    | del  | 
| 103 | baaav | 30000 | f    | hr    | del  | 
| 104 | xaaav | 50000 | f    | mrkt  | hyd  | 
+-----+-------+-------+------+-------+------+
4 rows in set (0.00 sec)

mysql> 

hive> create table demo2(id int, name string,
    >   sal int, sex string, 
    >  dname string, loc string)
    > stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    > with serdeproperties("hbase.columns.mapping"=":key,e:name,e:sal,e:sex,d:dname,d:loc")
    > tblproperties("hbase.table.name"="hbdemo2");
OK
Time taken: 1.244 seconds
hive> 

[training@localhost ~]$ sqoop import  --connect jdbc:mysql://localhost/sqdb  --username root  --table ed --columns name,sal,sex,id  --hbase-table hbdemo2  --column-family e

hive> select * from demo2;
OK
101     aaa     10000   m       NULL    NULL
102     baaa    20000   m       NULL    NULL
103     baaav   30000   f       NULL    NULL
104     xaaav   50000   f       NULL    NULL
Time taken: 0.143 seconds
hive>

[training@localhost ~]$ sqoop import  --connect jdbc:mysql://localhost/sqdb  --username root  --table ed --columns dname,loc,id  --hbase-table hbdemo2  --column-family d


hive> select * from demo2;
OK
101     aaa     10000   m       mrkt    hyd
102     baaa    20000   m       hr      del
103     baaav   30000   f       hr      del
104     xaaav   50000   f       mrkt    hyd
Time taken: 0.13 seconds
hive> 





































2 comments: