hive sqoop increment

create database ccdm_mstr;


use ccdm_mstr;


create table clm_cvs_fact(

clm_cvs_fact_ket varchar(30),

dw_cret_aud_key int(25),

dw_updt_aud_key int(30));


insert into clm_cvs_fact (clm_cvs_fact_ket, dw_cret_aud_key, dw_updt_aud_key) values ('123456700','12347771', '12347771');

insert into clm_cvs_fact (clm_cvs_fact_ket, dw_cret_aud_key, dw_updt_aud_key) values ('123456701','12347772', '12347772');


 select * from clm_cvs_fact;

 

 output:

+------------------+-----------------+-----------------+

| clm_cvs_fact_ket | dw_cret_aud_key | dw_updt_aud_key |

+------------------+-----------------+-----------------+

| 123456700        |        12347771 | 12347771        |

| 123456701        |        12347772 | 12347772        |

+------------------+-----------------+-----------------+



[cloudera@quickstart ~]$ sqoop import --connect jdbc:mysql://localhost/ccdm_mstr --username root --password cloudera --table clm_cvs_fact -m 1 --target-dir /user/ccdm_mstr_test --incremental append --check-column dw_updt_aud_key --last-value 0


[cloudera@quickstart ~]$ hadoop fs -cat /user/ccdm_mstr_test/part-m-00000

123456700,12347771,12347771

123456701,12347772,12347772


Now creted a table in hive 

[cloudera@quickstart ~]$ hive

create if not exist database ccdm_mstr;

use ccdm_mstr;

create table clm_cvs_fact

(

clm_cvs_fact_ket String,

dw_cret_aud_key String,

dw_updt_aud_key String

)

row format delimited

fields terminated by ','

location '/user/ccdm_mstr_test';


output:

select * from ccdm_mstr.clm_cvs_fact;

123456700 12347771 12347771

123456701 12347772 12347772

Time taken: 0.654 seconds, Fetched: 2 row(s)

==================

Trying to insert third record 


insert into clm_cvs_fact (clm_cvs_fact_ket, dw_cret_aud_key, dw_updt_aud_key) values ('123456702','12347773', '12347773');


mysql> select * from clm_cvs_fact;+------------------+-----------------+-----------------+

| clm_cvs_fact_ket | dw_cret_aud_key | dw_updt_aud_key |

+------------------+-----------------+-----------------+

| 123456700        |        12347771 | 12347771        |

| 123456701        |        12347772 | 12347772        |

| 123456702        |        12347773 | 12347773        |

+------------------+-----------------+-----------------+

=============

Now we are will runing sqoop with incremental load

sqoop import --connect jdbc:mysql://localhost/ccdm_mstr --username root --password cloudera --table clm_cvs_fact -m 1 --target-dir /user/ccdm_mstr_test --incremental append --check-column dw_updt_aud_key --last-value "12347772"


hive > select * from ccdm_mstr.clm_cvs_fact;

OK

123456700 12347771 12347771

123456701 12347772 12347772

123456702 12347773 12347773


================

Trying to update,delete,insert


insert -->

insert into clm_cvs_fact (clm_cvs_fact_ket, dw_cret_aud_key, dw_updt_aud_key) values ('123456705','12347776', '12347776');


update-->12342777 -->12349999


UPDATE clm_cvs_fact  SET dw_updt_aud_key = '12349999' WHERE clm_cvs_fact_ket = 123456701;


mysql> select * from clm_cvs_fact;+------------------+-----------------+-----------------+

| clm_cvs_fact_ket | dw_cret_aud_key | dw_updt_aud_key |

+------------------+-----------------+-----------------+

| 123456700        |        12347771 | 12347771       |

| 123456701        |        12347772 | 12399999       |

| 123456702        |        12347773 | 12347773        |

| 123456705        |        12347776 | 12347776        |

+------------------+-----------------+-----------------+


==============

Again running sqoop to import all new record 


sqoop import --connect jdbc:mysql://localhost/ccdm_mstr --username root --password cloudera --table clm_cvs_fact -m 1 --target-dir /user/ccdm_mstr_test --incremental append --check-column dw_updt_aud_key --last-value "12347773"


hive> 

    > 

    > select * from ccdm_mstr.clm_cvs_fact;

OK

123456700 12347771 12347771

123456701 12347772 12347772

123456702 12347773 12347773

123456701 12347772 12349999

123456705 12347776 12346777

Time taken: 0.056 seconds, Fetched: 5 row(s)


select t1.* from (select *  from clm_cvs_fact) t1 join (select clm_cvs_fact_ket, max(dw_updt_aud_key) md from  (select  * from clm_cvs_fact) t2 group by clm_cvs_fact_ket) s ON t1.clm_cvs_fact_ket = s.clm_cvs_fact_ket AND t1.dw_updt_aud_key = s.md;


hive output:


123456700 12347771 12347771

123456701 12347772 12349999

123456702 12347773 12347773

123456705 12347776 12347776


Comments

Popular posts from this blog

Shell encrypt and decrypt

cdc

incrementally update