— 如果 BSM 字段长度低于 32,不用做 md5,直接使用 BSM 字段就好
— 1. 创建汇总表
create table data_total as
select 1 as flag,id,BSM,md5(BSM) as md5_value from table_a union all
select 2 as flag,id,BSM,md5(BSM) as md5_value from table_b union all
select 3 as flag,id,BSM,md5(BSM) as md5_value from table_c union all
select 4 as flag,id,BSM,md5(BSM) as md5_value from table_d union all
select 5 as flag,id,BSM,md5(BSM) as md5_value from table_e
;
— 2. 查出哪些值有重复
create table value_mult as
select
md5_value
,count(1) as cnt
from data_total t1
group by md5_value
having count(1) > 1
;
— 3. 为 data_total 表创建索引
CREATE INDEX ix_md5_value ON data_total(md5_value);
— 4. 查询出结果
select t1.*
from data_total t1
inner join value_mult t2
on t1.md5_value=t2.md5_value
;