Re: Вопрос на собеседовании (как надо было ответить?)
От: octo47 Россия  
Дата: 17.02.09 13:42
Оценка:
Здравствуйте, MitjaT, Вы писали:

MT>Здравствуйте!


MT>Как вам такой письменный вопрос на собеседовании?

MT>Как правильно на него ответить?

MT>

MT>Чем плох такой запрос? Как можно его оптимизировать?

MT>select T1.F1, T2.F1
MT>from Table1 T1
MT>left join Table2 T2 on T1.F2=T2.F2 or T1.F3=T2.F3


Зависит от БД, для PostgreSQL наиболее быстрый из всех 3-х предложенных.
Для Oracle чуть чуть медленнее третьего (см листинг pgsql, для оракла не привожу).


drop table if exists table1;
drop table if exists table2;
CREATE TABLE table1(
    F1 serial primary key,
    F2 integer not null,
    F3 integer not null
);
CREATE TABLE table2(
    F1 serial primary key,
    F2 integer not null,
    F3 integer not null
);
create index ix_table1_f2 on table1(f2);
create index ix_table1_f3 on table1(f3);
create index ix_table2_f2 on table2(f2);
create index ix_table2_f3 on table2(f3);

-- Создаем записи которые не будут попадать под условие запроса
insert into table2(f2, f3) select f as f2, f+100000 as f3 from generate_series(0, 90000-1, 1) f;
insert into table1(f2, f3) select f2+200000, f3+300000 from table2 order by f1 limit 2000;
-- Создаем записи которые будут попадать под условие запроса F2 = F2
insert into table1(f2, f3) select f as f2, f+100000 as f3 from generate_series(90000, 92000-1, 1) f;
insert into table2(f2, f3) select f as f2, f+200000 as f3 from generate_series(90000, 92000-1, 1) f;
-- Создаем записи которые будут попадать под условие запроса F3 = F3
insert into table1(f2, f3) select f+100000 as f2, f as f3 from generate_series(92000, 94000-1, 1) f;
insert into table2(f2, f3) select f+200000 as f2, f as f3 from generate_series(92000, 94000-1, 1) f;
-- Создаем записи которые будут попадать под условие запроса F2 = F2 = F3 = F3
insert into table1(f2, f3) select f as f2, f as f3 from generate_series(94000, 96000-1, 1) f;
insert into table2(f2, f3) select f as f2, f as f3 from generate_series(94000, 96000-1, 1) f;
analyse;
explain analyse
select     T1.F1, T2.F1
    from Table1 T1
    left join Table2 T2
    on  T1.F2 = T2.F2
        or T1.F3 = T2.F3 ;

explain analyse
select  T1F1, max(T2F1)
    from
        (select     T1.F1 as T1F1, T2.F1 as T2F1
                from Table1 T1
                inner join Table2 T2
                on  T1.F2 = T2.F2
            union all
            select     T1.F1, T2.F1
                from Table1 T1
                inner join Table2 T2
                on  T1.F3 = T2.F3
            union all
            select     T1.F1, null
                from Table1 T1
        ) as T
    group by T1F1 ;

cluster table1 using table1_pkey;
cluster table2 using table2_pkey;
explain analyse 
select  T1.F1, coalesce(T2_1.F1, T2_2.F1) as T2F1
    from Table1 T1
        left join Table2 T2_1 on  T1.F2=T2_1.F2
        left join Table2 T2_2 on (T1.F3=T2_2.F3 and not T1.F2=T2_2.F2);
select count(*) from table1;
select count(*) from table2;



                                                             QUERY PLAN                                                             
------------------------------------------------------------------------------------------------------------------------------------
 Nested Loop Left Join  (cost=0.75..38484.32 rows=16000 width=8) (actual time=0.079..100.528 rows=8000 loops=1)
   Join Filter: ((t1.f2 = t2.f2) OR (t1.f3 = t2.f3))
   ->  Seq Scan on table1 t1  (cost=0.00..120.00 rows=8000 width=12) (actual time=0.007..4.181 rows=8000 loops=1)
   ->  Bitmap Heap Scan on table2 t2  (cost=0.75..4.77 rows=2 width=12) (actual time=0.008..0.009 rows=1 loops=8000)
         Recheck Cond: ((t1.f2 = t2.f2) OR (t1.f3 = t2.f3))
         ->  BitmapOr  (cost=0.75..0.75 rows=2 width=0) (actual time=0.007..0.007 rows=0 loops=8000)
               ->  Bitmap Index Scan on ix_table2_f2  (cost=0.00..0.37 rows=1 width=0) (actual time=0.003..0.003 rows=0 loops=8000)
                     Index Cond: (t1.f2 = t2.f2)
               ->  Bitmap Index Scan on ix_table2_f3  (cost=0.00..0.37 rows=1 width=0) (actual time=0.003..0.003 rows=0 loops=8000)
                     Index Cond: (t1.f3 = t2.f3)
 Total runtime: 103.928 ms
(11 rows)

                                                            QUERY PLAN                                                             
-----------------------------------------------------------------------------------------------------------------------------------
 HashAggregate  (cost=6582.00..6584.50 rows=200 width=8) (actual time=255.314..260.239 rows=8000 loops=1)
   ->  Append  (cost=220.00..6222.00 rows=24000 width=8) (actual time=98.856..242.948 rows=16000 loops=1)
         ->  Hash Join  (cost=220.00..2931.00 rows=8000 width=8) (actual time=98.855..107.491 rows=4000 loops=1)
               Hash Cond: (t2.f2 = t1.f2)
               ->  Seq Scan on table2 t2  (cost=0.00..1431.00 rows=96000 width=8) (actual time=0.006..40.053 rows=96000 loops=1)
               ->  Hash  (cost=120.00..120.00 rows=8000 width=8) (actual time=9.228..9.228 rows=8000 loops=1)
                     ->  Seq Scan on table1 t1  (cost=0.00..120.00 rows=8000 width=8) (actual time=0.007..4.450 rows=8000 loops=1)
         ->  Hash Join  (cost=220.00..2931.00 rows=8000 width=8) (actual time=105.900..112.798 rows=4000 loops=1)
               Hash Cond: (t2.f3 = t1.f3)
               ->  Seq Scan on table2 t2  (cost=0.00..1431.00 rows=96000 width=8) (actual time=0.007..42.931 rows=96000 loops=1)
               ->  Hash  (cost=120.00..120.00 rows=8000 width=8) (actual time=9.395..9.395 rows=8000 loops=1)
                     ->  Seq Scan on table1 t1  (cost=0.00..120.00 rows=8000 width=8) (actual time=0.007..4.426 rows=8000 loops=1)
         ->  Subquery Scan "*SELECT* 3"  (cost=0.00..200.00 rows=8000 width=4) (actual time=0.009..11.795 rows=8000 loops=1)
               ->  Seq Scan on table1 t1  (cost=0.00..120.00 rows=8000 width=4) (actual time=0.007..4.657 rows=8000 loops=1)
 Total runtime: 263.221 ms
(15 rows)

                                                                    QUERY PLAN                                                                    
--------------------------------------------------------------------------------------------------------------------------------------------------
 Hash Left Join  (cost=5764.22..7055.29 rows=8000 width=12) (actual time=215.136..266.488 rows=8000 loops=1)
   Hash Cond: (t1.f3 = t2_2.f3)
   Join Filter: (t1.f2 <> t2_2.f2)
   ->  Merge Left Join  (cost=2711.22..3320.29 rows=8000 width=16) (actual time=95.601..114.328 rows=8000 loops=1)
         Merge Cond: (t1.f2 = t2_1.f2)
         ->  Index Scan using ix_table1_f2 on table1 t1  (cost=0.00..346.96 rows=8000 width=12) (actual time=0.054..5.345 rows=8000 loops=1)
         ->  Index Scan using ix_table2_f2 on table2 t2_1  (cost=0.00..2766.45 rows=96000 width=8) (actual time=0.021..58.040 rows=94001 loops=1)
   ->  Hash  (cost=1431.00..1431.00 rows=96000 width=12) (actual time=119.398..119.398 rows=96000 loops=1)
         ->  Seq Scan on table2 t2_2  (cost=0.00..1431.00 rows=96000 width=12) (actual time=0.009..53.147 rows=96000 loops=1)
 Total runtime: 269.338 ms
(10 rows)

 count 
-------
  8000
(1 запись)

 count 
-------
 96000
(1 запись)


Вариант с принудительным использованием индексов во 2-м запросе, что дает что-то похожее на то, что делает mssql.
HashAggregate  (cost=100008101.30..100008103.80 rows=200 width=8) (actual time=264.442..268.939 rows=8000 loops=1)
  ->  Append  (cost=2724.91..100007741.30 rows=24000 width=8) (actual time=101.632..252.214 rows=16000 loops=1)
        ->  Merge Join  (cost=2724.91..3385.54 rows=8000 width=8) (actual time=101.630..117.700 rows=4000 loops=1)
              Merge Cond: (t1.f2 = t2.f2)
              ->  Index Scan using ix_table1_f2 on table1 t1  (cost=0.00..397.22 rows=8000 width=8) (actual time=0.020..4.918 rows=8000 loops=1)
              ->  Index Scan using ix_table2_f2 on table2 t2  (cost=0.00..2782.58 rows=96000 width=8) (actual time=0.017..59.696 rows=94001 loops=1)
        ->  Merge Join  (cost=0.28..3995.76 rows=8000 width=8) (actual time=0.025..112.698 rows=4000 loops=1)
              Merge Cond: (t1.f3 = t2.f3)
              ->  Index Scan using ix_table1_f3 on table1 t1  (cost=0.00..352.89 rows=8000 width=8) (actual time=0.011..3.831 rows=6001 loops=1)
              ->  Index Scan using ix_table2_f3 on table2 t2  (cost=0.00..3396.79 rows=96000 width=8) (actual time=0.010..57.608 rows=96000 loops=1)
        ->  Subquery Scan "*SELECT* 3"  (cost=100000000.00..100000200.00 rows=8000 width=4) (actual time=0.011..11.009 rows=8000 loops=1)
              ->  Seq Scan on table1 t1  (cost=100000000.00..100000120.00 rows=8000 width=4) (actual time=0.009..4.571 rows=8000 loops=1)
Total runtime: 271.772 ms
 
Подождите ...
Wait...
Пока на собственное сообщение не было ответов, его можно удалить.