Postgresql. Merge and split date ranges from two tables by set of keys

Question

I'm trying to combine multiple date ranges from two same tables with same or diferrent data. (PostgreSql 9.*)

Tables structure:

CREATE TABLE "first_activities" (
    "id" int4 NOT NULL DEFAULT nextval('first_activities_id_seq'::regclass),
    "start_time" timestamptz,
    "end_time" timestamptz,
    "activity_type" int2,
    "user_id" int4
)
WITH (OIDS=FALSE);
ALTER TABLE "first_activities" ADD PRIMARY KEY ("id") NOT DEFERRABLE INITIALLY IMMEDIATE;

CREATE TABLE "second_activities" (
    "id" int4 NOT NULL DEFAULT nextval('second_activities_id_seq'::regclass),
    "start_time" timestamptz,
    "end_time" timestamptz,
    "activity_type" int2,
    "user_id" int4
)
WITH (OIDS=FALSE);
ALTER TABLE "second_activities" ADD PRIMARY KEY ("id") NOT DEFERRABLE INITIALLY IMMEDIATE;

Data in First table:

INSERT INTO "first_activities" VALUES 
(NULL, '2014-10-31 01:00:00', '2014-10-31 02:00:00',  '3', '1'),
(NULL, '2014-10-31 02:00:00', '2014-10-31 03:00:00',  '4', '1'),
(NULL, '2014-10-31 03:00:00', '2014-10-31 04:00:00',  '2', '1'),
(NULL, '2014-10-31 04:30:00', '2014-10-31 05:00:00',  '3', '1'),
(NULL, '2014-10-31 05:30:00', '2014-11-01 06:00:00',  '4', '1'),
(NULL, '2014-11-01 06:30:00', '2014-11-01 07:00:00',  '2', '1'),
(NULL, '2014-11-01 07:30:00', '2014-11-01 08:00:00',  '1', '1'),
(NULL, '2014-11-01 08:00:00', '2014-11-01 09:00:00',  '3', '1'),
(NULL, '2014-11-01 09:00:00', '2014-11-02 10:00:00',  '4', '1'),
(NULL, '2014-08-27 10:00:00', '2014-08-27 11:00:00',  '2', '1'),
(NULL, '2014-08-27 11:00:00', '2014-08-27 12:00:00',  '1', '1'),

Data in Second table:

INSERT INTO "second_activities" VALUES 
(NULL, '2014-10-31 01:00:00', '2014-10-31 02:00:00',  '3', '1'),
(NULL, '2014-10-31 02:00:00', '2014-10-31 03:00:00',  '4', '1'),

-- Differece from first table
(NULL, '2014-10-31 03:30:00', '2014-10-31 04:00:00',  '1', '1'),
(NULL, '2014-10-31 04:25:00', '2014-10-31 04:35:00',  '3', '1'),
(NULL, '2014-10-31 04:45:00', '2014-10-31 05:35:00',  '3', '1'),
-- End of Difference from first table

(NULL, '2014-08-27 10:00:00', '2014-08-27 11:00:00',  '2', '1'),
(NULL, '2014-08-27 11:00:00', '2014-08-27 12:00:00',  '1', '1');

How can I filter result set that starting from query:

SELECT * FROM first_activities UNION ALL SELECT * from second_activities
ORDER BY start_time ASC;

to get final result set.

Final Result:

-- merge same data by user_id and activity_type and combine with
-- and split data with range intersection but not same user_id and acitvity_type

-- start_time               end_time        type  user_id
'2014-10-31 01:00:00', '2014-10-31 02:00:00',  '3', '1');
'2014-10-31 02:00:00', '2014-10-31 03:00:00',  '4', '1');

--data dont merge. Splitting with range intersection
'2014-10-31 03:00:00', '2014-10-31 03:30:00',  '2', '1'); -- from first table
'2014-10-31 03:30:00', '2014-10-31 04:00:00',  '1', '1'); -- from second table

-- data merged by same user_id and activity_type
'2014-10-31 04:25:00', '2014-10-31 05:35:00',  '3', '1'); 

'2014-10-31 05:30:00', '2014-11-01 06:00:00',  '4', '1');
'2014-11-01 06:30:00', '2014-11-01 07:00:00',  '2', '1');
'2014-11-01 07:30:00', '2014-11-01 08:00:00',  '1', '1');
'2014-11-01 08:00:00', '2014-11-01 09:00:00',  '3', '1');
'2014-11-01 09:00:00', '2014-11-02 10:00:00',  '4', '1');
'2014-08-27 10:00:00', '2014-08-27 11:00:00',  '2', '1');
'2014-08-27 11:00:00', '2014-08-27 12:00:00',  '1', '1');

klin · Accepted Answer · 2016-08-31 20:26:50Z

The issue can be reduced to the question of how to combine (compact) a group of adjacent (overlapping) ranges into one. I had to deal with this some time ago and found it a bit complicated in plain SQL. There is a simple solution using loop in a plpgsql code, but I found also a general solution with the use of custom aggregate.

The function compact_ranges(anyrange, anyrange) returns the sum of ranges if they are adjacent (overlapping) or the second range otherwise:

create or replace function compact_ranges(anyrange, anyrange)
returns anyrange language sql as $$
    select case 
        when $1 && $2 or $1 -|- $2 then $1+ $2
        else $2
    end
$$;

create aggregate compact_ranges_agg (anyrange) (
    sfunc = compact_ranges,
    stype = anyrange
);

The aggregate has a narrow scope of usage, it should be called as a progressive window function like in the example:

with test(rng) as (
values
    ('[ 1,  2)'::int4range),
    ('[ 3,  7)'),  -- group 1
    ('[ 5, 10)'),  -- group 1
    ('[ 6,  8)'),  -- group 1
    ('[11, 17)'),  -- group 2
    ('[12, 16)'),  -- group 2
    ('[15, 16)'),  -- group 2
    ('[18, 19)')
)
select distinct on (lower(new_rng)) new_rng
from (
    select *, compact_ranges_agg(rng) over (order by rng) new_rng
    from test
    ) s
order by lower(new_rng), new_rng desc;

 new_rng 
---------
 [1,2)
 [3,10)
 [11,17)
 [18,19)
(4 rows)

In the same way you can use it for your tables:

with merged as (
    select tstzrange(start_time, end_time) rng, activity_type, user_id
    from first_activities 
    union
    select tstzrange(start_time, end_time) rng, activity_type, user_id 
    from second_activities
),
compacted as (
    select distinct on (user_id, activity_type, lower(new_rng))
        lower(new_rng) start_time, 
        upper(new_rng) end_time,
        activity_type, 
        user_id
    from (
        select 
            user_id, activity_type, 
            compact_ranges_agg(rng) over (partition by user_id, activity_type order by rng) new_rng
        from merged
        ) s
    order by user_id, activity_type, lower(new_rng), new_rng desc
)
select
    start_time, 
    case when end_time > lead(start_time) over w then lead(start_time) over w else end_time end,
    activity_type, 
    user_id
from compacted
window w as (order by start_time)
order by start_time;

The result:

       start_time       |        end_time        | activity_type | user_id 
------------------------+------------------------+---------------+---------
 2014-08-27 10:00:00+02 | 2014-08-27 11:00:00+02 |             2 |       1
 2014-08-27 11:00:00+02 | 2014-08-27 12:00:00+02 |             1 |       1
 2014-10-31 01:00:00+01 | 2014-10-31 02:00:00+01 |             3 |       1
 2014-10-31 02:00:00+01 | 2014-10-31 03:00:00+01 |             4 |       1
 2014-10-31 03:00:00+01 | 2014-10-31 03:30:00+01 |             2 |       1
 2014-10-31 03:30:00+01 | 2014-10-31 04:00:00+01 |             1 |       1
 2014-10-31 04:25:00+01 | 2014-10-31 05:30:00+01 |             3 |       1
 2014-10-31 05:30:00+01 | 2014-11-01 06:00:00+01 |             4 |       1
 2014-11-01 06:30:00+01 | 2014-11-01 07:00:00+01 |             2 |       1
 2014-11-01 07:30:00+01 | 2014-11-01 08:00:00+01 |             1 |       1
 2014-11-01 08:00:00+01 | 2014-11-01 09:00:00+01 |             3 |       1
 2014-11-01 09:00:00+01 | 2014-11-02 10:00:00+01 |             4 |       1
(12 rows)

Thank you! How I can split intersection? To make 2014-10-31 03:00:00+01 | 2014-10-31 04:00:00+01 --> 2014-10-31 03:30:00+01 | 2014-10-31 04:00:00+01?

Collectives™ on Stack Overflow

Postgresql. Merge and split date ranges from two tables by set of keys

1 Answer 1

2 Comments

Your Answer

Linked

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

2 Comments

Your Answer

Sign up or log in

Post as a guest

Linked

Related