I have a table structured as below (apologies for quite long example of data, but couldn't reproduce the issue with smaller no. of rows):
CREATE TABLE example_tbl (
id CHAR(1),
pid INT,
sid INT,
tid VARCHAR(10),
other_columns INT
)
INSERT INTO example_tbl (id, pid, sid, tid, other_columns)
VALUES
('c', 157, 85, 'A32', 1),
('c', 157, 85, 'A32', 2),
('c', 157, 85, 'A32', 3),
('c', 157, 85, 'A32', 4),
('c', 157, 85, 'A32', 5),
('c', 157, 85, 'A32', 6),
('c', 157, 85, 'A32', 7),
('c', 157, 85, 'A32', 8),
('c', 157, 85, 'A32', 9),
('c', 157, 85, 'A32', 10),
('c', 157, 85, 'A32', 11),
('Q', 157, 81, 'A62', 1),
('Q', 157, 81, 'A62', 2),
('Q', 157, 81, 'A62', 3),
('Q', 157, 81, 'A62', 4),
('Q', 157, 81, 'A62', 5),
('Q', 157, 81, 'A62', 6),
('Q', 157, 81, 'A62', 7),
('Q', 157, 81, 'A62', 8),
('Q', 157, 81, 'A62', 9),
('Q', 157, 81, 'A62', 10),
('Q', 157, 81, 'A62', 11),
('f', 598, 51, 'A62', NULL),
('w', 598, 49, 'A32', 9),
('Z', 598, 44, 'A62', NULL),
('r', 598, 16, 'A32', 10),
('O', 118, 93, 'A32', 1),
('G', 118, 38, 'A32', 4),
('U', 118, 90, 'A32', 1),
('U', 118, 90, 'A32', 2),
('U', 118, 90, 'A32', 3),
('U', 118, 90, 'A32', 4),
('U', 118, 90, 'A32', 5),
('U', 118, 90, 'A32', 6),
('U', 118, 90, 'A32', 7),
('U', 118, 90, 'A32', 8),
('U', 118, 90, 'A32', 9),
('U', 118, 90, 'A32', 10),
('U', 118, 90, 'A32', 11),
('m', 118, 37, 'A62', 1),
('J', 118, 54, 'A32', 20),
('a', 118, 59, 'A32', 11),
('s', 118, 18, 'A62', 8),
('y', 118, 33, 'A62', NULL),
('N', 118, 79, 'A62', NULL),
('l', 118, 35, 'A32', 9),
('n', 118, 63, 'A32', 5),
('R', 118, 86, 'A62', 1),
('R', 118, 86, 'A62', 2),
('R', 118, 86, 'A62', 3),
('R', 118, 86, 'A62', 4),
('R', 118, 86, 'A62', 5),
('R', 118, 86, 'A62', 6),
('R', 118, 86, 'A62', 7),
('R', 118, 86, 'A62', 8),
('R', 118, 86, 'A62', 9),
('R', 118, 86, 'A62', 10),
('R', 118, 86, 'A62', 11),
('H', 118, 23, 'A32', 1),
('H', 118, 23, 'A32', 2),
('H', 118, 23, 'A32', 3),
('H', 118, 23, 'A32', 4),
('H', 118, 23, 'A32', 5),
('H', 118, 23, 'A32', 6),
('H', 118, 23, 'A32', 7),
('H', 118, 23, 'A32', 8),
('H', 118, 23, 'A32', 9),
('H', 118, 23, 'A32', 10),
('H', 118, 23, 'A32', 11),
('B', 118, 43, 'A62', 39),
('h', 118, 60, 'A62', NULL),
('p', 118, 72, 'A32', 1),
('v', 118, 22, 'A32', 5),
('I', 118, 89, 'A62', 9),
('T', 118, 17, 'A62', 1),
('F', 118, 41, 'A32', 10),
('z', 118, 55, 'A32', 6),
('Y', 118, 75, 'A32', NULL),
('u', 118, 48, 'A62', 9),
('x', 783, 27, 'A32', 10),
('V', 783, 11, 'A62', 8),
('i', 783, 61, 'A62', 1),
('i', 783, 61, 'A62', 2),
('i', 783, 61, 'A62', 3),
('i', 783, 61, 'A62', 4),
('i', 783, 61, 'A62', 5),
('i', 783, 61, 'A62', 6),
('i', 783, 61, 'A62', 7),
('i', 783, 61, 'A62', 8),
('i', 783, 61, 'A62', 9),
('i', 783, 61, 'A62', 10),
('i', 783, 61, 'A62', 11);
The id
column defines the event. I'd like add a column with id
of previous event within a group defined by tid
. The data should be ordered according to pid
(custom order where 157 < 598 < 118 < 783) and sid
. The tricky part is that some events have 11 rows and some have only 1 row.
The other_columns
represents all the other columns that are there and should be preserved.
I tried to use a LAG()
window function with precalculated offset as below:
WITH example_tbl_with_offset AS (
SELECT * FROM example_tbl
LEFT JOIN (SELECT id, COUNT(id)::int AS lag_offset
FROM example_tbl
GROUP BY id) AS offset_tbl
USING (id)
)
SELECT
*,
LAG(id, lag_offset) OVER (PARTITION BY tid
ORDER BY (CASE
WHEN pid = 157 THEN 1
WHEN pid = 598 THEN 2
WHEN pid = 118 THEN 3
WHEN pid = 783 THEN 4
END, sid
)) AS prev_id
FROM example_tbl_with_offset;
However, although the prev_id
column seems valid for the first couple of events, it eventually loses track.
Expected output is attached below:
CREATE TABLE expected_output (
id CHAR(1),
pid INT,
sid INT,
tid VARCHAR(10),
prev_id CHAR(1)
)
INSERT INTO expected_output (id, pid, sid, tid, prev_id)
VALUES
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('Q', 157, 81, 'A62', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('c', 157, 85, 'A32', NULL),
('r', 598, 16, 'A32', 'c'),
('Z', 598, 44, 'A62', 'Q'),
('w', 598, 49, 'A32', 'r'),
('f', 598, 51, 'A62', 'Z'),
('T', 118, 17, 'A62', 'f'),
('s', 118, 18, 'A62', 'T'),
('v', 118, 22, 'A32', 'w'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('H', 118, 23, 'A32', 'v'),
('y', 118, 33, 'A62', 's'),
('l', 118, 35, 'A32', 'H'),
('m', 118, 37, 'A62', 'y'),
('G', 118, 38, 'A32', 'I'),
('F', 118, 41, 'A32', 'G'),
('B', 118, 43, 'A62', 'm'),
('u', 118, 48, 'A62', 'B'),
('J', 118, 54, 'A32', 'F'),
('z', 118, 55, 'A32', 'J'),
('a', 118, 59, 'A32', 'z'),
('h', 118, 60, 'A62', 'u'),
('n', 118, 63, 'A32', 'a'),
('p', 118, 72, 'A32', 'n'),
('Y', 118, 75, 'A32', 'p'),
('N', 118, 79, 'A62', 'h'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('R', 118, 86, 'A62', 'N'),
('I', 118, 89, 'A62', 'R'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('U', 118, 90, 'A32', 'Y'),
('O', 118, 93, 'A32', 'U'),
('V', 783, 11, 'A62', 'I'),
('x', 783, 27, 'A32', 'O'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V'),
('i', 783, 61, 'A62', 'V');
The actual logic is probably easier to grasp with the following ordering:
SELECT * FROM expected_output
ORDER BY tid, CASE
WHEN pid = 157 THEN 1
WHEN pid = 598 THEN 2
WHEN pid = 118 THEN 3
WHEN pid = 783 THEN 4
END, sid;
@Edit: My PostgreSQL version is 9.5.