Home ยป How to find repeated, timestamped data?

How to find repeated, timestamped data?

create table issue_attributes (
  issue_id number,
  attr_timestamp timestamp,
  attribute_name varchar2(500),
  attribute_value varchar2(500),
  CONSTRAINT ia-pk PRIMARY KEY (issue_id, attr_timestamp, attribute_name)
)

select issue_id, attr_timestamp, attribute_name, attribute_value
from issue_attributes where issue_id = 1 and attribute_name = ‘OWNER’;

1, 01-JAN-2011 12:00, ‘OWNER’, ‘[email protected]
1, 01-FEB-2011 12:00, ‘OWNER’, ‘[email protected]
1, 01-MAR-2011 12:00, ‘OWNER’, ‘[email protected]
1, 01-APR-2011 12:00, ‘OWNER’, ‘[email protected]

1, 01-JAN-2011 12:00, ‘OWNER’, ‘[email protected]

2, 01-JAN-2011 12:00, ‘OWNER’, ‘[email protected]
2, 01-FEB-2011 12:00, ‘OWNER’, ‘[email protected]
2, 01-MAR-2011 12:00, ‘OWNER’, ‘[email protected]
2, 01-APR-2011 12:00, ‘OWNER’, ‘[email protected]

2, 01-JAN-2011 12:00, ‘OWNER’, ‘[email protected]
2, 01-FEB-2011 12:00, ‘OWNER’, ‘[email protected]
2, 01-MAR-2011 12:00, ‘OWNER’, ‘[email protected]

with issue_attributes as (
  select 1 as issue_id, date ‘2011-01-01’ as attr_timestamp,
    ‘OWNER’ as attribute_name, ‘[email protected]’ as attribute_value from dual union all
  select 1 as issue_id, date ‘2011-02-01’ as attr_timestamp,
    ‘OWNER’ as attribute_name, ‘[email protected]’ as attribute_value from dual union all
  select 1 as issue_id, date ‘2011-03-01’ as attr_timestamp,
    ‘OWNER’ as attribute_name, ‘[email protected]’ as attribute_value from dual union all
  select 1 as issue_id, date ‘2011-04-01’ as attr_timestamp,
    ‘OWNER’ as attribute_name, ‘[email protected]’ as attribute_value from dual union all
  select 2 as issue_id, date ‘2011-01-01’ as attr_timestamp,
    ‘OWNER’ as attribute_name, ‘[email protected]’ as attribute_value from dual union all
  select 2 as issue_id, date ‘2011-02-01’ as attr_timestamp,
    ‘OWNER’ as attribute_name, ‘[email protected]’ as attribute_value from dual union all
  select 2 as issue_id, date ‘2011-03-01’ as attr_timestamp,
    ‘OWNER’ as attribute_name, ‘[email protected]’ as attribute_value from dual union all
  select 2 as issue_id, date ‘2011-04-01’ as attr_timestamp,
    ‘OWNER’ as attribute_name, ‘[email protected]’ as attribute_value from dual
)
select
  issue_id,
  attr_timestamp,
  attribute_name,
  attribute_value,
  case when lag(attribute_value) over (partition by issue_id, attribute_name order by attr_timestamp) = attribute_value then null else ‘Y’end as keep_value
from
  issue_attributes

ISSUE_ID ATTR_TIMESTAMP ATTRIBUTE_NAME ATTRIBUTE_VALUE        KEEP_VALUE
1        01/01/2011     OWNER          [email protected]   Y
1        01/02/2011     OWNER          [email protected]
1        01/03/2011     OWNER          [email protected]
1        01/04/2011     OWNER          [email protected]
2        01/01/2011     OWNER          [email protected]   Y
2        01/02/2011     OWNER          [email protected] Y
2        01/03/2011     OWNER          [email protected]   Y
2        01/04/2011     OWNER          [email protected]

SQL> desc issue_attributes
 Name                                                              Null?    Type
 —————————————————————– ——– ——————————————–
 ISSUE_ID                                                                   NUMBER
 ATTR_TIMESTAMP                                                             TIMESTAMP(6)
 ATTRIBUTE_NAME                                                             VARCHAR2(500)
 ATTRIBUTE_VALUE                                                            VARCHAR2(500)

SQL> select * from issue_attributes;

  ISSUE_ID ATTR_TIMESTAMP                      ATTRIBUTE_ ATTRIBUTE_VALUE
———- ———————————– ———- ——————————
         1 01-JAN-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-FEB-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-MAR-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-APR-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-JAN-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-JAN-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-FEB-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-MAR-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-APR-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-JAN-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-FEB-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-MAR-20 11.12.00.000000 AM        OWNER      [email protected]

12 rows selected.

SQL> delete from issue_attributes
        where rowid in(select rid
                         from (select rowid rid,
                                      row_number() over (partition by ISSUE_ID,
                                                                      ATTR_TIMESTAMP,
                                                                      ATTRIBUTE_NAME,
                                                                      ATTRIBUTE_VALUE
                                                             order by rowid) rn
                                from issue_attributes)
                        where rn<> 1);
7 rows deleted.

SQL> select * from issue_attributes;

  ISSUE_ID ATTR_TIMESTAMP              ATTRIBUTE_ ATTRIBUTE_VALUE
———- ———————————– ———- ——————————
         1 01-JAN-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-FEB-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-MAR-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-APR-20 11.12.00.000000 AM        OWNER      [email protected]
         1 01-FEB-20 11.12.00.000000 AM        OWNER      [email protected]

5 rows selected.

SELECT MAX(attr_timestamp), issue_id, attribute_name, attribute_value
FROM issue_attributes
GROUP BY issue_id, attribute_name, attribute_value

SET search_path=’tmp’;

— The rows you want to delete.
SELECT * FROM issue_attributes to_del
WHERE EXISTS (
    SELECT * FROM issue_attributes xx
    WHERE xx.issue_id = to_del.issue_id
    AND xx.attribute_name = to_del.attribute_name
    AND xx.attribute_value = to_del.attribute_value
    AND xx.attr_timestamp > to_del.attr_timestamp
    AND NOT EXISTS ( SELECT * FROM issue_attributes nx
        WHERE nx.issue_id = to_del.issue_id
        AND nx.attribute_name = to_del.attribute_name
        AND nx.attribute_value <> to_del.attribute_value
        AND nx.attr_timestamp > to_del.attr_timestamp
        AND nx.attr_timestamp < xx.attr_timestamp
        ) 
    ) ;

— For completeness: the rows you want to keep.
SELECT * FROM issue_attributes must_stay
WHERE NOT EXISTS (
    SELECT * FROM issue_attributes xx
    WHERE xx.issue_id = must_stay.issue_id
    AND xx.attribute_name = must_stay.attribute_name
    AND xx.attribute_value = must_stay.attribute_value
    AND xx.attr_timestamp > must_stay.attr_timestamp
    AND NOT EXISTS ( SELECT * FROM issue_attributes nx
        WHERE nx.issue_id = must_stay.issue_id
        AND nx.attribute_name = must_stay.attribute_name
        AND nx.attribute_value <> must_stay.attribute_value
        AND nx.attr_timestamp > must_stay.attr_timestamp
        AND nx.attr_timestamp < xx.attr_timestamp
        )
    ) ;

issue_id |   attr_timestamp    | attribute_name |   attribute_value   
———-+———————+—————-+———————-
        1 | 2011-03-01 12:00:00 | OWNER          | [email protected]
        1 | 2011-01-01 12:00:00 | OWNER          | [email protected]
        1 | 2011-02-01 12:00:00 | OWNER          | [email protected]
        2 | 2011-03-01 12:00:00 | OWNER          | [email protected]
(4 rows)

 issue_id |   attr_timestamp    | attribute_name |    attribute_value   
———-+———————+—————-+————————
        1 | 2011-04-01 12:00:00 | OWNER          | [email protected]
        2 | 2011-02-01 12:00:00 | OWNER          | [email protected]
        2 | 2011-04-01 12:00:00 | OWNER          | [email protected]
        2 | 2011-01-01 12:00:00 | OWNER          | [email protected]
(4 rows)

Add Comment

Click here to post a comment

Your email address will not be published. Required fields are marked *