CALL `justfunctions.eu.dedup_table`("your_project.your_dataset.your_table","created_at","user_id","_dedup")
/*--Output--
your_project.your_dataset.your_table.your_table_dedup
*/
CREATE OR REPLACE PROCEDURE `your_project_id.your_dataset_id.dedup_table`(`table_name` string, `timestamp_column` string, `unique_column` string, `output_table_suffix` string)
options(
description = '''Creates a deduplicated version of the table <table_name>, retaining the latest row based on the <timestamp_column>, and outputs it with the suffix <output_table_suffix>. Arguments <timestamp_column>, <unique_column>, <output_table_suffix> are optional.'''
)
BEGIN
DECLARE sql STRING;
DECLARE final_table_name STRING;
SET final_table_name = CONCAT(table_name, IF(output_table_suffix != '', output_table_suffix, '_dedup'));
IF unique_column = '' AND timestamp_column = '' THEN
SET sql = FORMAT("""
CREATE OR REPLACE TABLE %s AS
SELECT DISTINCT *
FROM %s;
""", final_table_name, table_name);
ELSEIF unique_column != '' AND timestamp_column = '' THEN
SET sql = FORMAT("""
CREATE OR REPLACE TABLE %s AS
SELECT * EXCEPT(row_num)
FROM (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY %s) AS row_num
FROM
%s
)
WHERE row_num = 1;
""", final_table_name, unique_column, table_name);
ELSE
SET sql = FORMAT("""
CREATE OR REPLACE TABLE %s AS
SELECT * EXCEPT(row_num)
FROM (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY %s ORDER BY %s DESC) AS row_num
FROM
%s
)
WHERE row_num = 1;
""", final_table_name, unique_column, timestamp_column, table_name);
END IF;
EXECUTE IMMEDIATE sql;
END;
User Defined SQL Procedure
See something wrong? Contact us or report an issue on Github.