Trying to pivot data: need to derive missing rows and show latest values

The process:

  • Create a mapping table with all products and year variations
  • Create a table that retrieves the first value, partitioned by year and product
  • Use the COALESCE function inside the dynamic sql query to replace NULLs with 0s
  • Pivot the table using dynamic sql

Make sure to change your date variable to a DATE datatype if not done so already

DROP TABLE IF EXISTS #yourtable

DROP TABLE IF EXISTS #yourtable_allyearsandproducts DROP TABLE IF EXISTS #yourtable_allyearspriceadjusted

CREATE TABLE #yourtable ( PRODUCT VARCHAR(100) ,DATE DATE ,PRICE FLOAT )

INSERT INTO #yourtable ( PRODUCT ,DATE ,PRICE ) VALUES ('Apples','07/01/2023',91830.3), ('Apples','07/01/2022',85788.78), ('Apples','07/01/2019',80712.72), ('Oranges','07/01/2023',51.5), ('Oranges','07/01/2021',17.8), ('Bananas','07/01/2023',1021.69), ('Bananas','07/01/2022',841.23), ('Bananas','07/01/2019',273.88), ('Lemons','07/01/2023',8577.97), ('Lemons','07/01/2022',8456.89), ('Lemons','07/01/2019',7224.16), ('Strawberries','07/01/2022',274.32), ('Strawberries','07/01/2019',82.81), ('Coconuts','07/01/2023',1327.26), ('Coconuts','07/01/2022',1305.5), ('Coconuts','07/01/2021',2020.71)

CREATE TABLE #yourtable_allyearsandproducts ( Year VARCHAR(100) ,Product VARCHAR(100) ,Price FLOAT )

-- Determine each year between the max and min years of your initial table

DECLARE @MinYear INT DECLARE @iteration_Year INT

SET @MinYear = ( SELECT MIN(YEAR(DATE)) FROM #yourtable )

SET @iteration_Year = ( SELECT MAX(YEAR(DATE)) FROM #yourtable )

-- Insert the template with all years and products

WHILE ( @iteration_Year >= @MinYear) BEGIN INSERT INTO #yourtable_allyearsandproducts (Year, Product)
SELECT DISTINCT @iteration_Year ,PRODUCT FROM #yourtable

    UPDATE #yourtable_allyearsandproducts
    SET Price = p.Price
    FROM (
        SELECT YEAR(DATE) [Year]
            ,PRODUCT
            ,PRICE
        FROM #yourtable
        WHERE YEAR(Date) = @iteration_Year
        ) p
    WHERE p.PRODUCT = #yourtable_allyearsandproducts.Product
    AND p.Year = #yourtable_allyearsandproducts.Year
    AND p.Year = @iteration_Year

SET @iteration_Year  = @iteration_Year  - 1

END

-- Adjust the values if price is NULL

SELECT Year ,Product ,FIRST_VALUE(Price) OVER (PARTITION BY Product, value_partition ORDER BY Year) [PRICE] INTO #yourtable_allyearspriceadjusted FROM ( SELECT Year ,Product ,Price ,SUM(CASE WHEN price IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY Product ORDER BY Year) [value_partition] FROM #yourtable_allyearsandproducts ) q

DECLARE @cols VARCHAR(MAX) DECLARE @addzeros VARCHAR(MAX) DECLARE @query VARCHAR(MAX)

SELECT @cols = STUFF( (SELECT DISTINCT '],[' + t.Year FROM #yourtable_allyearspriceadjusted t ORDER BY '],[' + t.Year FOR XML PATH('') ), 1, 2, '') + ']'

-- Need to coalesce the NULLs into zeros, so we cannot pass the @cols variable into the dynamic SQL query SELECT @addzeros = ( SELECT DISTINCT 'COALESCE([' + t.Year + '],0) [' + t.Year + '],' FROM #yourtable_allyearspriceadjusted t ORDER BY 'COALESCE([' + t.Year + '],0) [' + t.Year + '],' FOR XML PATH('') )

SET @query = N'SELECT PRODUCT,' + LEFT(@addzeros,len(@addzeros)-1) + ' FROM ( SELECT Product ,Year ,Price FROM #yourtable_allyearspriceadjusted ) x PIVOT(SUM(Price) FOR Year IN ( '+ @cols +' )) piv ' EXEC(@query)

/r/SQL Thread