sql—当累计列小于或等于目标列时设置组号

k4ymrczo  于 2021-07-24  发布在  Java
关注(0)|答案(1)|浏览(254)

我有一个列,其中包含多少子链接一个网站有。
我的表模式是id,sub\u link,link

id  sub_link link group
1   5   link1.com   1
2   2   link2.com   2
3   4   link3.com   3
4   1   link4.com   3
4   1   link4.com   3
5   2   link5.com   4
6   4   link6.com   5
7   3   link7.com   6
7   3   link7.com   6

我想添加一个列,当子链接的总数等于或小于5时使用组号,同时避免重复。
我不知道从哪里开始,因为我只能编写select语句并进行一些连接。也许有一个方法使用窗口功能或cte,我更喜欢进一步我的经验。
这是一个小提琴链接
https://www.db-fiddle.com/f/6rmtcazwawvlulz5qgpmsb/1
谢谢你的帮助。

jjjwad0x

jjjwad0x1#

基于你在下面的评论,我不知道除了在一个单独的temp表中跟踪滚动子链接和外,如何做到这一点。下面是一个例子。如果您只需要跟踪当前记录和上一个记录的子链接,则可以使用lag函数通过单个查询来实现。

USE [master]
GO
CREATE DATABASE [Test]
GO

USE [Test]
GO

CREATE TABLE [Test] (
    ID INT
  , Sub_Links TINYINT
  , Link VARCHAR(100)
);

INSERT INTO [Test]
VALUES (1, 5, 'link1.com')
    , (2, 2, 'link2.com') 
    , (3, 4, 'link3.com')
    , (4, 1, 'link4.com')
    , (4, 1, 'link4.com')
    , (5, 2, 'link5.com')
    , (6, 4, 'link6.com')
    , (7, 3, 'link7.com')
    , (7, 3, 'link7.com');

SET NOCOUNT ON
GO
CREATE TABLE #Staging
(
      ID INT
    , Link VARCHAR(100)
    , Sub_Links INT
    , GroupNum INT
    , SublinkRollingSum TINYINT
)
GO

CREATE CLUSTERED INDEX [StagingOrder] ON #Staging(ID, Link) --Since need to guarantee order, doing this upfront should be more efficient
GO

INSERT INTO #Staging(ID, Link, Sub_Links)
SELECT DISTINCT --Don't include duplicate records
      ID
    , Link
    , Sub_Links
FROM Test
ORDER BY ID, Link
GO
--CREATE INDEX [GroupIndex] ON #Staging(GroupNum, SublinkRollingSum)--Intended to improve performance of below while loop
--GO

WITH FirstRecord AS
(
    SELECT TOP(1) *
    FROM #Staging
    ORDER BY ID, Link
)
UPDATE FirstRecord
SET GroupNum = 1, SublinkRollingSum = Sub_Links --This is the starting point

DECLARE 
      @CurrentID INT
    , @CurrentLink VARCHAR(100)
    , @CurrentGroup INT
    , @SublinkRollingSum TINYINT

SELECT TOP(1) 
      @CurrentID = ID
    , @CurrentLink = Link
    , @CurrentGroup = GroupNum
    , @SublinkRollingSum = SublinkRollingSum
FROM #Staging
ORDER BY ID, Link

WHILE (@@ROWCOUNT > 0)
BEGIN
    UPDATE #Staging
    SET   SublinkRollingSum = @SublinkRollingSum
        , GroupNum = @CurrentGroup
    WHERE ID = @CurrentID
        AND Link = @CurrentLink

    SELECT TOP(1)
          @CurrentID = ID
        , @CurrentLink = Link
        , @CurrentGroup = 
            CASE 
                WHEN (@SublinkRollingSum + Sub_Links <= 5) 
                    THEN @CurrentGroup
                ELSE @CurrentGroup + 1
            END
        , @SublinkRollingSum = 
            CASE 
                WHEN (@SublinkRollingSum + Sub_Links <= 5) 
                    THEN @SublinkRollingSum + Sub_Links
                ELSE Sub_Links
            END
    FROM #Staging
    WHERE ID > @CurrentID 
        OR (ID = @CurrentID AND Link <> @CurrentLink)
    ORDER BY ID, Link
END

SELECT 
      t.ID
    , t.Sub_Links
    , t.Link
    , s.GroupNum
FROM #Staging S
    JOIN Test t ON s.ID = t.ID
        AND s.Link = t.Link
ORDER BY t.ID, t.Link

DROP TABLE #Staging

--DROP DATABASE [Test]

相关问题