SQL Server Having issues with removing preceding zeroes and characters after special characters from a varchar column

ff29svar  于 2023-06-04  发布在  其他
关注(0)|答案(6)|浏览(298)

I am trying to remove alphabets, space from left, right and in between, preceding zeros, and all the characters after special characters(comma,semicolon,backslash, forward slash)

I have a column which contains value like below :

051 765 999
MK00564123
KJ786453425/9
432563542/3
096745632
53452BMG441,8

I am trying to remove alphabets and space from left and right from the value for those value containing alphabets in the value and also preceeding 0.

Expected Output:

51765999
564123
96745632

Also I am trying to remove everything after special character in the column(comma,semicolon,backslash, forward slash):

Expected Output:

786453425
432563542
53452441

Final Output:

51765999
564123
96745632
786453425
432563542
53452441

I have created the fiddle but not getting expected output as shown below:

Fiddle: http://sqlfiddle.com/#!18/0b383/1123

Can someone please help me to identify what is the issue and how can I get expected output?

nkcskrwz

nkcskrwz1#

One way of doing this is

CREATE FUNCTION dbo.udf_GetCleanedAlphaNumeric (@alphanumeric VARCHAR(100))
RETURNS TABLE
AS
    RETURN
      (SELECT SUBSTRING(cleaned, PATINDEX('%[^0]%', cleaned + 'x'), 8000) AS Final
       FROM   (VALUES (SUBSTRING(@alphanumeric, 1, PATINDEX('%[,;\/]%', @alphanumeric + ';') - 1))) ca1(prefix)
              CROSS APPLY (VALUES ( REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
                                    REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
                                    REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
                                    REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
                                    REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
                                    REPLACE(REPLACE(prefix, ' ', ''), 'a', '')
                                    , 'b', ''), 'c', ''), 'd', ''), 'e', ''), 'f', '')
                                    , 'g', ''), 'h', ''), 'i', ''), 'j', ''), 'k', '')
                                    , 'l', ''), 'm', ''), 'n', ''), 'o', ''), 'p', '')
                                    , 'q', ''), 'r', ''), 's', ''), 't', ''), 'u', '')
                                    , 'v', ''), 'w', ''), 'x', ''), 'y', ''), 'z', '') )) ca2(cleaned))

and then

SELECT alphanumeric,
       Final
FROM   temp
CROSS APPLY dbo.udf_GetCleanedAlphaNumeric(alphanumeric)
  • First extract the substring before any ,;\/ (if present)
  • Then strip out any letters or spaces (can be made much more concise on later versions with TRANSLATE function as in edit history)
  • Then remove leading zeroes

( DB Fiddle )

ilmyapht

ilmyapht2#

To trim leading zeroes from a numeric string:

REPLACE(LTRIM(REPLACE('00123', '0', ' ')), ' ', '0') -- returns '123'

Change your return to:

RETURN REPLACE(LTRIM(REPLACE(ISNULL(@strAlphaNumeric,0), '0', ' ')), ' ', '0')

See live demo.

This works by changing all 0 to a space, left trimming, then changing all space back to 0 .

Of course if SQL Server ever gets around to supporting REGEXP_REPLACE() it would be trivial:

REGEXP_REPLACE('00123', '^0+', '') -- would return '123'
nwlls2ji

nwlls2ji3#

This function can be easily inlined.

CREATE FUNCTION dbo.udf_GetNumeric
(@strAlphaNumeric VARCHAR(256))
RETURNS VARCHAR(256)
AS
BEGIN
declare @i bigint
set @strAlphaNumeric = replace(TRANSLATE(@strAlphaNumeric, 'abcdefghijklmnopqrstuvwxyz /\,;', replicate('a', 27)+replicate(';',4)),'a','')
set @i=charindex(';',@strAlphaNumeric)
if @i>0
   set @strAlphaNumeric = left(@strAlphaNumeric,@i-1)
RETURN replace(LTRIM(replace(ISNULL(@strAlphaNumeric,0), '0', ' ')), ' ', '0')
END
42fyovps

42fyovps4#

Here You can use this function which uses PATINDEX .

CREATE FUNCTION dbo.RemoveAlphabets
(
    @input          VARCHAR(MAX)
)
RETURNS VARCHAR(MAX)
BEGIN

    declare
    @match_pattern  VARCHAR(MAX) = '%[a-z ]%',
    @stop_pattern  VARCHAR(MAX) = '%[.,/]%',
    @replace_value  VARCHAR(MAX) = '',
    @match_ix   INT,
    @input_copy          VARCHAR(MAX)

    SET @input_copy = @input

    SET @match_ix = PATINDEX(@stop_pattern, @input_copy);
    IF @match_ix > 0
    BEGIN
        SET @input_copy = SUBSTRING(@input_copy, 1, @match_ix - 1)
    END

    SET @match_ix = PATINDEX(@match_pattern, @input_copy);
    
    WHILE @match_ix > 0
    BEGIN
        SET @input_copy = REPLACE(@input_copy, SUBSTRING(@input_copy, @match_ix, 1), @replace_value);
        SET @match_ix = PATINDEX(@match_pattern, @input_copy);
    END

    RETURN @input_copy;

END

And this is a test.

select dbo.RemoveAlphabets(col1)
from (
    VALUES  (N'051 765 999')
    ,   (N'MK00564123')
    ,   (N'KJ786453425/9')
    ,   (N'432563542/3')
    ,   (N'096745632')
    ,   (N'53452BMG441,8')
) t (col1)
acruukt9

acruukt95#

One method would be to use PATINDEX to find the position of the first non-alphanumeric or space character, and get the LEFT most characters up to that point.

Then I use a helper function I wrote which replaces characters that fit a certain pattern with another character. As you are on SQL Server 2016, then you'll need to use the XML version of it, as STRING_AGG was added in 2017. (note that the helper function also uses a tally helper function, as GENERATE_SERIES was added in 2022).

Finally I convert the value to a bigint to trim leading zeroes.

CREATE FUNCTION [fn].[Tally] (@LastNumber bigint, @Zero bit) 
RETURNS table
AS RETURN
    --Copyright Thom A (https://larnu.uk) under CC-by-SA
    WITH N AS(
        SELECT N
        FROM (VALUES(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL))N(N)),
    Tally AS(
        SELECT 0 AS I
        WHERE @Zero = 0
          AND @LastNumber IS NOT NULL
        UNION ALL
        SELECT TOP (ISNULL(@LastNumber,0))
               ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS I
        FROM N N1, N N2, N N3, N N4, N N5, N N6, N N7) --Up to 10,000,000 rows
    SELECT I
    FROM Tally T;
GO

CREATE FUNCTION [fn].[PatternCharacterReplace_XML] (@String varchar(8000), @Pattern varchar(100), @ReplacementCharacter varchar(1)) 
RETURNS table
AS RETURN
    --Copyright Thom A (https://larnu.uk) under CC-by-SA
    SELECT (SELECT CASE WHEN V.C LIKE @Pattern THEN @ReplacementCharacter ELSE V.C END
            FROM fn.Tally(CONVERT(int,LEN(@String)),1) T
                 CROSS APPLY (VALUES(SUBSTRING(@String,T.I,1)))V(C)
            ORDER BY T.I
            FOR XML PATH(''),TYPE).value('(./text())[1]','varchar(8000)') AS ReplacedString;
GO
SELECT V.YourString,
       PI.I,
       LEFT(V.YourString,PI.I),
       TRY_CONVERT(bigint,PCRX.ReplacedString)
FROM (VALUES('051 765 999'),
            ('MK00564123'),
            ('KJ786453425/9'),
            ('432563542/3'),
            ('096745632'),
            ('53452BMG441,8'))V(YourString)
     CROSS APPLY(VALUES(ISNULL(NULLIF(PATINDEX('%[^0-9 A-Z]%',V.YourString),0)-1,LEN(V.YourString))))PI(I)
     CROSS APPLY fn.PatternCharacterReplace_XML(LEFT(V.YourString,PI.I),'[^0-9]','') PCRX;

This'll certainly work on versions as old as SQL Server 2012 for strings where the numbers to return are 18 numbers or less.

If the numerical value would be greater than 9,223,372,036,854,775,807 then you could use PATINDEX and STUFF to remove the leading 0 characters.

5ssjco0h

5ssjco0h6#

SQL Fiddle

MS SQL Server 2017 Schema Setup:

CREATE FUNCTION dbo.udf_GetNumeric
(@strAlphaNumeric VARCHAR(256))
RETURNS VARCHAR(256)
AS
BEGIN
DECLARE @intAlpha INT
DECLARE @intSpecial INT
SET @intSpecial = PATINDEX('%[.,+*?^$(){}\\/]%', @strAlphanumeric)
IF @intSpecial > 0
BEGIN
   SET @strAlphaNumeric = SUBSTRING(@strAlphaNumeric,1,@intSpecial)
END

SET @intAlpha = PATINDEX('%[^0-9]%', @strAlphaNumeric)
BEGIN
WHILE @intAlpha > 0
BEGIN
SET @strAlphaNumeric = STUFF(@strAlphaNumeric, @intAlpha, 1, '' )
SET @intAlpha = PATINDEX('%[^0-9]%', @strAlphaNumeric )
END
END
RETURN ISNULL(REPLACE(LTRIM(REPLACE(
@strAlphaNumeric
  , '0', ' ')),' ', '0'),0)
END
;

Query 1:

create table temp
(
    alphanumeric varchar(100)
)

Results:

Query 2:

insert into temp
(alphanumeric)
values
('051 765 999'),
('MK00564123'),
('KJ786453425/9'),
('432563542/3'),
('096745632'),
('53452BMG441,8')

Results:

Query 3:

SELECT alphanumeric,dbo.udf_GetNumeric(alphanumeric) 
from temp

Results:

|  alphanumeric |           |
|---------------|-----------|
|   051 765 999 |  51765999 |
|    MK00564123 |    564123 |
| KJ786453425/9 | 786453425 |
|   432563542/3 | 432563542 |
|     096745632 |  96745632 |
| 53452BMG441,8 |  53452441 |

相关问题