仅从字符串中获取数字的查询

1tu0hz3e  于 2021-07-26  发布在  Java
关注(0)|答案(17)|浏览(318)

我有这样的数据:

string 1: 003Preliminary Examination Plan   
string 2: Coordination005  
string 3: Balance1000sheet

我期望的结果是

string 1: 003
string 2: 005
string 3: 1000

我想用sql实现它。

kqlmhetl

kqlmhetl1#

如果您使用的是postgres,并且您有类似“2000-一些示例文本”的数据,请尝试子字符串和位置组合,否则如果在您的场景中没有分隔符,则需要编写regex:

SUBSTRING(Column_name from 0 for POSITION('-' in column_name) - 1) as 
number_column_name
wb1gzix0

wb1gzix02#

我发现这个方法比最热门的答案快3倍。创建以下函数dbo.getnumbers:

CREATE FUNCTION dbo.GetNumbers(@String VARCHAR(8000))
RETURNS VARCHAR(8000)
AS
BEGIN;
    WITH
        Numbers
    AS (
        --Step 1.
        --Get a column of numbers to represent
        --every character position in the @String.
        SELECT 1 AS Number
        UNION ALL
        SELECT Number + 1
        FROM Numbers
        WHERE Number < LEN(@String)
        )
        ,Characters
    AS (
        SELECT Character
        FROM Numbers
        CROSS APPLY (
                --Step 2.
                --Use the column of numbers generated above
                --to tell substring which character to extract.
                SELECT SUBSTRING(@String, Number, 1) AS Character
            ) AS c
        )
    --Step 3.
    --Pattern match to return only numbers from the CTE
    --and use STRING_AGG to rebuild it into a single string.
    SELECT @String = STRING_AGG(Character,'')
    FROM Characters
    WHERE Character LIKE '[0-9]'

    --allows going past the default maximum of 100 loops in the CTE
    OPTION (MAXRECURSION 8000) 

    RETURN @String
END
GO

测试
测试目的:

SELECT dbo.GetNumbers(InputString) AS Numbers
FROM ( VALUES
         ('003Preliminary Examination Plan') --output: 003
        ,('Coordination005')                 --output: 005
        ,('Balance1000sheet')                --output: 1000
        ,('(111) 222-3333')                  --output: 1112223333
        ,('1.38hello@f00.b4r#\-6')           --output: 1380046
    ) testData(InputString)

性能测试:开始设置测试数据。。。

--Add table to hold test data
CREATE TABLE dbo.NumTest (String VARCHAR(8000)) 

--Make an 8000 character string with mix of numbers and letters
DECLARE @Num VARCHAR(8000) = REPLICATE('12tf56se',800)

--Add this to the test table 500 times
DECLARE @n INT = 0
WHILE @n < 500
BEGIN
    INSERT INTO dbo.NumTest VALUES (@Num)
    SET @n = @n +1
END

现在测试dbo.getnumbers函数:

SELECT dbo.GetNumbers(NumTest.String) AS Numbers
FROM dbo.NumTest -- Time to complete: 1 min 7s

然后在相同的数据上测试排名靠前的答案的自定义项。

SELECT dbo.udf_GetNumeric(NumTest.String)
FROM dbo.NumTest -- Time to complete: 3 mins 12s

dbo.getnumbers的灵感
小数
如果您需要它来处理小数,您可以使用以下任何一种方法,我发现它们之间没有明显的性能差异。
改变 '[0-9]''[0-9.]' 改变 Character LIKE '[0-9]'ISNUMERIC(Character) = 1 (sql将单个十进制数视为数字)
奖金
您可以很容易地通过交换来适应不同的需求 WHERE Character LIKE '[0-9]' 使用以下选项: WHERE Letter LIKE '[a-zA-Z]' --Get only letters WHERE Letter LIKE '[0-9a-zA-Z]' --Remove non-alphanumeric WHERE Letter LIKE '[^0-9a-zA-Z]' --Get only non-alphanumeric

zlwx9yxi

zlwx9yxi3#

t-sql函数,从文本中读取所有整数并返回指定索引处的整数,从左或右开始,同时使用起始搜索项(可选):

create or alter function dbo.udf_number_from_text(
    @text nvarchar(max),
    @search_term nvarchar(1000) = N'',
    @number_position tinyint = 1,
    @rtl bit = 0
) returns int
as
    begin
        declare @result int = 0;
        declare @search_term_index int = 0;

        if @text is null or len(@text) = 0 goto exit_label;
        set @text = trim(@text);
        if len(@text) = len(@search_term) goto exit_label;

        if len(@search_term) > 0
            begin
                set @search_term_index = charindex(@search_term, @text);
                if @search_term_index = 0 goto exit_label;
            end;

        if @search_term_index > 0
            if @rtl = 0
                set @text = trim(right(@text, len(@text) - @search_term_index - len(@search_term) + 1));
            else
                set @text = trim(left(@text, @search_term_index - 1));
        if len(@text) = 0 goto exit_label;

        declare @patt_number nvarchar(10) = '%[0-9]%';
        declare @patt_not_number nvarchar(10) = '%[^0-9]%';
        declare @number_start int = 1;
        declare @number_end int;
        declare @found_numbers table (id int identity(1,1), val int);

        while @number_start > 0
        begin
            set @number_start = patindex(@patt_number, @text);
            if @number_start > 0
                begin
                    if @number_start = len(@text)
                        begin
                            insert into @found_numbers(val)
                            select cast(substring(@text, @number_start, 1) as int);

                            break;
                        end;
                    else
                        begin
                            set @text = right(@text, len(@text) - @number_start + 1);
                            set @number_end = patindex(@patt_not_number, @text);

                            if @number_end = 0
                                begin
                                    insert into @found_numbers(val)
                                    select cast(@text as int);

                                    break;
                                end;
                            else
                                begin
                                    insert into @found_numbers(val)
                                    select cast(left(@text, @number_end - 1) as int);

                                    if @number_end = len(@text)
                                        break;
                                    else
                                        begin
                                            set @text = trim(right(@text, len(@text) - @number_end));
                                            if len(@text) = 0 break;
                                        end;
                                end;
                        end;
                end;
        end;

        if @rtl = 0
            select @result = coalesce(a.val, 0)
            from (select row_number() over (order by m.id asc) as c_row, m.val
                    from @found_numbers as m) as a
            where a.c_row = @number_position;
        else
            select @result = coalesce(a.val, 0)
            from (select row_number() over (order by m.id desc) as c_row, m.val
                    from @found_numbers as m) as a
            where a.c_row = @number_position;

        exit_label:
            return @result;
    end;

例子:

select dbo.udf_number_from text(N'Text text 10 text, 25 term', N'term',2,1);

返回10;

gywdnpxw

gywdnpxw4#

在甲骨文中
您可以通过以下方式获得所需:

SUBSTR('ABCD1234EFGH',REGEXP_INSTR ('ABCD1234EFGH', '[[:digit:]]'),REGEXP_COUNT ('ABCD1234EFGH', '[[:digit:]]'))

示例查询:

SELECT SUBSTR('003Preliminary Examination Plan  ',REGEXP_INSTR ('003Preliminary Examination Plan  ', '[[:digit:]]'),REGEXP_COUNT ('003Preliminary Examination Plan  ', '[[:digit:]]')) SAMPLE1,
SUBSTR('Coordination005',REGEXP_INSTR ('Coordination005', '[[:digit:]]'),REGEXP_COUNT ('Coordination005', '[[:digit:]]')) SAMPLE2,
SUBSTR('Balance1000sheet',REGEXP_INSTR ('Balance1000sheet', '[[:digit:]]'),REGEXP_COUNT ('Balance1000sheet', '[[:digit:]]')) SAMPLE3 FROM DUAL
hivapdat

hivapdat5#

他妈的。。。
此解决方案与所有早期解决方案不同,即:
不需要创建函数
没有必要使用模式匹配
不需要临时table
此解决方案使用递归公共表表达式(cte)
但首先要注意的是,问题并没有指明这些字符串存储在哪里。在下面的解决方案中,我创建了一个cte,作为一种快速而肮脏的方法,将这些字符串放入某种“源表”。
另请注意-此解决方案使用递归公共表表达式(cte)-因此不要被两个cte的用法所迷惑。第一个简单的方法是使数据对解决方案可用—但解决这个问题只需要第二个cte。您可以修改代码,使第二个cte查询成为您现有的表、视图等。
最后—我的代码很冗长,试图使用列名和cte名称来解释正在发生的事情,您可能能够稍微简化这个解决方案。为了好玩,我添加了一些带有格式的伪电话号码(可能是预期的,也可能是非典型的)。

with SOURCE_TABLE as (
    select '003Preliminary Examination Plan' as numberString
    union all select 'Coordination005' as numberString
    union all select 'Balance1000sheet' as numberString
    union all select '1300 456 678' as numberString
    union all select '(012) 995 8322  ' as numberString
    union all select '073263 6122,' as numberString
),
FIRST_CHAR_PROCESSED as (
    select
        len(numberString) as currentStringLength,
        isNull(cast(try_cast(replace(left(numberString, 1),' ','z') as tinyint) as nvarchar),'') as firstCharAsNumeric,
        cast(isNull(cast(try_cast(nullIf(left(numberString, 1),'') as tinyint) as nvarchar),'') as nvarchar(4000)) as newString,
        cast(substring(numberString,2,len(numberString)) as nvarchar) as remainingString
    from SOURCE_TABLE
    union all
    select
        len(remainingString) as currentStringLength,
        cast(try_cast(replace(left(remainingString, 1),' ','z') as tinyint) as nvarchar) as firstCharAsNumeric,
        cast(isNull(newString,'') as nvarchar(3999)) + isNull(cast(try_cast(nullIf(left(remainingString, 1),'') as tinyint) as nvarchar(1)),'') as newString,
        substring(remainingString,2,len(remainingString)) as remainingString
    from FIRST_CHAR_PROCESSED fcp2
    where fcp2.currentStringLength > 1
)
select 
    newString
    ,* -- comment this out when required
from FIRST_CHAR_PROCESSED 
where currentStringLength = 1

这是怎么回事?
基本上在我们的cte中,我们选择第一个字符并使用 try_cast (见文件)把它扔到 tinyint (对于一位数字来说,这是一种足够大的数据类型)。请注意,SQLServer中的类型转换规则表示空字符串(或空格)将解析为零,因此 nullif 添加到强制空格和空字符串以解析为null(请参阅讨论)(否则,每当在源数据中遇到空格时,我们的结果将包含零字符)。
cte还返回第一个字符之后的所有内容—这将成为我们对cte的递归调用的输入;换句话说:现在让我们处理下一个角色。
最后,实地调查 newString 在cte中生成(在第二个 SELECT )通过串联。对于递归CTE,数据类型必须在两者之间匹配 SELECT 任何给定列的语句-包括列大小。因为我们知道我们正在添加(最多)一个字符,所以我们将该字符强制转换为nvarchar(1),并强制转换 newString (到目前为止)作为nvarchar(3999)。连接起来,结果将是nvarchar(4000)-这与我们在第一个示例中执行的类型转换相匹配 SELECT .
如果运行此查询并排除 WHERE 子句,您将了解发生了什么-但是行的顺序可能很奇怪(您不一定会看到与单个输入值相关的所有行都被分组在一起,但您应该仍然能够遵循这些规则)。
希望这是一个有趣的选择,可以帮助一些人想要一个严格基于表达式的解决方案。

jm2pwxwz

jm2pwxwz6#

首先创建这个 UDF ```
CREATE FUNCTION dbo.udf_GetNumeric
(
@strAlphaNumeric VARCHAR(256)
)
RETURNS VARCHAR(256)
AS
BEGIN
DECLARE @intAlpha INT
SET @intAlpha = PATINDEX('%[^0-9]%', @strAlphaNumeric)
BEGIN
WHILE @intAlpha > 0
BEGIN
SET @strAlphaNumeric = STUFF(@strAlphaNumeric, @intAlpha, 1, '' )
SET @intAlpha = PATINDEX('%[^0-9]%', @strAlphaNumeric )
END
END
RETURN ISNULL(@strAlphaNumeric,0)
END
GO

现在使用 `function` 作为

SELECT dbo.udf_GetNumeric(column_name)
from table_name

sql小提琴
我希望这能解决你的问题。
参考
k75qkfdt

k75qkfdt7#

试试这个-
查询:

DECLARE @temp TABLE
(
      string NVARCHAR(50)
)

INSERT INTO @temp (string)
VALUES 
    ('003Preliminary Examination Plan'),
    ('Coordination005'),
    ('Balance1000sheet')

SELECT LEFT(subsrt, PATINDEX('%[^0-9]%', subsrt + 't') - 1) 
FROM (
    SELECT subsrt = SUBSTRING(string, pos, LEN(string))
    FROM (
        SELECT string, pos = PATINDEX('%[0-9]%', string)
        FROM @temp
    ) d
) t

输出:

----------
003
005
1000
vd2z7a6w

vd2z7a6w8#

查询:

DECLARE @temp TABLE
(
    string NVARCHAR(50)
)

INSERT INTO @temp (string)
VALUES 
    ('003Preliminary Examination Plan'),
    ('Coordination005'),
    ('Balance1000sheet')

SELECT SUBSTRING(string, PATINDEX('%[0-9]%', string), PATINDEX('%[0-9][^0-9]%', string + 't') - PATINDEX('%[0-9]%', 
                    string) + 1) AS Number
FROM @temp
v8wbuo2f

v8wbuo2f9#

请尝试:

declare @var nvarchar(max)='Balance1000sheet'

SELECT LEFT(Val,PATINDEX('%[^0-9]%', Val+'a')-1) from(
    SELECT SUBSTRING(@var, PATINDEX('%[0-9]%', @var), LEN(@var)) Val
)x
uklbhaso

uklbhaso10#

只从字符串中获取数字可以在一行中完成。试试这个:

SUBSTRING('your-string-here', PATINDEX('%[0-9]%', 'your-string-here'), LEN('your-string-here'))

注意:只适用于字符串中的第一个int,例如:123vfg34返回123。

qrjkbowd

qrjkbowd11#

通过前面的查询,我得到以下结果:
'aaaa1234bbbb3333'>>>>输出:1234
'-çã+0!\一º1234'>>>输出:0
下面的代码返回所有数字字符:
第一输出:12343333
第二输出:01234

declare @StringAlphaNum varchar(255)
declare @Character varchar
declare @SizeStringAlfaNumerica int
declare @CountCharacter int

set @StringAlphaNum = 'AAAA1234BBBB3333'
set @SizeStringAlfaNumerica = len(@StringAlphaNum)
set @CountCharacter = 1

while isnumeric(@StringAlphaNum) = 0
begin
    while @CountCharacter < @SizeStringAlfaNumerica
        begin
            if substring(@StringAlphaNum,@CountCharacter,1) not like '[0-9]%'
            begin
                set @Character = substring(@StringAlphaNum,@CountCharacter,1)
                set @StringAlphaNum = replace(@StringAlphaNum, @Character, '')
            end
    set @CountCharacter = @CountCharacter + 1
    end
    set @CountCharacter = 0
end
select @StringAlphaNum
dgjrabp2

dgjrabp212#

declare @puvodni nvarchar(20)
set @puvodni = N'abc1d8e8ttr987avc'

WHILE PATINDEX('%[^0-9]%', @puvodni) > 0 SET @puvodni = REPLACE(@puvodni, SUBSTRING(@puvodni, PATINDEX('%[^0-9]%', @puvodni), 1), '' ) 

SELECT @puvodni
but5z9lq

but5z9lq13#

我没有创建函数的权限,但有类似于

["blahblah012345679"]

需要从中间提取数字
注意:这假设数字分组在一起,而不是在字符串的开头和结尾。

select substring(column_name,patindex('%[0-9]%', column_name),patindex('%[0-9][^0-9]%', column_name)-patindex('%[0-9]%', column_name)+1)
from table name
sirbozc5

sirbozc514#

只是对@epsicron的答案稍加修改

SELECT SUBSTRING(string, PATINDEX('%[0-9]%', string), PATINDEX('%[0-9][^0-9]%', string + 't') - PATINDEX('%[0-9]%', 
                    string) + 1) AS Number
FROM (values ('003Preliminary Examination Plan'),
    ('Coordination005'),
    ('Balance1000sheet')) as a(string)

不需要临时变量

gwbalxhn

gwbalxhn15#

虽然这是一个旧的线程-这是谷歌搜索的第一个,我想出了一个不同的答案比以前。这将允许您传递字符串中要保留的内容的标准,不管该标准是什么。你可以把它放在一个函数中反复调用。

declare @String VARCHAR(MAX) = '-123.  a    456-78(90)'
declare @MatchExpression VARCHAR(255) = '%[0-9]%'
declare @return varchar(max)

WHILE PatIndex(@MatchExpression, @String) > 0
    begin
    set @return = CONCAT(@return, SUBSTRING(@string,patindex(@matchexpression, @string),1))
    SET @String = Stuff(@String, PatIndex(@MatchExpression, @String), 1, '')
    end
select (@return)

相关问题