swift 从字符串优化中提取链接

jvlzgdj9  于 2023-09-30  发布在  Swift
关注(0)|答案(8)|浏览(148)

我从网站获取数据(HTML字符串)。我想提取所有链接。我写函数(它工作),但它是如此缓慢.
你能帮我优化一下吗?我可以使用哪些标准功能?功能逻辑:在文本中找到“http:.//”sting,然后读取string(buy char),直到我不会得到“""。

extension String {

subscript (i: Int) -> Character {
    return self[advance(self.startIndex, i)]
}

subscript (i: Int) -> String {
    return String(self[i] as Character)
}

subscript (r: Range<Int>) -> String {
    return substringWithRange(Range(start: advance(startIndex, r.startIndex), end: advance(startIndex, r.endIndex)))
}}


func extractAllLinks(text:String) -> Array<String>{
var stringArray = Array<String>()
var find = "http://" as String

for (var i = countElements(find); i<countElements(text); i++)
{
    var ch:Character = text[i - Int(countElements(find))]
    if (ch == find[0])
    {
        var j = 0
        while (ch == find[j])
        {
            var ch2:Character = find[j]
            if(countElements(find)-1 == j)
            {
                break
            }
            j++
            i++
            ch = text[i - Int(countElements(find))]
        }

        i -= j
        if (j == (countElements(find)-1))
        {
            var str = ""
            for (; text[i - Int(countElements(find))] != "\""; i++)
            {
                str += text[i - Int(countElements(find))]
            }
            stringArray.append(str)
        }

    }
}
return stringArray}
relj7zay

relj7zay1#

就像AdamPro13上面说的使用NSDataDetector就可以很容易的得到所有的URL,看下面的代码吧:

let text = "http://www.google.com. http://www.bla.com"
let types: NSTextCheckingType = .Link
var error : NSError?

let detector = NSDataDetector(types: types.rawValue, error: &error)        
var matches = detector!.matchesInString(text, options: nil, range: NSMakeRange(0, count(text)))

for match in matches {
   println(match.URL!)
}

它输出:

http://www.google.com
http://www.bla.com

更新到Swift 2.0

let text = "http://www.google.com. http://www.bla.com"
let types: NSTextCheckingType = .Link

let detector = try? NSDataDetector(types: types.rawValue)

guard let detect = detector else {
   return
}

let matches = detect.matchesInString(text, options: .ReportCompletion, range: NSMakeRange(0, text.characters.count))

for match in matches {
    print(match.URL!)
}

记住在上面的情况下使用guard语句,它必须在函数或循环中。
我希望这对你有帮助。

fdx2calv

fdx2calv2#

这就是Swift 5.0的答案

let text = "http://www.google.com. http://www.bla.com"

func checkForUrls(text: String) -> [URL] {
    let types: NSTextCheckingResult.CheckingType = .link

    do {
        let detector = try NSDataDetector(types: types.rawValue)

        let matches = detector.matches(in: text, options: .reportCompletion, range: NSMakeRange(0, text.count))
    
        return matches.compactMap({$0.url})
    } catch let error {
        debugPrint(error.localizedDescription)
    }

    return []
}

checkForUrls(text: text)
rm5edbpk

rm5edbpk3#

详情

  • Swift 5.2、Xcode 11.4(11E146)

解决方案

// MARK: DataDetector

class DataDetector {

    private class func _find(all type: NSTextCheckingResult.CheckingType,
                             in string: String, iterationClosure: (String) -> Bool) {
        guard let detector = try? NSDataDetector(types: type.rawValue) else { return }
        let range = NSRange(string.startIndex ..< string.endIndex, in: string)
        let matches = detector.matches(in: string, options: [], range: range)
        loop: for match in matches {
            for i in 0 ..< match.numberOfRanges {
                let nsrange = match.range(at: i)
                let startIndex = string.index(string.startIndex, offsetBy: nsrange.lowerBound)
                let endIndex = string.index(string.startIndex, offsetBy: nsrange.upperBound)
                let range = startIndex..<endIndex
                guard iterationClosure(String(string[range])) else { break loop }
            }
        }
    }

    class func find(all type: NSTextCheckingResult.CheckingType, in string: String) -> [String] {
        var results = [String]()
        _find(all: type, in: string) {
            results.append($0)
            return true
        }
        return results
    }

    class func first(type: NSTextCheckingResult.CheckingType, in string: String) -> String? {
        var result: String?
        _find(all: type, in: string) {
            result = $0
            return false
        }
        return result
    }
}

// MARK: String extension

extension String {
    var detectedLinks: [String] { DataDetector.find(all: .link, in: self) }
    var detectedFirstLink: String? { DataDetector.first(type: .link, in: self) }
    var detectedURLs: [URL] { detectedLinks.compactMap { URL(string: $0) } }
    var detectedFirstURL: URL? {
        guard let urlString = detectedFirstLink else { return nil }
        return URL(string: urlString)
    }
}

用法

let text = """
Lorm Ipsum is simply dummy text of the printing and typesetting industry. apple.com/ Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. http://gooogle.com. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. yahoo.com It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.
"""

print(text.detectedLinks)
print(text.detectedFirstLink)
print(text.detectedURLs)
print(text.detectedFirstURL)

控制台输出

["apple.com/", "http://gooogle.com", "yahoo.com"]
Optional("apple.com/")
[apple.com/, http://gooogle.com, yahoo.com]
Optional(apple.com/)
vnzz0bqm

vnzz0bqm4#

非常有用的线程!下面是一个在Swift 1.2中工作的示例,基于Victor Sigler的答案。

// extract first link (if available) and open it!
    let text = "How technology is changing our relationships to each other: http://t.ted.com/mzRtRfX"
    let types: NSTextCheckingType = .Link

    do {
        let detector = try NSDataDetector(types: types.rawValue)
        let matches = detector.matchesInString(text, options: .ReportCompletion, range: NSMakeRange(0, text.characters.count))
        if matches.count > 0 {
            let url = matches[0].URL!
            print("Opening URL: \(url)")
            UIApplication.sharedApplication().openURL(url)
        }

    } catch {
        // none found or some other issue
        print ("error in findAndOpenURL detector")
    }
dpiehjr4

dpiehjr45#

实际上有一个名为NSDataDetector的类可以为您检测链接。
你可以在NSHipster上找到一个例子:http://nshipster.com/nsdatadetector/

vbopmzt1

vbopmzt16#

我想知道你是否意识到,每次调用countElements时,都会调用一个主要的复杂函数,该函数必须扫描字符串中的所有Unicode字符,并从中提取扩展的字形簇并对其进行计数。如果你不知道什么是扩展字素簇,那么你应该能够想象,这是不便宜和主要矫枉过正。
只需将其转换为NSString*,调用rangeOfString并完成它。
很明显,你所做的是完全不安全的,因为http://并不意味着有一个链接。你不能只是在html中寻找字符串并希望它能工作;不是的然后是https,Http,hTtp,htTp,https等等等等。但这一切都很容易,真实的的恐怖遵循乌塔姆辛哈的评论链接。

idv4meu8

idv4meu87#

正如其他人所指出的,最好使用正则表达式、数据检测器或解析库。然而,作为对字符串处理的具体反馈:
Swift字符串的关键是接受它们的只向前性质。通常,整数索引和随机访问是不必要的。正如@gnasher729所指出的,每次调用count时,都是在迭代字符串。类似地,整数索引扩展是线性的,因此如果在循环中使用它们,很容易意外地创建二次或三次复杂度算法。
但在本例中,不需要将字符串索引转换为随机访问整数。下面是一个我认为执行类似逻辑的版本(查找前缀,然后从那里查找“字符-忽略这不适合https,大写/小写等),只使用本地字符串索引:

func extractAllLinks(text: String) -> [String] {
    var links: [String] = []
    let prefix = "http://"
    let prefixLen = count(prefix)

    for var idx = text.startIndex; idx != text.endIndex; ++idx {
        let candidate = text[idx..<text.endIndex]
        if candidate.hasPrefix(prefix),
           let closingQuote = find(candidate, "\"") {
            let link = candidate[candidate.startIndex..<closingQuote]
            links.append(link)
            idx = advance(idx, count(link))
        }
    }
    return links
}

let text = "This contains the link \"http://www.whatever.com/\" and"
         + " the link \"http://google.com\""

extractAllLinks(text)

如果有其他助手,如findFromIndex等,甚至可以进一步优化(advance(idx, count())有点低效)。或者愿意不使用字符串片段而手动滚动搜索结束字符。

mxg2im7a

mxg2im7a8#

Swift 5.8.1

import Foundation

extension String {
    var urls: [String] {
        var urlsArr = [String]()
        do {
            let detector = try NSDataDetector(types: NSTextCheckingResult.CheckingType.link.rawValue)
            detector.enumerateMatches(in: self, options: [], range: NSMakeRange(0, self.utf16.count), using: { (result, _, _) in
                if let nsrange = result?.range {
                    urlsArr.append((self as NSString).substring(with:nsrange))
                }
            })
        } catch let error {
            print(error.localizedDescription)
        }
        return urlsArr
    }
}

用法:

let str = "🤩 Hello world. https://www.google.com , blabla ❤️ apple.com, www.test.com"
print(str.urls) //["https://www.google.com", "apple.com", "www.test.com"]

相关问题