在swift中解析html的最佳实践是什么？

kmynzznz 于 2022-12-26 发布在 Swift

关注(0)|答案(3)|浏览(159)

我是一个Swift新手。我需要一些类似Python的BeautifulSoup in Swift iOS项目的东西。准确地说，我需要得到以".txt"结尾的<a>的所有href。我应该采取哪些步骤？

swift

来源：https://stackoverflow.com/questions/31080818/what-is-the-best-practice-to-parse-html-in-swift

3条答案

按热度按时间

euoag5mw1#

有几个不错的HTML解析库使用Swift和Objective-C，如下所示：

看看上面发布的四个库中的以下示例，主要使用XPath 2.0进行解析：

客户：

let data = NSData(contentsOfFile: path)
let doc = TFHpple(htmlData: data)

if let elements = doc.searchWithXPathQuery("//a/@href[ends-with(.,'.txt')]") as? [TFHppleElement] {
   for element in elements {
       println(element.content)
   }
}

NDH客户：

let data = NSData(contentsOfFile: path)!
let html = NSString(data: data, encoding: NSUTF8StringEncoding)!
let doc = NDHpple(HTMLData: html)
if let elements = doc.searchWithXPathQuery("//a/@href[ends-with(.,'.txt')]") {
   for element in elements {
     println(element.children?.first?.content)
   }
}

假名（Xpath和CSS选择器）：

let html = "<html><head></head><body><ul><li><input type='image' name='input1' value='string1value' class='abc' /></li><li><input type='image' name='input2' value='string2value' class='def' /></li></ul><span class='spantext'><b>Hello World 1</b></span><span class='spantext'><b>Hello World 2</b></span><a href='example.com'>example(English)</a><a href='example.co.jp'>example(JP)</a></body>"

if let doc = Kanna.HTML(html: html, encoding: NSUTF8StringEncoding) {
   var bodyNode   = doc.body

   if let inputNodes = bodyNode?.xpath("//a/@href[ends-with(.,'.txt')]") {
      for node in inputNodes {
         println(node.contents)
      }
   }
}

Fuzi（Xpath和CSS选择器）：

let html = "<html><head></head><body><ul><li><input type='image' name='input1' value='string1value' class='abc' /></li><li><input type='image' name='input2' value='string2value' class='def' /></li></ul><span class='spantext'><b>Hello World 1</b></span><span class='spantext'><b>Hello World 2</b></span><a href='example.com'>example(English)</a><a href='example.co.jp'>example(JP)</a></body>"

do {
  // if encoding is omitted, it defaults to NSUTF8StringEncoding
  let doc = try HTMLDocument(string: html, encoding: NSUTF8StringEncoding)

  // XPath queries
  for anchor in doc.xpath("//a/@href[ends-with(.,'.txt')]") {
    print(anchor.stringValue)
  }

} catch let error {
    print(error)
}

ends-with函数是Xpath 2.0的一部分。

快速汤（CSS选择器）：

do{
    let doc: Document = try SwiftSoup.parse("...")
    let links: Elements = try doc.select("a[href]") // a with href
    let pngs: Elements = try doc.select("img[src$=.png]")
   
    // img with src ending .png
    let masthead: Element? = try doc.select("div.masthead").first()

    // div with class=masthead
    let resultLinks: Elements? = try doc.select("h3.r > a") // direct a after h3
} catch Exception.Error(let type, let message){
    print(message)
} catch {
   print("error")
}

季（XPath）：

let jiDoc = Ji(htmlURL: URL(string: "http://www.apple.com/support")!)
let titleNode = jiDoc?.xPath("//head/title")?.first
print("title: \(titleNode?.content)") // title: Optional("Official Apple Support")

赞(0）回复(0）举报 2022-12-26

6uxekuva2#

试试SwiftSoup，它是jsoup到Swift的一个移植。

let html: String = "<a id=1 href='?foo=bar&mid&lt=true'>One</a> <a id=2 href='?foo=bar&lt;qux&lg=1'>Two</a>";
    let els: Elements = try SwiftSoup.parse(html).select("a");
    for element: Element in els.array(){
        print(try element.attr("href"))
    }

赞(0）回复(0）举报 2022-12-26

xesrikrc3#

您可以尝试使用以下swift-html解析器：
https://github.com/tid-kijyun/Swift-HTML-Parser
它帮助很大。
从txt文件中获取html文件可以：

let file = "file.txt"

if let dirs : [String] = NSSearchPathForDirectoriesInDomains(NSSearchPathDirectory.DocumentDirectory, NSSearchPathDomainMask.AllDomainsMask, true) as? [String] {
    let dir = dirs[0] //documents directory
    let path = dir.stringByAppendingPathComponent(file);
    let html = String(contentsOfFile: path, encoding: NSUTF8StringEncoding, error: nil)

编辑：
为了得到你所需要的，你可以使用的例子：

import Foundation

let html = "theHtmlYouWannaParse"

var err : NSError?
var parser     = HTMLParser(html: html, error: &err)
if err != nil {
    println(err)
    exit(1)
}

var bodyNode   = parser.body

if let inputNodes = bodyNode?.findChildTags("b") {
    for node in inputNodes {
        println(node.contents)
    }
}

if let inputNodes = bodyNode?.findChildTags("a") {
    for node in inputNodes {
        println(node.getAttributeNamed("href")) //<- Here you would get your files link
    }
}

赞(0）回复(0）举报 2022-12-26

我来回答

在swift中解析html的最佳实践是什么？

3条答案

相关问题

热门标签

最新问答