org.jsoup.nodes.TextNode类的使用及代码示例

x33g5p2x  于2022-01-29 转载在 其他  
字(12.8k)|赞(0)|评价(0)|浏览(295)

本文整理了Java中org.jsoup.nodes.TextNode类的一些代码示例,展示了TextNode类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。TextNode类的具体详情如下:
包路径:org.jsoup.nodes.TextNode
类名称:TextNode

TextNode介绍

[英]A text node.
[中]文本节点。

代码示例

代码示例来源:origin: code4craft/webmagic

protected String getText(Element element) {
  StringBuilder accum = new StringBuilder();
  for (Node node : element.childNodes()) {
    if (node instanceof TextNode) {
      TextNode textNode = (TextNode) node;
      accum.append(textNode.text());
    }
  }
  return accum.toString();
}

代码示例来源:origin: k9mail/k-9

public void head(Node source, int depth) {
  if (elementToSkip != null) {
    return;
  }
  if (source instanceof Element) {
    Element sourceElement = (Element) source;
    if (isSafeTag(sourceElement)) {
      String sourceTag = sourceElement.tagName();
      Attributes destinationAttributes = sourceElement.attributes().clone();
      Element destinationChild = new Element(Tag.valueOf(sourceTag), sourceElement.baseUri(), destinationAttributes);
      destination.appendChild(destinationChild);
      destination = destinationChild;
    } else if (source != root) {
      elementToSkip = sourceElement;
    }
  } else if (source instanceof TextNode) {
    TextNode sourceText = (TextNode) source;
    TextNode destinationText = new TextNode(sourceText.getWholeText(), source.baseUri());
    destination.appendChild(destinationText);
  } else if (source instanceof DataNode && isSafeTag(source.parent())) {
    DataNode sourceData = (DataNode) source;
    DataNode destinationData = new DataNode(sourceData.getWholeData(), source.baseUri());
    destination.appendChild(destinationData);
  }
}

代码示例来源:origin: org.jsoup/jsoup

/**
 * Split this text node into two nodes at the specified string offset. After splitting, this node will contain the
 * original text up to the offset, and will have a new text node sibling containing the text after the offset.
 * @param offset string offset point to split node at.
 * @return the newly created text node containing the text after the offset.
 */
public TextNode splitText(int offset) {
  final String text = coreValue();
  Validate.isTrue(offset >= 0, "Split offset must be not be negative");
  Validate.isTrue(offset < text.length(), "Split offset must not be greater than current text length");
  String head = text.substring(0, offset);
  String tail = text.substring(offset);
  text(head);
  TextNode tailNode = new TextNode(tail);
  if (parent() != null)
    parent().addChildren(siblingIndex()+1, tailNode);
  return tailNode;
}

代码示例来源:origin: org.jsoup/jsoup

private static void appendNormalisedText(StringBuilder accum, TextNode textNode) {
  String text = textNode.getWholeText();
  if (preserveWhitespace(textNode.parentNode) || textNode instanceof CDataNode)
    accum.append(text);
  else
    StringUtil.appendNormalisedWhitespace(accum, text, TextNode.lastCharIsWhitespace(accum));
}

代码示例来源:origin: org.jsoup/jsoup

void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
  if (out.prettyPrint() && ((siblingIndex() == 0 && parentNode instanceof Element && ((Element) parentNode).tag().formatAsBlock() && !isBlank()) || (out.outline() && siblingNodes().size()>0 && !isBlank()) ))
    indent(accum, depth, out);
  boolean normaliseWhite = out.prettyPrint() && parent() instanceof Element
      && !Element.preserveWhitespace(parent());
  Entities.escape(accum, coreValue(), out, false, normaliseWhite, false);
}

代码示例来源:origin: perfectsense/brightspot-cms

if (next != null && BR_TAG.equals(next.tag())) {
  next.remove();
     (previousNode = previousNode.previousSibling()) != null;) {
      && !((TextNode) previousNode).isBlank()) {
    break;
     (previous = previous.previousSibling()) != null;) {
  child.remove();
  paragraph.prependChild(child.clone());
      && ((TextNode) next).isBlank())) {
    break;
while ((next = next.nextSibling()) != null) {
  if (!(next instanceof TextNode
      && ((TextNode) next).isBlank())) {
    break;
for (Node child : brDiv.childNodes()) {
  if (child instanceof TextNode) {
    if (!((TextNode) child).isBlank()) {
      continue DIV;

代码示例来源:origin: USPTO/PatentPublicData

for (int i = 1; i <= figRefEls.size(); i++) {
  Element element = figRefEls.get(i - 1);
  element.attr("id", "FR-" + Strings.padStart(String.valueOf(i), 4, '0'));
  element.attr("idref", ReferenceTagger.createFigId(element.select("PDAT").text()));
  element.tagName("a");
  element.addClass("figref");
  element.replaceWith(new TextNode("Table-Reference"));
  newEl.addClass("math");
  newEl.attr("format", "mathml");
  newEl.appendChild(new TextNode(mathml));
  element.replaceWith(newEl);         
  try {
    String unicode = UnicodeUtil.toSubscript(el.html());
    el.replaceWith(new TextNode(unicode));
  } catch (ParseException e) {
    el.tagName("sub");
  try {
    String unicode = UnicodeUtil.toSuperscript(el.html());
    el.replaceWith(new TextNode(unicode));
  } catch (ParseException e) {
    el.tagName("sup");

代码示例来源:origin: USPTO/PatentPublicData

public void fixFigrefListItem(Element element) {
  Node next = element.nextSibling();
  String trailingTxt;
  if (next != null && next instanceof TextNode) {
    trailingTxt = ((TextNode) next).getWholeText();
  } else if (next != null && next instanceof Element) {
    trailingTxt = ((Element) next).text();
  } else {
    return;
  }
  if (trailingTxt.matches("^(, |,? and )")) {
    next = element.nextSibling().nextSibling();
    if (next.nodeName().toLowerCase().equals("b")) {
      String containedTxt = ((TextNode) next.childNode(0)).getWholeText();
      if (containedTxt.matches("[0-9]{1,2}[A-z]?")) {
        Element newEl = element.clone();
        newEl.attr("id", "FR-" + Strings.padStart(containedTxt, 4, '0'));
        newEl.attr("idref", ReferenceTagger.createFigId(containedTxt));
        newEl.tagName("a");
        newEl.addClass("figref");
        newEl.text(containedTxt);
        next.replaceWith(newEl);
        fixFigrefListItem(newEl);
      }
    }
  }
}

代码示例来源:origin: stackoverflow.com

public static String textPlus(Element elem)
 {
  List<TextNode> textNodes = elem.textNodes();
  if (textNodes.isEmpty())
    return "";
  StringBuilder result = new StringBuilder();
  // start at the first text node
  Node currentNode = textNodes.get(0);
  while (currentNode != null)
  {
    // append deep text of all subsequent nodes
    if (currentNode instanceof TextNode)
    {
     TextNode currentText = (TextNode) currentNode;
     result.append(currentText.text());
    }
    else if (currentNode instanceof Element)
    {
     Element currentElement = (Element) currentNode;
     result.append(currentElement.text());
    }
    currentNode = currentNode.nextSibling();
  }
  return result.toString();
 }

代码示例来源:origin: org.jbehave/jbehave-rest

protected void cleanNodes(Element body, String tag) {
    for (Element element : body.getElementsByTag(tag)) {
      if (element == null || element.parent() == null) {
        continue;
      }
      for (Element child : element.children().select(tag)) {
        cleanNodes(child, tag);
      }
      element.replaceWith(new TextNode(element.text() + "<br/>", ""));
    }
  }
}

代码示例来源:origin: com.vaadin/flow-server

return new TextNode(element.getText(), document.baseUri());
    .createElement(element.getTag());
if (element.hasProperty("innerHTML")) {
  target.html((String) element.getPropertyRaw("innerHTML"));
  String attributeValue = element.getAttribute(name);
  if ("".equals(attributeValue)) {
    target.attr(name, true);
  } else {
    target.attr(name, attributeValue);

代码示例来源:origin: schaal/ocreader

final String possibleEmoji = img.attr("alt");
  img.replaceWith(new TextNode(possibleEmoji));
if(iframe.hasAttr("src")) {
  String href = iframe.attr("src");
  String html = String.format(Locale.US, videoLink, href, href);

代码示例来源:origin: org.eclipse.mylyn.docs/org.eclipse.mylyn.wikitext

for (Element element : body.getAllElements()) {
  if (Html.isSpanElement(element)) {
    List<Node> childNodes = element.childNodes();
    if (childNodes.isEmpty() && !isHyperlinkWithTarget(element)) {
      element.remove();
      modifiedOne = true;
    } else {
        if (node instanceof TextNode) {
          TextNode textNode = (TextNode) node;
          String text = textNode.text();
          if (text.trim().length() == 0) {
            textNode.remove();
            element.before(textNode);
            element.remove();
          normalizeTextNodes((Element) textNode.parent());

代码示例来源:origin: dhanji/sitebricks

Element element = ((Element) node);
  StringBuilder accum = new StringBuilder();
  accum.append("<").append(element.tagName());
  for (Attribute attribute: element.attributes()) {
    if (!(attribute.getKey().startsWith("_"))) {
      accum.append(" ");
  if (element.childNodes().isEmpty() && element.tag().isEmpty()) {
    accum.append(" />");
  } else {
  return ((TextNode) node).getWholeText();
} else if (node instanceof XmlDeclaration) {
 if (node.childNodes().isEmpty()) {
  return "";
  return node.outerHtml();
} else if (node instanceof Comment) {
} else if (node instanceof DataNode && node.childNodes().isEmpty()) {

代码示例来源:origin: 94fzb/zrlog

public static String autoDigest(String str, int size) {
  StringBuilder sb = new StringBuilder();
  Document document = Jsoup.parseBodyFragment(str);
  List<Node> allTextNode = new ArrayList<>();
  getAllTextNode(document.childNodes(), allTextNode);
  int tLength = 0;
  for (Node node : allTextNode) {
    if (node instanceof TextNode) {
      sb.append(node.parent().outerHtml());
      tLength += ((TextNode) node).text().length();
      if (tLength > size) {
        sb.append(" ...");
        break;
      }
    }
  }
  String digest = sb.toString();
  Elements elements = Jsoup.parse(str).body().select("video");
  if (elements != null && !elements.isEmpty()) {
    digest = elements.get(0).toString() + "<br/>" + digest;
  }
  return digest.trim();
}

代码示例来源:origin: YeDaxia/Android-YRichEditor

List<Node> childNodeList = doc.body().childNodes();
if (childNodeList == null || childNodeList.isEmpty()) {
  return null;
for (int pos = 0; pos != size; pos++) {
  Node childNode = childNodeList.get(pos);
  String tagName = childNode.nodeName();
  if (tagName.equalsIgnoreCase("h")) {
    elList.add(new PElement(Html.fromHtml(((Element) childNode).html())));
  } else if(tagName.equalsIgnoreCase("h1")){
    elList.add(new HElement(((Element) childNode).html()));
  }else if (tagName.equalsIgnoreCase("img")) {
    String src = childNode.attr("src");
    String width = childNode.attr("width");
    String height = childNode.attr("height");
    elList.add(new ImgElement(src, YUtils.parseInt(width, 0), YUtils.parseInt(height, 0)));
      elList.add(new PElement(Html.fromHtml(((Element) childNode).html())));
    } else if(childNode instanceof TextNode){
      elList.add(new PElement(((TextNode) childNode).text()));
    }else {
      elList.add(new PElement(childNode.outerHtml()));

代码示例来源:origin: org.jsoup/jsoup

public void head(Node source, int depth) {
  if (source instanceof Element) {
    Element sourceEl = (Element) source;
    if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
      ElementMeta meta = createSafeElement(sourceEl);
      Element destChild = meta.el;
      destination.appendChild(destChild);
      numDiscarded += meta.numAttribsDiscarded;
      destination = destChild;
    } else if (source != root) { // not a safe tag, so don't add. don't count root against discarded.
      numDiscarded++;
    }
  } else if (source instanceof TextNode) {
    TextNode sourceText = (TextNode) source;
    TextNode destText = new TextNode(sourceText.getWholeText());
    destination.appendChild(destText);
  } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
   DataNode sourceData = (DataNode) source;
   DataNode destData = new DataNode(sourceData.getWholeData());
   destination.appendChild(destData);
  } else { // else, we don't care about comments, xml proc instructions, etc
    numDiscarded++;
  }
}

代码示例来源:origin: org.jsoup/jsoup

void insert(Token.Character characterToken) {
  Node node;
  // characters in script and style go in as datanodes, not text nodes
  final String tagName = currentElement().tagName();
  final String data = characterToken.getData();
  if (characterToken.isCData())
    node = new CDataNode(data);
  else if (tagName.equals("script") || tagName.equals("style"))
    node = new DataNode(data);
  else
    node = new TextNode(data);
  currentElement().appendChild(node); // doesn't use insertNode, because we don't foster these; and will always have a stack.
}

代码示例来源:origin: org.kantega.openaksess/openaksess-core

public void head(Node source, int depth) {
  NodeWrapper node = new NodeWrapper(source);
  NodeWrapper parentNode = elements.isEmpty() ? null : elements.peek();
  elements.push(node);
  if (source instanceof Element) {
    Element sourceEl = (Element) source;
    String tagName = sourceEl.tagName();
    if (tagName.equals("body")) {
      return;
    }
    if(shouldKeepChild(node, parentNode)) {
      Element destChild = createSafeElement(sourceEl);
      destination.appendChild(destChild);
      destination = destChild;
    }
  } else if (source instanceof TextNode) {
    TextNode sourceText = (TextNode) source;
    TextNode destText = new TextNode(sourceText.getWholeText(), source.baseUri());
    destination.appendChild(destText);
  } else if (source instanceof DataNode) {
    DataNode sourceData = (DataNode) source;
    DataNode destData = new DataNode(sourceData.getWholeData(), source.baseUri());
    destination.appendChild(destData);
  }
}

代码示例来源:origin: stackoverflow.com

public static String htmlToText(InputStream html) throws IOException {
  Document document = Jsoup.parse(html, null, "");
  Element body = document.body();

  return buildStringFromNode(body).toString();
}

private static StringBuffer buildStringFromNode(Node node) {
  StringBuffer buffer = new StringBuffer();

  if (node instanceof TextNode) {
    TextNode textNode = (TextNode) node;
    buffer.append(textNode.text().trim());
  }

  for (Node childNode : node.childNodes()) {
    buffer.append(buildStringFromNode(childNode));
  }

  if (node instanceof Element) {
    Element element = (Element) node;
    String tagName = element.tagName();
    if ("p".equals(tagName) || "br".equals(tagName)) {
      buffer.append("\n");
    }
  }

  return buffer;
}

相关文章