org.ccil.cowan.tagsoup.Parser.<init>()方法的使用及代码示例

x33g5p2x  于2022-01-26 转载在 其他  
字(10.4k)|赞(0)|评价(0)|浏览(171)

本文整理了Java中org.ccil.cowan.tagsoup.Parser.<init>()方法的一些代码示例,展示了Parser.<init>()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Parser.<init>()方法的具体详情如下:
包路径:org.ccil.cowan.tagsoup.Parser
类名称:Parser
方法名:<init>

Parser.<init>介绍

暂无

代码示例

代码示例来源:origin: seven332/EhViewer

/**
 * Returns displayable styled text from the provided HTML string.
 * Any &lt;img&gt; tags in the HTML will use the specified ImageGetter
 * to request a representation of the image (use null if you don't
 * want this) and the specified TagHandler to handle unknown tags
 * (specify null if you don't want this).
 *
 * <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
 */
public static SpannableStringBuilder fromHtml(String source, ImageGetter imageGetter,
    TagHandler tagHandler) {
  Parser parser = new Parser();
  try {
    parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
  } catch (org.xml.sax.SAXNotRecognizedException e) {
    // Should not happen.
    throw new RuntimeException(e);
  } catch (org.xml.sax.SAXNotSupportedException e) {
    // Should not happen.
    throw new RuntimeException(e);
  }
  HtmlToSpannedConverter converter =
      new HtmlToSpannedConverter(source, imageGetter, tagHandler,
          parser);
  return converter.convert();
}

代码示例来源:origin: rest-assured/rest-assured

slurper = new XmlSlurper(config.isValidating(), config.isNamespaceAware(), config.isAllowDocTypeDeclaration());
} else {
  XMLReader p = new org.ccil.cowan.tagsoup.Parser();
  slurper = new XmlSlurper(p);

代码示例来源:origin: rest-assured/rest-assured

slurper = new XmlSlurper(config.isValidating(), config.isNamespaceAware(), config.isAllowDocTypeDeclaration());
} else {
  XMLReader p = new org.ccil.cowan.tagsoup.Parser();
  slurper = new XmlSlurper(p);

代码示例来源:origin: apache/tika

org.ccil.cowan.tagsoup.Parser parser = new org.ccil.cowan.tagsoup.Parser();
parser.setProperty(org.ccil.cowan.tagsoup.Parser.schemaProperty, schema);
parser.setContentHandler(handler);

代码示例来源:origin: org.ccil.cowan.tagsoup/tagsoup

protected SAXParserImpl() // used by factory, for prototypes
{
  super();
  parser = new org.ccil.cowan.tagsoup.Parser();
}

代码示例来源:origin: net.ontopia/ontopia-classify

protected XMLReader createXMLReader() throws SAXException {
 return new org.ccil.cowan.tagsoup.Parser();
}

代码示例来源:origin: ontopia/ontopia

@Override
protected XMLReader createXMLReader() throws SAXException {
 return new org.ccil.cowan.tagsoup.Parser();
}

代码示例来源:origin: apache/tika

new org.ccil.cowan.tagsoup.Parser();

代码示例来源:origin: org.xml-cml/cmlxom

public static Builder getTagsoupBuilder() {
    XMLReader tagsoup = null;
//        try {
      tagsoup = //XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser");
        new org.ccil.cowan.tagsoup.Parser();
//        } catch (SAXException e) {
//            throw new RuntimeException("Exception whilst creating XMLReader from org.ccil.cowan.tagsoup.Parser", e);
//        }
    return new Builder(tagsoup);
  }

代码示例来源:origin: net.sf.ofx4j/ofx4j

private BaseFinancialInstitutionData loadInstitutionData(String href) throws IOException, SAXException {
 if (LOG.isInfoEnabled()) {
  LOG.info("Loading institution data from: " + href);
 }
 
 URL url = new URL(href);
 XMLReader xmlReader = new Parser();
 xmlReader.setFeature("http://xml.org/sax/features/namespaces", false);
 xmlReader.setFeature("http://xml.org/sax/features/validation", false);
 InstitutionContentHandler institutionHandler = new InstitutionContentHandler();
 xmlReader.setContentHandler(institutionHandler);
 xmlReader.parse(new InputSource(url.openStream()));
 return institutionHandler.data;
}

代码示例来源:origin: stoicflame/ofx4j

private BaseFinancialInstitutionData loadInstitutionData(String href) throws IOException, SAXException {
 if (LOG.isInfoEnabled()) {
  LOG.info("Loading institution data from: " + href);
 }
 
 URL url = new URL(href);
 XMLReader xmlReader = new Parser();
 xmlReader.setFeature("http://xml.org/sax/features/namespaces", false);
 xmlReader.setFeature("http://xml.org/sax/features/validation", false);
 InstitutionContentHandler institutionHandler = new InstitutionContentHandler();
 xmlReader.setContentHandler(institutionHandler);
 xmlReader.parse(new InputSource(url.openStream()));
 return institutionHandler.data;
}

代码示例来源:origin: net.sf.ofx4j/ofx4j

private void initializeFIData() throws IOException, SAXException {
 URL url = new URL(getUrl());
 XMLReader xmlReader = new Parser();
 xmlReader.setFeature("http://xml.org/sax/features/namespaces", false);
 xmlReader.setFeature("http://xml.org/sax/features/validation", false);
 xmlReader.setContentHandler(new DirectoryContentHandler());
 xmlReader.parse(new InputSource(url.openStream()));
}

代码示例来源:origin: stoicflame/ofx4j

private void initializeFIData() throws IOException, SAXException {
 URL url = new URL(getUrl());
 XMLReader xmlReader = new Parser();
 xmlReader.setFeature("http://xml.org/sax/features/namespaces", false);
 xmlReader.setFeature("http://xml.org/sax/features/validation", false);
 xmlReader.setContentHandler(new DirectoryContentHandler());
 xmlReader.parse(new InputSource(url.openStream()));
}

代码示例来源:origin: org.finra.jtaf/jtaf-extwebdriver

@Override
public String evaluateXpath(String xpath) throws Exception {
  XPathFactory xpathFac = XPathFactory.newInstance();
  XPath theXpath = xpathFac.newXPath();
  String html = getHtmlSource();
  html = html.replaceAll(">\\s+<", "><");
  InputStream input = new ByteArrayInputStream(html.getBytes(Charset.forName("UTF-8")));
  XMLReader reader = new Parser();
  reader.setFeature(Parser.namespacesFeature, false);
  Transformer transformer = TransformerFactory.newInstance()
      .newTransformer();
  DOMResult result = new DOMResult();
  transformer.transform(new SAXSource(reader, new InputSource(input)),
      result);
  Node htmlNode = result.getNode(); // This code gets a Node from the
                    // result.
  return (String) theXpath.evaluate(xpath, htmlNode,
      XPathConstants.STRING);
}

代码示例来源:origin: fourlastor/dante

@Override public void parse(String string) {
  org.ccil.cowan.tagsoup.Parser parser = new org.ccil.cowan.tagsoup.Parser();
  parser.setContentHandler(this);
  try {
    parser.parse(new InputSource(new StringReader(string)));
  } catch (IOException | SAXException e) {
    throw new HtmlParsingException(e);
  }
  emptyBuffer();
}

代码示例来源:origin: com.xmlcalabash/xmlcalabash

private XdmNode tagSoup(String text) {
  StringReader inputStream = new StringReader(text);
  InputSource source = new InputSource(inputStream);
  Parser parser = new Parser();
  parser.setEntityResolver(runtime.getResolver());
  SAXSource saxSource = new SAXSource(parser, source);
  DocumentBuilder builder = runtime.getProcessor().newDocumentBuilder();
  try {
    XdmNode doc = builder.build(saxSource);
    return doc;
  } catch (Exception e) {
    throw new XProcException(e);
  }
}

代码示例来源:origin: org.daisy.libs/com.xmlcalabash

private XdmNode tagSoup(String text) {
  StringReader inputStream = new StringReader(text);
  InputSource source = new InputSource(inputStream);
  Parser parser = new Parser();
  parser.setEntityResolver(runtime.getResolver());
  SAXSource saxSource = new SAXSource(parser, source);
  DocumentBuilder builder = runtime.getProcessor().newDocumentBuilder();
  try {
    XdmNode doc = builder.build(saxSource);
    return doc;
  } catch (Exception e) {
    throw new XProcException(e);
  }
}

代码示例来源:origin: com.cloudera.cdk/cdk-morphlines-saxon

public ConvertHTML(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) throws SAXNotRecognizedException, SAXNotSupportedException {
 super(builder, config, parent, child, context);
 this.charset = getConfigs().getCharset(config, "charset", null);
 this.omitXMLDeclaration = getConfigs().getBoolean(config, "omitXMLDeclaration", false);      
 this.xmlReader = new Parser(); // no reuse?
 xmlReader.setProperty(Parser.schemaProperty, htmlSchema);
 xmlReader.setFeature(Parser.CDATAElementsFeature, getConfigs().getBoolean(config, "noCDATA", false));
 xmlReader.setFeature(Parser.namespacesFeature, !getConfigs().getBoolean(config, "noNamespaces", true));
 xmlReader.setFeature(Parser.ignoreBogonsFeature, getConfigs().getBoolean(config, "noBogons", false)); // also see TIKA-599
 xmlReader.setFeature(Parser.bogonsEmptyFeature, getConfigs().getBoolean(config, "emptyBogons", false));
 xmlReader.setFeature(Parser.rootBogonsFeature, getConfigs().getBoolean(config, "noRootBogons", false));
 xmlReader.setFeature(Parser.defaultAttributesFeature, getConfigs().getBoolean(config, "noDefaultAttributes", false));
 xmlReader.setFeature(Parser.translateColonsFeature, getConfigs().getBoolean(config, "noColons", false));
 xmlReader.setFeature(Parser.restartElementsFeature, getConfigs().getBoolean(config, "noRestart", false));
 xmlReader.setFeature(Parser.ignorableWhitespaceFeature, !getConfigs().getBoolean(config, "suppressIgnorableWhitespace", true));
 validateArguments();
}

代码示例来源:origin: kite-sdk/kite

public ConvertHTML(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) throws SAXNotRecognizedException, SAXNotSupportedException {
 super(builder, config, parent, child, context);
 this.charset = getConfigs().getCharset(config, "charset", null);
 this.omitXMLDeclaration = getConfigs().getBoolean(config, "omitXMLDeclaration", false);      
 this.xmlReader = new Parser(); // no reuse?
 xmlReader.setProperty(Parser.schemaProperty, htmlSchema);
 xmlReader.setFeature(Parser.CDATAElementsFeature, getConfigs().getBoolean(config, "noCDATA", false));
 xmlReader.setFeature(Parser.namespacesFeature, !getConfigs().getBoolean(config, "noNamespaces", true));
 xmlReader.setFeature(Parser.ignoreBogonsFeature, getConfigs().getBoolean(config, "noBogons", false)); // also see TIKA-599
 xmlReader.setFeature(Parser.bogonsEmptyFeature, getConfigs().getBoolean(config, "emptyBogons", false));
 xmlReader.setFeature(Parser.rootBogonsFeature, getConfigs().getBoolean(config, "noRootBogons", false));
 xmlReader.setFeature(Parser.defaultAttributesFeature, getConfigs().getBoolean(config, "noDefaultAttributes", false));
 xmlReader.setFeature(Parser.translateColonsFeature, getConfigs().getBoolean(config, "noColons", false));
 xmlReader.setFeature(Parser.restartElementsFeature, getConfigs().getBoolean(config, "noRestart", false));
 xmlReader.setFeature(Parser.ignorableWhitespaceFeature, !getConfigs().getBoolean(config, "suppressIgnorableWhitespace", true));
 validateArguments();
}

代码示例来源:origin: net.sf.ofx4j/ofx4j

public void parseV1FromFirstElement(Reader reader) throws IOException, OFXParseException {
 Parser parser = new Parser();
 try {
  parser.setFeature(Parser.restartElementsFeature, false);
 }
 catch (Exception e) {
  throw new OFXParseException(e);
 }
 parser.setContentHandler(new TagSoupHandler(getContentHandler()));
 try {
  parser.parse(new InputSource(reader));
 }
 catch (SAXException e) {
  if (e.getCause() instanceof OFXParseException) {
   throw (OFXParseException) e.getCause();
  }
  
  throw new OFXParseException("Error parsing OFX document.", e);
 }
}

相关文章