本文整理了Java中org.apache.uima.cas.CAS.setDocumentText()
方法的一些代码示例,展示了CAS.setDocumentText()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。CAS.setDocumentText()
方法的具体详情如下:
包路径:org.apache.uima.cas.CAS
类名称:CAS
方法名:setDocumentText
[英]Set the document text. Once set, Sofa data is immutable, and cannot be set again until the CAS has been reset.
[中]设置文档文本。一旦设置,Sofa数据是不可变的,并且在重置CAS之前无法再次设置。
代码示例来源:origin: nlpie/biomedicus
@Override
public void finishDestination() {
destinationView.setDocumentText(sofaBuilder.toString());
completedAnnotations.forEach(destinationView::addFsToIndexes);
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.pdf-asl
@Override
protected void endDocument(final PDDocument aPdf)
throws IOException
{
cas.setDocumentText(text.toString());
if (log.isTraceEnabled()) {
log.trace("</document>");
}
}
代码示例来源:origin: org.apache.uima/uimaj-tools
/**
* Inits the cas.
*/
private final void initCas() {
this.cas.setDocumentLanguage(this.language);
this.cas.setDocumentText(this.textArea.getText());
}
代码示例来源:origin: CLLKazan/UIMA-Ext
/**
* {@inheritDoc}
*/
@Override
public void getNext(CAS cas) throws IOException, CollectionException {
String text = consumeLine();
cas.setDocumentText(text);
}
代码示例来源:origin: nlpie/biomedicus
@Nonnull
@Override
public Document addDocument(@Nonnull String name, @Nonnull String text) {
CAS view = cas.createView(name);
view.setDocumentText(text);
return new CASDocument(view, labelAdapters);
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.bigdata/de.tudarmstadt.ukp.dkpro.bigdata.io.hadoop
@Override
public void convertValue(Text keyFrom, Text valueFrom, CASWritable valueTo)
{
CAS cas = valueTo.getCAS();
cas.reset();
Text doc = valueFrom;
if (textExtractor != null)
doc = textExtractor.extractDocumentText(keyFrom, valueFrom);
cas.setDocumentText(doc.toString());
}
}
代码示例来源:origin: org.nd4j/canova-data-nlp
/**
* Use the given analysis engine and process the given text
* You must release the return cas yourself
* @param text the text to rpocess
* @return the processed cas
*/
public CAS process(String text) {
CAS cas = retrieve();
cas.setDocumentText(text);
try {
analysisEngine.process(cas);
} catch (AnalysisEngineProcessException e) {
if(text != null && !text.isEmpty())
return process(text);
throw new RuntimeException(e);
}
return cas;
}
代码示例来源:origin: org.datavec/datavec-data-nlp
/**
* Use the given analysis engine and process the given text
* You must release the return cas yourself
* @param text the text to rpocess
* @return the processed cas
*/
public CAS process(String text) {
CAS cas = retrieve();
cas.setDocumentText(text);
try {
analysisEngine.process(cas);
} catch (AnalysisEngineProcessException e) {
if (text != null && !text.isEmpty())
return process(text);
throw new RuntimeException(e);
}
return cas;
}
代码示例来源:origin: org.apache.uima/uimaj-ep-cas-editor-ide
private InputStream getDocument(String fileName, String text, String language,
SerialFormat format) {
String failedToImportLine = "Failed to import: " + fileName + "\n\n";
CAS cas = createEmtpyCAS();
cas.setDocumentText(removeNonXmlChars(text));
cas.setDocumentLanguage(language);
ByteArrayOutputStream out = new ByteArrayOutputStream(40000);
try {
CasIOUtils.save(cas, out, format);
} catch (IOException e) {
throw new TaeError(failedToImportLine + e.getMessage(), e);
}
return new ByteArrayInputStream(out.toByteArray());
}
代码示例来源:origin: jpatanooga/Canova
/**
* Use the given analysis engine and process the given text
* You must release the return cas yourself
* @param text the text to rpocess
* @return the processed cas
*/
public CAS process(String text) {
CAS cas = retrieve();
cas.setDocumentText(text);
try {
analysisEngine.process(cas);
} catch (AnalysisEngineProcessException e) {
if(text != null && !text.isEmpty())
return process(text);
throw new RuntimeException(e);
}
return cas;
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.xml-asl
@Override
public void getNext(CAS cas)
throws IOException
{
// Initialize CAS with document meta data
initCas(cas, currentFileResource, null);
if (!StringUtils.isWhitespace(language)) {
cas.setDocumentLanguage(language);
}
// The buffer where document text is to be stored
StringBuilder documentText = new StringBuilder();
Node node = nodes.poll();
if (node != null) {
processNode(cas, node, documentText);
}
// Set document text in cas or error if nothing gets parsed out
String documentTextString = documentText.toString();
if (StringUtils.isWhitespace(documentTextString)) {
cas.setDocumentText("[Parse error]");
}
else {
cas.setDocumentText(documentTextString);
}
}
代码示例来源:origin: DigitalPebble/behemoth
protected void doProcess(BehemothDocument behemoth, Reporter reporter) throws AnalysisEngineProcessException {
// does the input document have a some text?
// if not - skip it
if (behemoth.getText() == null) {
LOG.debug(behemoth.getUrl().toString() + " has null text");
} else {
// detect language if specified by user
String lang = this.config.get("uima.language", "en");
cas.setDocumentLanguage(lang);
cas.setDocumentText(behemoth.getText());
// process it
tae.process(cas);
convertCASToBehemoth(cas, behemoth, reporter);
}
}
代码示例来源:origin: CLLKazan/UIMA-Ext
/**
* {@inheritDoc}
*/
@Override
public void getNext(CAS cas) throws IOException, CollectionException {
if (!dbIterator.hasNext()) {
throw new NoSuchElementException();
}
DbTuple tuple = dbIterator.next();
consumedCount++;
cas.setDocumentText(tuple.text);
try {
DocumentMetadata docMeta = new DocumentMetadata(cas.getJCas());
docMeta.setSourceUri(tuple.url);
docMeta.addToIndexes();
} catch (CASException e) {
throw new CollectionException(e);
}
}
代码示例来源:origin: CLLKazan/UIMA-Ext
/**
* {@inheritDoc}
*/
@Override
public void getNext(CAS aCAS) throws IOException, CollectionException {
if (!hasNext()) {
throw new CollectionException(new NoSuchElementException());
}
final int curFileIdx = lastReadFileIdx + 1;
File file = files.get(curFileIdx);
lastReadFileIdx = curFileIdx;
//
String fileContent = FileUtils.readFileToString(file, encoding);
aCAS.setDocumentText(fileContent);
try {
DocumentMetadata docMeta = new DocumentMetadata(aCAS.getJCas());
docMeta.setSourceUri(getURIForMetadata(file).toString());
docMeta.addToIndexes();
} catch (CASException e) {
throw new IllegalStateException(e);
}
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.text-asl
@Override
public void getNext(CAS aJCas)
throws IOException, CollectionException
{
Resource res = nextFile();
initCas(aJCas, res);
try (InputStream is = new BufferedInputStream(
CompressionUtils.getInputStream(res.getLocation(), res.getInputStream()))) {
String text;
if (ENCODING_AUTO.equals(sourceEncoding)) {
CharsetDetector detector = new CharsetDetector();
text = IOUtils.toString(detector.getReader(is, null));
}
else {
text = IOUtils.toString(is, sourceEncoding);
}
aJCas.setDocumentText(text);
}
}
}
代码示例来源:origin: org.apache.uima/textmarker-core
public static void main(String[] args) throws Exception {
URL url = TextMarkerEngine.class.getClassLoader().getResource("PlainTextAnnotator.xml");
if (url == null) {
url = PlainTextAnnotator.class.getClassLoader().getResource(
"org/apache/uima/textmarker/engine/PlainTextAnnotator.xml");
}
XMLInputSource in = new XMLInputSource(url);
ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
CAS cas = ae.newCAS();
cas.setDocumentText(FileUtils.file2String(new File(
"D:/work/workspace-textmarker/Test/input/list1.txt"), "UTF-8"));
ae.process(cas);
AnnotationIndex<AnnotationFS> annotationIndex = cas.getAnnotationIndex();
for (AnnotationFS annotationFS : annotationIndex) {
System.out.println(annotationFS.getType().getShortName() + " : "
+ annotationFS.getCoveredText());
}
}
代码示例来源:origin: org.apache.uima/ConceptMapper
public void runCPM(String text) {
cas.setDocumentText(text);
cas.setDocumentLanguage(langID);
try {
ae.process(cas);
} catch (AnalysisEngineProcessException e) {
e.printStackTrace();
}
processCAS(cas);
cas.reset();
}
代码示例来源:origin: org.apache.uima/uimaj-tools
public void process(CAS aCAS) throws AnalysisEngineProcessException {
// get handle to CAS view containing XML document
CAS xmlCas = aCAS.getView("xmlDocument");
InputStream xmlStream = xmlCas.getSofa().getSofaDataStream();
// parse with detag handler
DetagHandler handler = new DetagHandler();
try {
SAXParser parser = parserFactory.newSAXParser();
parser.parse(xmlStream, handler);
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
// create the plain text view and set its document text
CAS plainTextView = aCAS.createView("plainTextDocument");
plainTextView.setDocumentText(handler.getDetaggedText());
plainTextView.setDocumentLanguage(aCAS.getView("_InitialView").getDocumentLanguage());
// Index the SourceDocumentInformation object, if there is one, in the new sofa.
// This is needed by the SemanticSearchCasIndexer
Iterator iter = xmlCas.getAnnotationIndex(sourceDocInfoType).iterator();
if (iter.hasNext()) {
FeatureStructure sourceDocInfoFs = (FeatureStructure) iter.next();
plainTextView.getIndexRepository().addFS(sourceDocInfoFs);
}
}
代码示例来源:origin: org.apache.lucene/lucene-analyzers-uima
/**
* analyzes the tokenizer input using the given analysis engine
* <p>
* {@link #cas} will be filled with extracted metadata (UIMA annotations, feature structures)
*
* @throws IOException If there is a low-level I/O error.
*/
protected void analyzeInput() throws ResourceInitializationException, AnalysisEngineProcessException, IOException {
if (ae == null) {
ae = AEProviderFactory.getInstance().getAEProvider(null, descriptorPath, configurationParameters).getAE();
}
if (cas == null) {
cas = ae.newCAS();
} else {
cas.reset();
}
cas.setDocumentText(toString(input));
ae.process(cas);
}
代码示例来源:origin: nlpie/biomedicus
CASArtifact(
@Nullable LabelAdapters labelAdapters,
CAS cas,
String artifactID
) {
this.labelAdapters = labelAdapters;
this.cas = cas;
TypeSystem typeSystem = cas.getTypeSystem();
metadataType = typeSystem.getType("ArtifactMetadata");
keyFeature = metadataType.getFeatureByBaseName("key");
valueFeature = metadataType.getFeatureByBaseName("value");
metadataCas = cas.createView("metadata");
metadataCas.setDocumentText("");
Type idType = typeSystem.getType("ArtifactID");
Feature idFeat = idType.getFeatureByBaseName("artifactID");
this.artifactID = artifactID;
FeatureStructure documentIdFs = metadataCas.createFS(idType);
documentIdFs.setStringValue(idFeat, artifactID);
metadataCas.addFsToIndexes(documentIdFs);
metadataIndex = metadataCas.getIndexRepository().getIndex("metadata", metadataType);
casMetadata = new CASMetadata();
}
内容来源于网络,如有侵权,请联系作者删除!