org.apache.lucene.util.fst.Util.toUTF16()方法的使用及代码示例

x33g5p2x  于2022-02-01 转载在 其他  
字(6.4k)|赞(0)|评价(0)|浏览(147)

本文整理了Java中org.apache.lucene.util.fst.Util.toUTF16()方法的一些代码示例,展示了Util.toUTF16()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Util.toUTF16()方法的具体详情如下:
包路径:org.apache.lucene.util.fst.Util
类名称:Util
方法名:toUTF16

Util.toUTF16介绍

[英]Just maps each UTF16 unit (char) to the ints in an IntsRef.
[中]只需将每个UTF16单元(字符)映射到IntsRef中的INT。

代码示例

代码示例来源:origin: org.apache.lucene/lucene-analyzers-common

private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
 Map<String,String> mappings = new TreeMap<>();
 
 for (int i = 0; i < num; i++) {
  String line = reader.readLine();
  String parts[] = line.split("\\s+");
  if (parts.length != 3) {
   throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
  }
  if (mappings.put(parts[1], parts[2]) != null) {
   throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
  }
 }
 
 Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
 Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
 IntsRefBuilder scratchInts = new IntsRefBuilder();
 for (Map.Entry<String,String> entry : mappings.entrySet()) {
  Util.toUTF16(entry.getKey(), scratchInts);
  builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
 }
 
 return builder.finish();
}

代码示例来源:origin: org.apache.lucene/lucene-analyzers-common

/** Builds the NormalizeCharMap; call this once you
  *  are done calling {@link #add}. */
 public NormalizeCharMap build() {
  final FST<CharsRef> map;
  try {
   final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
   final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
   final IntsRefBuilder scratch = new IntsRefBuilder();
   for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
    builder.add(Util.toUTF16(ent.getKey(), scratch),
          new CharsRef(ent.getValue()));
   }
   map = builder.finish();
   pendingPairs.clear();
  } catch (IOException ioe) {
   // Bogus FST IOExceptions!!  (will never happen)
   throw new RuntimeException(ioe);
  }
  return new NormalizeCharMap(map);
 }
}

代码示例来源:origin: harbby/presto-connectors

private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
 Map<String,String> mappings = new TreeMap<>();
 
 for (int i = 0; i < num; i++) {
  String line = reader.readLine();
  String parts[] = line.split("\\s+");
  if (parts.length != 3) {
   throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
  }
  if (mappings.put(parts[1], parts[2]) != null) {
   throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
  }
 }
 
 Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
 Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
 IntsRefBuilder scratchInts = new IntsRefBuilder();
 for (Map.Entry<String,String> entry : mappings.entrySet()) {
  Util.toUTF16(entry.getKey(), scratchInts);
  builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
 }
 
 return builder.finish();
}

代码示例来源:origin: org.infinispan/infinispan-embedded-query

private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
 Map<String,String> mappings = new TreeMap<>();
 
 for (int i = 0; i < num; i++) {
  String line = reader.readLine();
  String parts[] = line.split("\\s+");
  if (parts.length != 3) {
   throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
  }
  if (mappings.put(parts[1], parts[2]) != null) {
   throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
  }
 }
 
 Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
 Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
 IntsRefBuilder scratchInts = new IntsRefBuilder();
 for (Map.Entry<String,String> entry : mappings.entrySet()) {
  Util.toUTF16(entry.getKey(), scratchInts);
  builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
 }
 
 return builder.finish();
}

代码示例来源:origin: NationalSecurityAgency/datawave

Util.toUTF16(value, irBuilder);
final IntsRef ints = irBuilder.get();
synchronized (this.fst) {

代码示例来源:origin: org.infinispan/infinispan-embedded-query

/** Builds the NormalizeCharMap; call this once you
  *  are done calling {@link #add}. */
 public NormalizeCharMap build() {
  final FST<CharsRef> map;
  try {
   final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
   final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
   final IntsRefBuilder scratch = new IntsRefBuilder();
   for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
    builder.add(Util.toUTF16(ent.getKey(), scratch),
          new CharsRef(ent.getValue()));
   }
   map = builder.finish();
   pendingPairs.clear();
  } catch (IOException ioe) {
   // Bogus FST IOExceptions!!  (will never happen)
   throw new RuntimeException(ioe);
  }
  return new NormalizeCharMap(map);
 }
}

代码示例来源:origin: harbby/presto-connectors

/** Builds the NormalizeCharMap; call this once you
  *  are done calling {@link #add}. */
 public NormalizeCharMap build() {
  final FST<CharsRef> map;
  try {
   final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
   final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
   final IntsRefBuilder scratch = new IntsRefBuilder();
   for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
    builder.add(Util.toUTF16(ent.getKey(), scratch),
          new CharsRef(ent.getValue()));
   }
   map = builder.finish();
   pendingPairs.clear();
  } catch (IOException ioe) {
   // Bogus FST IOExceptions!!  (will never happen)
   throw new RuntimeException(ioe);
  }
  return new NormalizeCharMap(map);
 }
}

代码示例来源:origin: NationalSecurityAgency/datawave

public static FST<?> getFST(SortedSet<String> values) throws IOException {
  final IntsRefBuilder irBuilder = new IntsRefBuilder();
  // The builder options with defaults
  FST.INPUT_TYPE inputType = FST.INPUT_TYPE.BYTE1;
  int minSuffixCount1 = 0;
  int minSuffixCount2 = 0;
  boolean doShareSuffix = true;
  boolean doShareNonSingletonNodes = true;
  int shareMaxTailLength = Integer.MAX_VALUE;
  
  boolean allowArrayArcs = true;
  int bytesPageBits = 15;
  final Outputs<Object> outputs = NoOutputs.getSingleton();
  
  // create the FST from the values
  org.apache.lucene.util.fst.Builder<Object> fstBuilder = new org.apache.lucene.util.fst.Builder<>(inputType, minSuffixCount1, minSuffixCount2,
          doShareSuffix, doShareNonSingletonNodes, shareMaxTailLength, outputs, allowArrayArcs, bytesPageBits);
  
  for (String value : values) {
    Util.toUTF16(value, irBuilder);
    final IntsRef scratchInt = irBuilder.get();
    fstBuilder.add(scratchInt, outputs.getNoOutput());
  }
  return fstBuilder.finish();
}

相关文章