zemberek.core.collections.Histogram.add()方法的使用及代码示例

x33g5p2x 于2022-01-20 转载在其他

字(7.7k)|赞(0)|评价(0)|浏览(155)

本文整理了Java中zemberek.core.collections.Histogram.add()方法的一些代码示例，展示了Histogram.add()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台，是从一些精选项目中提取出来的代码，具有较强的参考意义，能在一定程度帮忙到你。Histogram.add()方法的具体详情如下：
包路径：zemberek.core.collections.Histogram
类名称：Histogram
方法名：add

Histogram.add介绍

[英]adds an element. and increments it's count.
[中]添加一个元素。这是计数。

代码示例

代码示例来源：origin: ahmetaa/zemberek-nlp

/**
 * adds an element. and increments it's count.
 *
 * @param t element to add.
 * @return the count of the added element.
 * @throws NullPointerException if element is null.
 */
public int add(T t) {
 return add(t, 1);
}

代码示例来源：origin: ahmetaa/zemberek-nlp

void add(double d) {
 histogram.add(d);
}

代码示例来源：origin: ahmetaa/zemberek-nlp

public void add(int order, String key) {
 gramCounts[order].add(key);
}

代码示例来源：origin: ahmetaa/zemberek-nlp

public void merge(CharNgramCountModel otherModel) {
 if (otherModel.order != order) {
  throw new IllegalArgumentException(
    "Model orders does not match. Order of this model is" + order +
      " but merged model order is " + otherModel.order);
 }
 for (int i = 1; i < gramCounts.length; i++) {
  gramCounts[i].add(otherModel.gramCounts[i]);
 }
}

代码示例来源：origin: ahmetaa/zemberek-nlp

public Info(NerDataSet set) {
 this.types = set.types;
 this.numberOfSentences = set.sentences.size();
 for (NerSentence sentence : set.sentences) {
  numberOfTokens += sentence.tokens.size();
  for (NerToken token : sentence.tokens) {
   tokenHistogram.add(token.type);
   if (token.position == NePosition.OUTSIDE ||
     token.position == NePosition.BEGIN ||
     token.position == NePosition.UNIT) {
    typeHistogram.add(token.type);
   }
  }
 }
}

代码示例来源：origin: ahmetaa/zemberek-nlp

/**
 * adds an array of elements.
 *
 * @param array an array of elements to add.
 */
@SafeVarargs
public final void add(T... array) {
 if (array == null) {
  throw new NullPointerException("array cannot be null");
 }
 for (T t : array) {
  add(t);
 }
}

代码示例来源：origin: ahmetaa/zemberek-nlp

public void addGrams(String seq) {
 for (int i = 1; i <= order; ++i) {
  List<String> grams = this.getGram(seq, i);
  for (String gram : grams) {
   gramCounts[i].add(gram);
  }
 }
}

代码示例来源：origin: ahmetaa/zemberek-nlp

/**
 * merges another Histogram to this one.
 *
 * @param otherSet another Histogram
 */
public void add(Histogram<T> otherSet) {
 if (otherSet == null) {
  throw new NullPointerException("Histogram cannot be null");
 }
 for (T t : otherSet) {
  add(t, otherSet.getCount(t));
 }
}

代码示例来源：origin: ahmetaa/zemberek-nlp

public static Histogram<String> loadFromLines(
  List<String> lines,
  char delimiter,
  boolean keyComesFirst) {
 Histogram<String> result = new Histogram<>(lines.size());
 for (String s : lines) {
  int index = s.indexOf(delimiter);
  if (index <= 0) {
   throw new IllegalStateException("Bad histogram line = " + s);
  }
  String item = keyComesFirst ? s.substring(0, index) : s.substring(index + 1);
  String countStr = keyComesFirst ? s.substring(index + 1) : s.substring(0, index);
  int count = Integer.parseInt(countStr);
  result.add(item, count);
 }
 return result;
}

代码示例来源：origin: ahmetaa/zemberek-nlp

/**
 * Loads data from the custom serialized file and generates a CharNgramCountModel from it.
 *
 * @param is InputStream to load data.
 * @return a CharNgramCountModel generated from file.
 */
public static CharNgramCountModel load(InputStream is) throws IOException {
 try (DataInputStream dis = new DataInputStream(new BufferedInputStream(is))) {
  int order = dis.readInt();
  String modelId = dis.readUTF();
  Histogram<String>[] gramCounts = new Histogram[order + 1];
  for (int j = 1; j <= order; j++) {
   int size = dis.readInt();
   Histogram<String> countSet = new Histogram<>(size * 2);
   for (int i = 0; i < size; i++) {
    String key = dis.readUTF();
    countSet.add(key, dis.readInt());
   }
   gramCounts[j] = countSet;
  }
  return new CharNgramCountModel(modelId, order, gramCounts);
 }
}

代码示例来源：origin: ahmetaa/zemberek-nlp

private static void checkWeirdChars(Path root) throws IOException {
 List<Path> files = Files.walk(root, 1).filter(s -> s.toFile().isFile())
   .collect(Collectors.toList());
 Histogram<String> chars = new Histogram<>();
 for (Path file : files) {
  System.out.println(file);
  LinkedHashSet<String> sentences = getSentences(file);
  for (String sentence : sentences) {
   for (int i = 0; i < sentence.length(); i++) {
    char c = sentence.charAt(i);
    if (c >= 0x300 && c <= 0x036f) {
     chars.add(String.valueOf(c));
    }
    if (Scripts.undesiredChars.contains(c)) {
     chars.add(String.valueOf(c));
    }
   }
  }
 }
 for (String s : chars.getSortedList()) {
  System.out.println(String.format("%x %d", (int) s.charAt(0), chars.getCount(s)));
 }
}

代码示例来源：origin: ahmetaa/zemberek-nlp

static void getQuestionSuffixes(Path in, Path out) throws IOException {
 List<String> splitLines = Files.readAllLines(in, Charsets.UTF_8);
 Histogram<String> endings = new Histogram<>();
 for (String splitLine : splitLines) {
  String[] tokens = splitLine.split("=");
  String s = tokens[1].trim();
  String[] t2 = s.split("[ ]");
  if (t2.length != 2) {
   System.out.println("Problem in " + splitLine);
   continue;
  }
  String suf = t2[1];
  if (suf.startsWith("mi") ||
    suf.startsWith("mu") ||
    suf.startsWith("mı") ||
    suf.startsWith("mü")
  ) {
   endings.add(t2[1]);
  }
 }
 for (String ending : endings.getSortedList()) {
  System.out.println(ending + " " + endings.getCount(ending));
 }
 for (String ending : endings.getSortedList()) {
  System.out.println(ending);
 }
}

代码示例来源：origin: ahmetaa/zemberek-nlp

static void countTokens(Path... paths) throws IOException {
 for (Path path : paths) {
  List<String> lines = TextIO.loadLines(path);
  Histogram<String> hw = new Histogram<>();
  Histogram<String> hl = new Histogram<>();
  for (String l : lines) {
   for (String s : l.split("[\\s]+")) {
    if (s.contains("__label__")) {
     if(s.contains("-")) {
      Log.warn(l);
     }
     hl.add(s);
    } else {
     hw.add(s);
    }
   }
  }
  Log.info("There are %d lines, %d words, %d labels in %s",
    lines.size(),
    hw.size(),
    hl.size(),
    path);
 }
}

代码示例来源：origin: ahmetaa/zemberek-nlp

List<String> getEndingsFromVocabulary(List<String> words) {
 Histogram<String> endings = new Histogram<>(words.size() / 10);
 for (String word : words) {
  WordAnalysis analyses = morphology.analyze(word);
  for (SingleAnalysis analysis : analyses) {
   if (analysis.isUnknown()) {
    continue;
   }
   StemAndEnding se = analysis.getStemAndEnding();
   if (se.ending.length() > 0) {
    endings.add(se.ending);
   }
  }
 }
 return endings.getSortedList(Turkish.STRING_COMPARATOR_ASC);
}

代码示例来源：origin: ahmetaa/zemberek-nlp

void collectGrams(List<Token> tokens, Histogram<Term> grams, int order, int offset) {
 for (int i = 0; i < tokens.size() - order; i++) {
  String[] words = new String[order];
  boolean fail = false;
  for (int j = 0; j < order; j++) {
   Token t = tokens.get(i + j);
   if (!tokenTypeAccpetable(t)) {
    fail = true;
    break;
   }
   String word = normalize(t.getText());
   if (TurkishStopWords.DEFAULT.contains(word)) {
    fail = true;
    break;
   }
   words[j] = word;
  }
  if (!fail) {
   Term t = new Term(words);
   int count = grams.add(t);
   if (count == 1) { // if this is the first time, set the first occurance index.
    t.setFirstOccurrenceIndex(offset + i);
   }
  }
 }
}

代码示例来源：origin: ahmetaa/zemberek-nlp

histogram.add(model.gramLogProbs[i].values.values());
double[] lookup = new double[histogram.size()];
int j = 0;

代码示例来源：origin: ahmetaa/zemberek-nlp

uniques.add(s);

代码示例来源：origin: ahmetaa/zemberek-nlp

uniques.add(s);

代码示例来源：origin: ahmetaa/zemberek-nlp

coarsePos.add(item.coarsePosTag);
pos.add(item.posTag);
depRelations.add(item.depRelation);
morphItems.add(Lists
  .newArrayList(Splitter.on("|").trimResults().omitEmptyStrings().split(item.feats)));

代码示例来源：origin: ahmetaa/zemberek-nlp

public static void counts() {
 String[] fruits = {"apple", "pear", "grape", "apple", "apple", "apricot", "grape"};
 Log.info("Adding elements to histogram:" + Arrays.toString(fruits));
 Histogram<String> histogram = new Histogram<>();
 histogram.add(fruits);
 Log.info("\nPrint with no order");
 for (String s : histogram) {
  Log.info(s + " count: " + histogram.getCount(s));
 }
 Log.info("\nPrint with count order");
 for (String s : histogram.getSortedList()) {
  Log.info(s + " count: " + histogram.getCount(s));
 }
 histogram.removeSmaller(2);
 Log.info("\nAfter removing elements with counts less than 2");
 for (String s : histogram.getSortedList()) {
  Log.info(s + " count: " + histogram.getCount(s));
 }
}

内容来源于网络，如有侵权，请联系作者删除！

相关文章

热门标签

Java query python Node 开发语言 request Util 数据库 Table 后端算法 Logger Message Element Parser

最新文章

高级程序员和新手小白程序员区别你是那个等级看解决bug速度
浏览(1001) 发布于 5个月前
还在用双层for循环吗？太慢了
浏览(925) 发布于 5个月前
我用EasyExcel优化了公司的导出（附踩坑记录）
浏览(967) 发布于 5个月前
记录因Sharding Jdbc批量操作引发的一次fullGC
浏览(802) 发布于 5个月前
进大厂必须要会的单元测试
浏览(801) 发布于 5个月前

Histogram类方法