本文整理了Java中zemberek.core.collections.Histogram.add()
方法的一些代码示例,展示了Histogram.add()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Histogram.add()
方法的具体详情如下:
包路径:zemberek.core.collections.Histogram
类名称:Histogram
方法名:add
[英]adds an element. and increments it's count.
[中]添加一个元素。这是计数。
代码示例来源:origin: ahmetaa/zemberek-nlp
/**
* adds an element. and increments it's count.
*
* @param t element to add.
* @return the count of the added element.
* @throws NullPointerException if element is null.
*/
public int add(T t) {
return add(t, 1);
}
代码示例来源:origin: ahmetaa/zemberek-nlp
void add(double d) {
histogram.add(d);
}
代码示例来源:origin: ahmetaa/zemberek-nlp
public void add(int order, String key) {
gramCounts[order].add(key);
}
代码示例来源:origin: ahmetaa/zemberek-nlp
public void merge(CharNgramCountModel otherModel) {
if (otherModel.order != order) {
throw new IllegalArgumentException(
"Model orders does not match. Order of this model is" + order +
" but merged model order is " + otherModel.order);
}
for (int i = 1; i < gramCounts.length; i++) {
gramCounts[i].add(otherModel.gramCounts[i]);
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
public Info(NerDataSet set) {
this.types = set.types;
this.numberOfSentences = set.sentences.size();
for (NerSentence sentence : set.sentences) {
numberOfTokens += sentence.tokens.size();
for (NerToken token : sentence.tokens) {
tokenHistogram.add(token.type);
if (token.position == NePosition.OUTSIDE ||
token.position == NePosition.BEGIN ||
token.position == NePosition.UNIT) {
typeHistogram.add(token.type);
}
}
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
/**
* adds an array of elements.
*
* @param array an array of elements to add.
*/
@SafeVarargs
public final void add(T... array) {
if (array == null) {
throw new NullPointerException("array cannot be null");
}
for (T t : array) {
add(t);
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
public void addGrams(String seq) {
for (int i = 1; i <= order; ++i) {
List<String> grams = this.getGram(seq, i);
for (String gram : grams) {
gramCounts[i].add(gram);
}
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
/**
* merges another Histogram to this one.
*
* @param otherSet another Histogram
*/
public void add(Histogram<T> otherSet) {
if (otherSet == null) {
throw new NullPointerException("Histogram cannot be null");
}
for (T t : otherSet) {
add(t, otherSet.getCount(t));
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
public static Histogram<String> loadFromLines(
List<String> lines,
char delimiter,
boolean keyComesFirst) {
Histogram<String> result = new Histogram<>(lines.size());
for (String s : lines) {
int index = s.indexOf(delimiter);
if (index <= 0) {
throw new IllegalStateException("Bad histogram line = " + s);
}
String item = keyComesFirst ? s.substring(0, index) : s.substring(index + 1);
String countStr = keyComesFirst ? s.substring(index + 1) : s.substring(0, index);
int count = Integer.parseInt(countStr);
result.add(item, count);
}
return result;
}
代码示例来源:origin: ahmetaa/zemberek-nlp
/**
* Loads data from the custom serialized file and generates a CharNgramCountModel from it.
*
* @param is InputStream to load data.
* @return a CharNgramCountModel generated from file.
*/
public static CharNgramCountModel load(InputStream is) throws IOException {
try (DataInputStream dis = new DataInputStream(new BufferedInputStream(is))) {
int order = dis.readInt();
String modelId = dis.readUTF();
Histogram<String>[] gramCounts = new Histogram[order + 1];
for (int j = 1; j <= order; j++) {
int size = dis.readInt();
Histogram<String> countSet = new Histogram<>(size * 2);
for (int i = 0; i < size; i++) {
String key = dis.readUTF();
countSet.add(key, dis.readInt());
}
gramCounts[j] = countSet;
}
return new CharNgramCountModel(modelId, order, gramCounts);
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
private static void checkWeirdChars(Path root) throws IOException {
List<Path> files = Files.walk(root, 1).filter(s -> s.toFile().isFile())
.collect(Collectors.toList());
Histogram<String> chars = new Histogram<>();
for (Path file : files) {
System.out.println(file);
LinkedHashSet<String> sentences = getSentences(file);
for (String sentence : sentences) {
for (int i = 0; i < sentence.length(); i++) {
char c = sentence.charAt(i);
if (c >= 0x300 && c <= 0x036f) {
chars.add(String.valueOf(c));
}
if (Scripts.undesiredChars.contains(c)) {
chars.add(String.valueOf(c));
}
}
}
}
for (String s : chars.getSortedList()) {
System.out.println(String.format("%x %d", (int) s.charAt(0), chars.getCount(s)));
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
static void getQuestionSuffixes(Path in, Path out) throws IOException {
List<String> splitLines = Files.readAllLines(in, Charsets.UTF_8);
Histogram<String> endings = new Histogram<>();
for (String splitLine : splitLines) {
String[] tokens = splitLine.split("=");
String s = tokens[1].trim();
String[] t2 = s.split("[ ]");
if (t2.length != 2) {
System.out.println("Problem in " + splitLine);
continue;
}
String suf = t2[1];
if (suf.startsWith("mi") ||
suf.startsWith("mu") ||
suf.startsWith("mı") ||
suf.startsWith("mü")
) {
endings.add(t2[1]);
}
}
for (String ending : endings.getSortedList()) {
System.out.println(ending + " " + endings.getCount(ending));
}
for (String ending : endings.getSortedList()) {
System.out.println(ending);
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
static void countTokens(Path... paths) throws IOException {
for (Path path : paths) {
List<String> lines = TextIO.loadLines(path);
Histogram<String> hw = new Histogram<>();
Histogram<String> hl = new Histogram<>();
for (String l : lines) {
for (String s : l.split("[\\s]+")) {
if (s.contains("__label__")) {
if(s.contains("-")) {
Log.warn(l);
}
hl.add(s);
} else {
hw.add(s);
}
}
}
Log.info("There are %d lines, %d words, %d labels in %s",
lines.size(),
hw.size(),
hl.size(),
path);
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
List<String> getEndingsFromVocabulary(List<String> words) {
Histogram<String> endings = new Histogram<>(words.size() / 10);
for (String word : words) {
WordAnalysis analyses = morphology.analyze(word);
for (SingleAnalysis analysis : analyses) {
if (analysis.isUnknown()) {
continue;
}
StemAndEnding se = analysis.getStemAndEnding();
if (se.ending.length() > 0) {
endings.add(se.ending);
}
}
}
return endings.getSortedList(Turkish.STRING_COMPARATOR_ASC);
}
代码示例来源:origin: ahmetaa/zemberek-nlp
void collectGrams(List<Token> tokens, Histogram<Term> grams, int order, int offset) {
for (int i = 0; i < tokens.size() - order; i++) {
String[] words = new String[order];
boolean fail = false;
for (int j = 0; j < order; j++) {
Token t = tokens.get(i + j);
if (!tokenTypeAccpetable(t)) {
fail = true;
break;
}
String word = normalize(t.getText());
if (TurkishStopWords.DEFAULT.contains(word)) {
fail = true;
break;
}
words[j] = word;
}
if (!fail) {
Term t = new Term(words);
int count = grams.add(t);
if (count == 1) { // if this is the first time, set the first occurance index.
t.setFirstOccurrenceIndex(offset + i);
}
}
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
histogram.add(model.gramLogProbs[i].values.values());
double[] lookup = new double[histogram.size()];
int j = 0;
代码示例来源:origin: ahmetaa/zemberek-nlp
uniques.add(s);
代码示例来源:origin: ahmetaa/zemberek-nlp
uniques.add(s);
代码示例来源:origin: ahmetaa/zemberek-nlp
coarsePos.add(item.coarsePosTag);
pos.add(item.posTag);
depRelations.add(item.depRelation);
morphItems.add(Lists
.newArrayList(Splitter.on("|").trimResults().omitEmptyStrings().split(item.feats)));
代码示例来源:origin: ahmetaa/zemberek-nlp
public static void counts() {
String[] fruits = {"apple", "pear", "grape", "apple", "apple", "apricot", "grape"};
Log.info("Adding elements to histogram:" + Arrays.toString(fruits));
Histogram<String> histogram = new Histogram<>();
histogram.add(fruits);
Log.info("\nPrint with no order");
for (String s : histogram) {
Log.info(s + " count: " + histogram.getCount(s));
}
Log.info("\nPrint with count order");
for (String s : histogram.getSortedList()) {
Log.info(s + " count: " + histogram.getCount(s));
}
histogram.removeSmaller(2);
Log.info("\nAfter removing elements with counts less than 2");
for (String s : histogram.getSortedList()) {
Log.info(s + " count: " + histogram.getCount(s));
}
}
内容来源于网络,如有侵权,请联系作者删除!