projects-jenz/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java

1124 lines
70 KiB
Java
Raw Normal View History

2019-03-02 15:10:46 +01:00
package FunctionLayer.StanfordParser;
import FunctionLayer.LevenshteinDistance;
import FunctionLayer.Datahandler;
import FunctionLayer.SimilarityMatrix;
import FunctionLayer.StopwordAnnotator;
import com.google.common.collect.MapMaker;
import edu.mit.jmwe.data.IMWE;
import edu.mit.jmwe.data.IMWEDesc;
import edu.mit.jmwe.data.IToken;
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
2019-03-02 15:10:46 +01:00
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.JMWEAnnotation;
2019-03-02 15:10:46 +01:00
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.pipeline.CoreEntityMention;
2019-03-02 15:10:46 +01:00
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
2019-03-24 23:04:19 +01:00
import edu.stanford.nlp.process.CoreLabelTokenFactory;
2019-03-02 15:10:46 +01:00
import edu.stanford.nlp.process.DocumentPreprocessor;
2019-03-24 23:04:19 +01:00
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
2019-03-02 15:10:46 +01:00
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
2019-03-02 15:10:46 +01:00
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.trees.Constituent;
import edu.stanford.nlp.trees.GrammaticalRelation;
import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.trees.TypedDependency;
import edu.stanford.nlp.trees.tregex.gui.Tdiff;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Pair;
2019-03-02 15:10:46 +01:00
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.OptionalDouble;
2019-03-02 15:10:46 +01:00
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import org.apache.lucene.analysis.core.StopAnalyzer;
2019-03-02 15:10:46 +01:00
import org.ejml.simple.SimpleMatrix;
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
/**
*
* @author install1
*/
public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
2019-03-02 15:10:46 +01:00
private SimilarityMatrix smxParam;
private String str;
private String str1;
private MaxentTagger tagger;
private GrammaticalStructureFactory gsf;
private StanfordCoreNLP pipeline;
private StanfordCoreNLP pipelineSentiment;
private AbstractSequenceClassifier classifier;
private Annotation jmweStrAnnotation1;
private Annotation jmweStrAnnotation2;
private Annotation pipelineAnnotation1;
private Annotation pipelineAnnotation2;
private Annotation pipelineAnnotation1Sentiment;
private Annotation pipelineAnnotation2Sentiment;
private CoreDocument pipelineCoreDcoument1;
private CoreDocument pipelineCoreDcoument2;
2019-03-02 15:10:46 +01:00
public SentimentAnalyzerTest(String str, String str1, SimilarityMatrix smxParam, Annotation str1Annotation, Annotation str2Annotation,
Annotation strPipeline1, Annotation strPipeline2, Annotation strPipeSentiment1, Annotation strPipeSentiment2,
CoreDocument pipelineCoreDcoument1, CoreDocument pipelineCoreDcoument2) {
this.str = str;
this.str1 = str1;
this.smxParam = smxParam;
this.tagger = Datahandler.getTagger();
this.pipeline = Datahandler.getPipeline();
this.pipelineSentiment = Datahandler.getPipelineSentiment();
this.gsf = Datahandler.getGsf();
this.classifier = Datahandler.getClassifier();
this.jmweStrAnnotation1 = str1Annotation;
this.jmweStrAnnotation2 = str2Annotation;
this.pipelineAnnotation1 = strPipeline1;
this.pipelineAnnotation2 = strPipeline2;
this.pipelineAnnotation1Sentiment = strPipeSentiment1;
this.pipelineAnnotation2Sentiment = strPipeSentiment2;
this.pipelineCoreDcoument1 = pipelineCoreDcoument1;
this.pipelineCoreDcoument2 = pipelineCoreDcoument2;
2019-03-02 15:10:46 +01:00
}
@Override
public SimilarityMatrix call() {
Double score = -100.0;
try {
List<List<TaggedWord>> taggedwordlist1 = new ArrayList();
List<List<TaggedWord>> taggedwordlist2 = new ArrayList();
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
//noneDelete
TokenizerFactory<CoreLabel> ptbTokenizerFactory
= PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=firstDelete");
tokenizer.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : tokenizer) {
taggedwordlist1.add(tagger.tagSentence(sentence));
//taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
}
tokenizer = new DocumentPreprocessor(new StringReader(str));
tokenizer.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : tokenizer) {
taggedwordlist2.add(tagger.tagSentence(sentence));
//taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
}
int counter = 0;
int counter1 = 0;
counter = taggedwordlist2.stream().map((taggedlist2) -> taggedlist2.size()).reduce(counter, Integer::sum);
counter1 = taggedwordlist1.stream().map((taggedlist1) -> taggedlist1.size()).reduce(counter1, Integer::sum);
int overValue = counter >= counter1 ? counter - counter1 : counter1 - counter;
overValue *= 32;
score -= overValue;
ConcurrentMap<Integer, String> tgwlistIndex = new MapMaker().concurrencyLevel(2).makeMap();
taggedwordlist1.forEach((TGWList) -> {
TGWList.forEach((TaggedWord) -> {
if (!tgwlistIndex.values().contains(TaggedWord.tag()) && !TaggedWord.tag().equals(":")) {
tgwlistIndex.put(tgwlistIndex.size() + 1, TaggedWord.tag());
}
});
});
AtomicInteger runCount = new AtomicInteger(0);
taggedwordlist2.forEach((TGWList) -> {
TGWList.forEach((TaggedWord) -> {
if (tgwlistIndex.values().contains(TaggedWord.tag())) {
tgwlistIndex.values().remove(TaggedWord.tag());
runCount.getAndIncrement();
}
});
});
score += runCount.get() * 64;
////System.out.println("score post runCountGet: " + score + "\n");
ConcurrentMap<Integer, Tree> sentenceConstituencyParseList = new MapMaker().concurrencyLevel(2).makeMap();
try {
for (CoreMap sentence : pipelineAnnotation1.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
sentenceConstituencyParseList.put(sentenceConstituencyParseList.size(), sentenceConstituencyParse);
}
2019-04-05 13:29:20 +02:00
ConcurrentMap<Integer, Integer> alltypeDepsSizeMap = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, Integer> summationMap = new MapMaker().concurrencyLevel(2).makeMap();
for (CoreMap sentence : pipelineAnnotation2.get(CoreAnnotations.SentencesAnnotation.class)) {
int constiRelationsize = 0;
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceConstituencyParse);
Collection<TypedDependency> allTypedDependencies = gs.allTypedDependencies();
ConcurrentMap<Integer, String> filerTreeContent = new MapMaker().concurrencyLevel(2).makeMap();
for (Tree sentenceConstituencyParse1 : sentenceConstituencyParseList.values()) {
Set<Constituent> constinuent1 = Tdiff.markDiff(sentenceConstituencyParse, sentenceConstituencyParse1);
Set<Constituent> constinuent2 = Tdiff.markDiff(sentenceConstituencyParse1, sentenceConstituencyParse);
ConcurrentMap<Integer, String> constiLabels = new MapMaker().concurrencyLevel(2).makeMap();
for (Constituent consti : constinuent1) {
for (Constituent consti1 : constinuent2) {
if (consti.value().equals(consti1.value()) && !constiLabels.values().contains(consti.value())) {
constiLabels.put(constiLabels.size(), consti.value());
constiRelationsize++;
}
}
}
int constituents1 = constinuent1.size() - constiRelationsize;
int constituents2 = constinuent2.size() - constiRelationsize;
if (constituents1 * 5 < constituents2 || constituents2 * 5 < constituents1) {
score -= (constituents1 + constituents2) * 200;
} else if (constituents1 == 0 || constituents2 == 0) {
score -= constiRelationsize * 200;
} else {
score += constiRelationsize * 160;
//System.out.println("score post constiRelationsize: " + score + "\nconstituents1: " + constituents1
// + "\nconstituents2: " + constituents2 + "\nconstiRelationsize: " + constiRelationsize + "\n");
}
GrammaticalStructure gs1 = gsf.newGrammaticalStructure(sentenceConstituencyParse1);
Collection<TypedDependency> allTypedDependencies1 = gs1.allTypedDependencies();
2019-04-02 00:59:23 +02:00
int relationApplicable1 = 0;
int relationApplicable2 = 0;
2019-04-05 13:29:20 +02:00
int grammaticalRelation1 = 0;
int grammaticalRelation2 = 0;
for (TypedDependency TDY1 : allTypedDependencies1) {
IndexedWord dep = TDY1.dep();
IndexedWord gov = TDY1.gov();
GrammaticalRelation grammaticalRelation = gs.getGrammaticalRelation(gov, dep);
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
score += 700;
//System.out.println("grammaticalRelation applicable score: " + score + "\n");
2019-04-05 13:29:20 +02:00
grammaticalRelation1++;
}
GrammaticalRelation reln = TDY1.reln();
if (reln.isApplicable(sentenceConstituencyParse)) {
score += 525;
2019-04-02 00:59:23 +02:00
relationApplicable1++;
}
}
for (TypedDependency TDY : allTypedDependencies) {
IndexedWord dep = TDY.dep();
IndexedWord gov = TDY.gov();
GrammaticalRelation grammaticalRelation = gs1.getGrammaticalRelation(gov, dep);
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
score += 900;
//System.out.println("grammaticalRelation appliceable score: " + score + "\n");
2019-04-05 13:29:20 +02:00
grammaticalRelation2++;
}
GrammaticalRelation reln = TDY.reln();
if (reln.isApplicable(sentenceConstituencyParse1)) {
score += 525;
//System.out.println("reln appliceable score: " + score + "\n");
2019-04-02 00:59:23 +02:00
relationApplicable2++;
}
}
if ((grammaticalRelation1 == 0 && grammaticalRelation2 > 4) || (grammaticalRelation2 == 0 && grammaticalRelation1 > 4)) {
2019-04-05 13:29:20 +02:00
score -= 3450;
//System.out.println("grammaticalRelation1 score trim: " + score + "\ngrammaticalRelation1: " + grammaticalRelation1
// + "\ngrammaticalRelation2: " + grammaticalRelation2 + "\n");
2019-04-05 13:29:20 +02:00
}
2019-04-02 00:59:23 +02:00
if (!allTypedDependencies.isEmpty() || !allTypedDependencies1.isEmpty()) {
2019-04-05 13:29:20 +02:00
int allTypeDep1 = allTypedDependencies.size();
int allTypeDep2 = allTypedDependencies1.size();
if (allTypeDep1 <= allTypeDep2 * 5 && allTypeDep2 <= allTypeDep1 * 5) {
if (allTypeDep1 > 0 && allTypeDep2 > 0) {
if (allTypeDep1 * 2 <= allTypeDep2 || allTypeDep2 * 2 <= allTypeDep1) {
score -= allTypeDep1 > allTypeDep2 ? (allTypeDep1 - allTypeDep2) * 160 : (allTypeDep2 - allTypeDep1) * 160;
//System.out.println(" allTypeDep score: " + score + "\nallTypeDep1: " + allTypeDep1 + "\nallTypeDep2: "
// + allTypeDep2 + "\n");
} else {
score += allTypeDep1 > allTypeDep2 ? (allTypeDep1 - allTypeDep2) * 600 : (allTypeDep2 - allTypeDep1) * 600;
//System.out.println(" allTypeDep score: " + score + "\nallTypeDep1: " + allTypeDep1 + "\nallTypeDep2: "
// + allTypeDep2 + "\n");
}
2019-04-05 13:29:20 +02:00
alltypeDepsSizeMap.put(alltypeDepsSizeMap.size() + 1, allTypeDep1);
alltypeDepsSizeMap.put(alltypeDepsSizeMap.size() + 1, allTypeDep2);
}
}
if (allTypeDep1 >= 5 && allTypeDep2 >= 5) {
int largerTypeDep = allTypeDep1 > allTypeDep2 ? allTypeDep1 : allTypeDep2;
int smallerTypeDep = allTypeDep1 < allTypeDep2 ? allTypeDep1 : allTypeDep2;
int summation = (largerTypeDep * largerTypeDep) - (smallerTypeDep * smallerTypeDep);
if (summation / largerTypeDep < 15.0 && summation / largerTypeDep > 10.0 && smallerTypeDep * 2 > largerTypeDep
&& !summationMap.values().contains(summation)) {
2019-04-05 13:29:20 +02:00
score += summation * 80;
summationMap.put(summationMap.size() + 1, summation);
//System.out.println("score post summation: " + score + "\nsummation: " + summation + "\n");
} else if (largerTypeDep == smallerTypeDep) {
score += 2500;
//System.out.println("score largerTypeDep equals smallerTypeDep: " + score + "\nlargerTypeDep: " + largerTypeDep + "\n");
2019-04-05 13:29:20 +02:00
}
}
if (relationApplicable1 > 0 && relationApplicable2 > 0 && relationApplicable1 == relationApplicable2
&& grammaticalRelation1 > 0 && grammaticalRelation2 > 0 && grammaticalRelation1 == grammaticalRelation2) {
2019-04-02 00:59:23 +02:00
score += 3500;
//System.out.println("score relationApplicable equal: " + score + "\n");
} else if (allTypeDep1 * 5 < allTypeDep2 || allTypeDep2 * 5 < allTypeDep1) {
score -= allTypeDep1 > allTypeDep2 ? (allTypeDep1 - allTypeDep2) * (allTypeDep2 * 450)
: (allTypeDep2 - allTypeDep1) * (allTypeDep1 * 450);
//System.out.println("score minus grammaticalRelation equal: " + score + "\n");
}
if (relationApplicable1 > 1 && relationApplicable2 > 1 && relationApplicable1 * 3 > relationApplicable2
&& relationApplicable2 * 3 > relationApplicable1) {
score += relationApplicable1 > relationApplicable2 ? (relationApplicable1 - relationApplicable2) * 1500
: (relationApplicable2 - relationApplicable1) * 1500;
//System.out.println("score relationApplicable plus: " + score + "\n");
} else if (relationApplicable1 * 5 < relationApplicable2 || relationApplicable2 * 5 < relationApplicable1) {
score -= relationApplicable1 > relationApplicable2 ? (relationApplicable1 - relationApplicable2) * 500
: (relationApplicable2 - relationApplicable1) * 500;
//System.out.println("score relationApplicable minus: " + score + "\n");
}
if (grammaticalRelation1 > 0 && grammaticalRelation2 > 0 && grammaticalRelation1 * 3 > grammaticalRelation2
&& grammaticalRelation2 * 3 > grammaticalRelation1) {
score += grammaticalRelation1 > grammaticalRelation2 ? (grammaticalRelation1 - grammaticalRelation2) * 1500
: (grammaticalRelation2 - grammaticalRelation1) * 1500;
//System.out.println("score grammaticalRelation plus: " + score + "\n");
} else if (grammaticalRelation1 * 5 < grammaticalRelation2 || grammaticalRelation2 * 5 < grammaticalRelation1) {
score -= grammaticalRelation1 > grammaticalRelation2 ? (grammaticalRelation1 - grammaticalRelation2) * 500
: (grammaticalRelation2 - grammaticalRelation1) * 500;
//System.out.println("score grammaticalRelation minus: " + score + "\n");
}
//System.out.println("score post relationApplicable1 veri: " + score + "\nrelationApplicable1: " + relationApplicable1
// + "\nrelationApplicable2: " + relationApplicable2 + "\ngrammaticalRelation1: " + grammaticalRelation1 + "\n"
// + "grammaticalRelation2: " + grammaticalRelation2 + "\n");
}
AtomicInteger runCount1 = new AtomicInteger(0);
sentenceConstituencyParse.taggedLabeledYield().forEach((LBW) -> {
sentenceConstituencyParse1.taggedLabeledYield().stream().filter((LBW1) -> (LBW.lemma().equals(LBW1.lemma())
&& !filerTreeContent.values().contains(LBW.lemma()))).map((_item) -> {
filerTreeContent.put(filerTreeContent.size() + 1, LBW.lemma());
return _item;
}).forEachOrdered((_item) -> {
runCount1.getAndIncrement();
});
});
score += runCount1.get() * 250;
}
}
//System.out.println("score pre typeSizeSmallest: " + score + "\n");
2019-04-05 13:29:20 +02:00
int typeSizeSmallest = 100;
int typeSizeLargest = 0;
for (Integer i : alltypeDepsSizeMap.values()) {
if (i > typeSizeLargest) {
typeSizeLargest = i;
}
if (i < typeSizeSmallest) {
typeSizeSmallest = i;
}
}
if (typeSizeLargest >= typeSizeSmallest * 3) {
score -= typeSizeLargest * 160;
2019-04-05 13:29:20 +02:00
}
typeSizeLargest = 0;
typeSizeSmallest = 100;
for (int i : summationMap.values()) {
if (i > typeSizeLargest) {
typeSizeLargest = i;
}
if (i < typeSizeSmallest) {
typeSizeSmallest = i;
}
}
if (typeSizeLargest >= typeSizeSmallest * 3) {
score -= typeSizeLargest * 160;
2019-04-05 13:29:20 +02:00
}
} catch (Exception ex) {
//System.out.println("pipelineAnnotation stacktrace: " + ex.getLocalizedMessage() + "\n");
}
sentenceConstituencyParseList.clear();
ConcurrentMap<Integer, SimpleMatrix> simpleSMXlist = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, SimpleMatrix> simpleSMXlistVector = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, Integer> sentiment1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, Integer> sentiment2 = new MapMaker().concurrencyLevel(2).makeMap();
for (CoreMap sentence : pipelineAnnotation1Sentiment.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
sentiment1.put(sentiment1.size(), RNNCoreAnnotations.getPredictedClass(tree));
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree);
simpleSMXlist.put(simpleSMXlist.size(), predictions);
simpleSMXlistVector.put(simpleSMXlistVector.size() + 1, nodeVector);
}
ConcurrentMap<Integer, Double> elementSumCounter = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, Double> dotMap = new MapMaker().concurrencyLevel(2).makeMap();
2019-04-05 13:29:20 +02:00
ConcurrentMap<Integer, Double> elementSumMap = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, Double> dotSumMap = new MapMaker().concurrencyLevel(2).makeMap();
//System.out.println("score pre pipelineAnnotation2Sentiment: " + score + "\n");
for (CoreMap sentence : pipelineAnnotation2Sentiment.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
sentiment2.put(sentiment2.size() + 1, RNNCoreAnnotations.getPredictedClass(tree));
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree);
ConcurrentMap<Integer, Double> AccumulateDotMap = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, Double> subtractorMap = new MapMaker().concurrencyLevel(2).makeMap();
2019-04-02 00:59:23 +02:00
ConcurrentMap<Integer, Double> dotPredictions = new MapMaker().concurrencyLevel(2).makeMap();
Double largest = 10.0;
Double shortest = 100.0;
for (SimpleMatrix simpleSMX : simpleSMXlist.values()) {
double dotPrediction = predictions.dot(simpleSMX) * 100;
AccumulateDotMap.put(AccumulateDotMap.size() + 1, dotPrediction);
double subtracter = dotPrediction > 50 ? dotPrediction - 100 : dotPrediction > 0 ? 100 - dotPrediction : 0;
subtractorMap.put(subtractorMap.size() + 1, subtracter);
2019-04-02 00:59:23 +02:00
if (!dotPredictions.values().contains(dotPrediction)) {
if (dotPrediction > largest) {
largest = dotPrediction;
}
if (dotPrediction < shortest) {
shortest = dotPrediction;
2019-03-02 15:10:46 +01:00
}
2019-04-02 00:59:23 +02:00
Double dotPredictionIntervalDifference = largest - shortest;
subtracter *= 25;
//System.out.println("subtracter: " + subtracter + "\n");
2019-04-02 00:59:23 +02:00
if (dotPredictionIntervalDifference < 5.0) {
if (dotPredictions.values().size() > 0) {
if (subtracter > 0) {
score -= subtracter;
} else {
score += subtracter;
//System.out.println("score + subtracter: " + score + "\nsubtracter: " + subtracter + "\n");
}
2019-04-02 00:59:23 +02:00
}
} else {
score -= subtracter / 10;
2019-04-02 00:59:23 +02:00
}
} else {
subtracter -= 100;
subtracter *= 25;
score += subtracter * dotPrediction;
//System.out.println("score + subtracter * dotPrediction: " + score + "\nsubtracter: " + subtracter + "\ndotPrediction: "
//+ dotPrediction + "\n");
}
2019-04-02 00:59:23 +02:00
dotPredictions.put(dotPredictions.size() + 1, dotPrediction);
}
//System.out.println("score post subtracter1: " + score + "\n");
Double subTracPre = 0.0;
for (Double subtractors : subtractorMap.values()) {
if (Objects.equals(subTracPre, subtractors)) {
score -= 1500;
//System.out.println("score minus subTracPre equals: " + score + "\nsubTracPre: " + subTracPre + "\n");
}
subTracPre = subtractors;
}
2019-04-02 00:59:23 +02:00
ConcurrentMap<Integer, Double> DotOverTransfer = dotPredictions;
dotPredictions = new MapMaker().concurrencyLevel(2).makeMap();
Double totalSubtraction = 0.0;
2019-04-02 00:59:23 +02:00
for (SimpleMatrix simpleSMX : simpleSMXlist.values()) {
double dotPrediction = simpleSMX.dot(predictions) * 100;
AccumulateDotMap.put(AccumulateDotMap.size() + 1, dotPrediction);
double subtracter = dotPrediction > 50 ? dotPrediction - 100 : dotPrediction > 0 ? 100 - dotPrediction : 0;
//System.out.println("dotPrediction: " + dotPrediction + "\nsubtracter: " + subtracter + "\n");
2019-04-02 00:59:23 +02:00
subtractorMap.put(subtractorMap.size() + 1, subtracter);
if (!dotPredictions.values().contains(dotPrediction)) {
for (Double transferDots : DotOverTransfer.values()) {
if (transferDots == dotPrediction) {
totalSubtraction += transferDots;
} else {
score -= subtracter * 25;
//System.out.println("score minus subtracter: " + score + "\nsubtracter: " + subtracter + "\n");
2019-04-02 00:59:23 +02:00
}
//System.out.println("transferDots: " + transferDots + "\n");
2019-04-02 00:59:23 +02:00
}
} else {
subtracter -= 100;
subtracter *= 25;
score -= subtracter * dotPrediction;
//System.out.println("score minus subtracter * dotPrediction 2: " + score + "\ndotPrediction: "
// + dotPrediction + "\n");
}
2019-04-02 00:59:23 +02:00
dotPredictions.put(dotPredictions.size() + 1, dotPrediction);
}
if (totalSubtraction > 45.0) {
score -= totalSubtraction * 25;
} else {
score += totalSubtraction * 25;
}
//System.out.println("score post totalSubtraction: " + score + "\ntotalSubtraction: " + totalSubtraction + "\n");
Double preAccumulatorDot = 0.0;
Double postAccumulatorDot = 0.0;
for (Double accumulators : AccumulateDotMap.values()) {
2019-04-02 00:59:23 +02:00
if (Objects.equals(preAccumulatorDot, accumulators)) {
if (Objects.equals(postAccumulatorDot, accumulators)) {
score -= 1400;
}
postAccumulatorDot = accumulators;
2019-03-02 15:10:46 +01:00
}
preAccumulatorDot = accumulators;
2019-03-02 15:10:46 +01:00
}
subTracPre = 0.0;
for (Double subtractors : subtractorMap.values()) {
if (Objects.equals(subTracPre, subtractors)) {
score -= 500;
}
subTracPre = subtractors;
2019-03-02 15:10:46 +01:00
}
Double preDot = 0.0;
Double postDot = 0.0;
for (SimpleMatrix simpleSMX : simpleSMXlistVector.values()) {
double dot = nodeVector.dot(simpleSMX);
double elementSum = nodeVector.kron(simpleSMX).elementSum();
if (preDot == dot) {
if (postDot == dot) {
score -= 500;
}
postDot = dot;
}
preDot = dot;
elementSum = Math.round(elementSum * 100.0) / 100.0;
elementSumCounter.put(elementSumCounter.size() + 1, elementSum);
dotMap.put(dotMap.size() + 1, dot);
2019-04-05 13:29:20 +02:00
if (!dotSumMap.values().contains(dot)) {
if (dot < 0.000) {
score += dot * 500;
//System.out.println("score + dot * 500: " + score + "\ndot: " + dot + "\n");
2019-04-05 13:29:20 +02:00
} else if (dot < 0.1) {
score += 256;
//System.out.println("score + 256: " + score + "\ndot: " + dot + "<n");
2019-04-05 13:29:20 +02:00
}
if (dot > 0.50) {
score -= 1200;
2019-04-05 13:29:20 +02:00
}
dotSumMap.put(dotSumMap.size() + 1, dot);
} else {
score -= 250;
}
2019-04-05 13:29:20 +02:00
if (!elementSumMap.values().contains(elementSum)) {
if (elementSum < 0.01 && elementSum > 0.00) {
score += 3300;
//System.out.println("score elementSum < 0.01 && elementSum > 0.00: " + score + "\nelementSum: "
// + elementSum + "\n");
2019-04-05 13:29:20 +02:00
} else if (elementSum > 0.1 && elementSum < 0.2) {
score += 1100;
//System.out.println("score elementSum < 0.01 && elementSum > 0.00: " + score + "\nelementSum: "
// + elementSum + "\n");
2019-04-05 13:29:20 +02:00
} else {
score -= elementSum * 1024;
2019-04-05 13:29:20 +02:00
}
elementSumMap.put(elementSumMap.size() + 1, elementSum);
} else {
score -= 250;
}
}
for (SimpleMatrix simpleSMX : simpleSMXlistVector.values()) {
double dot = simpleSMX.dot(nodeVector);
double elementSum = simpleSMX.kron(nodeVector).elementSum();
if (preDot == dot) {
if (postDot == dot) {
score -= 500;
}
postDot = dot;
}
preDot = dot;
elementSum = Math.round(elementSum * 100.0) / 100.0;
elementSumCounter.put(elementSumCounter.size() + 1, elementSum);
dotMap.put(dotMap.size() + 1, dot);
2019-04-05 13:29:20 +02:00
if (!dotSumMap.values().contains(dot)) {
if (dot < 0.1) {
score += 256;
//System.out.println("score dot < 0.1: " + score + "\ndot: "
// + dot + "\n");
2019-04-05 13:29:20 +02:00
}
if (dot > 0.50) {
score -= 1400;
2019-04-05 13:29:20 +02:00
}
dotSumMap.put(dotSumMap.size() + 1, dot);
} else {
score -= 250;
}
2019-04-05 13:29:20 +02:00
if (!elementSumMap.values().contains(elementSum)) {
if (elementSum < 0.01 && elementSum > 0.00) {
score += 1300;
//System.out.println("score elementSum < 0.01 && elementSum > 0.00: " + score + "\nelementSum: "
// + elementSum + "\n");
2019-04-05 13:29:20 +02:00
} else if (elementSum > 0.1 && elementSum < 1.0) {
score += 1100;
//System.out.println("score elementSum < 0.01 && elementSum > 0.00: " + score + "\nelementSum: "
// + elementSum + "\n");
2019-04-05 13:29:20 +02:00
} else {
score -= elementSum * 1024;
2019-04-05 13:29:20 +02:00
}
elementSumMap.put(elementSumMap.size() + 1, elementSum);
} else {
score -= 250;
}
}
}
//System.out.println("score post sentiment analyzer2: " + score + "\n");
OptionalDouble minvalueDots = dotMap.values().stream().mapToDouble(Double::doubleValue).min();
OptionalDouble maxvalueDots = dotMap.values().stream().mapToDouble(Double::doubleValue).max();
double total = minvalueDots.getAsDouble() + maxvalueDots.getAsDouble();
boolean permitted = false;
if (minvalueDots.getAsDouble() != maxvalueDots.getAsDouble()) {
permitted = true;
}
if (permitted) {
Double dotsVariance = maxvalueDots.getAsDouble() - minvalueDots.getAsDouble();
//System.out.println("maxvalueDots.getAsDouble():" + maxvalueDots.getAsDouble() + "\nminvalueDots.getAsDouble():"
// + minvalueDots.getAsDouble() + "\ndotsVariance: " + dotsVariance + "\n");
if (maxvalueDots.getAsDouble() > minvalueDots.getAsDouble() * 10) {
score -= 5500;
} else if (minvalueDots.getAsDouble() < -0.10) {
score -= 3500;
} else if (dotsVariance < 0.5 && dotsVariance > 0.1) {
score -= 3500;
} else if (dotsVariance > minvalueDots.getAsDouble() * 2) {
score += 3500;
//System.out.println("varians 4 score. " + score + "\n");
} else if (minvalueDots.getAsDouble() * 3 > maxvalueDots.getAsDouble() && maxvalueDots.getAsDouble() < 0.1001) {
score += dotsVariance * 200000;
}
}
//System.out.println("score post dotsVariance: " + score + "\n");
OptionalDouble minvalueElements = elementSumCounter.values().stream().mapToDouble(Double::doubleValue).min();
OptionalDouble maxvalueElements = elementSumCounter.values().stream().mapToDouble(Double::doubleValue).max();
Double elementsVariance = maxvalueElements.getAsDouble() - minvalueElements.getAsDouble();
//System.out.println("elementsVariance: " + elementsVariance + "\nmaxvalueElements.getAsDouble(): "
// + maxvalueElements.getAsDouble() + "\nminvalueElements.getAsDouble(): " + minvalueElements.getAsDouble() + "\n");
2019-04-05 13:29:20 +02:00
if (elementsVariance == 0.0) {
score -= 550;
} else if (elementsVariance < 0.02 && elementsVariance > -0.01) {
score += 3500;
} else if (minvalueElements.getAsDouble() < 0.0 && minvalueElements.getAsDouble() - maxvalueElements.getAsDouble() < 0.50) {
score -= 2500;
} else if (elementsVariance * 2 >= maxvalueElements.getAsDouble() && elementsVariance < 0.1) {
score -= elementsVariance * 86000;
}
//System.out.println("score post elementsVariance: " + score + "\n");
score -= (sentiment1.size() > sentiment2.size() ? sentiment1.size() - sentiment2.size() : sentiment2.size() - sentiment1.size()) * 500;
DocumentReaderAndWriter<CoreLabel> readerAndWriter = classifier.makePlainTextReaderAndWriter();
List classifyRaw1 = classifier.classifyRaw(str, readerAndWriter);
List classifyRaw2 = classifier.classifyRaw(str1, readerAndWriter);
score -= (classifyRaw1.size() > classifyRaw2.size() ? classifyRaw1.size() - classifyRaw2.size() : classifyRaw2.size() - classifyRaw1.size()) * 200;
//System.out.println("score post classifyRaw: " + score + "\n");
int mainSentiment1 = 0;
int longest1 = 0;
int mainSentiment2 = 0;
int longest2 = 0;
for (CoreMap sentence : pipelineAnnotation1Sentiment.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
String partText = sentence.toString();
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
if (partText.length() > longest1) {
mainSentiment1 = sentiment;
longest1 = partText.length();
}
}
for (CoreMap sentence : pipelineAnnotation2Sentiment.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
String partText = sentence.toString();
if (partText.length() > longest2) {
mainSentiment2 = sentiment;
longest2 = partText.length();
}
}
//System.out.println("score post pipelineAnnotation2Sentiment: " + score + "\n");
if (longest1 != longest2) {
long deffLongest = longest1 > longest2 ? longest1 : longest2;
long deffshorter = longest1 < longest2 ? longest1 : longest2;
if (deffLongest > deffshorter * 5) {
score -= 5500;
} else if (deffLongest < (deffshorter * 2) - 1 && deffLongest - deffshorter <= 45) {
score += (deffLongest - deffshorter) * 20;
2019-04-05 13:29:20 +02:00
} else if (mainSentiment1 != mainSentiment2 && deffLongest - deffshorter > 20 && deffLongest - deffshorter < 45) {
score += (deffLongest - deffshorter) * 20;
} else if (deffLongest - deffshorter < 2) {
score += (deffLongest - deffshorter) * 20;
2019-04-02 00:59:23 +02:00
} else if (deffshorter * 2 >= deffLongest && deffshorter * 2 < deffLongest + 5) {
score += (deffLongest - deffshorter) * 20;
} else {
score -= (deffLongest - deffshorter) * 50;
}
if (deffLongest - deffshorter <= 5) {
score += 250;
}
}
int tokensCounter1 = 0;
int tokensCounter2 = 0;
int anotatorcounter1 = 0;
int anotatorcounter2 = 0;
int inflectedCounterPositive1 = 0;
int inflectedCounterPositive2 = 0;
int inflectedCounterNegative = 0;
int MarkedContinuousCounter1 = 0;
int MarkedContinuousCounter2 = 0;
2019-04-02 00:59:23 +02:00
Integer MarkedContiniousCounter1Entries = 0;
Integer MarkedContiniousCounter2Entries = 0;
int UnmarkedPatternCounter1 = 0;
int UnmarkedPatternCounter2 = 0;
ConcurrentMap<Integer, String> ITokenMapTag1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> ITokenMapTag2 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> strTokenStems1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> strTokenStems2 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> strTokenForm1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> strTokenForm2 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> strTokenGetEntry1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> strTokenGetEntry2 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> strTokenGetiPart1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> strTokenGetiPart2 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> strTokenEntryPOS1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> strTokenEntryPOS2 = new MapMaker().concurrencyLevel(2).makeMap();
2019-04-05 13:29:20 +02:00
ConcurrentMap<Integer, Integer> entryCounts1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, Integer> entryCounts2 = new MapMaker().concurrencyLevel(2).makeMap();
try {
List<CoreMap> sentences = jmweStrAnnotation1.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
for (IMWE<IToken> token : sentence.get(JMWEAnnotation.class)) {
if (token.isInflected()) {
inflectedCounterPositive1++;
} else {
inflectedCounterNegative++;
}
strTokenForm1.put(strTokenForm1.size() + 1, token.getForm());
strTokenGetEntry1.put(strTokenGetEntry1.size() + 1, token.getEntry().toString().substring(token.getEntry().toString().length() - 1));
Collection<IMWEDesc.IPart> values = token.getPartMap().values();
IMWEDesc entry = token.getEntry();
MarkedContinuousCounter1 += entry.getMarkedContinuous();
UnmarkedPatternCounter1 += entry.getUnmarkedPattern();
for (IMWEDesc.IPart iPart : values) {
strTokenGetiPart1.put(strTokenGetiPart1.size() + 1, iPart.getForm());
}
for (String strPostPrefix : entry.getPOS().getPrefixes()) {
strTokenEntryPOS1.put(strTokenEntryPOS1.size() + 1, strPostPrefix);
}
2019-04-05 13:29:20 +02:00
for (int counts : entry.getCounts()) {
entryCounts1.put(entryCounts1.size() + 1, counts);
}
for (IToken tokens : token.getTokens()) {
ITokenMapTag1.put(ITokenMapTag1.size() + 1, tokens.getTag());
for (String strtoken : tokens.getStems()) {
strTokenStems1.put(strTokenStems1.size() + 1, strtoken);
2019-04-02 00:59:23 +02:00
MarkedContiniousCounter1Entries++;
}
}
tokensCounter1++;
}
anotatorcounter1++;
}
sentences = jmweStrAnnotation2.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
for (IMWE<IToken> token : sentence.get(JMWEAnnotation.class)) {
if (token.isInflected()) {
inflectedCounterPositive2++;
} else {
inflectedCounterNegative--;
}
strTokenForm2.put(strTokenForm2.size() + 1, token.getForm());
strTokenGetEntry2.put(strTokenGetEntry2.size() + 1, token.getEntry().toString().substring(token.getEntry().toString().length() - 1));
Collection<IMWEDesc.IPart> values = token.getPartMap().values();
IMWEDesc entry = token.getEntry();
MarkedContinuousCounter2 += entry.getMarkedContinuous();
UnmarkedPatternCounter2 += entry.getUnmarkedPattern();
for (IMWEDesc.IPart iPart : values) {
strTokenGetiPart2.put(strTokenGetiPart2.size() + 1, iPart.getForm());
}
for (String strPostPrefix : entry.getPOS().getPrefixes()) {
strTokenEntryPOS2.put(strTokenEntryPOS2.size() + 1, strPostPrefix);
}
2019-04-05 13:29:20 +02:00
for (int counts : entry.getCounts()) {
entryCounts2.put(entryCounts2.size() + 1, counts);
}
for (IToken tokens : token.getTokens()) {
ITokenMapTag2.put(ITokenMapTag2.size() + 1, tokens.getTag());
for (String strtoken : tokens.getStems()) {
strTokenStems2.put(strTokenStems2.size() + 1, strtoken);
2019-04-02 00:59:23 +02:00
MarkedContiniousCounter2Entries++;
}
}
tokensCounter2++;
}
anotatorcounter2++;
}
} catch (Exception ex) {
//System.out.println("SENTIMENT stacktrace: " + ex.getMessage() + "\n");
}
2019-04-05 13:29:20 +02:00
int entry1 = entryCounts1.values().size();
int entry2 = entryCounts2.values().size();
//System.out.println("score post JMWEAnnotation: " + score + "\nentry1: " + entry1 + "\nentry2: " + entry2 + "\n");
if (entry1 > 0 && entry2 > 0) {
if ((entry1 >= entry2 * 5) || (entry2 >= entry1 * 5)) {
score -= entry1 > entry2 ? (entry1 - entry2) * 450 : (entry2 - entry1) * 450;
//System.out.println("1");
} else if ((entry1 >= entry2 * 50 || entry2 >= entry1 * 50)) {
score -= entry1 > entry2 ? entry1 * 180 : entry2 * 180;
//System.out.println("2");
} else if (entry1 >= entry2 * 2 || entry2 >= entry1 * 2) {
score += entry1 > entry2 ? (entry1 - entry2) * 450 : (entry2 - entry1) * 450;
//System.out.println("3");
} else if (entry1 > 10 && entry2 > 10 && entry1 * 2 > entry2 && entry2 * 2 > entry1) {
score += entry1 > entry2 ? entry2 * 600 : entry1 * 600;
//System.out.println("6");
}
2019-04-05 13:29:20 +02:00
}
ConcurrentMap<Integer, Integer> countsMap = new MapMaker().concurrencyLevel(2).makeMap();
for (int counts : entryCounts1.values()) {
for (int counts1 : entryCounts2.values()) {
if (counts == counts1 && counts > 0 && !countsMap.values().contains(counts)) {
score += counts * 250;
//System.out.println("score post counts: " + score + "\nCounts: " + counts + "\n");
2019-04-05 13:29:20 +02:00
countsMap.put(countsMap.size() + 1, counts);
}
}
}
if (strTokenEntryPOS1.values().size() > 1 && strTokenEntryPOS2.values().size() > 1) {
for (String strTokenPos1 : strTokenEntryPOS1.values()) {
for (String strTokenPos2 : strTokenEntryPOS2.values()) {
if (strTokenPos1.equals(strTokenPos2)) {
score += 500;
} else {
score -= 650;
//System.out.println("strTokenEntryPOS score: " + score + "\n");
2019-04-05 13:29:20 +02:00
}
}
}
}
//System.out.println("score pre UnmarkedPatternCounter: " + score + "\nUnmarkedPatternCounter1: " + UnmarkedPatternCounter1
// + "\nUnmarkedPatternCounter2: " + UnmarkedPatternCounter2 + "\n");
if (UnmarkedPatternCounter1 > 0 && UnmarkedPatternCounter2 > 0) {
if (UnmarkedPatternCounter1 * 2 > UnmarkedPatternCounter2 && UnmarkedPatternCounter2 * 2 > UnmarkedPatternCounter1) {
score += 2500;
} else if (UnmarkedPatternCounter1 * 5 < UnmarkedPatternCounter2 || UnmarkedPatternCounter2 * 5 < UnmarkedPatternCounter1) {
score -= 4000;
}
}
//System.out.println("score post UnmarkedPatternCounter: " + score + "\n");
if (MarkedContinuousCounter1 > 0 && MarkedContinuousCounter2 > 0) {
2019-04-05 13:29:20 +02:00
if (MarkedContinuousCounter1 > MarkedContinuousCounter2 * 50 || MarkedContinuousCounter2 > MarkedContinuousCounter1 * 50) {
score -= MarkedContinuousCounter1 > MarkedContinuousCounter2 ? MarkedContinuousCounter1 * 120 : MarkedContinuousCounter2 * 120;
//System.out.println("score post MarkedContinuousCounter too big: " + score + "\n");
2019-04-05 13:29:20 +02:00
} else if (!Objects.equals(MarkedContiniousCounter1Entries, MarkedContiniousCounter2Entries)
2019-04-02 00:59:23 +02:00
&& (MarkedContinuousCounter1 * 2 >= MarkedContinuousCounter2 * MarkedContinuousCounter1)
|| (MarkedContinuousCounter2 * 2 >= MarkedContinuousCounter1 * MarkedContinuousCounter2)) {
2019-04-05 13:29:20 +02:00
score += 4500;
} else if (MarkedContiniousCounter1Entries == 0 || MarkedContiniousCounter2Entries == 0) {
2019-04-02 00:59:23 +02:00
score += MarkedContinuousCounter1 > MarkedContinuousCounter2 ? (MarkedContinuousCounter2 - MarkedContinuousCounter1) * 500
: (MarkedContinuousCounter1 - MarkedContinuousCounter2) * 500;
}
2019-04-05 13:29:20 +02:00
if (MarkedContiniousCounter1Entries > 0 && MarkedContiniousCounter2Entries > 0 && MarkedContinuousCounter1 > 0
&& MarkedContinuousCounter2 > 0 && MarkedContinuousCounter1 < MarkedContinuousCounter2 * 10
&& MarkedContinuousCounter2 < MarkedContinuousCounter1 * 10) {
if (MarkedContiniousCounter1Entries > MarkedContiniousCounter2Entries * 5
|| MarkedContiniousCounter2Entries > MarkedContiniousCounter1Entries * 5
|| MarkedContiniousCounter1Entries * 5 < MarkedContinuousCounter1
|| MarkedContiniousCounter1Entries * 5 < MarkedContinuousCounter2
|| MarkedContiniousCounter2Entries * 5 < MarkedContinuousCounter1
|| MarkedContiniousCounter2Entries * 5 < MarkedContinuousCounter2) {
score -= MarkedContinuousCounter1 > MarkedContinuousCounter2 ? MarkedContinuousCounter1 * 400 : MarkedContinuousCounter2 * 400;
//System.out.println("score post MarkedContinuousCounter: " + score + "\n");
2019-04-05 13:29:20 +02:00
}
}
}
2019-04-05 13:29:20 +02:00
ConcurrentMap<Integer, String> strtokensMap = new MapMaker().concurrencyLevel(2).makeMap();
for (String strTokeniPart1 : strTokenGetiPart1.values()) {
for (String strTokeniPart2 : strTokenGetiPart2.values()) {
2019-04-05 13:29:20 +02:00
if (strTokeniPart1.equals(strTokeniPart2) && !strtokensMap.values().contains(strTokeniPart2)) {
strtokensMap.put(strtokensMap.size() + 1, strTokeniPart2);
score += 400;
2019-04-05 13:29:20 +02:00
} else {
score -= 200;
//System.out.println("score minus strTokenGetiPart: " + score + "\n");
}
}
}
2019-04-05 13:29:20 +02:00
int tokenEntry1 = strTokenGetEntry1.values().size();
int tokenEntry2 = strTokenGetEntry2.values().size();
boolean boundariyLeacks = false;
int remnantCounter = 0;
if (tokenEntry1 * 2 != tokenEntry2 && tokenEntry2 * 2 != tokenEntry1) {
boundariyLeacks = true;
}
ConcurrentMap<Integer, String> entryTokenMap = new MapMaker().concurrencyLevel(2).makeMap();
for (String strTokenEntry1 : strTokenGetEntry1.values()) {
for (String strTokenEntry2 : strTokenGetEntry2.values()) {
2019-04-05 13:29:20 +02:00
if (!entryTokenMap.values().contains(strTokenEntry2)) {
if (strTokenEntry1.equals(strTokenEntry2)) {
score += boundariyLeacks ? 2500 : 2500 / 2;
} else if (!boundariyLeacks) {
score -= 450;
//System.out.println("boundariyLeacks score: " + score + "\n");
2019-04-05 13:29:20 +02:00
} else {
remnantCounter++;
}
}
2019-04-05 13:29:20 +02:00
entryTokenMap.put(entryTokenMap.size() + 1, strTokenEntry2);
}
}
//System.out.println("score pre remnantCounter: " + score + "\n");
score += remnantCounter * 250;
//System.out.println("score post remnantCounter: " + score + "\n");
2019-04-05 13:29:20 +02:00
ConcurrentMap<Integer, String> iTokenMapTagsMap = new MapMaker().concurrencyLevel(2).makeMap();
for (String strmapTag : ITokenMapTag1.values()) {
for (String strmapTag1 : ITokenMapTag2.values()) {
if (strmapTag.equals(strmapTag1)) {
2019-04-05 13:29:20 +02:00
score -= 1450;
} else if (!iTokenMapTagsMap.values().contains(strmapTag)) {
score += 725;
iTokenMapTagsMap.put(iTokenMapTagsMap.size() + 1, strmapTag);
}
}
}
//System.out.println("score post strmapTag: " + score + "\n");
2019-04-05 13:29:20 +02:00
int tokenform1size = strTokenForm1.values().size();
int tokenform2size = strTokenForm2.values().size();
if (tokenform1size > 0 || tokenform2size > 0) {
if (tokenform1size < tokenform2size * 5 && tokenform2size < tokenform1size * 5) {
for (String strTokenForm1itr1 : strTokenForm1.values()) {
for (String strTokenForm1itr2 : strTokenForm2.values()) {
if (strTokenForm1itr1.equals(strTokenForm1itr2)) {
score -= 1600;
} else {
score += 500;
//System.out.println("tokenform1size score500: " + score + "\n");
2019-04-05 13:29:20 +02:00
}
}
}
2019-04-05 13:29:20 +02:00
} else if (tokenform1size > 0 && tokenform2size > 0) {
if (tokenform1size * 2 >= tokenform2size && tokenform2size * 2 >= tokenform1size) {
score += tokenform1size > tokenform2size ? tokenform1size * 600 : tokenform2size * 600;
} else if (tokenform1size * 4 <= tokenform2size || tokenform2size * 4 <= tokenform1size) {
score -= tokenform1size > tokenform2size ? (tokenform1size - tokenform2size) * 600 : (tokenform2size - tokenform1size) * 600;
}
//System.out.println("tokenform1size score: " + score + "\ntokenform1size: " + tokenform1size + "\ntokenform2size: "
// + tokenform2size + "\n");
}
}
//System.out.println("Score pre tokenStemmingMap: " + score + "\n");
2019-04-05 13:29:20 +02:00
ConcurrentMap<Integer, String> tokenStemmingMap = new MapMaker().concurrencyLevel(2).makeMap();
for (String strTokenStem : strTokenStems1.values()) {
for (String strTokenStem1 : strTokenStems2.values()) {
if (strTokenStem.equals(strTokenStem1) && !tokenStemmingMap.values().contains(strTokenStem)) {
score += 1500;
2019-04-05 13:29:20 +02:00
tokenStemmingMap.put(tokenStemmingMap.size() + 1, strTokenStem);
}
//System.out.println("score strTokenStem: " + score + "\n");
}
}
//System.out.println("Score pre inflected: " + score + "\n");
//System.out.println("inflectedCounterPositive1: " + inflectedCounterPositive1 + "\ninflectedCounterPositive2: "
// + inflectedCounterPositive2 + "\ninflectedCounterNegative: " + inflectedCounterNegative + "\n");
if (inflectedCounterPositive1 + inflectedCounterPositive2 > inflectedCounterNegative && inflectedCounterNegative > 0) {
score += ((inflectedCounterPositive1 + inflectedCounterPositive2) - inflectedCounterNegative) * 650;
//System.out.println("score inflectedCounterPositive plus: " + score + "\n");
}
if (inflectedCounterPositive1 > 0 && inflectedCounterPositive2 > 0) {
if (inflectedCounterPositive1 * 2 > inflectedCounterPositive2 && inflectedCounterPositive2 * 2 > inflectedCounterPositive1) {
score += ((inflectedCounterPositive1 + inflectedCounterPositive2) - inflectedCounterNegative) * 550;
//System.out.println("score plus inflectedCounterPositive * 2: " + score + "\n");
} else if (inflectedCounterPositive1 * 5 < inflectedCounterPositive2 || inflectedCounterPositive2 * 5 < inflectedCounterPositive1) {
score -= inflectedCounterPositive1 > inflectedCounterPositive2 ? (inflectedCounterPositive1 - inflectedCounterPositive2) * 400
: (inflectedCounterPositive2 - inflectedCounterPositive1) * 400;
//System.out.println("score minus inflectedCounterPositive * 2: " + score + "\n");
}
}
//System.out.println("anotatorcounter1: " + anotatorcounter1 + "\nanotatorcounter2: " + anotatorcounter2 + "\n");
if (anotatorcounter1 > 1 && anotatorcounter2 > 1) {
if (anotatorcounter1 * 2 > anotatorcounter2 && anotatorcounter2 * 2 > anotatorcounter1) {
score += anotatorcounter1 > anotatorcounter2 ? (anotatorcounter1 - anotatorcounter2) * 700
: (anotatorcounter2 - anotatorcounter1) * 700;
//System.out.println("score plus anotatorcounter: " + score + "\n");
} else if (anotatorcounter1 * 5 < anotatorcounter2 || anotatorcounter2 * 5 < anotatorcounter1) {
score -= anotatorcounter1 > anotatorcounter2 ? (anotatorcounter1 - anotatorcounter2) * 400 : (anotatorcounter2 - anotatorcounter1) * 400;
//System.out.println("score minus anotatorcounter: " + score + "\n");
}
}
//System.out.println("tokensCounter1: " + tokensCounter1 + "\ntokensCounter2: " + tokensCounter2 + "\n");
if ((tokensCounter1 > 1 && tokensCounter2 > 1) && tokensCounter1 < tokensCounter2 * 5 && tokensCounter2 < tokensCounter1 * 5) {
if (tokensCounter1 > tokensCounter2 / 2 && tokensCounter2 > tokensCounter1 / 2) {
score += (tokensCounter1 + tokensCounter2) * 1400;
//System.out.println("score plus tokensCounter: " + score + "\n");
} else {
score -= 3500;
//System.out.println("score minus tokensCounter: " + score + "\n");
}
} else {
int elseint = tokensCounter1 >= tokensCounter2 ? (tokensCounter1 - tokensCounter2) * 500 : (tokensCounter2 - tokensCounter1) * 500;
//System.out.println("elseint: " + elseint + "<n");
2019-04-05 13:29:20 +02:00
if ((tokensCounter1 > tokensCounter2 * 5 || tokensCounter2 > tokensCounter1 * 5)
&& tokensCounter1 > 0 && tokensCounter2 > 0) {
score -= tokensCounter1 > tokensCounter2 ? (tokensCounter1 - tokensCounter2) * 500 : (tokensCounter2 - tokensCounter1) * 500;
//System.out.println("score post tokensCounter: " + score + "\n");
2019-04-05 13:29:20 +02:00
} else if (elseint > 0 && tokensCounter1 > 0 && tokensCounter2 > 0) {
score -= elseint * 2;
//System.out.println("score post elseint: " + elseint + "\n");
}
}
//System.out.println("Score Pre levenhstein: " + score + "\n");
LevenshteinDistance leven = new LevenshteinDistance(str, str1);
double SentenceScoreDiff = leven.computeLevenshteinDistance();
SentenceScoreDiff *= 15;
score -= SentenceScoreDiff;
ConcurrentMap<Integer, String> nerEntities1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntities2 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntities3 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntities4 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntityTokenTags1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntityTokenTags2 = new MapMaker().concurrencyLevel(2).makeMap();
for (CoreEntityMention em : pipelineCoreDcoument1.entityMentions()) {
Set<Map.Entry<String, Double>> entrySet = em.entityTypeConfidences().entrySet();
String entityType = em.entityType();
Double EntityConfidences = 0.0;
for (Map.Entry<String, Double> entries : entrySet) {
EntityConfidences = entries.getValue();
}
List<CoreLabel> tokens = em.tokens();
for (CoreLabel token : tokens) {
if (!nerEntityTokenTags1.values().contains(token.tag())) {
if (entityType.equals("PERSON") && EntityConfidences > 0.80) {
nerEntityTokenTags1.put(nerEntityTokenTags1.size() + 1, token.tag());
}
}
}
if (!nerEntities1.values().contains(em.text())) {
nerEntities1.put(nerEntities1.size() + 1, em.text());
nerEntities3.put(nerEntities3.size() + 1, em.entityType());
}
}
for (CoreEntityMention em : pipelineCoreDcoument2.entityMentions()) {
Set<Map.Entry<String, Double>> entrySet = em.entityTypeConfidences().entrySet();
String entityType = em.entityType();
Double EntityConfidences = 0.0;
for (Map.Entry<String, Double> entries : entrySet) {
EntityConfidences = entries.getValue();
}
List<CoreLabel> tokens = em.tokens();
for (CoreLabel token : tokens) {
if (!nerEntityTokenTags2.values().contains(token.tag())) {
if (entityType.equals("PERSON") && EntityConfidences > 0.80) {
nerEntityTokenTags2.put(nerEntityTokenTags2.size() + 1, token.tag());
}
}
}
if (!nerEntities2.values().contains(em.text())) {
nerEntities2.put(nerEntities2.size() + 1, em.text());
nerEntities4.put(nerEntities4.size() + 1, em.entityType());
}
}
//System.out.println("score post PERSON trim: " + score + "\n");
for (String strEnts1 : nerEntities1.values()) {
Collection<String> values = nerEntities2.values();
for (String strEnts2 : values) {
if (strEnts1.equalsIgnoreCase(strEnts2)) {
score += 2500;
//System.out.println("score strEnts1 plus: " + score + "\n");
} else {
score -= 150;
}
}
}
for (String strEnts1 : nerEntities3.values()) {
if (nerEntities4.values().contains(strEnts1)) {
score -= 1500;
//System.out.println("score nerEntities4 minus: " + score + "\n");
} else {
score -= 150;
}
}
for (String strToken : nerEntityTokenTags1.values()) {
if (nerEntityTokenTags2.values().contains(strToken)) {
score += 2000;
//System.out.println("score nerEntities4 plus: " + score + "\n");
} else {
score -= 150;
}
}
//System.out.println("score pre stopwordTokens: " + score + "\n");
ConcurrentMap<Integer, String> stopwordTokens = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> stopwordTokens1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> stopWordLemma = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> stopWordLemma1 = new MapMaker().concurrencyLevel(2).makeMap();
Integer pairCounter1 = 0;
Integer pairCounter2 = 0;
String customStopWordList = "start,starts,period,periods,a,an,and,are,as,at,be,but,by,for,if,in,into,is,it,no,not,of,on,or,such,that,the,their,then,there,these,they,this,to,was,will,with";
List<CoreLabel> tokensSentiment1 = pipelineAnnotation1Sentiment.get(CoreAnnotations.TokensAnnotation.class);
List<CoreLabel> tokensSentiment2 = pipelineAnnotation2Sentiment.get(CoreAnnotations.TokensAnnotation.class);
Set<?> stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
Set<?> stopWordsCustom = StopwordAnnotator.getStopWordList(customStopWordList, true);
for (CoreLabel token : tokensSentiment1) {
Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);
String word = token.word().toLowerCase();
if (stopWords.contains(word) || stopWordsCustom.contains(word)) {
stopwordTokens.put(stopwordTokens.size(), word);
}
String lemma = token.lemma().toLowerCase();
if (stopWords.contains(lemma) || stopWordsCustom.contains(lemma)) {
stopWordLemma.put(stopWordLemma.size(), lemma);
}
if (stopword.first() && stopword.second()) {
pairCounter1++;
}
//System.out.println("stopword Pair: " + stopword.first() + " " + stopword.second() + "\nword: "
// + word + "\nlemma: " + lemma + "\n");
}
for (CoreLabel token : tokensSentiment2) {
Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);
String word = token.word().toLowerCase();
if (stopWords.contains(word) || stopWordsCustom.contains(word)) {
stopwordTokens1.put(stopwordTokens1.size(), word);
}
String lemma = token.lemma().toLowerCase();
if (stopWords.contains(lemma) || stopWordsCustom.contains(lemma)) {
stopWordLemma1.put(stopWordLemma1.size(), lemma);
}
if (stopword.first() && stopword.second()) {
pairCounter2++;
}
//System.out.println("stopword Pair: " + stopword.first() + " " + stopword.second() + "\nword: "
// + word + "\nlemma: " + lemma + "\n");
}
for (String stopwords1 : stopwordTokens.values()) {
for (String stopwords2 : stopwordTokens1.values()) {
if (stopwords1.equals(stopwords2)) {
score -= 500;
//System.out.println("score stopwordsToken: " + score + "\n");
}
}
}
for (String stopwords1 : stopWordLemma.values()) {
for (String stopwords2 : stopWordLemma1.values()) {
if (stopwords1.equals(stopwords2)) {
score -= 500;
//System.out.println("score stopwords Lemma: " + score + "\n");
}
}
}
if (!stopwordTokens.values().isEmpty() && !stopwordTokens1.values().isEmpty()) {
int stopwordsize1 = stopwordTokens.values().size();
int stopwordsize2 = stopwordTokens1.values().size();
if (stopwordsize1 * 5 < stopwordsize2 || stopwordsize2 * 5 < stopwordsize1) {
score -= stopwordsize1 > stopwordsize2 ? (stopwordsize1 - stopwordsize2) * 850 : (stopwordsize2 - stopwordsize1) * 850;
} else {
score += stopwordsize1 > stopwordsize2 ? (stopwordsize1 - stopwordsize2) * 850 : (stopwordsize2 - stopwordsize1) * 850;;
}
//System.out.println("score post stopwordsize: " + score + "\nstopwordsize1: " + stopwordsize1 + "\nstopwordsize2: "
// + stopwordsize2 + "\n");
}
if (pairCounter1 > 0 && pairCounter2 > 0) {
if (pairCounter1 * 3 <= pairCounter2 || pairCounter2 * 3 <= pairCounter1) {
score -= pairCounter1 > pairCounter2 ? (pairCounter1 - pairCounter2) * 1500 : (pairCounter2 - pairCounter1) * 1500;
} else {
score += pairCounter1 > pairCounter2 ? (pairCounter1 - pairCounter2) * 700 : (pairCounter2 - pairCounter1) * 700;
}
//System.out.println("score post pairCounter: " + score + "\npairCounter1: " + pairCounter1 + "\npairCounter2: " + pairCounter2 + "\n");
}
} catch (Exception ex) {
//System.out.println("SENTIMENT stacktrace Overall catch: " + ex.getMessage() + "\n");
2019-03-02 15:10:46 +01:00
}
//System.out.println("Final current score: " + score + "\nSentence 1: " + str + "\nSentence 2: " + str1 + "\n");
smxParam.setDistance(score);
return smxParam;
2019-03-02 15:10:46 +01:00
}
}