package FunctionLayer.StanfordParser; import FunctionLayer.LevenshteinDistance; import FunctionLayer.Datahandler; import FunctionLayer.SimilarityMatrix; import FunctionLayer.StopwordAnnotator; import com.google.common.collect.MapMaker; import edu.mit.jmwe.data.IMWE; import edu.mit.jmwe.data.IMWEDesc; import edu.mit.jmwe.data.IToken; import edu.stanford.nlp.ie.AbstractSequenceClassifier; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.IndexedWord; import edu.stanford.nlp.ling.JMWEAnnotation; import edu.stanford.nlp.ling.TaggedWord; import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations; import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.CoreDocument; import edu.stanford.nlp.pipeline.CoreEntityMention; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.process.CoreLabelTokenFactory; import edu.stanford.nlp.process.DocumentPreprocessor; import edu.stanford.nlp.process.PTBTokenizer; import edu.stanford.nlp.process.TokenizerFactory; import edu.stanford.nlp.sentiment.SentimentCoreAnnotations; import edu.stanford.nlp.sequences.DocumentReaderAndWriter; import edu.stanford.nlp.tagger.maxent.MaxentTagger; import edu.stanford.nlp.trees.Constituent; import edu.stanford.nlp.trees.GrammaticalRelation; import edu.stanford.nlp.trees.GrammaticalStructure; import edu.stanford.nlp.trees.GrammaticalStructureFactory; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreeCoreAnnotations; import edu.stanford.nlp.trees.TypedDependency; import edu.stanford.nlp.trees.tregex.gui.Tdiff; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.Pair; import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.OptionalDouble; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BinaryOperator; import java.util.function.Function; import org.apache.lucene.analysis.core.StopAnalyzer; import org.ejml.simple.SimpleMatrix; /* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ /** * * @author install1 */ public class SentimentAnalyzerTest implements Callable { private SimilarityMatrix smxParam; private String str; private String str1; private MaxentTagger tagger; private GrammaticalStructureFactory gsf; private StanfordCoreNLP pipeline; private StanfordCoreNLP pipelineSentiment; private AbstractSequenceClassifier classifier; private Annotation jmweStrAnnotation1; private Annotation jmweStrAnnotation2; private Annotation pipelineAnnotation1; private Annotation pipelineAnnotation2; private Annotation pipelineAnnotation1Sentiment; private Annotation pipelineAnnotation2Sentiment; private CoreDocument pipelineCoreDcoument1; private CoreDocument pipelineCoreDcoument2; public SentimentAnalyzerTest(String str, String str1, SimilarityMatrix smxParam, Annotation str1Annotation, Annotation str2Annotation, Annotation strPipeline1, Annotation strPipeline2, Annotation strPipeSentiment1, Annotation strPipeSentiment2, CoreDocument pipelineCoreDcoument1, CoreDocument pipelineCoreDcoument2) { this.str = str; this.str1 = str1; this.smxParam = smxParam; this.tagger = Datahandler.getTagger(); this.pipeline = Datahandler.getPipeline(); this.pipelineSentiment = Datahandler.getPipelineSentiment(); this.gsf = Datahandler.getGsf(); this.classifier = Datahandler.getClassifier(); this.jmweStrAnnotation1 = str1Annotation; this.jmweStrAnnotation2 = str2Annotation; this.pipelineAnnotation1 = strPipeline1; this.pipelineAnnotation2 = strPipeline2; this.pipelineAnnotation1Sentiment = strPipeSentiment1; this.pipelineAnnotation2Sentiment = strPipeSentiment2; this.pipelineCoreDcoument1 = pipelineCoreDcoument1; this.pipelineCoreDcoument2 = pipelineCoreDcoument2; } @Override public SimilarityMatrix call() { Double score = -100.0; try { List> taggedwordlist1 = new ArrayList(); List> taggedwordlist2 = new ArrayList(); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1)); //noneDelete TokenizerFactory ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=firstDelete"); tokenizer.setTokenizerFactory(ptbTokenizerFactory); for (List sentence : tokenizer) { taggedwordlist1.add(tagger.tagSentence(sentence)); //taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); } tokenizer = new DocumentPreprocessor(new StringReader(str)); tokenizer.setTokenizerFactory(ptbTokenizerFactory); for (List sentence : tokenizer) { taggedwordlist2.add(tagger.tagSentence(sentence)); //taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); } int counter = 0; int counter1 = 0; counter = taggedwordlist2.stream().map((taggedlist2) -> taggedlist2.size()).reduce(counter, Integer::sum); counter1 = taggedwordlist1.stream().map((taggedlist1) -> taggedlist1.size()).reduce(counter1, Integer::sum); int overValue = counter >= counter1 ? counter - counter1 : counter1 - counter; overValue *= 32; score -= overValue; ConcurrentMap tgwlistIndex = new MapMaker().concurrencyLevel(2).makeMap(); taggedwordlist1.forEach((TGWList) -> { TGWList.forEach((TaggedWord) -> { if (!tgwlistIndex.values().contains(TaggedWord.tag()) && !TaggedWord.tag().equals(":")) { tgwlistIndex.put(tgwlistIndex.size() + 1, TaggedWord.tag()); } }); }); AtomicInteger runCount = new AtomicInteger(0); taggedwordlist2.forEach((TGWList) -> { TGWList.forEach((TaggedWord) -> { if (tgwlistIndex.values().contains(TaggedWord.tag())) { tgwlistIndex.values().remove(TaggedWord.tag()); runCount.getAndIncrement(); } }); }); score += runCount.get() * 64; ////System.out.println("score post runCountGet: " + score + "\n"); ConcurrentMap sentenceConstituencyParseList = new MapMaker().concurrencyLevel(2).makeMap(); try { for (CoreMap sentence : pipelineAnnotation1.get(CoreAnnotations.SentencesAnnotation.class)) { Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); sentenceConstituencyParseList.put(sentenceConstituencyParseList.size(), sentenceConstituencyParse); } ConcurrentMap alltypeDepsSizeMap = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap summationMap = new MapMaker().concurrencyLevel(2).makeMap(); for (CoreMap sentence : pipelineAnnotation2.get(CoreAnnotations.SentencesAnnotation.class)) { int constiRelationsize = 0; Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceConstituencyParse); Collection allTypedDependencies = gs.allTypedDependencies(); ConcurrentMap filerTreeContent = new MapMaker().concurrencyLevel(2).makeMap(); for (Tree sentenceConstituencyParse1 : sentenceConstituencyParseList.values()) { Set constinuent1 = Tdiff.markDiff(sentenceConstituencyParse, sentenceConstituencyParse1); Set constinuent2 = Tdiff.markDiff(sentenceConstituencyParse1, sentenceConstituencyParse); ConcurrentMap constiLabels = new MapMaker().concurrencyLevel(2).makeMap(); for (Constituent consti : constinuent1) { for (Constituent consti1 : constinuent2) { if (consti.value().equals(consti1.value()) && !constiLabels.values().contains(consti.value())) { constiLabels.put(constiLabels.size(), consti.value()); constiRelationsize++; } } } int constituents1 = constinuent1.size() - constiRelationsize; int constituents2 = constinuent2.size() - constiRelationsize; if (constituents1 * 5 < constituents2 || constituents2 * 5 < constituents1) { score -= (constituents1 + constituents2) * 200; } else if (constituents1 == 0 || constituents2 == 0) { score -= constiRelationsize * 200; } else { score += constiRelationsize * 160; //System.out.println("score post constiRelationsize: " + score + "\nconstituents1: " + constituents1 // + "\nconstituents2: " + constituents2 + "\nconstiRelationsize: " + constiRelationsize + "\n"); } GrammaticalStructure gs1 = gsf.newGrammaticalStructure(sentenceConstituencyParse1); Collection allTypedDependencies1 = gs1.allTypedDependencies(); int relationApplicable1 = 0; int relationApplicable2 = 0; int grammaticalRelation1 = 0; int grammaticalRelation2 = 0; for (TypedDependency TDY1 : allTypedDependencies1) { IndexedWord dep = TDY1.dep(); IndexedWord gov = TDY1.gov(); GrammaticalRelation grammaticalRelation = gs.getGrammaticalRelation(gov, dep); if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) { score += 700; //System.out.println("grammaticalRelation applicable score: " + score + "\n"); grammaticalRelation1++; } GrammaticalRelation reln = TDY1.reln(); if (reln.isApplicable(sentenceConstituencyParse)) { score += 525; relationApplicable1++; } } for (TypedDependency TDY : allTypedDependencies) { IndexedWord dep = TDY.dep(); IndexedWord gov = TDY.gov(); GrammaticalRelation grammaticalRelation = gs1.getGrammaticalRelation(gov, dep); if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) { score += 900; //System.out.println("grammaticalRelation appliceable score: " + score + "\n"); grammaticalRelation2++; } GrammaticalRelation reln = TDY.reln(); if (reln.isApplicable(sentenceConstituencyParse1)) { score += 525; //System.out.println("reln appliceable score: " + score + "\n"); relationApplicable2++; } } if ((grammaticalRelation1 == 0 && grammaticalRelation2 > 4) || (grammaticalRelation2 == 0 && grammaticalRelation1 > 4)) { score -= 3450; //System.out.println("grammaticalRelation1 score trim: " + score + "\ngrammaticalRelation1: " + grammaticalRelation1 // + "\ngrammaticalRelation2: " + grammaticalRelation2 + "\n"); } if (!allTypedDependencies.isEmpty() || !allTypedDependencies1.isEmpty()) { int allTypeDep1 = allTypedDependencies.size(); int allTypeDep2 = allTypedDependencies1.size(); if (allTypeDep1 <= allTypeDep2 * 5 && allTypeDep2 <= allTypeDep1 * 5) { if (allTypeDep1 > 0 && allTypeDep2 > 0) { if (allTypeDep1 * 2 <= allTypeDep2 || allTypeDep2 * 2 <= allTypeDep1) { score -= allTypeDep1 > allTypeDep2 ? (allTypeDep1 - allTypeDep2) * 160 : (allTypeDep2 - allTypeDep1) * 160; //System.out.println(" allTypeDep score: " + score + "\nallTypeDep1: " + allTypeDep1 + "\nallTypeDep2: " // + allTypeDep2 + "\n"); } else { score += allTypeDep1 > allTypeDep2 ? (allTypeDep1 - allTypeDep2) * 600 : (allTypeDep2 - allTypeDep1) * 600; //System.out.println(" allTypeDep score: " + score + "\nallTypeDep1: " + allTypeDep1 + "\nallTypeDep2: " // + allTypeDep2 + "\n"); } alltypeDepsSizeMap.put(alltypeDepsSizeMap.size() + 1, allTypeDep1); alltypeDepsSizeMap.put(alltypeDepsSizeMap.size() + 1, allTypeDep2); } } if (allTypeDep1 >= 5 && allTypeDep2 >= 5) { int largerTypeDep = allTypeDep1 > allTypeDep2 ? allTypeDep1 : allTypeDep2; int smallerTypeDep = allTypeDep1 < allTypeDep2 ? allTypeDep1 : allTypeDep2; int summation = (largerTypeDep * largerTypeDep) - (smallerTypeDep * smallerTypeDep); if (summation / largerTypeDep < 15.0 && summation / largerTypeDep > 10.0 && smallerTypeDep * 2 > largerTypeDep && !summationMap.values().contains(summation)) { score += summation * 80; summationMap.put(summationMap.size() + 1, summation); //System.out.println("score post summation: " + score + "\nsummation: " + summation + "\n"); } else if (largerTypeDep == smallerTypeDep) { score += 2500; //System.out.println("score largerTypeDep equals smallerTypeDep: " + score + "\nlargerTypeDep: " + largerTypeDep + "\n"); } } if (relationApplicable1 > 0 && relationApplicable2 > 0 && relationApplicable1 == relationApplicable2 && grammaticalRelation1 > 0 && grammaticalRelation2 > 0 && grammaticalRelation1 == grammaticalRelation2) { score += 3500; //System.out.println("score relationApplicable equal: " + score + "\n"); } else if (allTypeDep1 * 5 < allTypeDep2 || allTypeDep2 * 5 < allTypeDep1) { score -= allTypeDep1 > allTypeDep2 ? (allTypeDep1 - allTypeDep2) * (allTypeDep2 * 450) : (allTypeDep2 - allTypeDep1) * (allTypeDep1 * 450); //System.out.println("score minus grammaticalRelation equal: " + score + "\n"); } if (relationApplicable1 > 1 && relationApplicable2 > 1 && relationApplicable1 * 3 > relationApplicable2 && relationApplicable2 * 3 > relationApplicable1) { score += relationApplicable1 > relationApplicable2 ? (relationApplicable1 - relationApplicable2) * 1500 : (relationApplicable2 - relationApplicable1) * 1500; //System.out.println("score relationApplicable plus: " + score + "\n"); } else if (relationApplicable1 * 5 < relationApplicable2 || relationApplicable2 * 5 < relationApplicable1) { score -= relationApplicable1 > relationApplicable2 ? (relationApplicable1 - relationApplicable2) * 500 : (relationApplicable2 - relationApplicable1) * 500; //System.out.println("score relationApplicable minus: " + score + "\n"); } if (grammaticalRelation1 > 0 && grammaticalRelation2 > 0 && grammaticalRelation1 * 3 > grammaticalRelation2 && grammaticalRelation2 * 3 > grammaticalRelation1) { score += grammaticalRelation1 > grammaticalRelation2 ? (grammaticalRelation1 - grammaticalRelation2) * 1500 : (grammaticalRelation2 - grammaticalRelation1) * 1500; //System.out.println("score grammaticalRelation plus: " + score + "\n"); } else if (grammaticalRelation1 * 5 < grammaticalRelation2 || grammaticalRelation2 * 5 < grammaticalRelation1) { score -= grammaticalRelation1 > grammaticalRelation2 ? (grammaticalRelation1 - grammaticalRelation2) * 500 : (grammaticalRelation2 - grammaticalRelation1) * 500; //System.out.println("score grammaticalRelation minus: " + score + "\n"); } //System.out.println("score post relationApplicable1 veri: " + score + "\nrelationApplicable1: " + relationApplicable1 // + "\nrelationApplicable2: " + relationApplicable2 + "\ngrammaticalRelation1: " + grammaticalRelation1 + "\n" // + "grammaticalRelation2: " + grammaticalRelation2 + "\n"); } AtomicInteger runCount1 = new AtomicInteger(0); sentenceConstituencyParse.taggedLabeledYield().forEach((LBW) -> { sentenceConstituencyParse1.taggedLabeledYield().stream().filter((LBW1) -> (LBW.lemma().equals(LBW1.lemma()) && !filerTreeContent.values().contains(LBW.lemma()))).map((_item) -> { filerTreeContent.put(filerTreeContent.size() + 1, LBW.lemma()); return _item; }).forEachOrdered((_item) -> { runCount1.getAndIncrement(); }); }); score += runCount1.get() * 250; } } //System.out.println("score pre typeSizeSmallest: " + score + "\n"); int typeSizeSmallest = 100; int typeSizeLargest = 0; for (Integer i : alltypeDepsSizeMap.values()) { if (i > typeSizeLargest) { typeSizeLargest = i; } if (i < typeSizeSmallest) { typeSizeSmallest = i; } } if (typeSizeLargest >= typeSizeSmallest * 3) { score -= typeSizeLargest * 160; } typeSizeLargest = 0; typeSizeSmallest = 100; for (int i : summationMap.values()) { if (i > typeSizeLargest) { typeSizeLargest = i; } if (i < typeSizeSmallest) { typeSizeSmallest = i; } } if (typeSizeLargest >= typeSizeSmallest * 3) { score -= typeSizeLargest * 160; } } catch (Exception ex) { //System.out.println("pipelineAnnotation stacktrace: " + ex.getLocalizedMessage() + "\n"); } sentenceConstituencyParseList.clear(); ConcurrentMap simpleSMXlist = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap simpleSMXlistVector = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap sentiment1 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap sentiment2 = new MapMaker().concurrencyLevel(2).makeMap(); for (CoreMap sentence : pipelineAnnotation1Sentiment.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); sentiment1.put(sentiment1.size(), RNNCoreAnnotations.getPredictedClass(tree)); SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree); simpleSMXlist.put(simpleSMXlist.size(), predictions); simpleSMXlistVector.put(simpleSMXlistVector.size() + 1, nodeVector); } ConcurrentMap elementSumCounter = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap dotMap = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap elementSumMap = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap dotSumMap = new MapMaker().concurrencyLevel(2).makeMap(); //System.out.println("score pre pipelineAnnotation2Sentiment: " + score + "\n"); for (CoreMap sentence : pipelineAnnotation2Sentiment.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); sentiment2.put(sentiment2.size() + 1, RNNCoreAnnotations.getPredictedClass(tree)); SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree); ConcurrentMap AccumulateDotMap = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap subtractorMap = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap dotPredictions = new MapMaker().concurrencyLevel(2).makeMap(); Double largest = 10.0; Double shortest = 100.0; for (SimpleMatrix simpleSMX : simpleSMXlist.values()) { double dotPrediction = predictions.dot(simpleSMX) * 100; AccumulateDotMap.put(AccumulateDotMap.size() + 1, dotPrediction); double subtracter = dotPrediction > 50 ? dotPrediction - 100 : dotPrediction > 0 ? 100 - dotPrediction : 0; subtractorMap.put(subtractorMap.size() + 1, subtracter); if (!dotPredictions.values().contains(dotPrediction)) { if (dotPrediction > largest) { largest = dotPrediction; } if (dotPrediction < shortest) { shortest = dotPrediction; } Double dotPredictionIntervalDifference = largest - shortest; subtracter *= 25; //System.out.println("subtracter: " + subtracter + "\n"); if (dotPredictionIntervalDifference < 5.0) { if (dotPredictions.values().size() > 0) { if (subtracter > 0) { score -= subtracter; } else { score += subtracter; //System.out.println("score + subtracter: " + score + "\nsubtracter: " + subtracter + "\n"); } } } else { score -= subtracter / 10; } } else { subtracter -= 100; subtracter *= 25; score += subtracter * dotPrediction; //System.out.println("score + subtracter * dotPrediction: " + score + "\nsubtracter: " + subtracter + "\ndotPrediction: " //+ dotPrediction + "\n"); } dotPredictions.put(dotPredictions.size() + 1, dotPrediction); } //System.out.println("score post subtracter1: " + score + "\n"); Double subTracPre = 0.0; for (Double subtractors : subtractorMap.values()) { if (Objects.equals(subTracPre, subtractors)) { score -= 1500; //System.out.println("score minus subTracPre equals: " + score + "\nsubTracPre: " + subTracPre + "\n"); } subTracPre = subtractors; } ConcurrentMap DotOverTransfer = dotPredictions; dotPredictions = new MapMaker().concurrencyLevel(2).makeMap(); Double totalSubtraction = 0.0; for (SimpleMatrix simpleSMX : simpleSMXlist.values()) { double dotPrediction = simpleSMX.dot(predictions) * 100; AccumulateDotMap.put(AccumulateDotMap.size() + 1, dotPrediction); double subtracter = dotPrediction > 50 ? dotPrediction - 100 : dotPrediction > 0 ? 100 - dotPrediction : 0; //System.out.println("dotPrediction: " + dotPrediction + "\nsubtracter: " + subtracter + "\n"); subtractorMap.put(subtractorMap.size() + 1, subtracter); if (!dotPredictions.values().contains(dotPrediction)) { for (Double transferDots : DotOverTransfer.values()) { if (transferDots == dotPrediction) { totalSubtraction += transferDots; } else { score -= subtracter * 25; //System.out.println("score minus subtracter: " + score + "\nsubtracter: " + subtracter + "\n"); } //System.out.println("transferDots: " + transferDots + "\n"); } } else { subtracter -= 100; subtracter *= 25; score -= subtracter * dotPrediction; //System.out.println("score minus subtracter * dotPrediction 2: " + score + "\ndotPrediction: " // + dotPrediction + "\n"); } dotPredictions.put(dotPredictions.size() + 1, dotPrediction); } if (totalSubtraction > 45.0) { score -= totalSubtraction * 25; } else { score += totalSubtraction * 25; } //System.out.println("score post totalSubtraction: " + score + "\ntotalSubtraction: " + totalSubtraction + "\n"); Double preAccumulatorDot = 0.0; Double postAccumulatorDot = 0.0; for (Double accumulators : AccumulateDotMap.values()) { if (Objects.equals(preAccumulatorDot, accumulators)) { if (Objects.equals(postAccumulatorDot, accumulators)) { score -= 1400; } postAccumulatorDot = accumulators; } preAccumulatorDot = accumulators; } subTracPre = 0.0; for (Double subtractors : subtractorMap.values()) { if (Objects.equals(subTracPre, subtractors)) { score -= 500; } subTracPre = subtractors; } Double preDot = 0.0; Double postDot = 0.0; for (SimpleMatrix simpleSMX : simpleSMXlistVector.values()) { double dot = nodeVector.dot(simpleSMX); double elementSum = nodeVector.kron(simpleSMX).elementSum(); if (preDot == dot) { if (postDot == dot) { score -= 500; } postDot = dot; } preDot = dot; elementSum = Math.round(elementSum * 100.0) / 100.0; elementSumCounter.put(elementSumCounter.size() + 1, elementSum); dotMap.put(dotMap.size() + 1, dot); if (!dotSumMap.values().contains(dot)) { if (dot < 0.000) { score += dot * 500; //System.out.println("score + dot * 500: " + score + "\ndot: " + dot + "\n"); } else if (dot < 0.1) { score += 256; //System.out.println("score + 256: " + score + "\ndot: " + dot + " 0.50) { score -= 1200; } dotSumMap.put(dotSumMap.size() + 1, dot); } else { score -= 250; } if (!elementSumMap.values().contains(elementSum)) { if (elementSum < 0.01 && elementSum > 0.00) { score += 3300; //System.out.println("score elementSum < 0.01 && elementSum > 0.00: " + score + "\nelementSum: " // + elementSum + "\n"); } else if (elementSum > 0.1 && elementSum < 0.2) { score += 1100; //System.out.println("score elementSum < 0.01 && elementSum > 0.00: " + score + "\nelementSum: " // + elementSum + "\n"); } else { score -= elementSum * 1024; } elementSumMap.put(elementSumMap.size() + 1, elementSum); } else { score -= 250; } } for (SimpleMatrix simpleSMX : simpleSMXlistVector.values()) { double dot = simpleSMX.dot(nodeVector); double elementSum = simpleSMX.kron(nodeVector).elementSum(); if (preDot == dot) { if (postDot == dot) { score -= 500; } postDot = dot; } preDot = dot; elementSum = Math.round(elementSum * 100.0) / 100.0; elementSumCounter.put(elementSumCounter.size() + 1, elementSum); dotMap.put(dotMap.size() + 1, dot); if (!dotSumMap.values().contains(dot)) { if (dot < 0.1) { score += 256; //System.out.println("score dot < 0.1: " + score + "\ndot: " // + dot + "\n"); } if (dot > 0.50) { score -= 1400; } dotSumMap.put(dotSumMap.size() + 1, dot); } else { score -= 250; } if (!elementSumMap.values().contains(elementSum)) { if (elementSum < 0.01 && elementSum > 0.00) { score += 1300; //System.out.println("score elementSum < 0.01 && elementSum > 0.00: " + score + "\nelementSum: " // + elementSum + "\n"); } else if (elementSum > 0.1 && elementSum < 1.0) { score += 1100; //System.out.println("score elementSum < 0.01 && elementSum > 0.00: " + score + "\nelementSum: " // + elementSum + "\n"); } else { score -= elementSum * 1024; } elementSumMap.put(elementSumMap.size() + 1, elementSum); } else { score -= 250; } } } //System.out.println("score post sentiment analyzer2: " + score + "\n"); OptionalDouble minvalueDots = dotMap.values().stream().mapToDouble(Double::doubleValue).min(); OptionalDouble maxvalueDots = dotMap.values().stream().mapToDouble(Double::doubleValue).max(); double total = minvalueDots.getAsDouble() + maxvalueDots.getAsDouble(); boolean permitted = false; if (minvalueDots.getAsDouble() != maxvalueDots.getAsDouble()) { permitted = true; } if (permitted) { Double dotsVariance = maxvalueDots.getAsDouble() - minvalueDots.getAsDouble(); //System.out.println("maxvalueDots.getAsDouble():" + maxvalueDots.getAsDouble() + "\nminvalueDots.getAsDouble():" // + minvalueDots.getAsDouble() + "\ndotsVariance: " + dotsVariance + "\n"); if (maxvalueDots.getAsDouble() > minvalueDots.getAsDouble() * 10) { score -= 5500; } else if (minvalueDots.getAsDouble() < -0.10) { score -= 3500; } else if (dotsVariance < 0.5 && dotsVariance > 0.1) { score -= 3500; } else if (dotsVariance > minvalueDots.getAsDouble() * 2) { score += 3500; //System.out.println("varians 4 score. " + score + "\n"); } else if (minvalueDots.getAsDouble() * 3 > maxvalueDots.getAsDouble() && maxvalueDots.getAsDouble() < 0.1001) { score += dotsVariance * 200000; } } //System.out.println("score post dotsVariance: " + score + "\n"); OptionalDouble minvalueElements = elementSumCounter.values().stream().mapToDouble(Double::doubleValue).min(); OptionalDouble maxvalueElements = elementSumCounter.values().stream().mapToDouble(Double::doubleValue).max(); Double elementsVariance = maxvalueElements.getAsDouble() - minvalueElements.getAsDouble(); //System.out.println("elementsVariance: " + elementsVariance + "\nmaxvalueElements.getAsDouble(): " // + maxvalueElements.getAsDouble() + "\nminvalueElements.getAsDouble(): " + minvalueElements.getAsDouble() + "\n"); if (elementsVariance == 0.0) { score -= 550; } else if (elementsVariance < 0.02 && elementsVariance > -0.01) { score += 3500; } else if (minvalueElements.getAsDouble() < 0.0 && minvalueElements.getAsDouble() - maxvalueElements.getAsDouble() < 0.50) { score -= 2500; } else if (elementsVariance * 2 >= maxvalueElements.getAsDouble() && elementsVariance < 0.1) { score -= elementsVariance * 86000; } //System.out.println("score post elementsVariance: " + score + "\n"); score -= (sentiment1.size() > sentiment2.size() ? sentiment1.size() - sentiment2.size() : sentiment2.size() - sentiment1.size()) * 500; DocumentReaderAndWriter readerAndWriter = classifier.makePlainTextReaderAndWriter(); List classifyRaw1 = classifier.classifyRaw(str, readerAndWriter); List classifyRaw2 = classifier.classifyRaw(str1, readerAndWriter); score -= (classifyRaw1.size() > classifyRaw2.size() ? classifyRaw1.size() - classifyRaw2.size() : classifyRaw2.size() - classifyRaw1.size()) * 200; //System.out.println("score post classifyRaw: " + score + "\n"); int mainSentiment1 = 0; int longest1 = 0; int mainSentiment2 = 0; int longest2 = 0; for (CoreMap sentence : pipelineAnnotation1Sentiment.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); int sentiment = RNNCoreAnnotations.getPredictedClass(tree); String partText = sentence.toString(); SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); if (partText.length() > longest1) { mainSentiment1 = sentiment; longest1 = partText.length(); } } for (CoreMap sentence : pipelineAnnotation2Sentiment.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); int sentiment = RNNCoreAnnotations.getPredictedClass(tree); SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); String partText = sentence.toString(); if (partText.length() > longest2) { mainSentiment2 = sentiment; longest2 = partText.length(); } } //System.out.println("score post pipelineAnnotation2Sentiment: " + score + "\n"); if (longest1 != longest2) { long deffLongest = longest1 > longest2 ? longest1 : longest2; long deffshorter = longest1 < longest2 ? longest1 : longest2; if (deffLongest > deffshorter * 5) { score -= 5500; } else if (deffLongest < (deffshorter * 2) - 1 && deffLongest - deffshorter <= 45) { score += (deffLongest - deffshorter) * 20; } else if (mainSentiment1 != mainSentiment2 && deffLongest - deffshorter > 20 && deffLongest - deffshorter < 45) { score += (deffLongest - deffshorter) * 20; } else if (deffLongest - deffshorter < 2) { score += (deffLongest - deffshorter) * 20; } else if (deffshorter * 2 >= deffLongest && deffshorter * 2 < deffLongest + 5) { score += (deffLongest - deffshorter) * 20; } else { score -= (deffLongest - deffshorter) * 50; } if (deffLongest - deffshorter <= 5) { score += 250; } } int tokensCounter1 = 0; int tokensCounter2 = 0; int anotatorcounter1 = 0; int anotatorcounter2 = 0; int inflectedCounterPositive1 = 0; int inflectedCounterPositive2 = 0; int inflectedCounterNegative = 0; int MarkedContinuousCounter1 = 0; int MarkedContinuousCounter2 = 0; Integer MarkedContiniousCounter1Entries = 0; Integer MarkedContiniousCounter2Entries = 0; int UnmarkedPatternCounter1 = 0; int UnmarkedPatternCounter2 = 0; ConcurrentMap ITokenMapTag1 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap ITokenMapTag2 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap strTokenStems1 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap strTokenStems2 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap strTokenForm1 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap strTokenForm2 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap strTokenGetEntry1 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap strTokenGetEntry2 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap strTokenGetiPart1 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap strTokenGetiPart2 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap strTokenEntryPOS1 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap strTokenEntryPOS2 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap entryCounts1 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap entryCounts2 = new MapMaker().concurrencyLevel(2).makeMap(); try { List sentences = jmweStrAnnotation1.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : sentences) { for (IMWE token : sentence.get(JMWEAnnotation.class)) { if (token.isInflected()) { inflectedCounterPositive1++; } else { inflectedCounterNegative++; } strTokenForm1.put(strTokenForm1.size() + 1, token.getForm()); strTokenGetEntry1.put(strTokenGetEntry1.size() + 1, token.getEntry().toString().substring(token.getEntry().toString().length() - 1)); Collection values = token.getPartMap().values(); IMWEDesc entry = token.getEntry(); MarkedContinuousCounter1 += entry.getMarkedContinuous(); UnmarkedPatternCounter1 += entry.getUnmarkedPattern(); for (IMWEDesc.IPart iPart : values) { strTokenGetiPart1.put(strTokenGetiPart1.size() + 1, iPart.getForm()); } for (String strPostPrefix : entry.getPOS().getPrefixes()) { strTokenEntryPOS1.put(strTokenEntryPOS1.size() + 1, strPostPrefix); } for (int counts : entry.getCounts()) { entryCounts1.put(entryCounts1.size() + 1, counts); } for (IToken tokens : token.getTokens()) { ITokenMapTag1.put(ITokenMapTag1.size() + 1, tokens.getTag()); for (String strtoken : tokens.getStems()) { strTokenStems1.put(strTokenStems1.size() + 1, strtoken); MarkedContiniousCounter1Entries++; } } tokensCounter1++; } anotatorcounter1++; } sentences = jmweStrAnnotation2.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : sentences) { for (IMWE token : sentence.get(JMWEAnnotation.class)) { if (token.isInflected()) { inflectedCounterPositive2++; } else { inflectedCounterNegative--; } strTokenForm2.put(strTokenForm2.size() + 1, token.getForm()); strTokenGetEntry2.put(strTokenGetEntry2.size() + 1, token.getEntry().toString().substring(token.getEntry().toString().length() - 1)); Collection values = token.getPartMap().values(); IMWEDesc entry = token.getEntry(); MarkedContinuousCounter2 += entry.getMarkedContinuous(); UnmarkedPatternCounter2 += entry.getUnmarkedPattern(); for (IMWEDesc.IPart iPart : values) { strTokenGetiPart2.put(strTokenGetiPart2.size() + 1, iPart.getForm()); } for (String strPostPrefix : entry.getPOS().getPrefixes()) { strTokenEntryPOS2.put(strTokenEntryPOS2.size() + 1, strPostPrefix); } for (int counts : entry.getCounts()) { entryCounts2.put(entryCounts2.size() + 1, counts); } for (IToken tokens : token.getTokens()) { ITokenMapTag2.put(ITokenMapTag2.size() + 1, tokens.getTag()); for (String strtoken : tokens.getStems()) { strTokenStems2.put(strTokenStems2.size() + 1, strtoken); MarkedContiniousCounter2Entries++; } } tokensCounter2++; } anotatorcounter2++; } } catch (Exception ex) { //System.out.println("SENTIMENT stacktrace: " + ex.getMessage() + "\n"); } int entry1 = entryCounts1.values().size(); int entry2 = entryCounts2.values().size(); //System.out.println("score post JMWEAnnotation: " + score + "\nentry1: " + entry1 + "\nentry2: " + entry2 + "\n"); if (entry1 > 0 && entry2 > 0) { if ((entry1 >= entry2 * 5) || (entry2 >= entry1 * 5)) { score -= entry1 > entry2 ? (entry1 - entry2) * 450 : (entry2 - entry1) * 450; //System.out.println("1"); } else if ((entry1 >= entry2 * 50 || entry2 >= entry1 * 50)) { score -= entry1 > entry2 ? entry1 * 180 : entry2 * 180; //System.out.println("2"); } else if (entry1 >= entry2 * 2 || entry2 >= entry1 * 2) { score += entry1 > entry2 ? (entry1 - entry2) * 450 : (entry2 - entry1) * 450; //System.out.println("3"); } else if (entry1 > 10 && entry2 > 10 && entry1 * 2 > entry2 && entry2 * 2 > entry1) { score += entry1 > entry2 ? entry2 * 600 : entry1 * 600; //System.out.println("6"); } } ConcurrentMap countsMap = new MapMaker().concurrencyLevel(2).makeMap(); for (int counts : entryCounts1.values()) { for (int counts1 : entryCounts2.values()) { if (counts == counts1 && counts > 0 && !countsMap.values().contains(counts)) { score += counts * 250; //System.out.println("score post counts: " + score + "\nCounts: " + counts + "\n"); countsMap.put(countsMap.size() + 1, counts); } } } if (strTokenEntryPOS1.values().size() > 1 && strTokenEntryPOS2.values().size() > 1) { for (String strTokenPos1 : strTokenEntryPOS1.values()) { for (String strTokenPos2 : strTokenEntryPOS2.values()) { if (strTokenPos1.equals(strTokenPos2)) { score += 500; } else { score -= 650; //System.out.println("strTokenEntryPOS score: " + score + "\n"); } } } } //System.out.println("score pre UnmarkedPatternCounter: " + score + "\nUnmarkedPatternCounter1: " + UnmarkedPatternCounter1 // + "\nUnmarkedPatternCounter2: " + UnmarkedPatternCounter2 + "\n"); if (UnmarkedPatternCounter1 > 0 && UnmarkedPatternCounter2 > 0) { if (UnmarkedPatternCounter1 * 2 > UnmarkedPatternCounter2 && UnmarkedPatternCounter2 * 2 > UnmarkedPatternCounter1) { score += 2500; } else if (UnmarkedPatternCounter1 * 5 < UnmarkedPatternCounter2 || UnmarkedPatternCounter2 * 5 < UnmarkedPatternCounter1) { score -= 4000; } } //System.out.println("score post UnmarkedPatternCounter: " + score + "\n"); if (MarkedContinuousCounter1 > 0 && MarkedContinuousCounter2 > 0) { if (MarkedContinuousCounter1 > MarkedContinuousCounter2 * 50 || MarkedContinuousCounter2 > MarkedContinuousCounter1 * 50) { score -= MarkedContinuousCounter1 > MarkedContinuousCounter2 ? MarkedContinuousCounter1 * 120 : MarkedContinuousCounter2 * 120; //System.out.println("score post MarkedContinuousCounter too big: " + score + "\n"); } else if (!Objects.equals(MarkedContiniousCounter1Entries, MarkedContiniousCounter2Entries) && (MarkedContinuousCounter1 * 2 >= MarkedContinuousCounter2 * MarkedContinuousCounter1) || (MarkedContinuousCounter2 * 2 >= MarkedContinuousCounter1 * MarkedContinuousCounter2)) { score += 4500; } else if (MarkedContiniousCounter1Entries == 0 || MarkedContiniousCounter2Entries == 0) { score += MarkedContinuousCounter1 > MarkedContinuousCounter2 ? (MarkedContinuousCounter2 - MarkedContinuousCounter1) * 500 : (MarkedContinuousCounter1 - MarkedContinuousCounter2) * 500; } if (MarkedContiniousCounter1Entries > 0 && MarkedContiniousCounter2Entries > 0 && MarkedContinuousCounter1 > 0 && MarkedContinuousCounter2 > 0 && MarkedContinuousCounter1 < MarkedContinuousCounter2 * 10 && MarkedContinuousCounter2 < MarkedContinuousCounter1 * 10) { if (MarkedContiniousCounter1Entries > MarkedContiniousCounter2Entries * 5 || MarkedContiniousCounter2Entries > MarkedContiniousCounter1Entries * 5 || MarkedContiniousCounter1Entries * 5 < MarkedContinuousCounter1 || MarkedContiniousCounter1Entries * 5 < MarkedContinuousCounter2 || MarkedContiniousCounter2Entries * 5 < MarkedContinuousCounter1 || MarkedContiniousCounter2Entries * 5 < MarkedContinuousCounter2) { score -= MarkedContinuousCounter1 > MarkedContinuousCounter2 ? MarkedContinuousCounter1 * 400 : MarkedContinuousCounter2 * 400; //System.out.println("score post MarkedContinuousCounter: " + score + "\n"); } } } ConcurrentMap strtokensMap = new MapMaker().concurrencyLevel(2).makeMap(); for (String strTokeniPart1 : strTokenGetiPart1.values()) { for (String strTokeniPart2 : strTokenGetiPart2.values()) { if (strTokeniPart1.equals(strTokeniPart2) && !strtokensMap.values().contains(strTokeniPart2)) { strtokensMap.put(strtokensMap.size() + 1, strTokeniPart2); score += 400; } else { score -= 200; //System.out.println("score minus strTokenGetiPart: " + score + "\n"); } } } int tokenEntry1 = strTokenGetEntry1.values().size(); int tokenEntry2 = strTokenGetEntry2.values().size(); boolean boundariyLeacks = false; int remnantCounter = 0; if (tokenEntry1 * 2 != tokenEntry2 && tokenEntry2 * 2 != tokenEntry1) { boundariyLeacks = true; } ConcurrentMap entryTokenMap = new MapMaker().concurrencyLevel(2).makeMap(); for (String strTokenEntry1 : strTokenGetEntry1.values()) { for (String strTokenEntry2 : strTokenGetEntry2.values()) { if (!entryTokenMap.values().contains(strTokenEntry2)) { if (strTokenEntry1.equals(strTokenEntry2)) { score += boundariyLeacks ? 2500 : 2500 / 2; } else if (!boundariyLeacks) { score -= 450; //System.out.println("boundariyLeacks score: " + score + "\n"); } else { remnantCounter++; } } entryTokenMap.put(entryTokenMap.size() + 1, strTokenEntry2); } } //System.out.println("score pre remnantCounter: " + score + "\n"); score += remnantCounter * 250; //System.out.println("score post remnantCounter: " + score + "\n"); ConcurrentMap iTokenMapTagsMap = new MapMaker().concurrencyLevel(2).makeMap(); for (String strmapTag : ITokenMapTag1.values()) { for (String strmapTag1 : ITokenMapTag2.values()) { if (strmapTag.equals(strmapTag1)) { score -= 1450; } else if (!iTokenMapTagsMap.values().contains(strmapTag)) { score += 725; iTokenMapTagsMap.put(iTokenMapTagsMap.size() + 1, strmapTag); } } } //System.out.println("score post strmapTag: " + score + "\n"); int tokenform1size = strTokenForm1.values().size(); int tokenform2size = strTokenForm2.values().size(); if (tokenform1size > 0 || tokenform2size > 0) { if (tokenform1size < tokenform2size * 5 && tokenform2size < tokenform1size * 5) { for (String strTokenForm1itr1 : strTokenForm1.values()) { for (String strTokenForm1itr2 : strTokenForm2.values()) { if (strTokenForm1itr1.equals(strTokenForm1itr2)) { score -= 1600; } else { score += 500; //System.out.println("tokenform1size score500: " + score + "\n"); } } } } else if (tokenform1size > 0 && tokenform2size > 0) { if (tokenform1size * 2 >= tokenform2size && tokenform2size * 2 >= tokenform1size) { score += tokenform1size > tokenform2size ? tokenform1size * 600 : tokenform2size * 600; } else if (tokenform1size * 4 <= tokenform2size || tokenform2size * 4 <= tokenform1size) { score -= tokenform1size > tokenform2size ? (tokenform1size - tokenform2size) * 600 : (tokenform2size - tokenform1size) * 600; } //System.out.println("tokenform1size score: " + score + "\ntokenform1size: " + tokenform1size + "\ntokenform2size: " // + tokenform2size + "\n"); } } //System.out.println("Score pre tokenStemmingMap: " + score + "\n"); ConcurrentMap tokenStemmingMap = new MapMaker().concurrencyLevel(2).makeMap(); for (String strTokenStem : strTokenStems1.values()) { for (String strTokenStem1 : strTokenStems2.values()) { if (strTokenStem.equals(strTokenStem1) && !tokenStemmingMap.values().contains(strTokenStem)) { score += 1500; tokenStemmingMap.put(tokenStemmingMap.size() + 1, strTokenStem); } //System.out.println("score strTokenStem: " + score + "\n"); } } //System.out.println("Score pre inflected: " + score + "\n"); //System.out.println("inflectedCounterPositive1: " + inflectedCounterPositive1 + "\ninflectedCounterPositive2: " // + inflectedCounterPositive2 + "\ninflectedCounterNegative: " + inflectedCounterNegative + "\n"); if (inflectedCounterPositive1 + inflectedCounterPositive2 > inflectedCounterNegative && inflectedCounterNegative > 0) { score += ((inflectedCounterPositive1 + inflectedCounterPositive2) - inflectedCounterNegative) * 650; //System.out.println("score inflectedCounterPositive plus: " + score + "\n"); } if (inflectedCounterPositive1 > 0 && inflectedCounterPositive2 > 0) { if (inflectedCounterPositive1 * 2 > inflectedCounterPositive2 && inflectedCounterPositive2 * 2 > inflectedCounterPositive1) { score += ((inflectedCounterPositive1 + inflectedCounterPositive2) - inflectedCounterNegative) * 550; //System.out.println("score plus inflectedCounterPositive * 2: " + score + "\n"); } else if (inflectedCounterPositive1 * 5 < inflectedCounterPositive2 || inflectedCounterPositive2 * 5 < inflectedCounterPositive1) { score -= inflectedCounterPositive1 > inflectedCounterPositive2 ? (inflectedCounterPositive1 - inflectedCounterPositive2) * 400 : (inflectedCounterPositive2 - inflectedCounterPositive1) * 400; //System.out.println("score minus inflectedCounterPositive * 2: " + score + "\n"); } } //System.out.println("anotatorcounter1: " + anotatorcounter1 + "\nanotatorcounter2: " + anotatorcounter2 + "\n"); if (anotatorcounter1 > 1 && anotatorcounter2 > 1) { if (anotatorcounter1 * 2 > anotatorcounter2 && anotatorcounter2 * 2 > anotatorcounter1) { score += anotatorcounter1 > anotatorcounter2 ? (anotatorcounter1 - anotatorcounter2) * 700 : (anotatorcounter2 - anotatorcounter1) * 700; //System.out.println("score plus anotatorcounter: " + score + "\n"); } else if (anotatorcounter1 * 5 < anotatorcounter2 || anotatorcounter2 * 5 < anotatorcounter1) { score -= anotatorcounter1 > anotatorcounter2 ? (anotatorcounter1 - anotatorcounter2) * 400 : (anotatorcounter2 - anotatorcounter1) * 400; //System.out.println("score minus anotatorcounter: " + score + "\n"); } } //System.out.println("tokensCounter1: " + tokensCounter1 + "\ntokensCounter2: " + tokensCounter2 + "\n"); if ((tokensCounter1 > 1 && tokensCounter2 > 1) && tokensCounter1 < tokensCounter2 * 5 && tokensCounter2 < tokensCounter1 * 5) { if (tokensCounter1 > tokensCounter2 / 2 && tokensCounter2 > tokensCounter1 / 2) { score += (tokensCounter1 + tokensCounter2) * 1400; //System.out.println("score plus tokensCounter: " + score + "\n"); } else { score -= 3500; //System.out.println("score minus tokensCounter: " + score + "\n"); } } else { int elseint = tokensCounter1 >= tokensCounter2 ? (tokensCounter1 - tokensCounter2) * 500 : (tokensCounter2 - tokensCounter1) * 500; //System.out.println("elseint: " + elseint + " tokensCounter2 * 5 || tokensCounter2 > tokensCounter1 * 5) && tokensCounter1 > 0 && tokensCounter2 > 0) { score -= tokensCounter1 > tokensCounter2 ? (tokensCounter1 - tokensCounter2) * 500 : (tokensCounter2 - tokensCounter1) * 500; //System.out.println("score post tokensCounter: " + score + "\n"); } else if (elseint > 0 && tokensCounter1 > 0 && tokensCounter2 > 0) { score -= elseint * 2; //System.out.println("score post elseint: " + elseint + "\n"); } } //System.out.println("Score Pre levenhstein: " + score + "\n"); LevenshteinDistance leven = new LevenshteinDistance(str, str1); double SentenceScoreDiff = leven.computeLevenshteinDistance(); SentenceScoreDiff *= 15; score -= SentenceScoreDiff; ConcurrentMap nerEntities1 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap nerEntities2 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap nerEntities3 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap nerEntities4 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap nerEntityTokenTags1 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap nerEntityTokenTags2 = new MapMaker().concurrencyLevel(2).makeMap(); for (CoreEntityMention em : pipelineCoreDcoument1.entityMentions()) { Set> entrySet = em.entityTypeConfidences().entrySet(); String entityType = em.entityType(); Double EntityConfidences = 0.0; for (Map.Entry entries : entrySet) { EntityConfidences = entries.getValue(); } List tokens = em.tokens(); for (CoreLabel token : tokens) { if (!nerEntityTokenTags1.values().contains(token.tag())) { if (entityType.equals("PERSON") && EntityConfidences > 0.80) { nerEntityTokenTags1.put(nerEntityTokenTags1.size() + 1, token.tag()); } } } if (!nerEntities1.values().contains(em.text())) { nerEntities1.put(nerEntities1.size() + 1, em.text()); nerEntities3.put(nerEntities3.size() + 1, em.entityType()); } } for (CoreEntityMention em : pipelineCoreDcoument2.entityMentions()) { Set> entrySet = em.entityTypeConfidences().entrySet(); String entityType = em.entityType(); Double EntityConfidences = 0.0; for (Map.Entry entries : entrySet) { EntityConfidences = entries.getValue(); } List tokens = em.tokens(); for (CoreLabel token : tokens) { if (!nerEntityTokenTags2.values().contains(token.tag())) { if (entityType.equals("PERSON") && EntityConfidences > 0.80) { nerEntityTokenTags2.put(nerEntityTokenTags2.size() + 1, token.tag()); } } } if (!nerEntities2.values().contains(em.text())) { nerEntities2.put(nerEntities2.size() + 1, em.text()); nerEntities4.put(nerEntities4.size() + 1, em.entityType()); } } //System.out.println("score post PERSON trim: " + score + "\n"); for (String strEnts1 : nerEntities1.values()) { Collection values = nerEntities2.values(); for (String strEnts2 : values) { if (strEnts1.equalsIgnoreCase(strEnts2)) { score += 2500; //System.out.println("score strEnts1 plus: " + score + "\n"); } else { score -= 150; } } } for (String strEnts1 : nerEntities3.values()) { if (nerEntities4.values().contains(strEnts1)) { score -= 1500; //System.out.println("score nerEntities4 minus: " + score + "\n"); } else { score -= 150; } } for (String strToken : nerEntityTokenTags1.values()) { if (nerEntityTokenTags2.values().contains(strToken)) { score += 2000; //System.out.println("score nerEntities4 plus: " + score + "\n"); } else { score -= 150; } } //System.out.println("score pre stopwordTokens: " + score + "\n"); ConcurrentMap stopwordTokens = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap stopwordTokens1 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap stopWordLemma = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap stopWordLemma1 = new MapMaker().concurrencyLevel(2).makeMap(); Integer pairCounter1 = 0; Integer pairCounter2 = 0; String customStopWordList = "start,starts,period,periods,a,an,and,are,as,at,be,but,by,for,if,in,into,is,it,no,not,of,on,or,such,that,the,their,then,there,these,they,this,to,was,will,with"; List tokensSentiment1 = pipelineAnnotation1Sentiment.get(CoreAnnotations.TokensAnnotation.class); List tokensSentiment2 = pipelineAnnotation2Sentiment.get(CoreAnnotations.TokensAnnotation.class); Set stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET; Set stopWordsCustom = StopwordAnnotator.getStopWordList(customStopWordList, true); for (CoreLabel token : tokensSentiment1) { Pair stopword = token.get(StopwordAnnotator.class); String word = token.word().toLowerCase(); if (stopWords.contains(word) || stopWordsCustom.contains(word)) { stopwordTokens.put(stopwordTokens.size(), word); } String lemma = token.lemma().toLowerCase(); if (stopWords.contains(lemma) || stopWordsCustom.contains(lemma)) { stopWordLemma.put(stopWordLemma.size(), lemma); } if (stopword.first() && stopword.second()) { pairCounter1++; } //System.out.println("stopword Pair: " + stopword.first() + " " + stopword.second() + "\nword: " // + word + "\nlemma: " + lemma + "\n"); } for (CoreLabel token : tokensSentiment2) { Pair stopword = token.get(StopwordAnnotator.class); String word = token.word().toLowerCase(); if (stopWords.contains(word) || stopWordsCustom.contains(word)) { stopwordTokens1.put(stopwordTokens1.size(), word); } String lemma = token.lemma().toLowerCase(); if (stopWords.contains(lemma) || stopWordsCustom.contains(lemma)) { stopWordLemma1.put(stopWordLemma1.size(), lemma); } if (stopword.first() && stopword.second()) { pairCounter2++; } //System.out.println("stopword Pair: " + stopword.first() + " " + stopword.second() + "\nword: " // + word + "\nlemma: " + lemma + "\n"); } for (String stopwords1 : stopwordTokens.values()) { for (String stopwords2 : stopwordTokens1.values()) { if (stopwords1.equals(stopwords2)) { score -= 500; //System.out.println("score stopwordsToken: " + score + "\n"); } } } for (String stopwords1 : stopWordLemma.values()) { for (String stopwords2 : stopWordLemma1.values()) { if (stopwords1.equals(stopwords2)) { score -= 500; //System.out.println("score stopwords Lemma: " + score + "\n"); } } } if (!stopwordTokens.values().isEmpty() && !stopwordTokens1.values().isEmpty()) { int stopwordsize1 = stopwordTokens.values().size(); int stopwordsize2 = stopwordTokens1.values().size(); if (stopwordsize1 * 5 < stopwordsize2 || stopwordsize2 * 5 < stopwordsize1) { score -= stopwordsize1 > stopwordsize2 ? (stopwordsize1 - stopwordsize2) * 850 : (stopwordsize2 - stopwordsize1) * 850; } else { score += stopwordsize1 > stopwordsize2 ? (stopwordsize1 - stopwordsize2) * 850 : (stopwordsize2 - stopwordsize1) * 850;; } //System.out.println("score post stopwordsize: " + score + "\nstopwordsize1: " + stopwordsize1 + "\nstopwordsize2: " // + stopwordsize2 + "\n"); } if (pairCounter1 > 0 && pairCounter2 > 0) { if (pairCounter1 * 3 <= pairCounter2 || pairCounter2 * 3 <= pairCounter1) { score -= pairCounter1 > pairCounter2 ? (pairCounter1 - pairCounter2) * 1500 : (pairCounter2 - pairCounter1) * 1500; } else { score += pairCounter1 > pairCounter2 ? (pairCounter1 - pairCounter2) * 700 : (pairCounter2 - pairCounter1) * 700; } //System.out.println("score post pairCounter: " + score + "\npairCounter1: " + pairCounter1 + "\npairCounter2: " + pairCounter2 + "\n"); } } catch (Exception ex) { //System.out.println("SENTIMENT stacktrace Overall catch: " + ex.getMessage() + "\n"); } System.out.println("Final current score: " + score + "\nSentence 1: " + str + "\nSentence 2: " + str1 + "\n"); smxParam.setDistance(score); return smxParam; } }