package FunctionLayer.StanfordParser; import FunctionLayer.LevenshteinDistance; import FunctionLayer.MYSQLDatahandler; import FunctionLayer.SimilarityMatrix; import edu.stanford.nlp.ie.AbstractSequenceClassifier; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.IndexedWord; import edu.stanford.nlp.ling.TaggedWord; import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations; import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.process.DocumentPreprocessor; import edu.stanford.nlp.sentiment.SentimentCoreAnnotations; import edu.stanford.nlp.sequences.DocumentReaderAndWriter; import edu.stanford.nlp.tagger.maxent.MaxentTagger; import edu.stanford.nlp.trees.Constituent; import edu.stanford.nlp.trees.GrammaticalRelation; import edu.stanford.nlp.trees.GrammaticalStructure; import edu.stanford.nlp.trees.GrammaticalStructureFactory; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreeCoreAnnotations; import edu.stanford.nlp.trees.TypedDependency; import edu.stanford.nlp.trees.tregex.gui.Tdiff; import edu.stanford.nlp.util.CoreMap; import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.atomic.AtomicInteger; import org.ejml.simple.SimpleMatrix; /* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ /** * * @author install1 */ public class SentimentAnalyzerTest implements Callable { private String str; private String str1; private SimilarityMatrix smxParam; private ShiftReduceParser model; private MaxentTagger tagger; private GrammaticalStructureFactory gsf; private StanfordCoreNLP pipeline; private StanfordCoreNLP pipelineSentiment; private AbstractSequenceClassifier classifier; public SentimentAnalyzerTest(String str, String str1, SimilarityMatrix smxParam) { this.str = str; this.str1 = str1; this.smxParam = smxParam; model = MYSQLDatahandler.getModel(); tagger = MYSQLDatahandler.getTagger(); pipeline = MYSQLDatahandler.getPipeline(); pipelineSentiment = MYSQLDatahandler.getPipelineSentiment(); gsf = MYSQLDatahandler.getGsf(); classifier = MYSQLDatahandler.getClassifier(); } @Override public SimilarityMatrix call() { try { Double score = -100.0; List> taggedwordlist1 = new ArrayList(); List> taggedwordlist2 = new ArrayList(); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1)); for (List sentence : tokenizer) { taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); } tokenizer = new DocumentPreprocessor(new StringReader(str)); for (List sentence : tokenizer) { taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); } int counter = 0; int counter1 = 0; counter = taggedwordlist2.stream().map((taggedlist2) -> taggedlist2.size()).reduce(counter, Integer::sum); counter1 = taggedwordlist1.stream().map((taggedlist1) -> taggedlist1.size()).reduce(counter1, Integer::sum); int overValue = counter >= counter1 ? counter - counter1 : counter1 - counter; overValue *= 16; score -= overValue; List tgwlistIndex = new ArrayList(); taggedwordlist1.forEach((TGWList) -> { TGWList.forEach((TaggedWord) -> { if (!tgwlistIndex.contains(TaggedWord.tag()) && !TaggedWord.tag().equals(":")) { tgwlistIndex.add(TaggedWord.tag()); } }); }); AtomicInteger runCount = new AtomicInteger(0); taggedwordlist2.forEach((TGWList) -> { TGWList.forEach((TaggedWord) -> { if (tgwlistIndex.contains(TaggedWord.tag())) { tgwlistIndex.remove(TaggedWord.tag()); runCount.getAndIncrement(); } }); }); score += runCount.get() * 64; Annotation annotation = new Annotation(str1); pipeline.annotate(annotation); List sentenceConstituencyParseList = new ArrayList(); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); sentenceConstituencyParseList.add(sentenceConstituencyParse); } Annotation annotation1 = new Annotation(str); pipeline.annotate(annotation1); List nerList = new ArrayList(); for (CoreMap sentence : annotation1.get(CoreAnnotations.SentencesAnnotation.class)) { Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceConstituencyParse); Collection allTypedDependencies = gs.allTypedDependencies(); List filerTreeContent = new ArrayList(); for (Tree sentenceConstituencyParse1 : sentenceConstituencyParseList) { Set inT1notT2 = Tdiff.markDiff(sentenceConstituencyParse, sentenceConstituencyParse1); Set inT2notT1 = Tdiff.markDiff(sentenceConstituencyParse1, sentenceConstituencyParse); List constiLabels = new ArrayList(); for (Constituent consti : inT1notT2) { for (Constituent consti1 : inT2notT1) { if (consti.value().equals(consti1.value()) && !constiLabels.contains(consti.value())) { score += 64; //256 constiLabels.add(consti.value()); } } } GrammaticalStructure gs1 = gsf.newGrammaticalStructure(sentenceConstituencyParse1); Collection allTypedDependencies1 = gs1.allTypedDependencies(); for (TypedDependency TDY1 : allTypedDependencies1) { IndexedWord dep = TDY1.dep(); IndexedWord gov = TDY1.gov(); GrammaticalRelation grammaticalRelation = gs.getGrammaticalRelation(gov, dep); if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) { score += 900; } GrammaticalRelation reln = TDY1.reln(); if (reln.isApplicable(sentenceConstituencyParse)) { score += 256; } } for (TypedDependency TDY : allTypedDependencies) { IndexedWord dep = TDY.dep(); IndexedWord gov = TDY.gov(); GrammaticalRelation grammaticalRelation = gs1.getGrammaticalRelation(gov, dep); if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) { score += 900; } GrammaticalRelation reln = TDY.reln(); if (reln.isApplicable(sentenceConstituencyParse1)) { score += 256; } } AtomicInteger runCount1 = new AtomicInteger(0); sentenceConstituencyParse.taggedLabeledYield().forEach((LBW) -> { sentenceConstituencyParse1.taggedLabeledYield().stream().filter((LBW1) -> (LBW.lemma().equals(LBW1.lemma()) && !filerTreeContent.contains(LBW.lemma()))).map((_item) -> { filerTreeContent.add(LBW.lemma()); return _item; }).forEachOrdered((_item) -> { runCount1.getAndIncrement(); }); }); score += runCount1.get() * 1500; } } Annotation annotationSentiment1 = pipelineSentiment.process(str); List simpleSMXlist = new ArrayList(); List simpleSMXlistVector = new ArrayList(); List sentiment1 = new ArrayList(); List sentiment2 = new ArrayList(); for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); sentiment1.add(RNNCoreAnnotations.getPredictedClass(tree)); SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree); simpleSMXlist.add(predictions); simpleSMXlistVector.add(nodeVector); } annotationSentiment1 = pipelineSentiment.process(str1); for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); sentiment2.add(RNNCoreAnnotations.getPredictedClass(tree)); SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree); score = simpleSMXlist.stream().map((simpleSMX) -> predictions.dot(simpleSMX) * 100).map((dot) -> dot > 50 ? dot - 50 : 50 - dot).map((subtracter) -> { subtracter *= 25; return subtracter; }).map((subtracter) -> subtracter).reduce(score, (accumulator, _item) -> accumulator - _item); for (SimpleMatrix simpleSMX : simpleSMXlistVector) { double dot = nodeVector.dot(simpleSMX); double elementSum = nodeVector.kron(simpleSMX).elementSum(); elementSum = Math.round(elementSum * 100.0) / 100.0; if (dot < 0.1) { score += 256; } if (elementSum < 0.1 && elementSum > 0.0) { score += 1300; } else if (elementSum > 0.1 && elementSum < 1.0) { score -= 1100; } else { score -= 1424; } } } score -= (sentiment1.size() > sentiment2.size() ? sentiment1.size() - sentiment2.size() : sentiment2.size() - sentiment1.size()) * 500; DocumentReaderAndWriter readerAndWriter = classifier.makePlainTextReaderAndWriter(); List classifyRaw1 = classifier.classifyRaw(str, readerAndWriter); List classifyRaw2 = classifier.classifyRaw(str1, readerAndWriter); score -= (classifyRaw1.size() > classifyRaw2.size() ? classifyRaw1.size() - classifyRaw2.size() : classifyRaw2.size() - classifyRaw1.size()) * 200; Annotation annotationSentiment = pipelineSentiment.process(str); int mainSentiment1 = 0; int longest1 = 0; int mainSentiment2 = 0; int longest2 = 0; for (CoreMap sentence : annotationSentiment.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); int sentiment = RNNCoreAnnotations.getPredictedClass(tree); String partText = sentence.toString(); if (partText.length() > longest1) { mainSentiment1 = sentiment; longest1 = partText.length(); } } annotationSentiment = pipelineSentiment.process(str1); for (CoreMap sentence : annotationSentiment.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); int sentiment = RNNCoreAnnotations.getPredictedClass(tree); SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); String partText = sentence.toString(); if (partText.length() > longest2) { mainSentiment2 = sentiment; longest2 = partText.length(); } } if (longest1 != longest2) { long deffLongest = longest1 > longest2 ? longest1 : longest2; long deffshorter = longest1 < longest2 ? longest1 : longest2; if (deffLongest >= (deffshorter * 2) - 1 && deffLongest - deffshorter <= 45) { score += (deffLongest - deffshorter) * 200; } else if (mainSentiment1 != mainSentiment2 && deffLongest - deffshorter > 20 && deffLongest - deffshorter < 45) { score += (deffLongest - deffshorter) * 200; } else { score -= (deffLongest - deffshorter) * 50; } } LevenshteinDistance leven = new LevenshteinDistance(str, str1); int SentenceScoreDiff = leven.computeLevenshteinDistance(); SentenceScoreDiff *= 15; score -= SentenceScoreDiff; System.out.println("Final current score: " + score + "\nSentence 1: " + str + "\nSentence 2: " + str1 + "\n"); smxParam.setDistance(score); } catch (Exception ex) { System.out.println("ex: " + ex.getMessage() + "\n"); smxParam.setDistance(-1000); return smxParam; } return smxParam; } }