2019-03-02 15:10:46 +01:00
|
|
|
package FunctionLayer.StanfordParser;
|
|
|
|
|
|
|
|
import FunctionLayer.LevenshteinDistance;
|
2019-03-03 13:17:07 +01:00
|
|
|
import FunctionLayer.MYSQLDatahandler;
|
|
|
|
import FunctionLayer.SimilarityMatrix;
|
|
|
|
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
|
2019-03-02 15:10:46 +01:00
|
|
|
import edu.stanford.nlp.ling.CoreAnnotations;
|
|
|
|
import edu.stanford.nlp.ling.CoreLabel;
|
|
|
|
import edu.stanford.nlp.ling.HasWord;
|
|
|
|
import edu.stanford.nlp.ling.IndexedWord;
|
|
|
|
import edu.stanford.nlp.ling.TaggedWord;
|
|
|
|
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
|
|
|
|
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
|
|
|
|
import edu.stanford.nlp.pipeline.Annotation;
|
|
|
|
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
|
|
|
|
import edu.stanford.nlp.process.DocumentPreprocessor;
|
|
|
|
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
|
2019-03-03 13:17:07 +01:00
|
|
|
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
|
2019-03-02 15:10:46 +01:00
|
|
|
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
|
|
|
|
import edu.stanford.nlp.trees.Constituent;
|
|
|
|
import edu.stanford.nlp.trees.GrammaticalRelation;
|
|
|
|
import edu.stanford.nlp.trees.GrammaticalStructure;
|
|
|
|
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
|
|
|
|
import edu.stanford.nlp.trees.Tree;
|
|
|
|
import edu.stanford.nlp.trees.TreeCoreAnnotations;
|
|
|
|
import edu.stanford.nlp.trees.TypedDependency;
|
|
|
|
import edu.stanford.nlp.trees.tregex.gui.Tdiff;
|
|
|
|
import edu.stanford.nlp.util.CoreMap;
|
|
|
|
import java.io.StringReader;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.Collection;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Set;
|
2019-03-03 13:17:07 +01:00
|
|
|
import java.util.concurrent.Callable;
|
|
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
2019-03-02 15:10:46 +01:00
|
|
|
import org.ejml.simple.SimpleMatrix;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* To change this license header, choose License Headers in Project Properties.
|
|
|
|
* To change this template file, choose Tools | Templates
|
|
|
|
* and open the template in the editor.
|
|
|
|
*/
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @author install1
|
|
|
|
*/
|
2019-03-03 13:17:07 +01:00
|
|
|
public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
|
2019-03-02 15:10:46 +01:00
|
|
|
|
2019-03-03 13:17:07 +01:00
|
|
|
private String str;
|
|
|
|
private String str1;
|
|
|
|
private SimilarityMatrix smxParam;
|
|
|
|
private ShiftReduceParser model;
|
|
|
|
private MaxentTagger tagger;
|
|
|
|
private GrammaticalStructureFactory gsf;
|
|
|
|
private StanfordCoreNLP pipeline;
|
|
|
|
private StanfordCoreNLP pipelineSentiment;
|
|
|
|
private AbstractSequenceClassifier classifier;
|
2019-03-02 15:10:46 +01:00
|
|
|
|
2019-03-03 13:17:07 +01:00
|
|
|
public SentimentAnalyzerTest(String str, String str1, SimilarityMatrix smxParam) {
|
|
|
|
this.str = str;
|
|
|
|
this.str1 = str1;
|
|
|
|
this.smxParam = smxParam;
|
|
|
|
model = MYSQLDatahandler.getModel();
|
|
|
|
tagger = MYSQLDatahandler.getTagger();
|
|
|
|
pipeline = MYSQLDatahandler.getPipeline();
|
|
|
|
pipelineSentiment = MYSQLDatahandler.getPipelineSentiment();
|
|
|
|
gsf = MYSQLDatahandler.getGsf();
|
|
|
|
classifier = MYSQLDatahandler.getClassifier();
|
2019-03-02 15:10:46 +01:00
|
|
|
}
|
|
|
|
|
2019-03-03 13:17:07 +01:00
|
|
|
@Override
|
|
|
|
public SimilarityMatrix call() {
|
|
|
|
try {
|
|
|
|
Double score = -100.0;
|
|
|
|
List<List<TaggedWord>> taggedwordlist1 = new ArrayList();
|
|
|
|
List<List<TaggedWord>> taggedwordlist2 = new ArrayList();
|
|
|
|
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
|
|
|
|
for (List<HasWord> sentence : tokenizer) {
|
|
|
|
taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
|
|
|
|
}
|
|
|
|
tokenizer = new DocumentPreprocessor(new StringReader(str));
|
|
|
|
for (List<HasWord> sentence : tokenizer) {
|
|
|
|
taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
|
|
|
|
}
|
|
|
|
int counter = 0;
|
|
|
|
int counter1 = 0;
|
|
|
|
counter = taggedwordlist2.stream().map((taggedlist2) -> taggedlist2.size()).reduce(counter, Integer::sum);
|
|
|
|
counter1 = taggedwordlist1.stream().map((taggedlist1) -> taggedlist1.size()).reduce(counter1, Integer::sum);
|
|
|
|
int overValue = counter >= counter1 ? counter - counter1 : counter1 - counter;
|
|
|
|
overValue *= 16;
|
|
|
|
score -= overValue;
|
|
|
|
List<String> tgwlistIndex = new ArrayList();
|
|
|
|
taggedwordlist1.forEach((TGWList) -> {
|
|
|
|
TGWList.forEach((TaggedWord) -> {
|
|
|
|
if (!tgwlistIndex.contains(TaggedWord.tag()) && !TaggedWord.tag().equals(":")) {
|
|
|
|
tgwlistIndex.add(TaggedWord.tag());
|
2019-03-02 15:10:46 +01:00
|
|
|
}
|
2019-03-03 13:17:07 +01:00
|
|
|
});
|
|
|
|
});
|
|
|
|
AtomicInteger runCount = new AtomicInteger(0);
|
|
|
|
taggedwordlist2.forEach((TGWList) -> {
|
|
|
|
TGWList.forEach((TaggedWord) -> {
|
|
|
|
if (tgwlistIndex.contains(TaggedWord.tag())) {
|
|
|
|
tgwlistIndex.remove(TaggedWord.tag());
|
|
|
|
runCount.getAndIncrement();
|
|
|
|
}
|
|
|
|
});
|
|
|
|
});
|
|
|
|
score += runCount.get() * 64;
|
|
|
|
Annotation annotation = new Annotation(str1);
|
|
|
|
pipeline.annotate(annotation);
|
|
|
|
List<Tree> sentenceConstituencyParseList = new ArrayList();
|
|
|
|
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
|
|
|
|
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
|
|
|
|
sentenceConstituencyParseList.add(sentenceConstituencyParse);
|
2019-03-02 15:10:46 +01:00
|
|
|
}
|
2019-03-03 13:17:07 +01:00
|
|
|
Annotation annotation1 = new Annotation(str);
|
|
|
|
pipeline.annotate(annotation1);
|
|
|
|
List<String> nerList = new ArrayList();
|
|
|
|
for (CoreMap sentence : annotation1.get(CoreAnnotations.SentencesAnnotation.class)) {
|
|
|
|
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
|
|
|
|
GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceConstituencyParse);
|
|
|
|
Collection<TypedDependency> allTypedDependencies = gs.allTypedDependencies();
|
|
|
|
List<String> filerTreeContent = new ArrayList();
|
|
|
|
for (Tree sentenceConstituencyParse1 : sentenceConstituencyParseList) {
|
|
|
|
Set<Constituent> inT1notT2 = Tdiff.markDiff(sentenceConstituencyParse, sentenceConstituencyParse1);
|
|
|
|
Set<Constituent> inT2notT1 = Tdiff.markDiff(sentenceConstituencyParse1, sentenceConstituencyParse);
|
|
|
|
List<String> constiLabels = new ArrayList();
|
|
|
|
for (Constituent consti : inT1notT2) {
|
|
|
|
for (Constituent consti1 : inT2notT1) {
|
|
|
|
if (consti.value().equals(consti1.value()) && !constiLabels.contains(consti.value())) {
|
|
|
|
score += 64; //256
|
|
|
|
constiLabels.add(consti.value());
|
|
|
|
}
|
2019-03-02 15:10:46 +01:00
|
|
|
}
|
|
|
|
}
|
2019-03-03 13:17:07 +01:00
|
|
|
GrammaticalStructure gs1 = gsf.newGrammaticalStructure(sentenceConstituencyParse1);
|
|
|
|
Collection<TypedDependency> allTypedDependencies1 = gs1.allTypedDependencies();
|
|
|
|
for (TypedDependency TDY1 : allTypedDependencies1) {
|
|
|
|
IndexedWord dep = TDY1.dep();
|
|
|
|
IndexedWord gov = TDY1.gov();
|
|
|
|
GrammaticalRelation grammaticalRelation = gs.getGrammaticalRelation(gov, dep);
|
|
|
|
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
|
|
|
|
score += 900;
|
|
|
|
}
|
|
|
|
GrammaticalRelation reln = TDY1.reln();
|
|
|
|
if (reln.isApplicable(sentenceConstituencyParse)) {
|
|
|
|
score += 256;
|
|
|
|
}
|
2019-03-02 15:10:46 +01:00
|
|
|
}
|
2019-03-03 13:17:07 +01:00
|
|
|
for (TypedDependency TDY : allTypedDependencies) {
|
|
|
|
IndexedWord dep = TDY.dep();
|
|
|
|
IndexedWord gov = TDY.gov();
|
|
|
|
GrammaticalRelation grammaticalRelation = gs1.getGrammaticalRelation(gov, dep);
|
|
|
|
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
|
|
|
|
score += 900;
|
|
|
|
}
|
|
|
|
GrammaticalRelation reln = TDY.reln();
|
|
|
|
if (reln.isApplicable(sentenceConstituencyParse1)) {
|
|
|
|
score += 256;
|
|
|
|
}
|
2019-03-02 15:10:46 +01:00
|
|
|
}
|
2019-03-03 13:17:07 +01:00
|
|
|
AtomicInteger runCount1 = new AtomicInteger(0);
|
|
|
|
sentenceConstituencyParse.taggedLabeledYield().forEach((LBW) -> {
|
|
|
|
sentenceConstituencyParse1.taggedLabeledYield().stream().filter((LBW1) -> (LBW.lemma().equals(LBW1.lemma())
|
|
|
|
&& !filerTreeContent.contains(LBW.lemma()))).map((_item) -> {
|
|
|
|
filerTreeContent.add(LBW.lemma());
|
|
|
|
return _item;
|
|
|
|
}).forEachOrdered((_item) -> {
|
|
|
|
runCount1.getAndIncrement();
|
|
|
|
});
|
|
|
|
});
|
|
|
|
score += runCount1.get() * 1500;
|
2019-03-02 15:10:46 +01:00
|
|
|
}
|
2019-03-03 13:17:07 +01:00
|
|
|
}
|
|
|
|
Annotation annotationSentiment1 = pipelineSentiment.process(str);
|
|
|
|
List<SimpleMatrix> simpleSMXlist = new ArrayList();
|
|
|
|
List<SimpleMatrix> simpleSMXlistVector = new ArrayList();
|
|
|
|
List<Integer> sentiment1 = new ArrayList();
|
|
|
|
List<Integer> sentiment2 = new ArrayList();
|
|
|
|
for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) {
|
|
|
|
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
|
|
|
|
sentiment1.add(RNNCoreAnnotations.getPredictedClass(tree));
|
|
|
|
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
|
|
|
|
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree);
|
|
|
|
simpleSMXlist.add(predictions);
|
|
|
|
simpleSMXlistVector.add(nodeVector);
|
|
|
|
}
|
|
|
|
annotationSentiment1 = pipelineSentiment.process(str1);
|
|
|
|
for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) {
|
|
|
|
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
|
|
|
|
sentiment2.add(RNNCoreAnnotations.getPredictedClass(tree));
|
|
|
|
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
|
|
|
|
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree);
|
|
|
|
score = simpleSMXlist.stream().map((simpleSMX) -> predictions.dot(simpleSMX) * 100).map((dot) -> dot > 50 ? dot - 50 : 50 - dot).map((subtracter) -> {
|
|
|
|
subtracter *= 25;
|
|
|
|
return subtracter;
|
|
|
|
}).map((subtracter) -> subtracter).reduce(score, (accumulator, _item) -> accumulator - _item);
|
|
|
|
for (SimpleMatrix simpleSMX : simpleSMXlistVector) {
|
|
|
|
double dot = nodeVector.dot(simpleSMX);
|
|
|
|
double elementSum = nodeVector.kron(simpleSMX).elementSum();
|
|
|
|
elementSum = Math.round(elementSum * 100.0) / 100.0;
|
|
|
|
if (dot < 0.1) {
|
2019-03-02 15:10:46 +01:00
|
|
|
score += 256;
|
|
|
|
}
|
2019-03-03 13:17:07 +01:00
|
|
|
if (elementSum < 0.1 && elementSum > 0.0) {
|
|
|
|
score += 1300;
|
|
|
|
} else if (elementSum > 0.1 && elementSum < 1.0) {
|
|
|
|
score -= 1100;
|
|
|
|
} else {
|
|
|
|
score -= 1424;
|
2019-03-02 15:10:46 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-03-03 13:17:07 +01:00
|
|
|
score -= (sentiment1.size() > sentiment2.size() ? sentiment1.size() - sentiment2.size() : sentiment2.size() - sentiment1.size()) * 500;
|
|
|
|
DocumentReaderAndWriter<CoreLabel> readerAndWriter = classifier.makePlainTextReaderAndWriter();
|
|
|
|
List classifyRaw1 = classifier.classifyRaw(str, readerAndWriter);
|
|
|
|
List classifyRaw2 = classifier.classifyRaw(str1, readerAndWriter);
|
|
|
|
score -= (classifyRaw1.size() > classifyRaw2.size() ? classifyRaw1.size() - classifyRaw2.size() : classifyRaw2.size() - classifyRaw1.size()) * 200;
|
|
|
|
Annotation annotationSentiment = pipelineSentiment.process(str);
|
|
|
|
int mainSentiment1 = 0;
|
|
|
|
int longest1 = 0;
|
|
|
|
int mainSentiment2 = 0;
|
|
|
|
int longest2 = 0;
|
|
|
|
for (CoreMap sentence : annotationSentiment.get(CoreAnnotations.SentencesAnnotation.class)) {
|
|
|
|
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
|
|
|
|
int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
|
|
|
|
String partText = sentence.toString();
|
|
|
|
if (partText.length() > longest1) {
|
|
|
|
mainSentiment1 = sentiment;
|
|
|
|
longest1 = partText.length();
|
2019-03-02 15:10:46 +01:00
|
|
|
}
|
|
|
|
}
|
2019-03-03 13:17:07 +01:00
|
|
|
annotationSentiment = pipelineSentiment.process(str1);
|
|
|
|
for (CoreMap sentence : annotationSentiment.get(CoreAnnotations.SentencesAnnotation.class)) {
|
|
|
|
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
|
|
|
|
int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
|
|
|
|
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
|
|
|
|
String partText = sentence.toString();
|
|
|
|
if (partText.length() > longest2) {
|
|
|
|
mainSentiment2 = sentiment;
|
|
|
|
longest2 = partText.length();
|
2019-03-02 15:10:46 +01:00
|
|
|
}
|
2019-03-03 13:17:07 +01:00
|
|
|
}
|
|
|
|
if (longest1 != longest2) {
|
|
|
|
long deffLongest = longest1 > longest2 ? longest1 : longest2;
|
|
|
|
long deffshorter = longest1 < longest2 ? longest1 : longest2;
|
|
|
|
if (deffLongest >= (deffshorter * 2) - 1 && deffLongest - deffshorter <= 45) {
|
|
|
|
score += (deffLongest - deffshorter) * 200;
|
|
|
|
} else if (mainSentiment1 != mainSentiment2 && deffLongest - deffshorter > 20 && deffLongest - deffshorter < 45) {
|
|
|
|
score += (deffLongest - deffshorter) * 200;
|
2019-03-02 15:10:46 +01:00
|
|
|
} else {
|
2019-03-03 13:17:07 +01:00
|
|
|
score -= (deffLongest - deffshorter) * 50;
|
2019-03-02 15:10:46 +01:00
|
|
|
}
|
|
|
|
}
|
2019-03-03 13:17:07 +01:00
|
|
|
LevenshteinDistance leven = new LevenshteinDistance(str, str1);
|
|
|
|
int SentenceScoreDiff = leven.computeLevenshteinDistance();
|
|
|
|
SentenceScoreDiff *= 15;
|
|
|
|
score -= SentenceScoreDiff;
|
|
|
|
System.out.println("Final current score: " + score + "\nSentence 1: " + str + "\nSentence 2: " + str1 + "\n");
|
|
|
|
smxParam.setDistance(score);
|
|
|
|
} catch (Exception ex) {
|
|
|
|
System.out.println("ex: " + ex.getMessage() + "\n");
|
|
|
|
smxParam.setDistance(-1000);
|
|
|
|
return smxParam;
|
2019-03-02 15:10:46 +01:00
|
|
|
}
|
2019-03-03 13:17:07 +01:00
|
|
|
return smxParam;
|
2019-03-02 15:10:46 +01:00
|
|
|
}
|
2019-03-03 13:17:07 +01:00
|
|
|
|
2019-03-02 15:10:46 +01:00
|
|
|
}
|