260 lines
13 KiB
Java
260 lines
13 KiB
Java
|
package FunctionLayer.StanfordParser;
|
||
|
|
||
|
import FunctionLayer.LevenshteinDistance;
|
||
|
import edu.stanford.nlp.ling.CoreAnnotations;
|
||
|
import edu.stanford.nlp.ling.CoreLabel;
|
||
|
import edu.stanford.nlp.ling.HasWord;
|
||
|
import edu.stanford.nlp.ling.IndexedWord;
|
||
|
import edu.stanford.nlp.ling.Label;
|
||
|
import edu.stanford.nlp.ling.TaggedWord;
|
||
|
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
|
||
|
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
|
||
|
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
|
||
|
import edu.stanford.nlp.pipeline.Annotation;
|
||
|
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
|
||
|
import edu.stanford.nlp.process.DocumentPreprocessor;
|
||
|
import edu.stanford.nlp.process.Tokenizer;
|
||
|
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
|
||
|
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
|
||
|
import edu.stanford.nlp.trees.Constituent;
|
||
|
import edu.stanford.nlp.trees.GrammaticalRelation;
|
||
|
import edu.stanford.nlp.trees.GrammaticalStructure;
|
||
|
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
|
||
|
import edu.stanford.nlp.trees.Tree;
|
||
|
import edu.stanford.nlp.trees.TreeCoreAnnotations;
|
||
|
import edu.stanford.nlp.trees.TreebankLanguagePack;
|
||
|
import edu.stanford.nlp.trees.TypedDependency;
|
||
|
import edu.stanford.nlp.trees.tregex.gui.Tdiff;
|
||
|
import edu.stanford.nlp.util.CoreMap;
|
||
|
import java.io.StringReader;
|
||
|
import java.util.ArrayList;
|
||
|
import java.util.Collection;
|
||
|
import java.util.List;
|
||
|
import java.util.Properties;
|
||
|
import java.util.Set;
|
||
|
import org.ejml.simple.SimpleMatrix;
|
||
|
|
||
|
/*
|
||
|
* To change this license header, choose License Headers in Project Properties.
|
||
|
* To change this template file, choose Tools | Templates
|
||
|
* and open the template in the editor.
|
||
|
*/
|
||
|
/**
|
||
|
*
|
||
|
* @author install1
|
||
|
*/
|
||
|
public class SentimentAnalyzerTest {
|
||
|
|
||
|
private static String modelPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz";
|
||
|
private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz";
|
||
|
private static String parserModelPathUD = "edu/stanford/nlp/models/parser/nndep/english_UD.gz";
|
||
|
private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz";
|
||
|
private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
|
||
|
private static MaxentTagger tagger;
|
||
|
private static ShiftReduceParser model;
|
||
|
private static String[] options = {"-maxLength", "100"};
|
||
|
private static LexicalizedParser lp;
|
||
|
private static TreebankLanguagePack tlp;
|
||
|
private static Properties props = new Properties();
|
||
|
private static Properties propsSentiment = new Properties();
|
||
|
private static GrammaticalStructureFactory gsf;
|
||
|
private static StanfordCoreNLP pipeline;
|
||
|
private static StanfordCoreNLP pipelineSentiment;
|
||
|
|
||
|
public static void shiftReduceParserInitiate() {
|
||
|
model = ShiftReduceParser.loadModel(modelPath, options);
|
||
|
tagger = new MaxentTagger(taggerPath);
|
||
|
lp = LexicalizedParser.loadModel(lexParserEnglishRNN, options);
|
||
|
tlp = lp.getOp().langpack();
|
||
|
gsf = tlp.grammaticalStructureFactory();
|
||
|
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse");
|
||
|
// set up pipeline properties
|
||
|
props.setProperty("parse.model", modelPath);
|
||
|
// use faster shift reduce parser
|
||
|
props.setProperty("parse.maxlen", "100");
|
||
|
props.setProperty("parse.binaryTrees", "true");
|
||
|
propsSentiment.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
|
||
|
propsSentiment.setProperty("parse.model", lexParserEnglishRNN);
|
||
|
propsSentiment.setProperty("parse.maxlen", "100");
|
||
|
pipeline = new StanfordCoreNLP(props);
|
||
|
pipelineSentiment = new StanfordCoreNLP(propsSentiment);
|
||
|
}
|
||
|
|
||
|
public static ShiftReduceParser getModel() {
|
||
|
return model;
|
||
|
}
|
||
|
|
||
|
public static MaxentTagger getTagger() {
|
||
|
return tagger;
|
||
|
}
|
||
|
|
||
|
public static double sentimentanalyzing(String str, String str1) {
|
||
|
double score = -100.0;
|
||
|
List<List<TaggedWord>> taggedwordlist1 = new ArrayList();
|
||
|
List<List<TaggedWord>> taggedwordlist2 = new ArrayList();
|
||
|
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
|
||
|
for (List<HasWord> sentence : tokenizer) {
|
||
|
List<TaggedWord> tagged1 = tagger.tagSentence(sentence);
|
||
|
Tree tree = model.apply(tagged1);
|
||
|
ArrayList<TaggedWord> taggedYield = tree.taggedYield();
|
||
|
taggedwordlist1.add(taggedYield);
|
||
|
}
|
||
|
tokenizer = new DocumentPreprocessor(new StringReader(str));
|
||
|
for (List<HasWord> sentence : tokenizer) {
|
||
|
List<TaggedWord> tagged1 = tagger.tagSentence(sentence);
|
||
|
Tree tree = model.apply(tagged1);
|
||
|
ArrayList<TaggedWord> taggedYield = tree.taggedYield();
|
||
|
taggedwordlist2.add(taggedYield);
|
||
|
}
|
||
|
int counter = 0;
|
||
|
int counter1 = 0;
|
||
|
for (List<TaggedWord> taggedlist2 : taggedwordlist2) {
|
||
|
counter += taggedlist2.size();
|
||
|
}
|
||
|
for (List<TaggedWord> taggedlist1 : taggedwordlist1) {
|
||
|
counter1 += taggedlist1.size();
|
||
|
}
|
||
|
int overValue = counter >= counter1 ? counter - counter1 : counter1 - counter;
|
||
|
overValue *= 16;
|
||
|
while (overValue > 0) {
|
||
|
overValue--;
|
||
|
score--;
|
||
|
}
|
||
|
System.out.println("Score Post overValue: " + score + "\n");
|
||
|
for (List<TaggedWord> TGWList : taggedwordlist1) {
|
||
|
for (TaggedWord TGW : TGWList) {
|
||
|
List<String> tgwlist1 = new ArrayList();
|
||
|
for (List<TaggedWord> taggedlist2 : taggedwordlist2) {
|
||
|
for (TaggedWord TGW1 : taggedlist2) {
|
||
|
if (TGW.tag().equals(TGW1.tag()) && !TGW.tag().equals(":") && !tgwlist1.contains(TGW1.tag())) {
|
||
|
score += 64;
|
||
|
tgwlist1.add(TGW.tag());
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// System.out.println("Score: " + score);
|
||
|
Annotation annotation = new Annotation(str1);
|
||
|
pipeline.annotate(annotation);
|
||
|
List<Tree> sentenceConstituencyParseList = new ArrayList();
|
||
|
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
|
||
|
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
|
||
|
sentenceConstituencyParseList.add(sentenceConstituencyParse);
|
||
|
}
|
||
|
Annotation annotation1 = new Annotation(str);
|
||
|
pipeline.annotate(annotation1);
|
||
|
for (CoreMap sentence : annotation1.get(CoreAnnotations.SentencesAnnotation.class)) {
|
||
|
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
|
||
|
GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceConstituencyParse);
|
||
|
Collection<TypedDependency> allTypedDependencies = gs.allTypedDependencies();
|
||
|
List<String> filerTreeContent = new ArrayList();
|
||
|
for (Tree sentenceConstituencyParse1 : sentenceConstituencyParseList) {
|
||
|
Set<Constituent> inT1notT2 = Tdiff.markDiff(sentenceConstituencyParse, sentenceConstituencyParse1);
|
||
|
Set<Constituent> inT2notT1 = Tdiff.markDiff(sentenceConstituencyParse1, sentenceConstituencyParse);
|
||
|
List<String> constiLabels = new ArrayList();
|
||
|
for (Constituent consti : inT1notT2) {
|
||
|
for (Constituent consti1 : inT2notT1) {
|
||
|
if (consti.value().equals(consti1.value()) && !constiLabels.contains(consti.value())) {
|
||
|
//System.out.println("consti value: " + consti.value() + "\n");
|
||
|
score += 64; //256
|
||
|
constiLabels.add(consti.value());
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
GrammaticalStructure gs1 = gsf.newGrammaticalStructure(sentenceConstituencyParse1);
|
||
|
Collection<TypedDependency> allTypedDependencies1 = gs1.allTypedDependencies();
|
||
|
for (TypedDependency TDY1 : allTypedDependencies1) {
|
||
|
IndexedWord dep = TDY1.dep();
|
||
|
IndexedWord gov = TDY1.gov();
|
||
|
GrammaticalRelation grammaticalRelation = gs.getGrammaticalRelation(gov, dep);
|
||
|
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
|
||
|
score += 900;
|
||
|
//System.out.println("grammaticalRelation applicable: " + score + "\n");
|
||
|
}
|
||
|
GrammaticalRelation reln = TDY1.reln();
|
||
|
if (reln.isApplicable(sentenceConstituencyParse)) {
|
||
|
score += 256;
|
||
|
}
|
||
|
}
|
||
|
for (TypedDependency TDY : allTypedDependencies) {
|
||
|
IndexedWord dep = TDY.dep();
|
||
|
IndexedWord gov = TDY.gov();
|
||
|
GrammaticalRelation grammaticalRelation = gs1.getGrammaticalRelation(gov, dep);
|
||
|
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
|
||
|
score += 900;
|
||
|
//System.out.println("grammaticalRelation applicable: " + score + "\n");
|
||
|
}
|
||
|
GrammaticalRelation reln = TDY.reln();
|
||
|
if (reln.isApplicable(sentenceConstituencyParse1)) {
|
||
|
score += 256;
|
||
|
}
|
||
|
}
|
||
|
for (CoreLabel LBW : sentenceConstituencyParse.taggedLabeledYield()) {
|
||
|
for (CoreLabel LBW1 : sentenceConstituencyParse1.taggedLabeledYield()) {
|
||
|
if (LBW.lemma().equals(LBW1.lemma()) && !filerTreeContent.contains(LBW.lemma())) {
|
||
|
filerTreeContent.add(LBW.lemma());
|
||
|
score += 1500;
|
||
|
//System.out.println("lemma: " + LBW.lemma() + "\n");
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
Annotation annotationSentiment1 = pipelineSentiment.process(str);
|
||
|
List<SimpleMatrix> simpleSMXlist = new ArrayList();
|
||
|
List<SimpleMatrix> simpleSMXlistVector = new ArrayList();
|
||
|
for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) {
|
||
|
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
|
||
|
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
|
||
|
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree);
|
||
|
simpleSMXlist.add(predictions);
|
||
|
simpleSMXlistVector.add(nodeVector);
|
||
|
}
|
||
|
annotationSentiment1 = pipelineSentiment.process(str1);
|
||
|
for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) {
|
||
|
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
|
||
|
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
|
||
|
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree);
|
||
|
for (SimpleMatrix simpleSMX : simpleSMXlist) {
|
||
|
double dot = predictions.dot(simpleSMX) * 100;
|
||
|
//System.out.println("dot value: " + dot + "\n");
|
||
|
double subtracter = dot > 50 ? dot - 50 : 50 - dot;
|
||
|
System.out.println("score pre dot: " + score + "\nsubtracter: " + subtracter + "\n");
|
||
|
subtracter *= 25;
|
||
|
while (subtracter > 0) {
|
||
|
subtracter--;
|
||
|
score--;
|
||
|
}
|
||
|
System.out.println("score post dot: " + score + "\n");
|
||
|
}
|
||
|
for (SimpleMatrix simpleSMX : simpleSMXlistVector) {
|
||
|
double dot = nodeVector.dot(simpleSMX);
|
||
|
double elementSum = nodeVector.kron(simpleSMX).elementSum();
|
||
|
elementSum = Math.round(elementSum * 100.0) / 100.0;
|
||
|
System.out.println("kron SMX elementSum: " + elementSum + "\n");
|
||
|
if (dot < 0.1) {
|
||
|
// System.out.println("\ndot VECTOR: " + dot + "\n");
|
||
|
score += 256;
|
||
|
}
|
||
|
if (elementSum < 0.1 && elementSum > 0.0) {
|
||
|
score += 1300;
|
||
|
} else if (elementSum > 0.1 && elementSum < 1.0) {
|
||
|
score -= 1100;
|
||
|
} else {
|
||
|
score -= 1424;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
int SentenceScoreDiff = LevenshteinDistance.computeLevenshteinDistance(str, str1);
|
||
|
SentenceScoreDiff *= 15;
|
||
|
// System.out.println("score pre levenhstein substract: " + score + "\nSentenceScoreDiff: " + SentenceScoreDiff + "\n");
|
||
|
while (SentenceScoreDiff > 0) {
|
||
|
SentenceScoreDiff--;
|
||
|
score--;
|
||
|
}
|
||
|
System.out.println("Final current score: " + score + "\nSentences: " + str + "\n" + str1 + "\n\n\n");
|
||
|
return score;
|
||
|
}
|
||
|
}
|