projects-jenz/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java

260 lines
13 KiB
Java
Raw Normal View History

2019-03-02 15:10:46 +01:00
package FunctionLayer.StanfordParser;
import FunctionLayer.LevenshteinDistance;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.trees.Constituent;
import edu.stanford.nlp.trees.GrammaticalRelation;
import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.TypedDependency;
import edu.stanford.nlp.trees.tregex.gui.Tdiff;
import edu.stanford.nlp.util.CoreMap;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import org.ejml.simple.SimpleMatrix;
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
/**
*
* @author install1
*/
public class SentimentAnalyzerTest {
private static String modelPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz";
private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz";
private static String parserModelPathUD = "edu/stanford/nlp/models/parser/nndep/english_UD.gz";
private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz";
private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
private static MaxentTagger tagger;
private static ShiftReduceParser model;
private static String[] options = {"-maxLength", "100"};
private static LexicalizedParser lp;
private static TreebankLanguagePack tlp;
private static Properties props = new Properties();
private static Properties propsSentiment = new Properties();
private static GrammaticalStructureFactory gsf;
private static StanfordCoreNLP pipeline;
private static StanfordCoreNLP pipelineSentiment;
public static void shiftReduceParserInitiate() {
model = ShiftReduceParser.loadModel(modelPath, options);
tagger = new MaxentTagger(taggerPath);
lp = LexicalizedParser.loadModel(lexParserEnglishRNN, options);
tlp = lp.getOp().langpack();
gsf = tlp.grammaticalStructureFactory();
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse");
// set up pipeline properties
props.setProperty("parse.model", modelPath);
// use faster shift reduce parser
props.setProperty("parse.maxlen", "100");
props.setProperty("parse.binaryTrees", "true");
propsSentiment.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
propsSentiment.setProperty("parse.model", lexParserEnglishRNN);
propsSentiment.setProperty("parse.maxlen", "100");
pipeline = new StanfordCoreNLP(props);
pipelineSentiment = new StanfordCoreNLP(propsSentiment);
}
public static ShiftReduceParser getModel() {
return model;
}
public static MaxentTagger getTagger() {
return tagger;
}
public static double sentimentanalyzing(String str, String str1) {
double score = -100.0;
List<List<TaggedWord>> taggedwordlist1 = new ArrayList();
List<List<TaggedWord>> taggedwordlist2 = new ArrayList();
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
for (List<HasWord> sentence : tokenizer) {
List<TaggedWord> tagged1 = tagger.tagSentence(sentence);
Tree tree = model.apply(tagged1);
ArrayList<TaggedWord> taggedYield = tree.taggedYield();
taggedwordlist1.add(taggedYield);
}
tokenizer = new DocumentPreprocessor(new StringReader(str));
for (List<HasWord> sentence : tokenizer) {
List<TaggedWord> tagged1 = tagger.tagSentence(sentence);
Tree tree = model.apply(tagged1);
ArrayList<TaggedWord> taggedYield = tree.taggedYield();
taggedwordlist2.add(taggedYield);
}
int counter = 0;
int counter1 = 0;
for (List<TaggedWord> taggedlist2 : taggedwordlist2) {
counter += taggedlist2.size();
}
for (List<TaggedWord> taggedlist1 : taggedwordlist1) {
counter1 += taggedlist1.size();
}
int overValue = counter >= counter1 ? counter - counter1 : counter1 - counter;
overValue *= 16;
while (overValue > 0) {
overValue--;
score--;
}
System.out.println("Score Post overValue: " + score + "\n");
for (List<TaggedWord> TGWList : taggedwordlist1) {
for (TaggedWord TGW : TGWList) {
List<String> tgwlist1 = new ArrayList();
for (List<TaggedWord> taggedlist2 : taggedwordlist2) {
for (TaggedWord TGW1 : taggedlist2) {
if (TGW.tag().equals(TGW1.tag()) && !TGW.tag().equals(":") && !tgwlist1.contains(TGW1.tag())) {
score += 64;
tgwlist1.add(TGW.tag());
}
}
}
}
}
// System.out.println("Score: " + score);
Annotation annotation = new Annotation(str1);
pipeline.annotate(annotation);
List<Tree> sentenceConstituencyParseList = new ArrayList();
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
sentenceConstituencyParseList.add(sentenceConstituencyParse);
}
Annotation annotation1 = new Annotation(str);
pipeline.annotate(annotation1);
for (CoreMap sentence : annotation1.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceConstituencyParse);
Collection<TypedDependency> allTypedDependencies = gs.allTypedDependencies();
List<String> filerTreeContent = new ArrayList();
for (Tree sentenceConstituencyParse1 : sentenceConstituencyParseList) {
Set<Constituent> inT1notT2 = Tdiff.markDiff(sentenceConstituencyParse, sentenceConstituencyParse1);
Set<Constituent> inT2notT1 = Tdiff.markDiff(sentenceConstituencyParse1, sentenceConstituencyParse);
List<String> constiLabels = new ArrayList();
for (Constituent consti : inT1notT2) {
for (Constituent consti1 : inT2notT1) {
if (consti.value().equals(consti1.value()) && !constiLabels.contains(consti.value())) {
//System.out.println("consti value: " + consti.value() + "\n");
score += 64; //256
constiLabels.add(consti.value());
}
}
}
GrammaticalStructure gs1 = gsf.newGrammaticalStructure(sentenceConstituencyParse1);
Collection<TypedDependency> allTypedDependencies1 = gs1.allTypedDependencies();
for (TypedDependency TDY1 : allTypedDependencies1) {
IndexedWord dep = TDY1.dep();
IndexedWord gov = TDY1.gov();
GrammaticalRelation grammaticalRelation = gs.getGrammaticalRelation(gov, dep);
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
score += 900;
//System.out.println("grammaticalRelation applicable: " + score + "\n");
}
GrammaticalRelation reln = TDY1.reln();
if (reln.isApplicable(sentenceConstituencyParse)) {
score += 256;
}
}
for (TypedDependency TDY : allTypedDependencies) {
IndexedWord dep = TDY.dep();
IndexedWord gov = TDY.gov();
GrammaticalRelation grammaticalRelation = gs1.getGrammaticalRelation(gov, dep);
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
score += 900;
//System.out.println("grammaticalRelation applicable: " + score + "\n");
}
GrammaticalRelation reln = TDY.reln();
if (reln.isApplicable(sentenceConstituencyParse1)) {
score += 256;
}
}
for (CoreLabel LBW : sentenceConstituencyParse.taggedLabeledYield()) {
for (CoreLabel LBW1 : sentenceConstituencyParse1.taggedLabeledYield()) {
if (LBW.lemma().equals(LBW1.lemma()) && !filerTreeContent.contains(LBW.lemma())) {
filerTreeContent.add(LBW.lemma());
score += 1500;
//System.out.println("lemma: " + LBW.lemma() + "\n");
}
}
}
}
}
Annotation annotationSentiment1 = pipelineSentiment.process(str);
List<SimpleMatrix> simpleSMXlist = new ArrayList();
List<SimpleMatrix> simpleSMXlistVector = new ArrayList();
for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree);
simpleSMXlist.add(predictions);
simpleSMXlistVector.add(nodeVector);
}
annotationSentiment1 = pipelineSentiment.process(str1);
for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree);
for (SimpleMatrix simpleSMX : simpleSMXlist) {
double dot = predictions.dot(simpleSMX) * 100;
//System.out.println("dot value: " + dot + "\n");
double subtracter = dot > 50 ? dot - 50 : 50 - dot;
System.out.println("score pre dot: " + score + "\nsubtracter: " + subtracter + "\n");
subtracter *= 25;
while (subtracter > 0) {
subtracter--;
score--;
}
System.out.println("score post dot: " + score + "\n");
}
for (SimpleMatrix simpleSMX : simpleSMXlistVector) {
double dot = nodeVector.dot(simpleSMX);
double elementSum = nodeVector.kron(simpleSMX).elementSum();
elementSum = Math.round(elementSum * 100.0) / 100.0;
System.out.println("kron SMX elementSum: " + elementSum + "\n");
if (dot < 0.1) {
// System.out.println("\ndot VECTOR: " + dot + "\n");
score += 256;
}
if (elementSum < 0.1 && elementSum > 0.0) {
score += 1300;
} else if (elementSum > 0.1 && elementSum < 1.0) {
score -= 1100;
} else {
score -= 1424;
}
}
}
int SentenceScoreDiff = LevenshteinDistance.computeLevenshteinDistance(str, str1);
SentenceScoreDiff *= 15;
// System.out.println("score pre levenhstein substract: " + score + "\nSentenceScoreDiff: " + SentenceScoreDiff + "\n");
while (SentenceScoreDiff > 0) {
SentenceScoreDiff--;
score--;
}
System.out.println("Final current score: " + score + "\nSentences: " + str + "\n" + str1 + "\n\n\n");
return score;
}
}