package FunctionLayer.StanfordParser; import FunctionLayer.LevenshteinDistance; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.IndexedWord; import edu.stanford.nlp.ling.Label; import edu.stanford.nlp.ling.TaggedWord; import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations; import edu.stanford.nlp.parser.lexparser.LexicalizedParser; import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.process.DocumentPreprocessor; import edu.stanford.nlp.process.Tokenizer; import edu.stanford.nlp.sentiment.SentimentCoreAnnotations; import edu.stanford.nlp.tagger.maxent.MaxentTagger; import edu.stanford.nlp.trees.Constituent; import edu.stanford.nlp.trees.GrammaticalRelation; import edu.stanford.nlp.trees.GrammaticalStructure; import edu.stanford.nlp.trees.GrammaticalStructureFactory; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreeCoreAnnotations; import edu.stanford.nlp.trees.TreebankLanguagePack; import edu.stanford.nlp.trees.TypedDependency; import edu.stanford.nlp.trees.tregex.gui.Tdiff; import edu.stanford.nlp.util.CoreMap; import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Properties; import java.util.Set; import org.ejml.simple.SimpleMatrix; /* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ /** * * @author install1 */ public class SentimentAnalyzerTest { private static String modelPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz"; private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz"; private static String parserModelPathUD = "edu/stanford/nlp/models/parser/nndep/english_UD.gz"; private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"; private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; private static MaxentTagger tagger; private static ShiftReduceParser model; private static String[] options = {"-maxLength", "100"}; private static LexicalizedParser lp; private static TreebankLanguagePack tlp; private static Properties props = new Properties(); private static Properties propsSentiment = new Properties(); private static GrammaticalStructureFactory gsf; private static StanfordCoreNLP pipeline; private static StanfordCoreNLP pipelineSentiment; public static void shiftReduceParserInitiate() { model = ShiftReduceParser.loadModel(modelPath, options); tagger = new MaxentTagger(taggerPath); lp = LexicalizedParser.loadModel(lexParserEnglishRNN, options); tlp = lp.getOp().langpack(); gsf = tlp.grammaticalStructureFactory(); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse"); // set up pipeline properties props.setProperty("parse.model", modelPath); // use faster shift reduce parser props.setProperty("parse.maxlen", "100"); props.setProperty("parse.binaryTrees", "true"); propsSentiment.setProperty("annotators", "tokenize, ssplit, parse, sentiment"); propsSentiment.setProperty("parse.model", lexParserEnglishRNN); propsSentiment.setProperty("parse.maxlen", "100"); pipeline = new StanfordCoreNLP(props); pipelineSentiment = new StanfordCoreNLP(propsSentiment); } public static ShiftReduceParser getModel() { return model; } public static MaxentTagger getTagger() { return tagger; } public static double sentimentanalyzing(String str, String str1) { double score = -100.0; List> taggedwordlist1 = new ArrayList(); List> taggedwordlist2 = new ArrayList(); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1)); for (List sentence : tokenizer) { List tagged1 = tagger.tagSentence(sentence); Tree tree = model.apply(tagged1); ArrayList taggedYield = tree.taggedYield(); taggedwordlist1.add(taggedYield); } tokenizer = new DocumentPreprocessor(new StringReader(str)); for (List sentence : tokenizer) { List tagged1 = tagger.tagSentence(sentence); Tree tree = model.apply(tagged1); ArrayList taggedYield = tree.taggedYield(); taggedwordlist2.add(taggedYield); } int counter = 0; int counter1 = 0; for (List taggedlist2 : taggedwordlist2) { counter += taggedlist2.size(); } for (List taggedlist1 : taggedwordlist1) { counter1 += taggedlist1.size(); } int overValue = counter >= counter1 ? counter - counter1 : counter1 - counter; overValue *= 16; while (overValue > 0) { overValue--; score--; } System.out.println("Score Post overValue: " + score + "\n"); for (List TGWList : taggedwordlist1) { for (TaggedWord TGW : TGWList) { List tgwlist1 = new ArrayList(); for (List taggedlist2 : taggedwordlist2) { for (TaggedWord TGW1 : taggedlist2) { if (TGW.tag().equals(TGW1.tag()) && !TGW.tag().equals(":") && !tgwlist1.contains(TGW1.tag())) { score += 64; tgwlist1.add(TGW.tag()); } } } } } // System.out.println("Score: " + score); Annotation annotation = new Annotation(str1); pipeline.annotate(annotation); List sentenceConstituencyParseList = new ArrayList(); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); sentenceConstituencyParseList.add(sentenceConstituencyParse); } Annotation annotation1 = new Annotation(str); pipeline.annotate(annotation1); for (CoreMap sentence : annotation1.get(CoreAnnotations.SentencesAnnotation.class)) { Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceConstituencyParse); Collection allTypedDependencies = gs.allTypedDependencies(); List filerTreeContent = new ArrayList(); for (Tree sentenceConstituencyParse1 : sentenceConstituencyParseList) { Set inT1notT2 = Tdiff.markDiff(sentenceConstituencyParse, sentenceConstituencyParse1); Set inT2notT1 = Tdiff.markDiff(sentenceConstituencyParse1, sentenceConstituencyParse); List constiLabels = new ArrayList(); for (Constituent consti : inT1notT2) { for (Constituent consti1 : inT2notT1) { if (consti.value().equals(consti1.value()) && !constiLabels.contains(consti.value())) { //System.out.println("consti value: " + consti.value() + "\n"); score += 64; //256 constiLabels.add(consti.value()); } } } GrammaticalStructure gs1 = gsf.newGrammaticalStructure(sentenceConstituencyParse1); Collection allTypedDependencies1 = gs1.allTypedDependencies(); for (TypedDependency TDY1 : allTypedDependencies1) { IndexedWord dep = TDY1.dep(); IndexedWord gov = TDY1.gov(); GrammaticalRelation grammaticalRelation = gs.getGrammaticalRelation(gov, dep); if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) { score += 900; //System.out.println("grammaticalRelation applicable: " + score + "\n"); } GrammaticalRelation reln = TDY1.reln(); if (reln.isApplicable(sentenceConstituencyParse)) { score += 256; } } for (TypedDependency TDY : allTypedDependencies) { IndexedWord dep = TDY.dep(); IndexedWord gov = TDY.gov(); GrammaticalRelation grammaticalRelation = gs1.getGrammaticalRelation(gov, dep); if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) { score += 900; //System.out.println("grammaticalRelation applicable: " + score + "\n"); } GrammaticalRelation reln = TDY.reln(); if (reln.isApplicable(sentenceConstituencyParse1)) { score += 256; } } for (CoreLabel LBW : sentenceConstituencyParse.taggedLabeledYield()) { for (CoreLabel LBW1 : sentenceConstituencyParse1.taggedLabeledYield()) { if (LBW.lemma().equals(LBW1.lemma()) && !filerTreeContent.contains(LBW.lemma())) { filerTreeContent.add(LBW.lemma()); score += 1500; //System.out.println("lemma: " + LBW.lemma() + "\n"); } } } } } Annotation annotationSentiment1 = pipelineSentiment.process(str); List simpleSMXlist = new ArrayList(); List simpleSMXlistVector = new ArrayList(); for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree); simpleSMXlist.add(predictions); simpleSMXlistVector.add(nodeVector); } annotationSentiment1 = pipelineSentiment.process(str1); for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree); for (SimpleMatrix simpleSMX : simpleSMXlist) { double dot = predictions.dot(simpleSMX) * 100; //System.out.println("dot value: " + dot + "\n"); double subtracter = dot > 50 ? dot - 50 : 50 - dot; System.out.println("score pre dot: " + score + "\nsubtracter: " + subtracter + "\n"); subtracter *= 25; while (subtracter > 0) { subtracter--; score--; } System.out.println("score post dot: " + score + "\n"); } for (SimpleMatrix simpleSMX : simpleSMXlistVector) { double dot = nodeVector.dot(simpleSMX); double elementSum = nodeVector.kron(simpleSMX).elementSum(); elementSum = Math.round(elementSum * 100.0) / 100.0; System.out.println("kron SMX elementSum: " + elementSum + "\n"); if (dot < 0.1) { // System.out.println("\ndot VECTOR: " + dot + "\n"); score += 256; } if (elementSum < 0.1 && elementSum > 0.0) { score += 1300; } else if (elementSum > 0.1 && elementSum < 1.0) { score -= 1100; } else { score -= 1424; } } } int SentenceScoreDiff = LevenshteinDistance.computeLevenshteinDistance(str, str1); SentenceScoreDiff *= 15; // System.out.println("score pre levenhstein substract: " + score + "\nSentenceScoreDiff: " + SentenceScoreDiff + "\n"); while (SentenceScoreDiff > 0) { SentenceScoreDiff--; score--; } System.out.println("Final current score: " + score + "\nSentences: " + str + "\n" + str1 + "\n\n\n"); return score; } }