diff --git a/ArtificialAutism/src/main/java/FunctionLayer/MYSQLDatahandler.java b/ArtificialAutism/src/main/java/FunctionLayer/MYSQLDatahandler.java index 9411cdc7..0651c685 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/MYSQLDatahandler.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/MYSQLDatahandler.java @@ -50,7 +50,7 @@ import java.util.logging.Logger; * @author install1 */ public class MYSQLDatahandler { - + public static final long EXPIRE_TIME_IN_SECONDS = TimeUnit.SECONDS.convert(6, TimeUnit.MINUTES); public static final long EXPIRE_TIME_IN_SECONDS1 = TimeUnit.SECONDS.convert(10, TimeUnit.HOURS); public static MYSQLDatahandler instance = new MYSQLDatahandler(); @@ -60,107 +60,119 @@ public class MYSQLDatahandler { private LinkedHashMap> lHMSMX = new LinkedHashMap(); private final Stopwatch stopwatch; private final Stopwatch stopwatch1; - private static String modelPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz"; + private ExecutorService executor; + + private static String shiftReduceParserPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz"; private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz"; private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"; private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; - private static String NERModel = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz"; + private static String nerModel = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz"; + private static String jmweIndexData = "/home/javatests/lib/mweindex_wordnet3.0_semcor1.6.data"; // ./lib/mweindex_wordnet3.0_semcor1.6.data private static MaxentTagger tagger; private static ShiftReduceParser model; private static String[] options = {"-maxLength", "100"}; private static Properties props = new Properties(); private static Properties propsSentiment = new Properties(); + private static Properties propsJMWE = new Properties(); private static GrammaticalStructureFactory gsf; private static LexicalizedParser lp; private static TreebankLanguagePack tlp; private static AbstractSequenceClassifier classifier; - private ExecutorService executor; + // set up Stanford CoreNLP pipeline private static StanfordCoreNLP pipeline; private static StanfordCoreNLP pipelineSentiment; - + private static StanfordCoreNLP pipelineJMWE; + public static AbstractSequenceClassifier getClassifier() { return classifier; } - + public static void setClassifier(AbstractSequenceClassifier classifier) { MYSQLDatahandler.classifier = classifier; } - + public void instantiateExecutor() { this.executor = new ForkJoinPool(Runtime.getRuntime().availableProcessors(), ForkJoinPool.defaultForkJoinWorkerThreadFactory, null, true); } - + public MYSQLDatahandler() { this.stopwatch = Stopwatch.createUnstarted(); this.stopwatch1 = Stopwatch.createStarted(); this.stringCache = new MapMaker().concurrencyLevel(2).makeMap(); } - + public static void shiftReduceParserInitiate() { try { - classifier = CRFClassifier.getClassifierNoExceptions(NERModel); + classifier = CRFClassifier.getClassifierNoExceptions(nerModel); } catch (ClassCastException ex) { Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex); } - model = ShiftReduceParser.loadModel(modelPath, options); + model = ShiftReduceParser.loadModel(shiftReduceParserPath, options); tagger = new MaxentTagger(taggerPath); lp = LexicalizedParser.loadModel(lexParserEnglishRNN, options); tlp = lp.getOp().langpack(); gsf = tlp.grammaticalStructureFactory(); // set up pipeline properties - props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse"); - props.setProperty("parse.model", modelPath); + props.setProperty("parse.model", shiftReduceParserPath); props.setProperty("parse.maxlen", "100"); - props.setProperty("tokenize.maxlen", "100"); - props.setProperty("ssplit.maxlen", "100"); - props.setProperty("lemma.maxlen", "100"); props.setProperty("parse.binaryTrees", "true"); - propsSentiment.setProperty("annotators", "tokenize, ssplit, parse, sentiment"); + props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse"); propsSentiment.setProperty("parse.model", lexParserEnglishRNN); + propsSentiment.setProperty("ner.model", nerModel); propsSentiment.setProperty("sentiment.model", sentimentModel); - propsSentiment.setProperty("sentiment.maxlen", "100"); propsSentiment.setProperty("parse.maxlen", "100"); - propsSentiment.setProperty("tokenize.maxlen", "100"); - propsSentiment.setProperty("ssplit.maxlen", "100"); + propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,depparse,sentiment"); //coref too expensive memorywise + propsJMWE.setProperty("customAnnotatorClass.jmwe", "edu.stanford.nlp.pipeline.JMWEAnnotator"); + propsJMWE.setProperty("customAnnotatorClass.jmwe.verbose", "false"); + propsJMWE.setProperty("customAnnotatorClass.jmwe.underscoreReplacement", "-"); + propsJMWE.setProperty("customAnnotatorClass.jmwe.indexData", jmweIndexData); + propsJMWE.setProperty("customAnnotatorClass.jmwe.detector", "Exhaustive"); + //"Consecutive", "Exhaustive", "ProperNouns", "Complex" and "CompositeConsecutiveProperNouns" + propsJMWE.setProperty("annotators", "tokenize, ssplit, pos, lemma, jmwe"); // set up pipeline pipeline = new StanfordCoreNLP(props); pipelineSentiment = new StanfordCoreNLP(propsSentiment); + pipelineJMWE = new StanfordCoreNLP(propsJMWE); } - + + public static StanfordCoreNLP getPipelineJMWE() { + return pipelineJMWE; + } + public static GrammaticalStructureFactory getGsf() { return gsf; } - + public static StanfordCoreNLP getPipeline() { return pipeline; } - + public static StanfordCoreNLP getPipelineSentiment() { return pipelineSentiment; } - + public static MaxentTagger getTagger() { return tagger; } - + public static ShiftReduceParser getModel() { return model; } - + private Map getCache() throws SQLException, IOException, CustomError { return DataMapper.getAllStrings(); } - + public int getlHMSMXSize() { return lHMSMX.size(); } - + public int getstringCacheSize() { return stringCache.size(); } - + public void initiateMYSQL() throws SQLException, IOException { try { DataMapper.createTables(); @@ -171,7 +183,7 @@ public class MYSQLDatahandler { .getName()).log(Level.SEVERE, null, ex); } } - + public synchronized void checkIfUpdateMatrixes() { refreshMatrixFromDB = false; int counter = 0; @@ -208,9 +220,8 @@ public class MYSQLDatahandler { selectUpdate = secondaryIterator; secondaryIterator++; } - int beginindex = selectUpdate; ConcurrentMap strIndexNavigator = new MapMaker().concurrencyLevel(2).makeMap(); - String get = stringCachelocal.getOrDefault(beginindex, null); + String get = stringCachelocal.getOrDefault(selectUpdate, null); if (get == null) { get = stringCachelocal.get(new Random().nextInt(stringCachelocal.size() - 1)); } @@ -220,7 +231,7 @@ public class MYSQLDatahandler { strIndexNavigator.values().forEach((str) -> { stringCachelocal.values().stream().filter((str1) -> (!str.equals(str1))).forEachOrdered((str1) -> { boolean present = false; - LinkedHashMap orDefault = LHMSMXLocal.getOrDefault(str, null); + LinkedHashMap orDefault = lHMSMX.getOrDefault(str, null); if (orDefault != null) { Double orDefault1 = orDefault.getOrDefault(str1, null); if (orDefault1 != null) { @@ -229,7 +240,7 @@ public class MYSQLDatahandler { } } if (!present) { - orDefault = LHMSMXLocal.getOrDefault(str1, null); + orDefault = lHMSMX.getOrDefault(str1, null); if (orDefault != null) { Double orDefault1 = orDefault.getOrDefault(str, null); if (orDefault1 != null) { @@ -242,12 +253,12 @@ public class MYSQLDatahandler { SimilarityMatrix SMX = new SimilarityMatrix(str, str1); Callable worker = new SentimentAnalyzerTest(str, str1, SMX); futures.put(futures.size() + 1, executor.submit(worker)); - LinkedHashMap orDefault1 = LHMSMXLocal.getOrDefault(str, null); + LinkedHashMap orDefault1 = lHMSMX.getOrDefault(str, null); if (orDefault1 == null) { orDefault1 = new LinkedHashMap(); } orDefault1.put(str1, 0.0); - LHMSMXLocal.put(str, orDefault1); + lHMSMX.put(str, orDefault1); } }); }); @@ -262,12 +273,11 @@ public class MYSQLDatahandler { Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex); } LinkedHashMap getFuture = lHMSMX.getOrDefault(SMX.getPrimaryString(), null); - if (getFuture == null) { - getFuture = new LinkedHashMap(); + if (getFuture != null) { + getFuture.put(SMX.getSecondaryString(), SMX.getDistance()); + lHMSMX.put(SMX.getPrimaryString(), getFuture); + matrixUpdateList.put(matrixUpdateList.size() + 1, SMX); } - getFuture.put(SMX.getSecondaryString(), SMX.getDistance()); - lHMSMX.put(SMX.getPrimaryString(), getFuture); - matrixUpdateList.put(matrixUpdateList.size() + 1, SMX); } try { if (!matrixUpdateList.isEmpty()) { @@ -280,7 +290,7 @@ public class MYSQLDatahandler { } } } - + public synchronized void checkIfUpdateStrings() throws CustomError { if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) { ConcurrentMap str = MessageResponseHandler.getStr(); @@ -307,7 +317,7 @@ public class MYSQLDatahandler { } } } - + public synchronized String getResponseMsg(String str) throws CustomError { str = str.trim(); if (str.startsWith("<@")) { @@ -380,7 +390,7 @@ public class MYSQLDatahandler { + "\nScore: " + SMXreturn.getDistance()); return SMXreturn.getSecondaryString(); } - + public String mostSimilar(String toBeCompared, ConcurrentMap concurrentStrings) { int minDistance = 8; String similar = ""; @@ -403,7 +413,7 @@ public class MYSQLDatahandler { } return similar; } - + public static ConcurrentMap cutContent(ConcurrentMap str) { ConcurrentMap returnlist = new MapMaker().concurrencyLevel(2).makeMap(); for (String str1 : str.values()) { @@ -415,7 +425,7 @@ public class MYSQLDatahandler { } return returnlist; } - + public static ConcurrentMap filterContent(ConcurrentMap str) { ConcurrentMap strlistreturn = new MapMaker().concurrencyLevel(2).makeMap(); for (String str1 : str.values()) { @@ -523,7 +533,7 @@ public class MYSQLDatahandler { } return strlistreturn; } - + private ConcurrentMap removeSlacks(ConcurrentMap str) { ShiftReduceParser model = getModel(); MaxentTagger tagger = getTagger(); @@ -590,7 +600,7 @@ public class MYSQLDatahandler { } return strreturn; } - + private ConcurrentMap verifyCalculationFitness(ConcurrentMap strmap) { ConcurrentMap returnmap = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap allStrings = stringCache; diff --git a/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java b/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java index d6c2458b..78ae931d 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java @@ -4,11 +4,15 @@ import FunctionLayer.LevenshteinDistance; import FunctionLayer.MYSQLDatahandler; import FunctionLayer.SimilarityMatrix; import com.google.common.collect.MapMaker; +import edu.mit.jmwe.data.IMWE; +import edu.mit.jmwe.data.IMWEDesc; +import edu.mit.jmwe.data.IToken; import edu.stanford.nlp.ie.AbstractSequenceClassifier; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.IndexedWord; +import edu.stanford.nlp.ling.JMWEAnnotation; import edu.stanford.nlp.ling.TaggedWord; import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations; import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; @@ -48,14 +52,16 @@ import org.ejml.simple.SimpleMatrix; */ public class SentimentAnalyzerTest implements Callable { + private SimilarityMatrix smxParam; + private String str; private String str1; - private SimilarityMatrix smxParam; private ShiftReduceParser model; private MaxentTagger tagger; private GrammaticalStructureFactory gsf; private StanfordCoreNLP pipeline; private StanfordCoreNLP pipelineSentiment; + private StanfordCoreNLP pipelineJMWE; private AbstractSequenceClassifier classifier; public SentimentAnalyzerTest(String str, String str1, SimilarityMatrix smxParam) { @@ -66,6 +72,7 @@ public class SentimentAnalyzerTest implements Callable { tagger = MYSQLDatahandler.getTagger(); pipeline = MYSQLDatahandler.getPipeline(); pipelineSentiment = MYSQLDatahandler.getPipelineSentiment(); + pipelineJMWE = MYSQLDatahandler.getPipelineJMWE(); gsf = MYSQLDatahandler.getGsf(); classifier = MYSQLDatahandler.getClassifier(); } @@ -231,6 +238,7 @@ public class SentimentAnalyzerTest implements Callable { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); int sentiment = RNNCoreAnnotations.getPredictedClass(tree); String partText = sentence.toString(); + SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); if (partText.length() > longest1) { mainSentiment1 = sentiment; longest1 = partText.length(); @@ -258,6 +266,155 @@ public class SentimentAnalyzerTest implements Callable { score -= (deffLongest - deffshorter) * 50; } } + Annotation jmweStrAnnotation = new Annotation(str); + pipelineJMWE.annotate(jmweStrAnnotation); + List sentences = jmweStrAnnotation.get(CoreAnnotations.SentencesAnnotation.class); + int tokensCounter1 = 0; + int tokensCounter2 = 0; + int anotatorcounter1 = 0; + int anotatorcounter2 = 0; + int inflectedCounterPositive1 = 0; + int inflectedCounterPositive2 = 0; + int inflectedCounterNegative = 0; + int MarkedContinuousCounter1 = 0; + int MarkedContinuousCounter2 = 0; + int UnmarkedPatternCounter = 0; + ConcurrentMap ITokenMapTag1 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap ITokenMapTag2 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap strTokenStems1 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap strTokenStems2 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap strTokenForm1 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap strTokenForm2 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap strTokenGetEntry1 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap strTokenGetEntry2 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap strTokenGetiPart1 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap strTokenGetiPart2 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap strTokenEntryPOS1 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap strTokenEntryPOS2 = new MapMaker().concurrencyLevel(2).makeMap(); + for (CoreMap sentence : sentences) { + for (IMWE token : sentence.get(JMWEAnnotation.class)) { + if (token.isInflected()) { + inflectedCounterPositive1++; + } else { + inflectedCounterNegative++; + } + strTokenForm1.put(strTokenForm1.size() + 1, token.getForm()); + strTokenGetEntry1.put(strTokenGetEntry1.size() + 1, token.getEntry().toString().substring(token.getEntry().toString().length() - 1)); + Collection values = token.getPartMap().values(); + IMWEDesc entry = token.getEntry(); + MarkedContinuousCounter1 += entry.getMarkedContinuous(); + UnmarkedPatternCounter += entry.getUnmarkedPattern(); + for (IMWEDesc.IPart iPart : values) { + strTokenGetiPart1.put(strTokenGetiPart1.size() + 1, iPart.getForm()); + } + for (String strPostPrefix : entry.getPOS().getPrefixes()) { + strTokenEntryPOS1.put(strTokenEntryPOS1.size() + 1, strPostPrefix); + } + for (IToken tokens : token.getTokens()) { + ITokenMapTag1.put(ITokenMapTag1.size() + 1, tokens.getTag()); + for (String strtoken : tokens.getStems()) { + strTokenStems1.put(strTokenStems1.size() + 1, strtoken); + } + } + tokensCounter1++; + } + anotatorcounter1++; + } + jmweStrAnnotation = new Annotation(str1); + pipelineJMWE.annotate(jmweStrAnnotation); + sentences = jmweStrAnnotation.get(CoreAnnotations.SentencesAnnotation.class); + for (CoreMap sentence : sentences) { + for (IMWE token : sentence.get(JMWEAnnotation.class)) { + if (token.isInflected()) { + inflectedCounterPositive2++; + } else { + inflectedCounterNegative--; + } + strTokenForm2.put(strTokenForm2.size() + 1, token.getForm()); + strTokenGetEntry2.put(strTokenGetEntry2.size() + 1, token.getEntry().toString().substring(token.getEntry().toString().length() - 1)); + Collection values = token.getPartMap().values(); + IMWEDesc entry = token.getEntry(); + MarkedContinuousCounter2 += entry.getMarkedContinuous(); + UnmarkedPatternCounter += entry.getUnmarkedPattern(); + for (IMWEDesc.IPart iPart : values) { + strTokenGetiPart2.put(strTokenGetiPart2.size() + 1, iPart.getForm()); + } + for (String strPostPrefix : entry.getPOS().getPrefixes()) { + strTokenEntryPOS2.put(strTokenEntryPOS2.size() + 1, strPostPrefix); + } + for (IToken tokens : token.getTokens()) { + ITokenMapTag2.put(ITokenMapTag2.size() + 1, tokens.getTag()); + for (String strtoken : tokens.getStems()) { + strTokenStems2.put(strTokenStems2.size() + 1, strtoken); + } + } + tokensCounter2++; + } + anotatorcounter2++; + } + for (String strTokenPos1 : strTokenEntryPOS1.values()) { + for (String strTokenPos2 : strTokenEntryPOS2.values()) { + if (strTokenPos1.equals(strTokenPos2)) { + score += 500; + } + } + } + score += UnmarkedPatternCounter * 1600; + if (MarkedContinuousCounter1 > 0 && MarkedContinuousCounter2 > 0) { + score += MarkedContinuousCounter1 > MarkedContinuousCounter2 ? (MarkedContinuousCounter1 - MarkedContinuousCounter2) * 500 + : (MarkedContinuousCounter2 - MarkedContinuousCounter1) * 500; + } + for (String strTokeniPart1 : strTokenGetiPart1.values()) { + for (String strTokeniPart2 : strTokenGetiPart2.values()) { + if (strTokeniPart1.equals(strTokeniPart2)) { + score += 400; + } + } + } + for (String strTokenEntry1 : strTokenGetEntry1.values()) { + for (String strTokenEntry2 : strTokenGetEntry2.values()) { + if (strTokenEntry1.equals(strTokenEntry2)) { + score += 2500; + } + } + } + for (String strmapTag : ITokenMapTag1.values()) { + for (String strmapTag1 : ITokenMapTag2.values()) { + if (strmapTag.equals(strmapTag1)) { + score += 1450; + } + } + } + for (String strTokenForm1itr1 : strTokenForm1.values()) { + for (String strTokenForm1itr2 : strTokenForm2.values()) { + if (strTokenForm1itr1.equals(strTokenForm1itr2)) { + score += 2600; + } else if (strTokenForm1itr1.contains(strTokenForm1itr2)) { + score += 500; + } + } + } + for (String strTokenStem : strTokenStems1.values()) { + for (String strTokenStem1 : strTokenStems2.values()) { + if (strTokenStem.equals(strTokenStem1)) { + score += 1500; + } + } + } + if (inflectedCounterPositive1 + inflectedCounterPositive2 > inflectedCounterNegative && inflectedCounterNegative > 0) { + score += (inflectedCounterPositive1 - inflectedCounterNegative) * 650; + } + if (inflectedCounterPositive1 > 0 && inflectedCounterPositive2 > 0) { + score += ((inflectedCounterPositive1 + inflectedCounterPositive2) - inflectedCounterNegative) * 550; + } + if (anotatorcounter1 > 1 && anotatorcounter2 > 1) { + score += (anotatorcounter1 + anotatorcounter2) * 400; + } + if (tokensCounter1 > 0 && tokensCounter2 > 0) { + score += (tokensCounter1 + tokensCounter2) * 400; + } else { + score -= tokensCounter1 >= tokensCounter2 ? (tokensCounter1 - tokensCounter2) * 500 : (tokensCounter2 - tokensCounter1) * 500; + } LevenshteinDistance leven = new LevenshteinDistance(str, str1); int SentenceScoreDiff = leven.computeLevenshteinDistance(); SentenceScoreDiff *= 15; diff --git a/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java b/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java index 2f2e26e9..3c04eabe 100644 --- a/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java +++ b/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java @@ -46,7 +46,7 @@ public class DiscordHandler { MYSQLDatahandler.shiftReduceParserInitiate(); MYSQLDatahandler.instance.instantiateExecutor(); if (MYSQLDatahandler.instance.getstringCacheSize() != 0) { - while (MYSQLDatahandler.instance.getlHMSMXSize() * MYSQLDatahandler.instance.getlHMSMXSize() * 1.5 + while (MYSQLDatahandler.instance.getlHMSMXSize() * MYSQLDatahandler.instance.getlHMSMXSize() * 2 < (MYSQLDatahandler.instance.getstringCacheSize() * MYSQLDatahandler.instance.getstringCacheSize()) - MYSQLDatahandler.instance.getstringCacheSize()) {