diff --git a/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java b/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java index de3b1938..bfb4376a 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java @@ -12,28 +12,24 @@ import com.google.common.collect.MapMaker; import edu.stanford.nlp.ie.AbstractSequenceClassifier; import edu.stanford.nlp.ie.crf.CRFClassifier; import edu.stanford.nlp.ling.CoreLabel; -import edu.stanford.nlp.ling.HasWord; -import edu.stanford.nlp.ling.TaggedWord; -import edu.stanford.nlp.ling.Word; import edu.stanford.nlp.parser.lexparser.LexicalizedParser; -import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; import edu.stanford.nlp.pipeline.Annotation; +import edu.stanford.nlp.pipeline.CoreDocument; import edu.stanford.nlp.pipeline.StanfordCoreNLP; -import edu.stanford.nlp.process.DocumentPreprocessor; import edu.stanford.nlp.tagger.maxent.MaxentTagger; import edu.stanford.nlp.trees.GrammaticalStructureFactory; -import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreebankLanguagePack; import java.io.IOException; -import java.io.StringReader; import java.sql.SQLException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Properties; +import java.util.Random; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.CountDownLatch; @@ -51,20 +47,24 @@ import java.util.logging.Logger; */ public class Datahandler { - public static final long EXPIRE_TIME_IN_SECONDS = TimeUnit.SECONDS.convert(6, TimeUnit.MINUTES); + public static final long EXPIRE_TIME_IN_SECONDS = TimeUnit.SECONDS.convert(10, TimeUnit.MINUTES); public static final long EXPIRE_TIME_IN_SECONDS1 = TimeUnit.SECONDS.convert(10, TimeUnit.HOURS); public static Datahandler instance = new Datahandler(); private static volatile Double minDistance; private static Annotation strAnno; private static Annotation strAnnoSentiment; private static Annotation strAnnoJMWE; + private static CoreDocument coreDoc; private volatile boolean refreshMatrixFromDB; private static volatile int secondaryIterator = 0; private static volatile Double preRelationCounters = 0.0; + private static volatile Double preRelationUserCounters = 0.0; private final ConcurrentMap stringCache; private static ConcurrentMap pipelineAnnotationCache; private static ConcurrentMap pipelineSentimentAnnotationCache; private static ConcurrentMap jmweAnnotationCache; + private static ConcurrentMap coreDocumentAnnotationCache; + private static ConcurrentMap conversationMatchMap; private LinkedHashMap> lHMSMX = new LinkedHashMap(); private final Stopwatch stopwatch; private final Stopwatch stopwatch1; @@ -74,10 +74,12 @@ public class Datahandler { private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz"; private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"; private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; - private static String nerModel = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz"; + private static String nerModel = "edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz"; + private static String nerModel2 = "edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz"; + private static String nerModel3 = "edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz"; + private static String nerModelCaseless = "edu/stanford/nlp/models/truecase/truecasing.fast.caseless.qn.ser.gz"; private static MaxentTagger tagger; - private static ShiftReduceParser model; - private static String[] options = {"-maxLength", "90"}; + private static String[] options = {"-maxLength", "100"}; private static Properties props = new Properties(); private static Properties propsSentiment = new Properties(); private static GrammaticalStructureFactory gsf; @@ -92,19 +94,16 @@ public class Datahandler { this.stopwatch = Stopwatch.createUnstarted(); this.stopwatch1 = Stopwatch.createStarted(); this.stringCache = new MapMaker().concurrencyLevel(2).makeMap(); - //cant sadly just have one pipelines for every annotation, one pipeline per annotation is required this.jmweAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap(); this.pipelineAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap(); this.pipelineSentimentAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap(); + this.coreDocumentAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap(); + this.conversationMatchMap = new MapMaker().concurrencyLevel(2).makeMap(); } public void shiftReduceParserInitiate() { //got 8 cores - CountDownLatch cdl = new CountDownLatch(4); - new Thread(() -> { - model = ShiftReduceParser.loadModel(shiftReduceParserPath, options); - cdl.countDown(); - }).start(); + CountDownLatch cdl = new CountDownLatch(3); new Thread(() -> { try { classifier = CRFClassifier.getClassifierNoExceptions(nerModel); @@ -115,7 +114,6 @@ public class Datahandler { }).start(); new Thread(() -> { propsSentiment.setProperty("parse.model", lexParserEnglishRNN); - propsSentiment.setProperty("ner.model", nerModel); propsSentiment.setProperty("sentiment.model", sentimentModel); propsSentiment.setProperty("parse.maxlen", "90"); propsSentiment.setProperty("threads", "25"); @@ -123,12 +121,13 @@ public class Datahandler { propsSentiment.setProperty("tokenize.maxlen", "90"); propsSentiment.setProperty("ssplit.maxlen", "90"); propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,sentiment"); //coref too expensive memorywise, does it need depparse? - propsSentiment.setProperty("tokenize.options", "untokenizable=firstDelete"); + propsSentiment.setProperty("tokenize.options", "untokenizable=firstKeep"); pipelineSentiment = new StanfordCoreNLP(propsSentiment); tagger = new MaxentTagger(taggerPath); cdl.countDown(); }).start(); new Thread(() -> { + props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse,ner"); props.setProperty("parse.model", shiftReduceParserPath); props.setProperty("parse.maxlen", "90"); props.setProperty("parse.binaryTrees", "true"); @@ -137,8 +136,11 @@ public class Datahandler { props.setProperty("tokenize.maxlen", "90"); props.setProperty("ssplit.maxlen", "90"); props.setProperty("lemma.maxlen", "90"); - props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse"); - props.setProperty("tokenize.options", "untokenizable=firstDelete"); + props.setProperty("ner.model", nerModel + "," + nerModel2 + "," + nerModel3); + props.setProperty("ner.combinationMode", "HIGH_RECALL"); + props.setProperty("regexner.ignorecase", "true"); + props.setProperty("ner.fine.regexner.ignorecase", "true"); + props.setProperty("tokenize.options", "untokenizable=firstKeep"); //firstKeep //firstDelete pipeline = new StanfordCoreNLP(props); cdl.countDown(); }).start(); @@ -191,10 +193,6 @@ public class Datahandler { return tagger; } - public static ShiftReduceParser getModel() { - return model; - } - private Map getCache() throws SQLException, IOException, CustomError { return DataMapper.getAllStrings(); } @@ -258,6 +256,9 @@ public class Datahandler { Annotationspipeline.put(str, strAnno); Annotation strAnno2 = new Annotation(str); AnnotationspipelineSentiment.put(str, strAnno2); + CoreDocument CD = new CoreDocument(str); + pipeline.annotate(CD); + coreDocumentAnnotationCache.put(str, CD); }); pipeline.annotate(Annotationspipeline.values()); pipelineSentiment.annotate(AnnotationspipelineSentiment.values()); @@ -270,7 +271,6 @@ public class Datahandler { } } - //synchronized public synchronized void updateMatrixes() { refreshMatrixFromDB = false; if (stopwatch1.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS1) { @@ -278,7 +278,6 @@ public class Datahandler { lHMSMX = DataMapper.getAllRelationScores(); stopwatch1.reset(); } - //requiring atleast 10 entries ensures no issues in case of empty stringcache if (stringCache.values().size() > 10 && !refreshMatrixFromDB) { ConcurrentMap stringCachelocal = stringCache; int selectUpdate = -1; @@ -293,13 +292,22 @@ public class Datahandler { } ij2++; } - if (selectUpdate == -1 || selectUpdate + 1 == stringCachelocal.size()) { - int valueSize = stringCachelocal.size(); - if (secondaryIterator + iteratorCap >= valueSize) { - secondaryIterator = 0; + //secondaryIterator + if (selectUpdate == -1 || selectUpdate + 1 >= stringCachelocal.size() || stringCachelocal.get(selectUpdate) == null) { + Integer iterator = 0; + while (iterator == 0) { + if (secondaryIterator >= stringCachelocal.size()) { + secondaryIterator = 0; + } + String get = stringCachelocal.get(secondaryIterator); + if (get == null) { + secondaryIterator++; + } else { + selectUpdate = secondaryIterator; + iterator++; + } } - selectUpdate = secondaryIterator; - secondaryIterator += iteratorCap; + secondaryIterator++; } String getStringCacheStr = stringCachelocal.get(selectUpdate); ConcurrentMap matrixUpdateMap = new MapMaker().concurrencyLevel(2).makeMap(); @@ -339,7 +347,8 @@ public class Datahandler { SimilarityMatrix SMX = new SimilarityMatrix(getStringCacheStr, str1); Callable worker = new SentimentAnalyzerTest(getStringCacheStr, str1, SMX, jmweAnnotationCache.get(getStringCacheStr), jmweAnnotationCache.get(str1), pipelineAnnotationCache.get(getStringCacheStr), pipelineAnnotationCache.get(str1), - pipelineSentimentAnnotationCache.get(getStringCacheStr), pipelineSentimentAnnotationCache.get(str1)); + pipelineSentimentAnnotationCache.get(getStringCacheStr), pipelineSentimentAnnotationCache.get(str1), + coreDocumentAnnotationCache.get(getStringCacheStr), coreDocumentAnnotationCache.get(str1)); futures.put(futures.size() + 1, executor.submit(worker)); } } @@ -375,6 +384,64 @@ public class Datahandler { } } + public ConcurrentMap removeNonSensicalStrings(ConcurrentMap strmap) { + ConcurrentMap strmapreturn = new MapMaker().concurrencyLevel(2).makeMap(); + int relationCap = 20; + ConcurrentMap strCacheLocal = stringCache.size() < 150 ? strmap : stringCache; + ConcurrentMap localJMWEMap = getMultipleJMWEAnnotation(strmap.values()); + ConcurrentMap localPipelineAnnotation = getMultiplePipelineAnnotation(strmap.values()); + ConcurrentMap localPipelineSentimentAnnotation = getMultiplePipelineSentimentAnnotation(strmap.values()); + ConcurrentMap localCoreDocumentMap = getMultipleCoreDocuments(strmap.values()); + for (String str : strmap.values()) { + ConcurrentMap> futures = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap> strsmaps = new MapMaker().concurrencyLevel(2).makeMap(); + for (String str1 : strCacheLocal.values()) { + HashMap HM1 = new HashMap(); + HM1.put(str, str1); + if (!str.equals(str1) && !strsmaps.values().contains(HM1)) { + SimilarityMatrix SMX = new SimilarityMatrix(str, str1); + Callable worker; + if (stringCache.size() < 150) { + worker = new SentimentAnalyzerTest(str, str1, SMX, + localJMWEMap.get(str), localJMWEMap.get(str1), localPipelineAnnotation.get(str), + localPipelineAnnotation.get(str1), localPipelineSentimentAnnotation.get(str), + localPipelineSentimentAnnotation.get(str1), localCoreDocumentMap.get(str), localCoreDocumentMap.get(str1)); + } else { + worker = new SentimentAnalyzerTest(str, str1, SMX, + localJMWEMap.get(str), jmweAnnotationCache.get(str1), localPipelineAnnotation.get(str), + pipelineAnnotationCache.get(str1), localPipelineSentimentAnnotation.get(str), + pipelineSentimentAnnotationCache.get(str1), localCoreDocumentMap.get(str), coreDocumentAnnotationCache.get(str1)); + } + HashMap HM = new HashMap(); + HM.put(SMX.getPrimaryString(), SMX.getSecondaryString()); + strsmaps.put(strsmaps.size() + 1, HM); + futures.put(futures.size() + 1, executor.submit(worker)); + } + } + int positiveRelationCounter = 0; + for (Future future : futures.values()) { + try { + SimilarityMatrix getSMX = future.get(5, TimeUnit.SECONDS); + Double scoreRelationNewMsgToRecentMsg = getSMX.getDistance(); + if (scoreRelationNewMsgToRecentMsg >= 5000.0) { + System.out.println("scoreRelationNewMsgToRecentMsg: " + scoreRelationNewMsgToRecentMsg + "\n"); + positiveRelationCounter++; + if (positiveRelationCounter > relationCap) { + strmapreturn.put(strmapreturn.size() + 1, str); + } + if (positiveRelationCounter > relationCap) { + System.out.println("strmapreturn size: " + strmapreturn.size() + "\n"); + break; + } + } + } catch (InterruptedException | ExecutionException | TimeoutException ex) { + Logger.getLogger(Datahandler.class.getName()).log(Level.SEVERE, null, ex); + } + } + } + return strmapreturn; + } + public synchronized void checkIfUpdateStrings(boolean hlStatsMsg) throws CustomError { if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) { ConcurrentMap str = MessageResponseHandler.getStr(); @@ -382,15 +449,30 @@ public class Datahandler { str = filterContent(str); str = removeSlacks(str); System.out.println("finished removeSlacks \n" + str.size() + "\n"); + str = removeNonSensicalStrings(str); + System.out.println("POST removeNonSensicalStrings size: " + str.size() + "\n"); str = annotationCacheUpdate(str); System.out.println("annotationCacheUpdate str size POST: " + str.size() + "\n"); - try { - DataMapper.InsertMYSQLStrings(str); - } catch (CustomError ex) { - Logger.getLogger(Datahandler.class - .getName()).log(Level.SEVERE, null, ex); + ConcurrentMap strf = str; + if (!stringCache.isEmpty()) { + new Thread(() -> { + try { + DataMapper.InsertMYSQLStrings(strf); + } catch (CustomError ex) { + Logger.getLogger(Datahandler.class + .getName()).log(Level.SEVERE, null, ex); + } + MessageResponseHandler.setStr(new MapMaker().concurrencyLevel(2).makeMap()); + }).start(); + } else { + try { + DataMapper.InsertMYSQLStrings(strf); + } catch (CustomError ex) { + Logger.getLogger(Datahandler.class + .getName()).log(Level.SEVERE, null, ex); + } + MessageResponseHandler.setStr(new MapMaker().concurrencyLevel(2).makeMap()); } - MessageResponseHandler.setStr(new MapMaker().concurrencyLevel(2).makeMap()); if (!stopwatch.isRunning()) { stopwatch.start(); } else { @@ -404,44 +486,7 @@ public class Datahandler { if (str.startsWith("<@")) { str = str.substring(str.indexOf("> ") + 2); } - final LinkedHashMap> LHMSMXLocal = lHMSMX; - ConcurrentMap strArrs = stringCache; - double Score = -10000; SimilarityMatrix SMXreturn = new SimilarityMatrix("", ""); - System.out.println("pre mostSimilarSTR \n"); - String mostSimilarSTR = mostSimilar(str, strArrs, MostRecent); - if (mostSimilarSTR != null) { - LinkedHashMap orDefault = LHMSMXLocal.getOrDefault(mostSimilarSTR, null); - if (orDefault != null) { - for (Entry entrySet : orDefault.entrySet()) { - double smxDistance = entrySet.getValue(); - if (smxDistance > Score) { - Score = smxDistance; - SMXreturn = new SimilarityMatrix(mostSimilarSTR, entrySet.getKey(), smxDistance); - } - } - } - for (Entry> values1 : LHMSMXLocal.entrySet()) { - LinkedHashMap value = values1.getValue(); - for (Entry keystr : value.entrySet()) { - if (keystr.getKey().equals(mostSimilarSTR)) { - double smxDistance = keystr.getValue(); - if (smxDistance > Score) { - Score = smxDistance; - SMXreturn = new SimilarityMatrix(values1.getKey(), keystr.getKey(), smxDistance); - } - } - } - } - if (!SMXreturn.getPrimaryString().isEmpty()) { - if (SMXreturn.getPrimaryString().equals(mostSimilarSTR)) { - return SMXreturn.getSecondaryString(); - } else { - return SMXreturn.getPrimaryString(); - } - } - } - System.out.println("none within 8 range"); ConcurrentMap strCache = stringCache; ConcurrentMap> futureslocal = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap futurereturn = new MapMaker().concurrencyLevel(2).makeMap(); @@ -449,12 +494,21 @@ public class Datahandler { getSingularAnnotation(strF); strCache.values().parallelStream().forEach((str1) -> { if (!strF.equals(str1)) { - SimilarityMatrix SMX = new SimilarityMatrix(strF, str1); - Callable worker = new SentimentAnalyzerTest(strF, str1, SMX, - strAnnoJMWE, jmweAnnotationCache.get(str1), strAnno, - pipelineAnnotationCache.get(str1), strAnnoSentiment, - pipelineSentimentAnnotationCache.get(str1)); - futureslocal.put(futureslocal.size() + 1, executor.submit(worker)); + boolean present = false; + for (String strCons : conversationMatchMap.values()) { + if (strCons.equals(str1)) { + present = true; + break; + } + } + if (!present) { + SimilarityMatrix SMX = new SimilarityMatrix(strF, str1); + Callable worker = new SentimentAnalyzerTest(strF, str1, SMX, + strAnnoJMWE, jmweAnnotationCache.get(str1), strAnno, + pipelineAnnotationCache.get(str1), strAnnoSentiment, + pipelineSentimentAnnotationCache.get(str1), coreDoc, coreDocumentAnnotationCache.get(str1)); + futureslocal.put(futureslocal.size() + 1, executor.submit(worker)); + } } }); futureslocal.values().parallelStream().forEach((future) -> { @@ -466,48 +520,30 @@ public class Datahandler { System.out.println("ex getResponsemsg: " + ex.getMessage() + "\n"); } }); - ConcurrentMap smxUpdateReturn = new MapMaker().concurrencyLevel(2).makeMap(); + preRelationCounters = 0.0; + preRelationUserCounters = 0.0; + conversationMatchMap.put(conversationMatchMap.size() + 1, MostRecent); + Double scoreRelationNewMsgToRecentMsg = 0.0; + for (String conversationStr : conversationMatchMap.values()) { + scoreRelationNewMsgToRecentMsg += getScoreRelationNewMsgToRecentMsg(strF, conversationStr); + } + boolean relatedReponse = scoreRelationNewMsgToRecentMsg >= 250; + if (!relatedReponse) { + conversationMatchMap = new MapMaker().concurrencyLevel(2).makeMap(); + } for (SimilarityMatrix SMX : futurereturn.values()) { Double scoreRelation = 500.0; - boolean foundmatch = false; - if (!MostRecent.isEmpty()) { - LinkedHashMap orDefault1 = lHMSMX.getOrDefault(MostRecent, null); - if (orDefault1 != null) { - Collection orDefaultstrs = orDefault1.keySet(); - for (String strs : orDefaultstrs) { - if (strs.equals(SMX.getSecondaryString())) { - scoreRelation = orDefault1.get(SMX.getSecondaryString()); - foundmatch = true; - break; - } - } - } - if (!foundmatch) { - orDefault1 = lHMSMX.getOrDefault(SMX.getSecondaryString(), null); - if (orDefault1 != null) { - Collection orDefaultstrs = orDefault1.keySet(); - for (String strs : orDefaultstrs) { - if (strs.equals(MostRecent)) { - scoreRelation = orDefault1.get(MostRecent); - foundmatch = true; - break; - } - } - } + Double scoreRelationLastUserMsg = SMX.getDistance(); + if (relatedReponse) { + for (String conversationStr : conversationMatchMap.values()) { + scoreRelation += getScoreRelationNewMsgToRecentMsg(SMX.getSecondaryString(), conversationStr); } } - if (!foundmatch) { - scoreRelation = getScoreRelationNewMsgToRecentMsg(SMX.getSecondaryString(), MostRecent); - } - if (scoreRelation > (25 * smxUpdateReturn.size())) { - smxUpdateReturn.put(smxUpdateReturn.size() + 1, SMX); - } - } - for (SimilarityMatrix SMX : smxUpdateReturn.values()) { - double distance = SMX.getDistance(); - if (distance > Score) { - Score = distance; + Double totalRelation = scoreRelation + scoreRelationLastUserMsg; + if (totalRelation > preRelationCounters + preRelationUserCounters && scoreRelationLastUserMsg > preRelationUserCounters) { SMXreturn = SMX; + preRelationCounters = scoreRelation; + preRelationUserCounters = scoreRelationLastUserMsg; } } System.out.println("Reached end: secondary: " + SMXreturn.getSecondaryString() + "\nPrimarY: " + SMXreturn.getPrimaryString() @@ -524,6 +560,44 @@ public class Datahandler { notactualList.add(str); ConcurrentMap jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(notactualList); strAnnoJMWE = jmweAnnotation.values().iterator().next(); + CoreDocument coreDocument = new CoreDocument(str); + pipeline.annotate(coreDocument); + coreDoc = coreDocument; + } + + public ConcurrentMap getMultipleJMWEAnnotation(Collection str) { + ConcurrentMap jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(str); + return jmweAnnotation; + } + + public ConcurrentMap getMultiplePipelineAnnotation(Collection str) { + ConcurrentMap pipelineAnnotationMap = new MapMaker().concurrencyLevel(2).makeMap(); + for (String str1 : str) { + Annotation strAnno1 = new Annotation(str1); + pipelineAnnotationMap.put(str1, strAnno1); + } + pipeline.annotate(pipelineAnnotationMap.values()); + return pipelineAnnotationMap; + } + + public ConcurrentMap getMultiplePipelineSentimentAnnotation(Collection str) { + ConcurrentMap pipelineAnnotationMap = new MapMaker().concurrencyLevel(2).makeMap(); + for (String str1 : str) { + Annotation strAnno1 = new Annotation(str1); + pipelineAnnotationMap.put(str1, strAnno1); + } + pipelineSentiment.annotate(pipelineAnnotationMap.values()); + return pipelineAnnotationMap; + } + + public ConcurrentMap getMultipleCoreDocuments(Collection str) { + ConcurrentMap pipelineCoreDocumentAnnotations = new MapMaker().concurrencyLevel(2).makeMap(); + str.parallelStream().forEach((str1) -> { + CoreDocument coreDocument = new CoreDocument(str1); + pipeline.annotate(coreDocument); + pipelineCoreDocumentAnnotations.put(str1, coreDocument); + }); + return pipelineCoreDocumentAnnotations; } private Double getScoreRelationNewMsgToRecentMsg(String str, String mostRecentMsg) { @@ -531,7 +605,8 @@ public class Datahandler { Callable worker = new SentimentAnalyzerTest(str, mostRecentMsg, SMX, jmweAnnotationCache.get(str), jmweAnnotationCache.get(mostRecentMsg), pipelineAnnotationCache.get(str), pipelineAnnotationCache.get(mostRecentMsg), pipelineSentimentAnnotationCache.get(str), - pipelineSentimentAnnotationCache.get(mostRecentMsg)); + pipelineSentimentAnnotationCache.get(mostRecentMsg), coreDocumentAnnotationCache.get(str), + coreDocumentAnnotationCache.get(mostRecentMsg)); SimilarityMatrix callSMX = null; try { callSMX = worker.call(); @@ -548,14 +623,15 @@ public class Datahandler { public String mostSimilar(String toBeCompared, ConcurrentMap concurrentStrings, String MostRecent) { similar = ""; - minDistance = 7.0; - preRelationCounters = 500.0; + minDistance = 6.0; + preRelationCounters = 0.0; + preRelationUserCounters = 0.0; + getSingularAnnotation(toBeCompared); ConcurrentMap similardistances = new MapMaker().concurrencyLevel(2).makeMap(); concurrentStrings.values().parallelStream().forEach((str) -> { LevenshteinDistance leven = new LevenshteinDistance(toBeCompared, str); double distance = leven.computeLevenshteinDistance(); if (distance <= minDistance) { - minDistance = distance; System.out.println("distance: " + distance + "\n"); similardistances.put(similardistances.size() + 1, str); } @@ -571,7 +647,8 @@ public class Datahandler { Double value = defaultEntry.getValue(); String key = defaultEntry.getKey(); if (value > maxDistance) { - Double RelationScore = 500.0; + Double RelationScoreLastMsg = 500.0; + Double RelationScoreLastUserMsg = 500.0; boolean foundmatch = false; if (!MostRecent.isEmpty()) { LinkedHashMap orDefault1 = lHMSMX.getOrDefault(MostRecent, null); @@ -579,7 +656,7 @@ public class Datahandler { Collection orDefaultstrs = orDefault1.keySet(); for (String strs : orDefaultstrs) { if (strs.equals(key)) { - RelationScore = orDefault1.get(key); + RelationScoreLastMsg = orDefault1.get(key); foundmatch = true; break; } @@ -591,7 +668,7 @@ public class Datahandler { Collection orDefaultstrs = orDefault1.keySet(); for (String strs : orDefaultstrs) { if (strs.equals(MostRecent)) { - RelationScore = orDefault1.get(MostRecent); + RelationScoreLastMsg = orDefault1.get(MostRecent); foundmatch = true; break; } @@ -600,12 +677,20 @@ public class Datahandler { } } if (!foundmatch) { - RelationScore = getScoreRelationNewMsgToRecentMsg(key, MostRecent); + RelationScoreLastMsg = getScoreRelationNewMsgToRecentMsg(key, MostRecent); } - if (RelationScore > preRelationCounters) { - maxDistance = value; - similar = defaultEntry.getKey(); - preRelationCounters = RelationScore; + RelationScoreLastUserMsg = getScoreRelationNewMsgToRecentMsg(key, toBeCompared); + Double totalRelation = RelationScoreLastMsg + RelationScoreLastUserMsg; + if (totalRelation > preRelationCounters + preRelationUserCounters) { + if (RelationScoreLastMsg + 500 > preRelationUserCounters && RelationScoreLastUserMsg > preRelationCounters + || RelationScoreLastUserMsg + 500 > preRelationCounters && RelationScoreLastMsg > preRelationUserCounters) { + if (RelationScoreLastMsg > preRelationCounters && RelationScoreLastUserMsg > preRelationUserCounters) { + maxDistance = value; + similar = defaultEntry.getKey(); + preRelationCounters = RelationScoreLastMsg; + preRelationUserCounters = RelationScoreLastUserMsg; + } + } } } } @@ -736,76 +821,26 @@ public class Datahandler { } private ConcurrentMap removeSlacks(ConcurrentMap str) { - ShiftReduceParser modelLocal = getModel(); - MaxentTagger taggerLocal = getTagger(); ConcurrentMap strreturn = new MapMaker().concurrencyLevel(2).makeMap(); + if (stringCache.isEmpty()) { + return str; + } + Collection values = stringCache.values(); str.values().parallelStream().forEach(str1 -> { - ConcurrentMap TGWList = new MapMaker().concurrencyLevel(2).makeMap(); - DocumentPreprocessor tokenizer = null; - try { - tokenizer = new DocumentPreprocessor(new StringReader(str1)); - } catch (Exception ex) { - System.out.println("failed tokenizer removeslacks: " + ex.getLocalizedMessage() + "\n"); - tokenizer = null; - } - if (tokenizer != null) { - for (List sentence : tokenizer) { - int counter = 0; - List taggedWords; - List tagged1 = taggerLocal.tagSentence(sentence); - Tree tree = modelLocal.apply(tagged1); - taggedWords = tree.taggedYield(); - for (TaggedWord TGW : taggedWords) { - if (!TGWList.values().contains(TGW.tag()) && !TGW.tag().equals(":") && !TGW.word().equals(TGW.tag())) { - TGWList.put(TGWList.size() + 1, TGW.tag()); - counter++; - } - if (counter > 3) { - int addCounter = 0; - ConcurrentMap wordList = new MapMaker().concurrencyLevel(2).makeMap(); - for (Word lab : tree.yieldWords()) { - if (lab != null && lab.word() != null) { - if (!wordList.values().contains(lab) && lab.value() != null && !lab.value().equals(":")) { - wordList.put(wordList.size() + 1, lab); - addCounter++; - } - } - } - if (addCounter > 3) { - addCounter = 0; - ConcurrentMap HWlist = new MapMaker().concurrencyLevel(2).makeMap(); - for (HasWord HW : tree.yieldHasWord()) { - if (HW != null && HW.word() != null && !HWlist.values().contains(HW)) { - addCounter++; - HWlist.put(HWlist.size() + 1, HW); - } - } - if (addCounter > 3) { - boolean tooclosematch = false; - Collection values = stringCache.values(); - for (String strVals : values) { - LevenshteinDistance leven = new LevenshteinDistance(strVals, str1); - double Distance = leven.computeLevenshteinDistance(); - int maxpermittedDistance = 5; - if (Distance < maxpermittedDistance) { - tooclosematch = true; - break; - } - } - if (!tooclosematch) { - strreturn.put(strreturn.size() + 1, str1); - System.out.println("adding strreturn str1: " + str1 + "\n"); - } - } - } - break; - } - } - if (counter > 3) { - break; - } + boolean tooclosematch = false; + for (String strVals : values) { + LevenshteinDistance leven = new LevenshteinDistance(strVals, str1); + double Distance = leven.computeLevenshteinDistance(); + Double maxpermittedDistance = 2.5; + if (Distance < maxpermittedDistance) { + tooclosematch = true; + break; } } + if (!tooclosematch) { + strreturn.put(strreturn.size() + 1, str1); + System.out.println("adding strreturn str1: " + str1 + "\n"); + } }); return strreturn; } @@ -818,22 +853,36 @@ public class Datahandler { ConcurrentMap Annotationspipeline = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap AnnotationspipelineSentiment = new MapMaker().concurrencyLevel(2).makeMap(); strmap.values().parallelStream().forEach(str -> { - Annotation strAnno = new Annotation(str); - Annotationspipeline.put(str, strAnno); + Annotation strAnno1 = new Annotation(str); + Annotationspipeline.put(str, strAnno1); Annotation strAnno2 = new Annotation(str); AnnotationspipelineSentiment.put(str, strAnno2); + try { + CoreDocument CD = new CoreDocument(str); + pipeline.annotate(CD); + coreDocumentAnnotationCache.put(str, CD); + } catch (Exception e) { + System.out.println("failed document annotation: " + e + "\n"); + } stringCache.put(stringCache.size() + 1, str); }); System.out.println("pre iterator annotation update \n"); pipeline.annotate(Annotationspipeline.values()); pipelineSentiment.annotate(AnnotationspipelineSentiment.values()); Annotationspipeline.entrySet().forEach(pipelineEntry -> { - pipelineAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue()); + if (pipelineEntry != null) { + pipelineAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue()); + } else { + System.out.println("failed pipeline cache \n"); + } }); AnnotationspipelineSentiment.entrySet().forEach(pipelineEntry -> { - pipelineSentimentAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue()); + if (pipelineEntry != null) { + pipelineSentimentAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue()); + } else { + System.out.println("failed sentiment cache \n"); + } }); return strmap; } - } diff --git a/ArtificialAutism/src/main/java/FunctionLayer/MessageResponseHandler.java b/ArtificialAutism/src/main/java/FunctionLayer/MessageResponseHandler.java index 40e0f292..bca6469e 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/MessageResponseHandler.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/MessageResponseHandler.java @@ -6,6 +6,8 @@ package FunctionLayer; import com.google.common.collect.MapMaker; +import edu.stanford.nlp.pipeline.CoreDocument; +import edu.stanford.nlp.pipeline.CoreEntityMention; import java.util.ArrayList; import java.util.List; import java.util.concurrent.ConcurrentMap; @@ -15,17 +17,17 @@ import java.util.concurrent.ConcurrentMap; * @author install1 */ public class MessageResponseHandler { - + private static ConcurrentMap str = new MapMaker().concurrencyLevel(2).makeMap(); - + public static ConcurrentMap getStr() { return str; } - + public static void setStr(ConcurrentMap str) { MessageResponseHandler.str = str; } - + public static void getMessage(String message) { if (message != null && !message.isEmpty()) { message = message.replace("@", ""); @@ -38,8 +40,8 @@ public class MessageResponseHandler { str.put(str.size() + 1, message); } } - - public synchronized static String selectReponseMessage(String toString, String mostRecentMsg) throws CustomError { + + public synchronized static String selectReponseMessage(String toString, String mostRecentMsg, String personName) throws CustomError { ConcurrentMap str1 = new MapMaker().concurrencyLevel(2).makeMap(); str1.put(str1.size() + 1, toString); str1 = Datahandler.cutContent(str1, false); @@ -50,6 +52,21 @@ public class MessageResponseHandler { } } String getResponseMsg = Datahandler.instance.getResponseMsg(strreturn, mostRecentMsg); + getResponseMsg = checkPersonPresnetInSentence(personName, getResponseMsg); return getResponseMsg; } + + private static String checkPersonPresnetInSentence(String personName, String responseMsg) { + String strreturn = responseMsg; + CoreDocument pipelineCoreDcoument = new CoreDocument(responseMsg); + Datahandler.getPipeline().annotate(pipelineCoreDcoument); + for (CoreEntityMention em : pipelineCoreDcoument.entityMentions()) { + String entityType = em.entityType(); + if (entityType.equals("PERSON")) { + String replace = strreturn.replaceFirst(em.text(), personName); + return replace; + } + } + return responseMsg; + } } diff --git a/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java b/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java index dfa93fcc..f585d09b 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java @@ -17,6 +17,8 @@ import edu.stanford.nlp.ling.TaggedWord; import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations; import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; import edu.stanford.nlp.pipeline.Annotation; +import edu.stanford.nlp.pipeline.CoreDocument; +import edu.stanford.nlp.pipeline.CoreEntityMention; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.process.CoreLabelTokenFactory; import edu.stanford.nlp.process.DocumentPreprocessor; @@ -38,6 +40,7 @@ import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.OptionalDouble; import java.util.Set; @@ -62,7 +65,6 @@ public class SentimentAnalyzerTest implements Callable { private SimilarityMatrix smxParam; private String str; private String str1; - private ShiftReduceParser model; private MaxentTagger tagger; private GrammaticalStructureFactory gsf; private StanfordCoreNLP pipeline; @@ -74,13 +76,15 @@ public class SentimentAnalyzerTest implements Callable { private Annotation pipelineAnnotation2; private Annotation pipelineAnnotation1Sentiment; private Annotation pipelineAnnotation2Sentiment; + private CoreDocument pipelineCoreDcoument1; + private CoreDocument pipelineCoreDcoument2; public SentimentAnalyzerTest(String str, String str1, SimilarityMatrix smxParam, Annotation str1Annotation, Annotation str2Annotation, - Annotation strPipeline1, Annotation strPipeline2, Annotation strPipeSentiment1, Annotation strPipeSentiment2) { + Annotation strPipeline1, Annotation strPipeline2, Annotation strPipeSentiment1, Annotation strPipeSentiment2, + CoreDocument pipelineCoreDcoument1, CoreDocument pipelineCoreDcoument2) { this.str = str; this.str1 = str1; this.smxParam = smxParam; - this.model = Datahandler.getModel(); this.tagger = Datahandler.getTagger(); this.pipeline = Datahandler.getPipeline(); this.pipelineSentiment = Datahandler.getPipelineSentiment(); @@ -90,8 +94,10 @@ public class SentimentAnalyzerTest implements Callable { this.jmweStrAnnotation2 = str2Annotation; this.pipelineAnnotation1 = strPipeline1; this.pipelineAnnotation2 = strPipeline2; - this.pipelineAnnotation1Sentiment = strPipeSentiment1; //maybe process? + this.pipelineAnnotation1Sentiment = strPipeSentiment1; this.pipelineAnnotation2Sentiment = strPipeSentiment2; + this.pipelineCoreDcoument1 = pipelineCoreDcoument1; + this.pipelineCoreDcoument2 = pipelineCoreDcoument2; } @Override @@ -106,12 +112,14 @@ public class SentimentAnalyzerTest implements Callable { = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=firstDelete"); tokenizer.setTokenizerFactory(ptbTokenizerFactory); for (List sentence : tokenizer) { - taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); + taggedwordlist1.add(tagger.tagSentence(sentence)); + //taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); } tokenizer = new DocumentPreprocessor(new StringReader(str)); tokenizer.setTokenizerFactory(ptbTokenizerFactory); for (List sentence : tokenizer) { - taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); + taggedwordlist2.add(tagger.tagSentence(sentence)); + //taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); } int counter = 0; int counter1 = 0; @@ -817,6 +825,74 @@ public class SentimentAnalyzerTest implements Callable { double SentenceScoreDiff = leven.computeLevenshteinDistance(); SentenceScoreDiff *= 15; score -= SentenceScoreDiff; + ConcurrentMap nerEntities1 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap nerEntities2 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap nerEntities3 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap nerEntities4 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap nerEntityTokenTags1 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap nerEntityTokenTags2 = new MapMaker().concurrencyLevel(2).makeMap(); + for (CoreEntityMention em : pipelineCoreDcoument1.entityMentions()) { + Set> entrySet = em.entityTypeConfidences().entrySet(); + String entityType = em.entityType(); + Double EntityConfidences = 0.0; + for (Map.Entry entries : entrySet) { + EntityConfidences = entries.getValue(); + } + List tokens = em.tokens(); + for (CoreLabel token : tokens) { + if (!nerEntityTokenTags1.values().contains(token.tag())) { + if (entityType.equals("PERSON") && EntityConfidences < 0.80) { + score -= 6000; + } else { + nerEntityTokenTags1.put(nerEntityTokenTags1.size() + 1, token.tag()); + } + } + } + if (!nerEntities1.values().contains(em.text())) { + nerEntities1.put(nerEntities1.size() + 1, em.text()); + nerEntities3.put(nerEntities3.size() + 1, em.entityType()); + } + } + for (CoreEntityMention em : pipelineCoreDcoument2.entityMentions()) { + Set> entrySet = em.entityTypeConfidences().entrySet(); + String entityType = em.entityType(); + Double EntityConfidences = 0.0; + for (Map.Entry entries : entrySet) { + EntityConfidences = entries.getValue(); + } + List tokens = em.tokens(); + for (CoreLabel token : tokens) { + if (!nerEntityTokenTags2.values().contains(token.tag())) { + if (entityType.equals("PERSON") && EntityConfidences < 0.80) { + score -= 6000; + } else { + nerEntityTokenTags2.put(nerEntityTokenTags2.size() + 1, token.tag()); + } + } + } + if (!nerEntities2.values().contains(em.text())) { + nerEntities2.put(nerEntities2.size() + 1, em.text()); + nerEntities4.put(nerEntities4.size() + 1, em.entityType()); + } + } + for (String strEnts1 : nerEntities1.values()) { + Collection values = nerEntities2.values(); + for (String strEnts2 : values) { + if (strEnts1.equalsIgnoreCase(strEnts2)) { + score += 7500; + } + } + } + for (String strEnts1 : nerEntities3.values()) { + if (nerEntities4.values().contains(strEnts1)) { + score += 3500; + } + } + for (String strToken : nerEntityTokenTags1.values()) { + if (nerEntityTokenTags2.values().contains(strToken)) { + score += 2500; + } + } } catch (Exception ex) { System.out.println("SENTIMENT stacktrace Overall catch: " + ex.getMessage() + "\n"); } diff --git a/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java b/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java index b4afcfbb..2dd7fb3f 100644 --- a/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java +++ b/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java @@ -33,7 +33,7 @@ import org.javacord.api.entity.user.User; */ public class DiscordHandler { - private static String MostRecentMsg = "what do you think of humanzz"; + private static String MostRecentMsg = "how are you today bot"; public static void main(String[] args) { System.setProperty("java.util.concurrent.ForkJoinPool.common.parallelism", "25"); @@ -51,15 +51,6 @@ public class DiscordHandler { System.out.println("FINISHED ALL ANNOTATIONS"); Datahandler.instance.addHLstatsMessages(); Datahandler.instance.updateStringCache(); - //order matters - if (Datahandler.instance.getstringCacheSize() != 0) { - while (Datahandler.instance.getlHMSMXSize() * Datahandler.instance.getlHMSMXSize() * 2.5 - < (Datahandler.instance.getstringCacheSize() - * Datahandler.instance.getstringCacheSize()) - - Datahandler.instance.getstringCacheSize()) { - Datahandler.instance.updateMatrixes(); - } - } String token = "NTI5NzAxNTk5NjAyMjc4NDAx.Dw0vDg.7-aMjVWdQMYPl8qVNyvTCPS5F_A"; DiscordApi api = new DiscordApiBuilder().setToken(token).login().join(); api.addMessageCreateListener(event -> { @@ -94,7 +85,8 @@ public class DiscordHandler { || event.getServerTextChannel().get().toString().contains("general-autism")) { String ResponseStr; try { - ResponseStr = MessageResponseHandler.selectReponseMessage(event.getMessage().toString(), MostRecentMsg); + String person = event.getMessageAuthor().getName(); + ResponseStr = MessageResponseHandler.selectReponseMessage(event.getMessage().toString(), MostRecentMsg, person); if (!ResponseStr.isEmpty()) { System.out.print("\nResponseStr3: " + ResponseStr + "\n"); event.getChannel().sendMessage(ResponseStr); @@ -103,7 +95,6 @@ public class DiscordHandler { new Thread(() -> { try { Datahandler.instance.checkIfUpdateStrings(false); - Datahandler.instance.updateMatrixes(); } catch (CustomError ex) { Logger.getLogger(DiscordHandler.class.getName()).log(Level.SEVERE, null, ex); }