with annotaton caching stores results in the DB becomes Obsolete, quite some changes but overall improvements

This commit is contained in:
jenzur 2019-04-14 14:18:01 +02:00
parent 17fe22b7ea
commit 5552a20eb6
4 changed files with 360 additions and 227 deletions

View File

@ -12,28 +12,24 @@ import com.google.common.collect.MapMaker;
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import java.io.IOException;
import java.io.StringReader;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CountDownLatch;
@ -51,20 +47,24 @@ import java.util.logging.Logger;
*/
public class Datahandler {
public static final long EXPIRE_TIME_IN_SECONDS = TimeUnit.SECONDS.convert(6, TimeUnit.MINUTES);
public static final long EXPIRE_TIME_IN_SECONDS = TimeUnit.SECONDS.convert(10, TimeUnit.MINUTES);
public static final long EXPIRE_TIME_IN_SECONDS1 = TimeUnit.SECONDS.convert(10, TimeUnit.HOURS);
public static Datahandler instance = new Datahandler();
private static volatile Double minDistance;
private static Annotation strAnno;
private static Annotation strAnnoSentiment;
private static Annotation strAnnoJMWE;
private static CoreDocument coreDoc;
private volatile boolean refreshMatrixFromDB;
private static volatile int secondaryIterator = 0;
private static volatile Double preRelationCounters = 0.0;
private static volatile Double preRelationUserCounters = 0.0;
private final ConcurrentMap<Integer, String> stringCache;
private static ConcurrentMap<String, Annotation> pipelineAnnotationCache;
private static ConcurrentMap<String, Annotation> pipelineSentimentAnnotationCache;
private static ConcurrentMap<String, Annotation> jmweAnnotationCache;
private static ConcurrentMap<String, CoreDocument> coreDocumentAnnotationCache;
private static ConcurrentMap<Integer, String> conversationMatchMap;
private LinkedHashMap<String, LinkedHashMap<String, Double>> lHMSMX = new LinkedHashMap();
private final Stopwatch stopwatch;
private final Stopwatch stopwatch1;
@ -74,10 +74,12 @@ public class Datahandler {
private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz";
private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz";
private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
private static String nerModel = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz";
private static String nerModel = "edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz";
private static String nerModel2 = "edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz";
private static String nerModel3 = "edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz";
private static String nerModelCaseless = "edu/stanford/nlp/models/truecase/truecasing.fast.caseless.qn.ser.gz";
private static MaxentTagger tagger;
private static ShiftReduceParser model;
private static String[] options = {"-maxLength", "90"};
private static String[] options = {"-maxLength", "100"};
private static Properties props = new Properties();
private static Properties propsSentiment = new Properties();
private static GrammaticalStructureFactory gsf;
@ -92,19 +94,16 @@ public class Datahandler {
this.stopwatch = Stopwatch.createUnstarted();
this.stopwatch1 = Stopwatch.createStarted();
this.stringCache = new MapMaker().concurrencyLevel(2).makeMap();
//cant sadly just have one pipelines for every annotation, one pipeline per annotation is required
this.jmweAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap();
this.pipelineAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap();
this.pipelineSentimentAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap();
this.coreDocumentAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap();
this.conversationMatchMap = new MapMaker().concurrencyLevel(2).makeMap();
}
public void shiftReduceParserInitiate() {
//got 8 cores
CountDownLatch cdl = new CountDownLatch(4);
new Thread(() -> {
model = ShiftReduceParser.loadModel(shiftReduceParserPath, options);
cdl.countDown();
}).start();
CountDownLatch cdl = new CountDownLatch(3);
new Thread(() -> {
try {
classifier = CRFClassifier.getClassifierNoExceptions(nerModel);
@ -115,7 +114,6 @@ public class Datahandler {
}).start();
new Thread(() -> {
propsSentiment.setProperty("parse.model", lexParserEnglishRNN);
propsSentiment.setProperty("ner.model", nerModel);
propsSentiment.setProperty("sentiment.model", sentimentModel);
propsSentiment.setProperty("parse.maxlen", "90");
propsSentiment.setProperty("threads", "25");
@ -123,12 +121,13 @@ public class Datahandler {
propsSentiment.setProperty("tokenize.maxlen", "90");
propsSentiment.setProperty("ssplit.maxlen", "90");
propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,sentiment"); //coref too expensive memorywise, does it need depparse?
propsSentiment.setProperty("tokenize.options", "untokenizable=firstDelete");
propsSentiment.setProperty("tokenize.options", "untokenizable=firstKeep");
pipelineSentiment = new StanfordCoreNLP(propsSentiment);
tagger = new MaxentTagger(taggerPath);
cdl.countDown();
}).start();
new Thread(() -> {
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse,ner");
props.setProperty("parse.model", shiftReduceParserPath);
props.setProperty("parse.maxlen", "90");
props.setProperty("parse.binaryTrees", "true");
@ -137,8 +136,11 @@ public class Datahandler {
props.setProperty("tokenize.maxlen", "90");
props.setProperty("ssplit.maxlen", "90");
props.setProperty("lemma.maxlen", "90");
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse");
props.setProperty("tokenize.options", "untokenizable=firstDelete");
props.setProperty("ner.model", nerModel + "," + nerModel2 + "," + nerModel3);
props.setProperty("ner.combinationMode", "HIGH_RECALL");
props.setProperty("regexner.ignorecase", "true");
props.setProperty("ner.fine.regexner.ignorecase", "true");
props.setProperty("tokenize.options", "untokenizable=firstKeep"); //firstKeep //firstDelete
pipeline = new StanfordCoreNLP(props);
cdl.countDown();
}).start();
@ -191,10 +193,6 @@ public class Datahandler {
return tagger;
}
public static ShiftReduceParser getModel() {
return model;
}
private Map<Integer, String> getCache() throws SQLException, IOException, CustomError {
return DataMapper.getAllStrings();
}
@ -258,6 +256,9 @@ public class Datahandler {
Annotationspipeline.put(str, strAnno);
Annotation strAnno2 = new Annotation(str);
AnnotationspipelineSentiment.put(str, strAnno2);
CoreDocument CD = new CoreDocument(str);
pipeline.annotate(CD);
coreDocumentAnnotationCache.put(str, CD);
});
pipeline.annotate(Annotationspipeline.values());
pipelineSentiment.annotate(AnnotationspipelineSentiment.values());
@ -270,7 +271,6 @@ public class Datahandler {
}
}
//synchronized
public synchronized void updateMatrixes() {
refreshMatrixFromDB = false;
if (stopwatch1.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS1) {
@ -278,7 +278,6 @@ public class Datahandler {
lHMSMX = DataMapper.getAllRelationScores();
stopwatch1.reset();
}
//requiring atleast 10 entries ensures no issues in case of empty stringcache
if (stringCache.values().size() > 10 && !refreshMatrixFromDB) {
ConcurrentMap<Integer, String> stringCachelocal = stringCache;
int selectUpdate = -1;
@ -293,13 +292,22 @@ public class Datahandler {
}
ij2++;
}
if (selectUpdate == -1 || selectUpdate + 1 == stringCachelocal.size()) {
int valueSize = stringCachelocal.size();
if (secondaryIterator + iteratorCap >= valueSize) {
secondaryIterator = 0;
//secondaryIterator
if (selectUpdate == -1 || selectUpdate + 1 >= stringCachelocal.size() || stringCachelocal.get(selectUpdate) == null) {
Integer iterator = 0;
while (iterator == 0) {
if (secondaryIterator >= stringCachelocal.size()) {
secondaryIterator = 0;
}
String get = stringCachelocal.get(secondaryIterator);
if (get == null) {
secondaryIterator++;
} else {
selectUpdate = secondaryIterator;
iterator++;
}
}
selectUpdate = secondaryIterator;
secondaryIterator += iteratorCap;
secondaryIterator++;
}
String getStringCacheStr = stringCachelocal.get(selectUpdate);
ConcurrentMap<Integer, SimilarityMatrix> matrixUpdateMap = new MapMaker().concurrencyLevel(2).makeMap();
@ -339,7 +347,8 @@ public class Datahandler {
SimilarityMatrix SMX = new SimilarityMatrix(getStringCacheStr, str1);
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(getStringCacheStr, str1, SMX, jmweAnnotationCache.get(getStringCacheStr),
jmweAnnotationCache.get(str1), pipelineAnnotationCache.get(getStringCacheStr), pipelineAnnotationCache.get(str1),
pipelineSentimentAnnotationCache.get(getStringCacheStr), pipelineSentimentAnnotationCache.get(str1));
pipelineSentimentAnnotationCache.get(getStringCacheStr), pipelineSentimentAnnotationCache.get(str1),
coreDocumentAnnotationCache.get(getStringCacheStr), coreDocumentAnnotationCache.get(str1));
futures.put(futures.size() + 1, executor.submit(worker));
}
}
@ -375,6 +384,64 @@ public class Datahandler {
}
}
public ConcurrentMap<Integer, String> removeNonSensicalStrings(ConcurrentMap<Integer, String> strmap) {
ConcurrentMap<Integer, String> strmapreturn = new MapMaker().concurrencyLevel(2).makeMap();
int relationCap = 20;
ConcurrentMap<Integer, String> strCacheLocal = stringCache.size() < 150 ? strmap : stringCache;
ConcurrentMap<String, Annotation> localJMWEMap = getMultipleJMWEAnnotation(strmap.values());
ConcurrentMap<String, Annotation> localPipelineAnnotation = getMultiplePipelineAnnotation(strmap.values());
ConcurrentMap<String, Annotation> localPipelineSentimentAnnotation = getMultiplePipelineSentimentAnnotation(strmap.values());
ConcurrentMap<String, CoreDocument> localCoreDocumentMap = getMultipleCoreDocuments(strmap.values());
for (String str : strmap.values()) {
ConcurrentMap<Integer, Future<SimilarityMatrix>> futures = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, HashMap<String, String>> strsmaps = new MapMaker().concurrencyLevel(2).makeMap();
for (String str1 : strCacheLocal.values()) {
HashMap HM1 = new HashMap();
HM1.put(str, str1);
if (!str.equals(str1) && !strsmaps.values().contains(HM1)) {
SimilarityMatrix SMX = new SimilarityMatrix(str, str1);
Callable<SimilarityMatrix> worker;
if (stringCache.size() < 150) {
worker = new SentimentAnalyzerTest(str, str1, SMX,
localJMWEMap.get(str), localJMWEMap.get(str1), localPipelineAnnotation.get(str),
localPipelineAnnotation.get(str1), localPipelineSentimentAnnotation.get(str),
localPipelineSentimentAnnotation.get(str1), localCoreDocumentMap.get(str), localCoreDocumentMap.get(str1));
} else {
worker = new SentimentAnalyzerTest(str, str1, SMX,
localJMWEMap.get(str), jmweAnnotationCache.get(str1), localPipelineAnnotation.get(str),
pipelineAnnotationCache.get(str1), localPipelineSentimentAnnotation.get(str),
pipelineSentimentAnnotationCache.get(str1), localCoreDocumentMap.get(str), coreDocumentAnnotationCache.get(str1));
}
HashMap HM = new HashMap();
HM.put(SMX.getPrimaryString(), SMX.getSecondaryString());
strsmaps.put(strsmaps.size() + 1, HM);
futures.put(futures.size() + 1, executor.submit(worker));
}
}
int positiveRelationCounter = 0;
for (Future<SimilarityMatrix> future : futures.values()) {
try {
SimilarityMatrix getSMX = future.get(5, TimeUnit.SECONDS);
Double scoreRelationNewMsgToRecentMsg = getSMX.getDistance();
if (scoreRelationNewMsgToRecentMsg >= 5000.0) {
System.out.println("scoreRelationNewMsgToRecentMsg: " + scoreRelationNewMsgToRecentMsg + "\n");
positiveRelationCounter++;
if (positiveRelationCounter > relationCap) {
strmapreturn.put(strmapreturn.size() + 1, str);
}
if (positiveRelationCounter > relationCap) {
System.out.println("strmapreturn size: " + strmapreturn.size() + "\n");
break;
}
}
} catch (InterruptedException | ExecutionException | TimeoutException ex) {
Logger.getLogger(Datahandler.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
return strmapreturn;
}
public synchronized void checkIfUpdateStrings(boolean hlStatsMsg) throws CustomError {
if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) {
ConcurrentMap<Integer, String> str = MessageResponseHandler.getStr();
@ -382,15 +449,30 @@ public class Datahandler {
str = filterContent(str);
str = removeSlacks(str);
System.out.println("finished removeSlacks \n" + str.size() + "\n");
str = removeNonSensicalStrings(str);
System.out.println("POST removeNonSensicalStrings size: " + str.size() + "\n");
str = annotationCacheUpdate(str);
System.out.println("annotationCacheUpdate str size POST: " + str.size() + "\n");
try {
DataMapper.InsertMYSQLStrings(str);
} catch (CustomError ex) {
Logger.getLogger(Datahandler.class
.getName()).log(Level.SEVERE, null, ex);
ConcurrentMap<Integer, String> strf = str;
if (!stringCache.isEmpty()) {
new Thread(() -> {
try {
DataMapper.InsertMYSQLStrings(strf);
} catch (CustomError ex) {
Logger.getLogger(Datahandler.class
.getName()).log(Level.SEVERE, null, ex);
}
MessageResponseHandler.setStr(new MapMaker().concurrencyLevel(2).makeMap());
}).start();
} else {
try {
DataMapper.InsertMYSQLStrings(strf);
} catch (CustomError ex) {
Logger.getLogger(Datahandler.class
.getName()).log(Level.SEVERE, null, ex);
}
MessageResponseHandler.setStr(new MapMaker().concurrencyLevel(2).makeMap());
}
MessageResponseHandler.setStr(new MapMaker().concurrencyLevel(2).makeMap());
if (!stopwatch.isRunning()) {
stopwatch.start();
} else {
@ -404,44 +486,7 @@ public class Datahandler {
if (str.startsWith("<@")) {
str = str.substring(str.indexOf("> ") + 2);
}
final LinkedHashMap<String, LinkedHashMap<String, Double>> LHMSMXLocal = lHMSMX;
ConcurrentMap<Integer, String> strArrs = stringCache;
double Score = -10000;
SimilarityMatrix SMXreturn = new SimilarityMatrix("", "");
System.out.println("pre mostSimilarSTR \n");
String mostSimilarSTR = mostSimilar(str, strArrs, MostRecent);
if (mostSimilarSTR != null) {
LinkedHashMap<String, Double> orDefault = LHMSMXLocal.getOrDefault(mostSimilarSTR, null);
if (orDefault != null) {
for (Entry<String, Double> entrySet : orDefault.entrySet()) {
double smxDistance = entrySet.getValue();
if (smxDistance > Score) {
Score = smxDistance;
SMXreturn = new SimilarityMatrix(mostSimilarSTR, entrySet.getKey(), smxDistance);
}
}
}
for (Entry<String, LinkedHashMap<String, Double>> values1 : LHMSMXLocal.entrySet()) {
LinkedHashMap<String, Double> value = values1.getValue();
for (Entry<String, Double> keystr : value.entrySet()) {
if (keystr.getKey().equals(mostSimilarSTR)) {
double smxDistance = keystr.getValue();
if (smxDistance > Score) {
Score = smxDistance;
SMXreturn = new SimilarityMatrix(values1.getKey(), keystr.getKey(), smxDistance);
}
}
}
}
if (!SMXreturn.getPrimaryString().isEmpty()) {
if (SMXreturn.getPrimaryString().equals(mostSimilarSTR)) {
return SMXreturn.getSecondaryString();
} else {
return SMXreturn.getPrimaryString();
}
}
}
System.out.println("none within 8 range");
ConcurrentMap<Integer, String> strCache = stringCache;
ConcurrentMap<Integer, Future<SimilarityMatrix>> futureslocal = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, SimilarityMatrix> futurereturn = new MapMaker().concurrencyLevel(2).makeMap();
@ -449,12 +494,21 @@ public class Datahandler {
getSingularAnnotation(strF);
strCache.values().parallelStream().forEach((str1) -> {
if (!strF.equals(str1)) {
SimilarityMatrix SMX = new SimilarityMatrix(strF, str1);
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(strF, str1, SMX,
strAnnoJMWE, jmweAnnotationCache.get(str1), strAnno,
pipelineAnnotationCache.get(str1), strAnnoSentiment,
pipelineSentimentAnnotationCache.get(str1));
futureslocal.put(futureslocal.size() + 1, executor.submit(worker));
boolean present = false;
for (String strCons : conversationMatchMap.values()) {
if (strCons.equals(str1)) {
present = true;
break;
}
}
if (!present) {
SimilarityMatrix SMX = new SimilarityMatrix(strF, str1);
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(strF, str1, SMX,
strAnnoJMWE, jmweAnnotationCache.get(str1), strAnno,
pipelineAnnotationCache.get(str1), strAnnoSentiment,
pipelineSentimentAnnotationCache.get(str1), coreDoc, coreDocumentAnnotationCache.get(str1));
futureslocal.put(futureslocal.size() + 1, executor.submit(worker));
}
}
});
futureslocal.values().parallelStream().forEach((future) -> {
@ -466,48 +520,30 @@ public class Datahandler {
System.out.println("ex getResponsemsg: " + ex.getMessage() + "\n");
}
});
ConcurrentMap<Integer, SimilarityMatrix> smxUpdateReturn = new MapMaker().concurrencyLevel(2).makeMap();
preRelationCounters = 0.0;
preRelationUserCounters = 0.0;
conversationMatchMap.put(conversationMatchMap.size() + 1, MostRecent);
Double scoreRelationNewMsgToRecentMsg = 0.0;
for (String conversationStr : conversationMatchMap.values()) {
scoreRelationNewMsgToRecentMsg += getScoreRelationNewMsgToRecentMsg(strF, conversationStr);
}
boolean relatedReponse = scoreRelationNewMsgToRecentMsg >= 250;
if (!relatedReponse) {
conversationMatchMap = new MapMaker().concurrencyLevel(2).makeMap();
}
for (SimilarityMatrix SMX : futurereturn.values()) {
Double scoreRelation = 500.0;
boolean foundmatch = false;
if (!MostRecent.isEmpty()) {
LinkedHashMap<String, Double> orDefault1 = lHMSMX.getOrDefault(MostRecent, null);
if (orDefault1 != null) {
Collection<String> orDefaultstrs = orDefault1.keySet();
for (String strs : orDefaultstrs) {
if (strs.equals(SMX.getSecondaryString())) {
scoreRelation = orDefault1.get(SMX.getSecondaryString());
foundmatch = true;
break;
}
}
}
if (!foundmatch) {
orDefault1 = lHMSMX.getOrDefault(SMX.getSecondaryString(), null);
if (orDefault1 != null) {
Collection<String> orDefaultstrs = orDefault1.keySet();
for (String strs : orDefaultstrs) {
if (strs.equals(MostRecent)) {
scoreRelation = orDefault1.get(MostRecent);
foundmatch = true;
break;
}
}
}
Double scoreRelationLastUserMsg = SMX.getDistance();
if (relatedReponse) {
for (String conversationStr : conversationMatchMap.values()) {
scoreRelation += getScoreRelationNewMsgToRecentMsg(SMX.getSecondaryString(), conversationStr);
}
}
if (!foundmatch) {
scoreRelation = getScoreRelationNewMsgToRecentMsg(SMX.getSecondaryString(), MostRecent);
}
if (scoreRelation > (25 * smxUpdateReturn.size())) {
smxUpdateReturn.put(smxUpdateReturn.size() + 1, SMX);
}
}
for (SimilarityMatrix SMX : smxUpdateReturn.values()) {
double distance = SMX.getDistance();
if (distance > Score) {
Score = distance;
Double totalRelation = scoreRelation + scoreRelationLastUserMsg;
if (totalRelation > preRelationCounters + preRelationUserCounters && scoreRelationLastUserMsg > preRelationUserCounters) {
SMXreturn = SMX;
preRelationCounters = scoreRelation;
preRelationUserCounters = scoreRelationLastUserMsg;
}
}
System.out.println("Reached end: secondary: " + SMXreturn.getSecondaryString() + "\nPrimarY: " + SMXreturn.getPrimaryString()
@ -524,6 +560,44 @@ public class Datahandler {
notactualList.add(str);
ConcurrentMap<String, Annotation> jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(notactualList);
strAnnoJMWE = jmweAnnotation.values().iterator().next();
CoreDocument coreDocument = new CoreDocument(str);
pipeline.annotate(coreDocument);
coreDoc = coreDocument;
}
public ConcurrentMap<String, Annotation> getMultipleJMWEAnnotation(Collection<String> str) {
ConcurrentMap<String, Annotation> jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(str);
return jmweAnnotation;
}
public ConcurrentMap<String, Annotation> getMultiplePipelineAnnotation(Collection<String> str) {
ConcurrentMap<String, Annotation> pipelineAnnotationMap = new MapMaker().concurrencyLevel(2).makeMap();
for (String str1 : str) {
Annotation strAnno1 = new Annotation(str1);
pipelineAnnotationMap.put(str1, strAnno1);
}
pipeline.annotate(pipelineAnnotationMap.values());
return pipelineAnnotationMap;
}
public ConcurrentMap<String, Annotation> getMultiplePipelineSentimentAnnotation(Collection<String> str) {
ConcurrentMap<String, Annotation> pipelineAnnotationMap = new MapMaker().concurrencyLevel(2).makeMap();
for (String str1 : str) {
Annotation strAnno1 = new Annotation(str1);
pipelineAnnotationMap.put(str1, strAnno1);
}
pipelineSentiment.annotate(pipelineAnnotationMap.values());
return pipelineAnnotationMap;
}
public ConcurrentMap<String, CoreDocument> getMultipleCoreDocuments(Collection<String> str) {
ConcurrentMap<String, CoreDocument> pipelineCoreDocumentAnnotations = new MapMaker().concurrencyLevel(2).makeMap();
str.parallelStream().forEach((str1) -> {
CoreDocument coreDocument = new CoreDocument(str1);
pipeline.annotate(coreDocument);
pipelineCoreDocumentAnnotations.put(str1, coreDocument);
});
return pipelineCoreDocumentAnnotations;
}
private Double getScoreRelationNewMsgToRecentMsg(String str, String mostRecentMsg) {
@ -531,7 +605,8 @@ public class Datahandler {
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, mostRecentMsg, SMX,
jmweAnnotationCache.get(str), jmweAnnotationCache.get(mostRecentMsg), pipelineAnnotationCache.get(str),
pipelineAnnotationCache.get(mostRecentMsg), pipelineSentimentAnnotationCache.get(str),
pipelineSentimentAnnotationCache.get(mostRecentMsg));
pipelineSentimentAnnotationCache.get(mostRecentMsg), coreDocumentAnnotationCache.get(str),
coreDocumentAnnotationCache.get(mostRecentMsg));
SimilarityMatrix callSMX = null;
try {
callSMX = worker.call();
@ -548,14 +623,15 @@ public class Datahandler {
public String mostSimilar(String toBeCompared, ConcurrentMap<Integer, String> concurrentStrings, String MostRecent) {
similar = "";
minDistance = 7.0;
preRelationCounters = 500.0;
minDistance = 6.0;
preRelationCounters = 0.0;
preRelationUserCounters = 0.0;
getSingularAnnotation(toBeCompared);
ConcurrentMap<Integer, String> similardistances = new MapMaker().concurrencyLevel(2).makeMap();
concurrentStrings.values().parallelStream().forEach((str) -> {
LevenshteinDistance leven = new LevenshteinDistance(toBeCompared, str);
double distance = leven.computeLevenshteinDistance();
if (distance <= minDistance) {
minDistance = distance;
System.out.println("distance: " + distance + "\n");
similardistances.put(similardistances.size() + 1, str);
}
@ -571,7 +647,8 @@ public class Datahandler {
Double value = defaultEntry.getValue();
String key = defaultEntry.getKey();
if (value > maxDistance) {
Double RelationScore = 500.0;
Double RelationScoreLastMsg = 500.0;
Double RelationScoreLastUserMsg = 500.0;
boolean foundmatch = false;
if (!MostRecent.isEmpty()) {
LinkedHashMap<String, Double> orDefault1 = lHMSMX.getOrDefault(MostRecent, null);
@ -579,7 +656,7 @@ public class Datahandler {
Collection<String> orDefaultstrs = orDefault1.keySet();
for (String strs : orDefaultstrs) {
if (strs.equals(key)) {
RelationScore = orDefault1.get(key);
RelationScoreLastMsg = orDefault1.get(key);
foundmatch = true;
break;
}
@ -591,7 +668,7 @@ public class Datahandler {
Collection<String> orDefaultstrs = orDefault1.keySet();
for (String strs : orDefaultstrs) {
if (strs.equals(MostRecent)) {
RelationScore = orDefault1.get(MostRecent);
RelationScoreLastMsg = orDefault1.get(MostRecent);
foundmatch = true;
break;
}
@ -600,12 +677,20 @@ public class Datahandler {
}
}
if (!foundmatch) {
RelationScore = getScoreRelationNewMsgToRecentMsg(key, MostRecent);
RelationScoreLastMsg = getScoreRelationNewMsgToRecentMsg(key, MostRecent);
}
if (RelationScore > preRelationCounters) {
maxDistance = value;
similar = defaultEntry.getKey();
preRelationCounters = RelationScore;
RelationScoreLastUserMsg = getScoreRelationNewMsgToRecentMsg(key, toBeCompared);
Double totalRelation = RelationScoreLastMsg + RelationScoreLastUserMsg;
if (totalRelation > preRelationCounters + preRelationUserCounters) {
if (RelationScoreLastMsg + 500 > preRelationUserCounters && RelationScoreLastUserMsg > preRelationCounters
|| RelationScoreLastUserMsg + 500 > preRelationCounters && RelationScoreLastMsg > preRelationUserCounters) {
if (RelationScoreLastMsg > preRelationCounters && RelationScoreLastUserMsg > preRelationUserCounters) {
maxDistance = value;
similar = defaultEntry.getKey();
preRelationCounters = RelationScoreLastMsg;
preRelationUserCounters = RelationScoreLastUserMsg;
}
}
}
}
}
@ -736,76 +821,26 @@ public class Datahandler {
}
private ConcurrentMap<Integer, String> removeSlacks(ConcurrentMap<Integer, String> str) {
ShiftReduceParser modelLocal = getModel();
MaxentTagger taggerLocal = getTagger();
ConcurrentMap<Integer, String> strreturn = new MapMaker().concurrencyLevel(2).makeMap();
if (stringCache.isEmpty()) {
return str;
}
Collection<String> values = stringCache.values();
str.values().parallelStream().forEach(str1 -> {
ConcurrentMap<Integer, String> TGWList = new MapMaker().concurrencyLevel(2).makeMap();
DocumentPreprocessor tokenizer = null;
try {
tokenizer = new DocumentPreprocessor(new StringReader(str1));
} catch (Exception ex) {
System.out.println("failed tokenizer removeslacks: " + ex.getLocalizedMessage() + "\n");
tokenizer = null;
}
if (tokenizer != null) {
for (List<HasWord> sentence : tokenizer) {
int counter = 0;
List<TaggedWord> taggedWords;
List<TaggedWord> tagged1 = taggerLocal.tagSentence(sentence);
Tree tree = modelLocal.apply(tagged1);
taggedWords = tree.taggedYield();
for (TaggedWord TGW : taggedWords) {
if (!TGWList.values().contains(TGW.tag()) && !TGW.tag().equals(":") && !TGW.word().equals(TGW.tag())) {
TGWList.put(TGWList.size() + 1, TGW.tag());
counter++;
}
if (counter > 3) {
int addCounter = 0;
ConcurrentMap<Integer, Word> wordList = new MapMaker().concurrencyLevel(2).makeMap();
for (Word lab : tree.yieldWords()) {
if (lab != null && lab.word() != null) {
if (!wordList.values().contains(lab) && lab.value() != null && !lab.value().equals(":")) {
wordList.put(wordList.size() + 1, lab);
addCounter++;
}
}
}
if (addCounter > 3) {
addCounter = 0;
ConcurrentMap<Integer, HasWord> HWlist = new MapMaker().concurrencyLevel(2).makeMap();
for (HasWord HW : tree.yieldHasWord()) {
if (HW != null && HW.word() != null && !HWlist.values().contains(HW)) {
addCounter++;
HWlist.put(HWlist.size() + 1, HW);
}
}
if (addCounter > 3) {
boolean tooclosematch = false;
Collection<String> values = stringCache.values();
for (String strVals : values) {
LevenshteinDistance leven = new LevenshteinDistance(strVals, str1);
double Distance = leven.computeLevenshteinDistance();
int maxpermittedDistance = 5;
if (Distance < maxpermittedDistance) {
tooclosematch = true;
break;
}
}
if (!tooclosematch) {
strreturn.put(strreturn.size() + 1, str1);
System.out.println("adding strreturn str1: " + str1 + "\n");
}
}
}
break;
}
}
if (counter > 3) {
break;
}
boolean tooclosematch = false;
for (String strVals : values) {
LevenshteinDistance leven = new LevenshteinDistance(strVals, str1);
double Distance = leven.computeLevenshteinDistance();
Double maxpermittedDistance = 2.5;
if (Distance < maxpermittedDistance) {
tooclosematch = true;
break;
}
}
if (!tooclosematch) {
strreturn.put(strreturn.size() + 1, str1);
System.out.println("adding strreturn str1: " + str1 + "\n");
}
});
return strreturn;
}
@ -818,22 +853,36 @@ public class Datahandler {
ConcurrentMap<String, Annotation> Annotationspipeline = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<String, Annotation> AnnotationspipelineSentiment = new MapMaker().concurrencyLevel(2).makeMap();
strmap.values().parallelStream().forEach(str -> {
Annotation strAnno = new Annotation(str);
Annotationspipeline.put(str, strAnno);
Annotation strAnno1 = new Annotation(str);
Annotationspipeline.put(str, strAnno1);
Annotation strAnno2 = new Annotation(str);
AnnotationspipelineSentiment.put(str, strAnno2);
try {
CoreDocument CD = new CoreDocument(str);
pipeline.annotate(CD);
coreDocumentAnnotationCache.put(str, CD);
} catch (Exception e) {
System.out.println("failed document annotation: " + e + "\n");
}
stringCache.put(stringCache.size() + 1, str);
});
System.out.println("pre iterator annotation update \n");
pipeline.annotate(Annotationspipeline.values());
pipelineSentiment.annotate(AnnotationspipelineSentiment.values());
Annotationspipeline.entrySet().forEach(pipelineEntry -> {
pipelineAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue());
if (pipelineEntry != null) {
pipelineAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue());
} else {
System.out.println("failed pipeline cache \n");
}
});
AnnotationspipelineSentiment.entrySet().forEach(pipelineEntry -> {
pipelineSentimentAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue());
if (pipelineEntry != null) {
pipelineSentimentAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue());
} else {
System.out.println("failed sentiment cache \n");
}
});
return strmap;
}
}

View File

@ -6,6 +6,8 @@
package FunctionLayer;
import com.google.common.collect.MapMaker;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.pipeline.CoreEntityMention;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ConcurrentMap;
@ -15,17 +17,17 @@ import java.util.concurrent.ConcurrentMap;
* @author install1
*/
public class MessageResponseHandler {
private static ConcurrentMap<Integer, String> str = new MapMaker().concurrencyLevel(2).makeMap();
public static ConcurrentMap<Integer, String> getStr() {
return str;
}
public static void setStr(ConcurrentMap<Integer, String> str) {
MessageResponseHandler.str = str;
}
public static void getMessage(String message) {
if (message != null && !message.isEmpty()) {
message = message.replace("@", "");
@ -38,8 +40,8 @@ public class MessageResponseHandler {
str.put(str.size() + 1, message);
}
}
public synchronized static String selectReponseMessage(String toString, String mostRecentMsg) throws CustomError {
public synchronized static String selectReponseMessage(String toString, String mostRecentMsg, String personName) throws CustomError {
ConcurrentMap<Integer, String> str1 = new MapMaker().concurrencyLevel(2).makeMap();
str1.put(str1.size() + 1, toString);
str1 = Datahandler.cutContent(str1, false);
@ -50,6 +52,21 @@ public class MessageResponseHandler {
}
}
String getResponseMsg = Datahandler.instance.getResponseMsg(strreturn, mostRecentMsg);
getResponseMsg = checkPersonPresnetInSentence(personName, getResponseMsg);
return getResponseMsg;
}
private static String checkPersonPresnetInSentence(String personName, String responseMsg) {
String strreturn = responseMsg;
CoreDocument pipelineCoreDcoument = new CoreDocument(responseMsg);
Datahandler.getPipeline().annotate(pipelineCoreDcoument);
for (CoreEntityMention em : pipelineCoreDcoument.entityMentions()) {
String entityType = em.entityType();
if (entityType.equals("PERSON")) {
String replace = strreturn.replaceFirst(em.text(), personName);
return replace;
}
}
return responseMsg;
}
}

View File

@ -17,6 +17,8 @@ import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.pipeline.CoreEntityMention;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.DocumentPreprocessor;
@ -38,6 +40,7 @@ import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.OptionalDouble;
import java.util.Set;
@ -62,7 +65,6 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
private SimilarityMatrix smxParam;
private String str;
private String str1;
private ShiftReduceParser model;
private MaxentTagger tagger;
private GrammaticalStructureFactory gsf;
private StanfordCoreNLP pipeline;
@ -74,13 +76,15 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
private Annotation pipelineAnnotation2;
private Annotation pipelineAnnotation1Sentiment;
private Annotation pipelineAnnotation2Sentiment;
private CoreDocument pipelineCoreDcoument1;
private CoreDocument pipelineCoreDcoument2;
public SentimentAnalyzerTest(String str, String str1, SimilarityMatrix smxParam, Annotation str1Annotation, Annotation str2Annotation,
Annotation strPipeline1, Annotation strPipeline2, Annotation strPipeSentiment1, Annotation strPipeSentiment2) {
Annotation strPipeline1, Annotation strPipeline2, Annotation strPipeSentiment1, Annotation strPipeSentiment2,
CoreDocument pipelineCoreDcoument1, CoreDocument pipelineCoreDcoument2) {
this.str = str;
this.str1 = str1;
this.smxParam = smxParam;
this.model = Datahandler.getModel();
this.tagger = Datahandler.getTagger();
this.pipeline = Datahandler.getPipeline();
this.pipelineSentiment = Datahandler.getPipelineSentiment();
@ -90,8 +94,10 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
this.jmweStrAnnotation2 = str2Annotation;
this.pipelineAnnotation1 = strPipeline1;
this.pipelineAnnotation2 = strPipeline2;
this.pipelineAnnotation1Sentiment = strPipeSentiment1; //maybe process?
this.pipelineAnnotation1Sentiment = strPipeSentiment1;
this.pipelineAnnotation2Sentiment = strPipeSentiment2;
this.pipelineCoreDcoument1 = pipelineCoreDcoument1;
this.pipelineCoreDcoument2 = pipelineCoreDcoument2;
}
@Override
@ -106,12 +112,14 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
= PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=firstDelete");
tokenizer.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : tokenizer) {
taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
taggedwordlist1.add(tagger.tagSentence(sentence));
//taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
}
tokenizer = new DocumentPreprocessor(new StringReader(str));
tokenizer.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : tokenizer) {
taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
taggedwordlist2.add(tagger.tagSentence(sentence));
//taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
}
int counter = 0;
int counter1 = 0;
@ -817,6 +825,74 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
double SentenceScoreDiff = leven.computeLevenshteinDistance();
SentenceScoreDiff *= 15;
score -= SentenceScoreDiff;
ConcurrentMap<Integer, String> nerEntities1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntities2 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntities3 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntities4 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntityTokenTags1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntityTokenTags2 = new MapMaker().concurrencyLevel(2).makeMap();
for (CoreEntityMention em : pipelineCoreDcoument1.entityMentions()) {
Set<Map.Entry<String, Double>> entrySet = em.entityTypeConfidences().entrySet();
String entityType = em.entityType();
Double EntityConfidences = 0.0;
for (Map.Entry<String, Double> entries : entrySet) {
EntityConfidences = entries.getValue();
}
List<CoreLabel> tokens = em.tokens();
for (CoreLabel token : tokens) {
if (!nerEntityTokenTags1.values().contains(token.tag())) {
if (entityType.equals("PERSON") && EntityConfidences < 0.80) {
score -= 6000;
} else {
nerEntityTokenTags1.put(nerEntityTokenTags1.size() + 1, token.tag());
}
}
}
if (!nerEntities1.values().contains(em.text())) {
nerEntities1.put(nerEntities1.size() + 1, em.text());
nerEntities3.put(nerEntities3.size() + 1, em.entityType());
}
}
for (CoreEntityMention em : pipelineCoreDcoument2.entityMentions()) {
Set<Map.Entry<String, Double>> entrySet = em.entityTypeConfidences().entrySet();
String entityType = em.entityType();
Double EntityConfidences = 0.0;
for (Map.Entry<String, Double> entries : entrySet) {
EntityConfidences = entries.getValue();
}
List<CoreLabel> tokens = em.tokens();
for (CoreLabel token : tokens) {
if (!nerEntityTokenTags2.values().contains(token.tag())) {
if (entityType.equals("PERSON") && EntityConfidences < 0.80) {
score -= 6000;
} else {
nerEntityTokenTags2.put(nerEntityTokenTags2.size() + 1, token.tag());
}
}
}
if (!nerEntities2.values().contains(em.text())) {
nerEntities2.put(nerEntities2.size() + 1, em.text());
nerEntities4.put(nerEntities4.size() + 1, em.entityType());
}
}
for (String strEnts1 : nerEntities1.values()) {
Collection<String> values = nerEntities2.values();
for (String strEnts2 : values) {
if (strEnts1.equalsIgnoreCase(strEnts2)) {
score += 7500;
}
}
}
for (String strEnts1 : nerEntities3.values()) {
if (nerEntities4.values().contains(strEnts1)) {
score += 3500;
}
}
for (String strToken : nerEntityTokenTags1.values()) {
if (nerEntityTokenTags2.values().contains(strToken)) {
score += 2500;
}
}
} catch (Exception ex) {
System.out.println("SENTIMENT stacktrace Overall catch: " + ex.getMessage() + "\n");
}

View File

@ -33,7 +33,7 @@ import org.javacord.api.entity.user.User;
*/
public class DiscordHandler {
private static String MostRecentMsg = "what do you think of humanzz";
private static String MostRecentMsg = "how are you today bot";
public static void main(String[] args) {
System.setProperty("java.util.concurrent.ForkJoinPool.common.parallelism", "25");
@ -51,15 +51,6 @@ public class DiscordHandler {
System.out.println("FINISHED ALL ANNOTATIONS");
Datahandler.instance.addHLstatsMessages();
Datahandler.instance.updateStringCache();
//order matters
if (Datahandler.instance.getstringCacheSize() != 0) {
while (Datahandler.instance.getlHMSMXSize() * Datahandler.instance.getlHMSMXSize() * 2.5
< (Datahandler.instance.getstringCacheSize()
* Datahandler.instance.getstringCacheSize())
- Datahandler.instance.getstringCacheSize()) {
Datahandler.instance.updateMatrixes();
}
}
String token = "NTI5NzAxNTk5NjAyMjc4NDAx.Dw0vDg.7-aMjVWdQMYPl8qVNyvTCPS5F_A";
DiscordApi api = new DiscordApiBuilder().setToken(token).login().join();
api.addMessageCreateListener(event -> {
@ -94,7 +85,8 @@ public class DiscordHandler {
|| event.getServerTextChannel().get().toString().contains("general-autism")) {
String ResponseStr;
try {
ResponseStr = MessageResponseHandler.selectReponseMessage(event.getMessage().toString(), MostRecentMsg);
String person = event.getMessageAuthor().getName();
ResponseStr = MessageResponseHandler.selectReponseMessage(event.getMessage().toString(), MostRecentMsg, person);
if (!ResponseStr.isEmpty()) {
System.out.print("\nResponseStr3: " + ResponseStr + "\n");
event.getChannel().sendMessage(ResponseStr);
@ -103,7 +95,6 @@ public class DiscordHandler {
new Thread(() -> {
try {
Datahandler.instance.checkIfUpdateStrings(false);
Datahandler.instance.updateMatrixes();
} catch (CustomError ex) {
Logger.getLogger(DiscordHandler.class.getName()).log(Level.SEVERE, null, ex);
}