with annotaton caching stores results in the DB becomes Obsolete, quite some changes but overall improvements

This commit is contained in:
jenzur 2019-04-14 14:18:01 +02:00
parent 17fe22b7ea
commit 5552a20eb6
4 changed files with 360 additions and 227 deletions

View File

@ -12,28 +12,24 @@ import com.google.common.collect.MapMaker;
import edu.stanford.nlp.ie.AbstractSequenceClassifier; import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ie.crf.CRFClassifier; import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser; import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.tagger.maxent.MaxentTagger; import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.trees.GrammaticalStructureFactory; import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreebankLanguagePack; import edu.stanford.nlp.trees.TreebankLanguagePack;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Properties; import java.util.Properties;
import java.util.Random;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
@ -51,20 +47,24 @@ import java.util.logging.Logger;
*/ */
public class Datahandler { public class Datahandler {
public static final long EXPIRE_TIME_IN_SECONDS = TimeUnit.SECONDS.convert(6, TimeUnit.MINUTES); public static final long EXPIRE_TIME_IN_SECONDS = TimeUnit.SECONDS.convert(10, TimeUnit.MINUTES);
public static final long EXPIRE_TIME_IN_SECONDS1 = TimeUnit.SECONDS.convert(10, TimeUnit.HOURS); public static final long EXPIRE_TIME_IN_SECONDS1 = TimeUnit.SECONDS.convert(10, TimeUnit.HOURS);
public static Datahandler instance = new Datahandler(); public static Datahandler instance = new Datahandler();
private static volatile Double minDistance; private static volatile Double minDistance;
private static Annotation strAnno; private static Annotation strAnno;
private static Annotation strAnnoSentiment; private static Annotation strAnnoSentiment;
private static Annotation strAnnoJMWE; private static Annotation strAnnoJMWE;
private static CoreDocument coreDoc;
private volatile boolean refreshMatrixFromDB; private volatile boolean refreshMatrixFromDB;
private static volatile int secondaryIterator = 0; private static volatile int secondaryIterator = 0;
private static volatile Double preRelationCounters = 0.0; private static volatile Double preRelationCounters = 0.0;
private static volatile Double preRelationUserCounters = 0.0;
private final ConcurrentMap<Integer, String> stringCache; private final ConcurrentMap<Integer, String> stringCache;
private static ConcurrentMap<String, Annotation> pipelineAnnotationCache; private static ConcurrentMap<String, Annotation> pipelineAnnotationCache;
private static ConcurrentMap<String, Annotation> pipelineSentimentAnnotationCache; private static ConcurrentMap<String, Annotation> pipelineSentimentAnnotationCache;
private static ConcurrentMap<String, Annotation> jmweAnnotationCache; private static ConcurrentMap<String, Annotation> jmweAnnotationCache;
private static ConcurrentMap<String, CoreDocument> coreDocumentAnnotationCache;
private static ConcurrentMap<Integer, String> conversationMatchMap;
private LinkedHashMap<String, LinkedHashMap<String, Double>> lHMSMX = new LinkedHashMap(); private LinkedHashMap<String, LinkedHashMap<String, Double>> lHMSMX = new LinkedHashMap();
private final Stopwatch stopwatch; private final Stopwatch stopwatch;
private final Stopwatch stopwatch1; private final Stopwatch stopwatch1;
@ -74,10 +74,12 @@ public class Datahandler {
private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz"; private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz";
private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"; private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz";
private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
private static String nerModel = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz"; private static String nerModel = "edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz";
private static String nerModel2 = "edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz";
private static String nerModel3 = "edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz";
private static String nerModelCaseless = "edu/stanford/nlp/models/truecase/truecasing.fast.caseless.qn.ser.gz";
private static MaxentTagger tagger; private static MaxentTagger tagger;
private static ShiftReduceParser model; private static String[] options = {"-maxLength", "100"};
private static String[] options = {"-maxLength", "90"};
private static Properties props = new Properties(); private static Properties props = new Properties();
private static Properties propsSentiment = new Properties(); private static Properties propsSentiment = new Properties();
private static GrammaticalStructureFactory gsf; private static GrammaticalStructureFactory gsf;
@ -92,19 +94,16 @@ public class Datahandler {
this.stopwatch = Stopwatch.createUnstarted(); this.stopwatch = Stopwatch.createUnstarted();
this.stopwatch1 = Stopwatch.createStarted(); this.stopwatch1 = Stopwatch.createStarted();
this.stringCache = new MapMaker().concurrencyLevel(2).makeMap(); this.stringCache = new MapMaker().concurrencyLevel(2).makeMap();
//cant sadly just have one pipelines for every annotation, one pipeline per annotation is required
this.jmweAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap(); this.jmweAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap();
this.pipelineAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap(); this.pipelineAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap();
this.pipelineSentimentAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap(); this.pipelineSentimentAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap();
this.coreDocumentAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap();
this.conversationMatchMap = new MapMaker().concurrencyLevel(2).makeMap();
} }
public void shiftReduceParserInitiate() { public void shiftReduceParserInitiate() {
//got 8 cores //got 8 cores
CountDownLatch cdl = new CountDownLatch(4); CountDownLatch cdl = new CountDownLatch(3);
new Thread(() -> {
model = ShiftReduceParser.loadModel(shiftReduceParserPath, options);
cdl.countDown();
}).start();
new Thread(() -> { new Thread(() -> {
try { try {
classifier = CRFClassifier.getClassifierNoExceptions(nerModel); classifier = CRFClassifier.getClassifierNoExceptions(nerModel);
@ -115,7 +114,6 @@ public class Datahandler {
}).start(); }).start();
new Thread(() -> { new Thread(() -> {
propsSentiment.setProperty("parse.model", lexParserEnglishRNN); propsSentiment.setProperty("parse.model", lexParserEnglishRNN);
propsSentiment.setProperty("ner.model", nerModel);
propsSentiment.setProperty("sentiment.model", sentimentModel); propsSentiment.setProperty("sentiment.model", sentimentModel);
propsSentiment.setProperty("parse.maxlen", "90"); propsSentiment.setProperty("parse.maxlen", "90");
propsSentiment.setProperty("threads", "25"); propsSentiment.setProperty("threads", "25");
@ -123,12 +121,13 @@ public class Datahandler {
propsSentiment.setProperty("tokenize.maxlen", "90"); propsSentiment.setProperty("tokenize.maxlen", "90");
propsSentiment.setProperty("ssplit.maxlen", "90"); propsSentiment.setProperty("ssplit.maxlen", "90");
propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,sentiment"); //coref too expensive memorywise, does it need depparse? propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,sentiment"); //coref too expensive memorywise, does it need depparse?
propsSentiment.setProperty("tokenize.options", "untokenizable=firstDelete"); propsSentiment.setProperty("tokenize.options", "untokenizable=firstKeep");
pipelineSentiment = new StanfordCoreNLP(propsSentiment); pipelineSentiment = new StanfordCoreNLP(propsSentiment);
tagger = new MaxentTagger(taggerPath); tagger = new MaxentTagger(taggerPath);
cdl.countDown(); cdl.countDown();
}).start(); }).start();
new Thread(() -> { new Thread(() -> {
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse,ner");
props.setProperty("parse.model", shiftReduceParserPath); props.setProperty("parse.model", shiftReduceParserPath);
props.setProperty("parse.maxlen", "90"); props.setProperty("parse.maxlen", "90");
props.setProperty("parse.binaryTrees", "true"); props.setProperty("parse.binaryTrees", "true");
@ -137,8 +136,11 @@ public class Datahandler {
props.setProperty("tokenize.maxlen", "90"); props.setProperty("tokenize.maxlen", "90");
props.setProperty("ssplit.maxlen", "90"); props.setProperty("ssplit.maxlen", "90");
props.setProperty("lemma.maxlen", "90"); props.setProperty("lemma.maxlen", "90");
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse"); props.setProperty("ner.model", nerModel + "," + nerModel2 + "," + nerModel3);
props.setProperty("tokenize.options", "untokenizable=firstDelete"); props.setProperty("ner.combinationMode", "HIGH_RECALL");
props.setProperty("regexner.ignorecase", "true");
props.setProperty("ner.fine.regexner.ignorecase", "true");
props.setProperty("tokenize.options", "untokenizable=firstKeep"); //firstKeep //firstDelete
pipeline = new StanfordCoreNLP(props); pipeline = new StanfordCoreNLP(props);
cdl.countDown(); cdl.countDown();
}).start(); }).start();
@ -191,10 +193,6 @@ public class Datahandler {
return tagger; return tagger;
} }
public static ShiftReduceParser getModel() {
return model;
}
private Map<Integer, String> getCache() throws SQLException, IOException, CustomError { private Map<Integer, String> getCache() throws SQLException, IOException, CustomError {
return DataMapper.getAllStrings(); return DataMapper.getAllStrings();
} }
@ -258,6 +256,9 @@ public class Datahandler {
Annotationspipeline.put(str, strAnno); Annotationspipeline.put(str, strAnno);
Annotation strAnno2 = new Annotation(str); Annotation strAnno2 = new Annotation(str);
AnnotationspipelineSentiment.put(str, strAnno2); AnnotationspipelineSentiment.put(str, strAnno2);
CoreDocument CD = new CoreDocument(str);
pipeline.annotate(CD);
coreDocumentAnnotationCache.put(str, CD);
}); });
pipeline.annotate(Annotationspipeline.values()); pipeline.annotate(Annotationspipeline.values());
pipelineSentiment.annotate(AnnotationspipelineSentiment.values()); pipelineSentiment.annotate(AnnotationspipelineSentiment.values());
@ -270,7 +271,6 @@ public class Datahandler {
} }
} }
//synchronized
public synchronized void updateMatrixes() { public synchronized void updateMatrixes() {
refreshMatrixFromDB = false; refreshMatrixFromDB = false;
if (stopwatch1.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS1) { if (stopwatch1.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS1) {
@ -278,7 +278,6 @@ public class Datahandler {
lHMSMX = DataMapper.getAllRelationScores(); lHMSMX = DataMapper.getAllRelationScores();
stopwatch1.reset(); stopwatch1.reset();
} }
//requiring atleast 10 entries ensures no issues in case of empty stringcache
if (stringCache.values().size() > 10 && !refreshMatrixFromDB) { if (stringCache.values().size() > 10 && !refreshMatrixFromDB) {
ConcurrentMap<Integer, String> stringCachelocal = stringCache; ConcurrentMap<Integer, String> stringCachelocal = stringCache;
int selectUpdate = -1; int selectUpdate = -1;
@ -293,13 +292,22 @@ public class Datahandler {
} }
ij2++; ij2++;
} }
if (selectUpdate == -1 || selectUpdate + 1 == stringCachelocal.size()) { //secondaryIterator
int valueSize = stringCachelocal.size(); if (selectUpdate == -1 || selectUpdate + 1 >= stringCachelocal.size() || stringCachelocal.get(selectUpdate) == null) {
if (secondaryIterator + iteratorCap >= valueSize) { Integer iterator = 0;
secondaryIterator = 0; while (iterator == 0) {
if (secondaryIterator >= stringCachelocal.size()) {
secondaryIterator = 0;
}
String get = stringCachelocal.get(secondaryIterator);
if (get == null) {
secondaryIterator++;
} else {
selectUpdate = secondaryIterator;
iterator++;
}
} }
selectUpdate = secondaryIterator; secondaryIterator++;
secondaryIterator += iteratorCap;
} }
String getStringCacheStr = stringCachelocal.get(selectUpdate); String getStringCacheStr = stringCachelocal.get(selectUpdate);
ConcurrentMap<Integer, SimilarityMatrix> matrixUpdateMap = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap<Integer, SimilarityMatrix> matrixUpdateMap = new MapMaker().concurrencyLevel(2).makeMap();
@ -339,7 +347,8 @@ public class Datahandler {
SimilarityMatrix SMX = new SimilarityMatrix(getStringCacheStr, str1); SimilarityMatrix SMX = new SimilarityMatrix(getStringCacheStr, str1);
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(getStringCacheStr, str1, SMX, jmweAnnotationCache.get(getStringCacheStr), Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(getStringCacheStr, str1, SMX, jmweAnnotationCache.get(getStringCacheStr),
jmweAnnotationCache.get(str1), pipelineAnnotationCache.get(getStringCacheStr), pipelineAnnotationCache.get(str1), jmweAnnotationCache.get(str1), pipelineAnnotationCache.get(getStringCacheStr), pipelineAnnotationCache.get(str1),
pipelineSentimentAnnotationCache.get(getStringCacheStr), pipelineSentimentAnnotationCache.get(str1)); pipelineSentimentAnnotationCache.get(getStringCacheStr), pipelineSentimentAnnotationCache.get(str1),
coreDocumentAnnotationCache.get(getStringCacheStr), coreDocumentAnnotationCache.get(str1));
futures.put(futures.size() + 1, executor.submit(worker)); futures.put(futures.size() + 1, executor.submit(worker));
} }
} }
@ -375,6 +384,64 @@ public class Datahandler {
} }
} }
public ConcurrentMap<Integer, String> removeNonSensicalStrings(ConcurrentMap<Integer, String> strmap) {
ConcurrentMap<Integer, String> strmapreturn = new MapMaker().concurrencyLevel(2).makeMap();
int relationCap = 20;
ConcurrentMap<Integer, String> strCacheLocal = stringCache.size() < 150 ? strmap : stringCache;
ConcurrentMap<String, Annotation> localJMWEMap = getMultipleJMWEAnnotation(strmap.values());
ConcurrentMap<String, Annotation> localPipelineAnnotation = getMultiplePipelineAnnotation(strmap.values());
ConcurrentMap<String, Annotation> localPipelineSentimentAnnotation = getMultiplePipelineSentimentAnnotation(strmap.values());
ConcurrentMap<String, CoreDocument> localCoreDocumentMap = getMultipleCoreDocuments(strmap.values());
for (String str : strmap.values()) {
ConcurrentMap<Integer, Future<SimilarityMatrix>> futures = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, HashMap<String, String>> strsmaps = new MapMaker().concurrencyLevel(2).makeMap();
for (String str1 : strCacheLocal.values()) {
HashMap HM1 = new HashMap();
HM1.put(str, str1);
if (!str.equals(str1) && !strsmaps.values().contains(HM1)) {
SimilarityMatrix SMX = new SimilarityMatrix(str, str1);
Callable<SimilarityMatrix> worker;
if (stringCache.size() < 150) {
worker = new SentimentAnalyzerTest(str, str1, SMX,
localJMWEMap.get(str), localJMWEMap.get(str1), localPipelineAnnotation.get(str),
localPipelineAnnotation.get(str1), localPipelineSentimentAnnotation.get(str),
localPipelineSentimentAnnotation.get(str1), localCoreDocumentMap.get(str), localCoreDocumentMap.get(str1));
} else {
worker = new SentimentAnalyzerTest(str, str1, SMX,
localJMWEMap.get(str), jmweAnnotationCache.get(str1), localPipelineAnnotation.get(str),
pipelineAnnotationCache.get(str1), localPipelineSentimentAnnotation.get(str),
pipelineSentimentAnnotationCache.get(str1), localCoreDocumentMap.get(str), coreDocumentAnnotationCache.get(str1));
}
HashMap HM = new HashMap();
HM.put(SMX.getPrimaryString(), SMX.getSecondaryString());
strsmaps.put(strsmaps.size() + 1, HM);
futures.put(futures.size() + 1, executor.submit(worker));
}
}
int positiveRelationCounter = 0;
for (Future<SimilarityMatrix> future : futures.values()) {
try {
SimilarityMatrix getSMX = future.get(5, TimeUnit.SECONDS);
Double scoreRelationNewMsgToRecentMsg = getSMX.getDistance();
if (scoreRelationNewMsgToRecentMsg >= 5000.0) {
System.out.println("scoreRelationNewMsgToRecentMsg: " + scoreRelationNewMsgToRecentMsg + "\n");
positiveRelationCounter++;
if (positiveRelationCounter > relationCap) {
strmapreturn.put(strmapreturn.size() + 1, str);
}
if (positiveRelationCounter > relationCap) {
System.out.println("strmapreturn size: " + strmapreturn.size() + "\n");
break;
}
}
} catch (InterruptedException | ExecutionException | TimeoutException ex) {
Logger.getLogger(Datahandler.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
return strmapreturn;
}
public synchronized void checkIfUpdateStrings(boolean hlStatsMsg) throws CustomError { public synchronized void checkIfUpdateStrings(boolean hlStatsMsg) throws CustomError {
if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) { if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) {
ConcurrentMap<Integer, String> str = MessageResponseHandler.getStr(); ConcurrentMap<Integer, String> str = MessageResponseHandler.getStr();
@ -382,15 +449,30 @@ public class Datahandler {
str = filterContent(str); str = filterContent(str);
str = removeSlacks(str); str = removeSlacks(str);
System.out.println("finished removeSlacks \n" + str.size() + "\n"); System.out.println("finished removeSlacks \n" + str.size() + "\n");
str = removeNonSensicalStrings(str);
System.out.println("POST removeNonSensicalStrings size: " + str.size() + "\n");
str = annotationCacheUpdate(str); str = annotationCacheUpdate(str);
System.out.println("annotationCacheUpdate str size POST: " + str.size() + "\n"); System.out.println("annotationCacheUpdate str size POST: " + str.size() + "\n");
try { ConcurrentMap<Integer, String> strf = str;
DataMapper.InsertMYSQLStrings(str); if (!stringCache.isEmpty()) {
} catch (CustomError ex) { new Thread(() -> {
Logger.getLogger(Datahandler.class try {
.getName()).log(Level.SEVERE, null, ex); DataMapper.InsertMYSQLStrings(strf);
} catch (CustomError ex) {
Logger.getLogger(Datahandler.class
.getName()).log(Level.SEVERE, null, ex);
}
MessageResponseHandler.setStr(new MapMaker().concurrencyLevel(2).makeMap());
}).start();
} else {
try {
DataMapper.InsertMYSQLStrings(strf);
} catch (CustomError ex) {
Logger.getLogger(Datahandler.class
.getName()).log(Level.SEVERE, null, ex);
}
MessageResponseHandler.setStr(new MapMaker().concurrencyLevel(2).makeMap());
} }
MessageResponseHandler.setStr(new MapMaker().concurrencyLevel(2).makeMap());
if (!stopwatch.isRunning()) { if (!stopwatch.isRunning()) {
stopwatch.start(); stopwatch.start();
} else { } else {
@ -404,44 +486,7 @@ public class Datahandler {
if (str.startsWith("<@")) { if (str.startsWith("<@")) {
str = str.substring(str.indexOf("> ") + 2); str = str.substring(str.indexOf("> ") + 2);
} }
final LinkedHashMap<String, LinkedHashMap<String, Double>> LHMSMXLocal = lHMSMX;
ConcurrentMap<Integer, String> strArrs = stringCache;
double Score = -10000;
SimilarityMatrix SMXreturn = new SimilarityMatrix("", ""); SimilarityMatrix SMXreturn = new SimilarityMatrix("", "");
System.out.println("pre mostSimilarSTR \n");
String mostSimilarSTR = mostSimilar(str, strArrs, MostRecent);
if (mostSimilarSTR != null) {
LinkedHashMap<String, Double> orDefault = LHMSMXLocal.getOrDefault(mostSimilarSTR, null);
if (orDefault != null) {
for (Entry<String, Double> entrySet : orDefault.entrySet()) {
double smxDistance = entrySet.getValue();
if (smxDistance > Score) {
Score = smxDistance;
SMXreturn = new SimilarityMatrix(mostSimilarSTR, entrySet.getKey(), smxDistance);
}
}
}
for (Entry<String, LinkedHashMap<String, Double>> values1 : LHMSMXLocal.entrySet()) {
LinkedHashMap<String, Double> value = values1.getValue();
for (Entry<String, Double> keystr : value.entrySet()) {
if (keystr.getKey().equals(mostSimilarSTR)) {
double smxDistance = keystr.getValue();
if (smxDistance > Score) {
Score = smxDistance;
SMXreturn = new SimilarityMatrix(values1.getKey(), keystr.getKey(), smxDistance);
}
}
}
}
if (!SMXreturn.getPrimaryString().isEmpty()) {
if (SMXreturn.getPrimaryString().equals(mostSimilarSTR)) {
return SMXreturn.getSecondaryString();
} else {
return SMXreturn.getPrimaryString();
}
}
}
System.out.println("none within 8 range");
ConcurrentMap<Integer, String> strCache = stringCache; ConcurrentMap<Integer, String> strCache = stringCache;
ConcurrentMap<Integer, Future<SimilarityMatrix>> futureslocal = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap<Integer, Future<SimilarityMatrix>> futureslocal = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, SimilarityMatrix> futurereturn = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap<Integer, SimilarityMatrix> futurereturn = new MapMaker().concurrencyLevel(2).makeMap();
@ -449,12 +494,21 @@ public class Datahandler {
getSingularAnnotation(strF); getSingularAnnotation(strF);
strCache.values().parallelStream().forEach((str1) -> { strCache.values().parallelStream().forEach((str1) -> {
if (!strF.equals(str1)) { if (!strF.equals(str1)) {
SimilarityMatrix SMX = new SimilarityMatrix(strF, str1); boolean present = false;
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(strF, str1, SMX, for (String strCons : conversationMatchMap.values()) {
strAnnoJMWE, jmweAnnotationCache.get(str1), strAnno, if (strCons.equals(str1)) {
pipelineAnnotationCache.get(str1), strAnnoSentiment, present = true;
pipelineSentimentAnnotationCache.get(str1)); break;
futureslocal.put(futureslocal.size() + 1, executor.submit(worker)); }
}
if (!present) {
SimilarityMatrix SMX = new SimilarityMatrix(strF, str1);
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(strF, str1, SMX,
strAnnoJMWE, jmweAnnotationCache.get(str1), strAnno,
pipelineAnnotationCache.get(str1), strAnnoSentiment,
pipelineSentimentAnnotationCache.get(str1), coreDoc, coreDocumentAnnotationCache.get(str1));
futureslocal.put(futureslocal.size() + 1, executor.submit(worker));
}
} }
}); });
futureslocal.values().parallelStream().forEach((future) -> { futureslocal.values().parallelStream().forEach((future) -> {
@ -466,48 +520,30 @@ public class Datahandler {
System.out.println("ex getResponsemsg: " + ex.getMessage() + "\n"); System.out.println("ex getResponsemsg: " + ex.getMessage() + "\n");
} }
}); });
ConcurrentMap<Integer, SimilarityMatrix> smxUpdateReturn = new MapMaker().concurrencyLevel(2).makeMap(); preRelationCounters = 0.0;
preRelationUserCounters = 0.0;
conversationMatchMap.put(conversationMatchMap.size() + 1, MostRecent);
Double scoreRelationNewMsgToRecentMsg = 0.0;
for (String conversationStr : conversationMatchMap.values()) {
scoreRelationNewMsgToRecentMsg += getScoreRelationNewMsgToRecentMsg(strF, conversationStr);
}
boolean relatedReponse = scoreRelationNewMsgToRecentMsg >= 250;
if (!relatedReponse) {
conversationMatchMap = new MapMaker().concurrencyLevel(2).makeMap();
}
for (SimilarityMatrix SMX : futurereturn.values()) { for (SimilarityMatrix SMX : futurereturn.values()) {
Double scoreRelation = 500.0; Double scoreRelation = 500.0;
boolean foundmatch = false; Double scoreRelationLastUserMsg = SMX.getDistance();
if (!MostRecent.isEmpty()) { if (relatedReponse) {
LinkedHashMap<String, Double> orDefault1 = lHMSMX.getOrDefault(MostRecent, null); for (String conversationStr : conversationMatchMap.values()) {
if (orDefault1 != null) { scoreRelation += getScoreRelationNewMsgToRecentMsg(SMX.getSecondaryString(), conversationStr);
Collection<String> orDefaultstrs = orDefault1.keySet();
for (String strs : orDefaultstrs) {
if (strs.equals(SMX.getSecondaryString())) {
scoreRelation = orDefault1.get(SMX.getSecondaryString());
foundmatch = true;
break;
}
}
}
if (!foundmatch) {
orDefault1 = lHMSMX.getOrDefault(SMX.getSecondaryString(), null);
if (orDefault1 != null) {
Collection<String> orDefaultstrs = orDefault1.keySet();
for (String strs : orDefaultstrs) {
if (strs.equals(MostRecent)) {
scoreRelation = orDefault1.get(MostRecent);
foundmatch = true;
break;
}
}
}
} }
} }
if (!foundmatch) { Double totalRelation = scoreRelation + scoreRelationLastUserMsg;
scoreRelation = getScoreRelationNewMsgToRecentMsg(SMX.getSecondaryString(), MostRecent); if (totalRelation > preRelationCounters + preRelationUserCounters && scoreRelationLastUserMsg > preRelationUserCounters) {
}
if (scoreRelation > (25 * smxUpdateReturn.size())) {
smxUpdateReturn.put(smxUpdateReturn.size() + 1, SMX);
}
}
for (SimilarityMatrix SMX : smxUpdateReturn.values()) {
double distance = SMX.getDistance();
if (distance > Score) {
Score = distance;
SMXreturn = SMX; SMXreturn = SMX;
preRelationCounters = scoreRelation;
preRelationUserCounters = scoreRelationLastUserMsg;
} }
} }
System.out.println("Reached end: secondary: " + SMXreturn.getSecondaryString() + "\nPrimarY: " + SMXreturn.getPrimaryString() System.out.println("Reached end: secondary: " + SMXreturn.getSecondaryString() + "\nPrimarY: " + SMXreturn.getPrimaryString()
@ -524,6 +560,44 @@ public class Datahandler {
notactualList.add(str); notactualList.add(str);
ConcurrentMap<String, Annotation> jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(notactualList); ConcurrentMap<String, Annotation> jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(notactualList);
strAnnoJMWE = jmweAnnotation.values().iterator().next(); strAnnoJMWE = jmweAnnotation.values().iterator().next();
CoreDocument coreDocument = new CoreDocument(str);
pipeline.annotate(coreDocument);
coreDoc = coreDocument;
}
public ConcurrentMap<String, Annotation> getMultipleJMWEAnnotation(Collection<String> str) {
ConcurrentMap<String, Annotation> jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(str);
return jmweAnnotation;
}
public ConcurrentMap<String, Annotation> getMultiplePipelineAnnotation(Collection<String> str) {
ConcurrentMap<String, Annotation> pipelineAnnotationMap = new MapMaker().concurrencyLevel(2).makeMap();
for (String str1 : str) {
Annotation strAnno1 = new Annotation(str1);
pipelineAnnotationMap.put(str1, strAnno1);
}
pipeline.annotate(pipelineAnnotationMap.values());
return pipelineAnnotationMap;
}
public ConcurrentMap<String, Annotation> getMultiplePipelineSentimentAnnotation(Collection<String> str) {
ConcurrentMap<String, Annotation> pipelineAnnotationMap = new MapMaker().concurrencyLevel(2).makeMap();
for (String str1 : str) {
Annotation strAnno1 = new Annotation(str1);
pipelineAnnotationMap.put(str1, strAnno1);
}
pipelineSentiment.annotate(pipelineAnnotationMap.values());
return pipelineAnnotationMap;
}
public ConcurrentMap<String, CoreDocument> getMultipleCoreDocuments(Collection<String> str) {
ConcurrentMap<String, CoreDocument> pipelineCoreDocumentAnnotations = new MapMaker().concurrencyLevel(2).makeMap();
str.parallelStream().forEach((str1) -> {
CoreDocument coreDocument = new CoreDocument(str1);
pipeline.annotate(coreDocument);
pipelineCoreDocumentAnnotations.put(str1, coreDocument);
});
return pipelineCoreDocumentAnnotations;
} }
private Double getScoreRelationNewMsgToRecentMsg(String str, String mostRecentMsg) { private Double getScoreRelationNewMsgToRecentMsg(String str, String mostRecentMsg) {
@ -531,7 +605,8 @@ public class Datahandler {
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, mostRecentMsg, SMX, Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, mostRecentMsg, SMX,
jmweAnnotationCache.get(str), jmweAnnotationCache.get(mostRecentMsg), pipelineAnnotationCache.get(str), jmweAnnotationCache.get(str), jmweAnnotationCache.get(mostRecentMsg), pipelineAnnotationCache.get(str),
pipelineAnnotationCache.get(mostRecentMsg), pipelineSentimentAnnotationCache.get(str), pipelineAnnotationCache.get(mostRecentMsg), pipelineSentimentAnnotationCache.get(str),
pipelineSentimentAnnotationCache.get(mostRecentMsg)); pipelineSentimentAnnotationCache.get(mostRecentMsg), coreDocumentAnnotationCache.get(str),
coreDocumentAnnotationCache.get(mostRecentMsg));
SimilarityMatrix callSMX = null; SimilarityMatrix callSMX = null;
try { try {
callSMX = worker.call(); callSMX = worker.call();
@ -548,14 +623,15 @@ public class Datahandler {
public String mostSimilar(String toBeCompared, ConcurrentMap<Integer, String> concurrentStrings, String MostRecent) { public String mostSimilar(String toBeCompared, ConcurrentMap<Integer, String> concurrentStrings, String MostRecent) {
similar = ""; similar = "";
minDistance = 7.0; minDistance = 6.0;
preRelationCounters = 500.0; preRelationCounters = 0.0;
preRelationUserCounters = 0.0;
getSingularAnnotation(toBeCompared);
ConcurrentMap<Integer, String> similardistances = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap<Integer, String> similardistances = new MapMaker().concurrencyLevel(2).makeMap();
concurrentStrings.values().parallelStream().forEach((str) -> { concurrentStrings.values().parallelStream().forEach((str) -> {
LevenshteinDistance leven = new LevenshteinDistance(toBeCompared, str); LevenshteinDistance leven = new LevenshteinDistance(toBeCompared, str);
double distance = leven.computeLevenshteinDistance(); double distance = leven.computeLevenshteinDistance();
if (distance <= minDistance) { if (distance <= minDistance) {
minDistance = distance;
System.out.println("distance: " + distance + "\n"); System.out.println("distance: " + distance + "\n");
similardistances.put(similardistances.size() + 1, str); similardistances.put(similardistances.size() + 1, str);
} }
@ -571,7 +647,8 @@ public class Datahandler {
Double value = defaultEntry.getValue(); Double value = defaultEntry.getValue();
String key = defaultEntry.getKey(); String key = defaultEntry.getKey();
if (value > maxDistance) { if (value > maxDistance) {
Double RelationScore = 500.0; Double RelationScoreLastMsg = 500.0;
Double RelationScoreLastUserMsg = 500.0;
boolean foundmatch = false; boolean foundmatch = false;
if (!MostRecent.isEmpty()) { if (!MostRecent.isEmpty()) {
LinkedHashMap<String, Double> orDefault1 = lHMSMX.getOrDefault(MostRecent, null); LinkedHashMap<String, Double> orDefault1 = lHMSMX.getOrDefault(MostRecent, null);
@ -579,7 +656,7 @@ public class Datahandler {
Collection<String> orDefaultstrs = orDefault1.keySet(); Collection<String> orDefaultstrs = orDefault1.keySet();
for (String strs : orDefaultstrs) { for (String strs : orDefaultstrs) {
if (strs.equals(key)) { if (strs.equals(key)) {
RelationScore = orDefault1.get(key); RelationScoreLastMsg = orDefault1.get(key);
foundmatch = true; foundmatch = true;
break; break;
} }
@ -591,7 +668,7 @@ public class Datahandler {
Collection<String> orDefaultstrs = orDefault1.keySet(); Collection<String> orDefaultstrs = orDefault1.keySet();
for (String strs : orDefaultstrs) { for (String strs : orDefaultstrs) {
if (strs.equals(MostRecent)) { if (strs.equals(MostRecent)) {
RelationScore = orDefault1.get(MostRecent); RelationScoreLastMsg = orDefault1.get(MostRecent);
foundmatch = true; foundmatch = true;
break; break;
} }
@ -600,12 +677,20 @@ public class Datahandler {
} }
} }
if (!foundmatch) { if (!foundmatch) {
RelationScore = getScoreRelationNewMsgToRecentMsg(key, MostRecent); RelationScoreLastMsg = getScoreRelationNewMsgToRecentMsg(key, MostRecent);
} }
if (RelationScore > preRelationCounters) { RelationScoreLastUserMsg = getScoreRelationNewMsgToRecentMsg(key, toBeCompared);
maxDistance = value; Double totalRelation = RelationScoreLastMsg + RelationScoreLastUserMsg;
similar = defaultEntry.getKey(); if (totalRelation > preRelationCounters + preRelationUserCounters) {
preRelationCounters = RelationScore; if (RelationScoreLastMsg + 500 > preRelationUserCounters && RelationScoreLastUserMsg > preRelationCounters
|| RelationScoreLastUserMsg + 500 > preRelationCounters && RelationScoreLastMsg > preRelationUserCounters) {
if (RelationScoreLastMsg > preRelationCounters && RelationScoreLastUserMsg > preRelationUserCounters) {
maxDistance = value;
similar = defaultEntry.getKey();
preRelationCounters = RelationScoreLastMsg;
preRelationUserCounters = RelationScoreLastUserMsg;
}
}
} }
} }
} }
@ -736,76 +821,26 @@ public class Datahandler {
} }
private ConcurrentMap<Integer, String> removeSlacks(ConcurrentMap<Integer, String> str) { private ConcurrentMap<Integer, String> removeSlacks(ConcurrentMap<Integer, String> str) {
ShiftReduceParser modelLocal = getModel();
MaxentTagger taggerLocal = getTagger();
ConcurrentMap<Integer, String> strreturn = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap<Integer, String> strreturn = new MapMaker().concurrencyLevel(2).makeMap();
if (stringCache.isEmpty()) {
return str;
}
Collection<String> values = stringCache.values();
str.values().parallelStream().forEach(str1 -> { str.values().parallelStream().forEach(str1 -> {
ConcurrentMap<Integer, String> TGWList = new MapMaker().concurrencyLevel(2).makeMap(); boolean tooclosematch = false;
DocumentPreprocessor tokenizer = null; for (String strVals : values) {
try { LevenshteinDistance leven = new LevenshteinDistance(strVals, str1);
tokenizer = new DocumentPreprocessor(new StringReader(str1)); double Distance = leven.computeLevenshteinDistance();
} catch (Exception ex) { Double maxpermittedDistance = 2.5;
System.out.println("failed tokenizer removeslacks: " + ex.getLocalizedMessage() + "\n"); if (Distance < maxpermittedDistance) {
tokenizer = null; tooclosematch = true;
} break;
if (tokenizer != null) {
for (List<HasWord> sentence : tokenizer) {
int counter = 0;
List<TaggedWord> taggedWords;
List<TaggedWord> tagged1 = taggerLocal.tagSentence(sentence);
Tree tree = modelLocal.apply(tagged1);
taggedWords = tree.taggedYield();
for (TaggedWord TGW : taggedWords) {
if (!TGWList.values().contains(TGW.tag()) && !TGW.tag().equals(":") && !TGW.word().equals(TGW.tag())) {
TGWList.put(TGWList.size() + 1, TGW.tag());
counter++;
}
if (counter > 3) {
int addCounter = 0;
ConcurrentMap<Integer, Word> wordList = new MapMaker().concurrencyLevel(2).makeMap();
for (Word lab : tree.yieldWords()) {
if (lab != null && lab.word() != null) {
if (!wordList.values().contains(lab) && lab.value() != null && !lab.value().equals(":")) {
wordList.put(wordList.size() + 1, lab);
addCounter++;
}
}
}
if (addCounter > 3) {
addCounter = 0;
ConcurrentMap<Integer, HasWord> HWlist = new MapMaker().concurrencyLevel(2).makeMap();
for (HasWord HW : tree.yieldHasWord()) {
if (HW != null && HW.word() != null && !HWlist.values().contains(HW)) {
addCounter++;
HWlist.put(HWlist.size() + 1, HW);
}
}
if (addCounter > 3) {
boolean tooclosematch = false;
Collection<String> values = stringCache.values();
for (String strVals : values) {
LevenshteinDistance leven = new LevenshteinDistance(strVals, str1);
double Distance = leven.computeLevenshteinDistance();
int maxpermittedDistance = 5;
if (Distance < maxpermittedDistance) {
tooclosematch = true;
break;
}
}
if (!tooclosematch) {
strreturn.put(strreturn.size() + 1, str1);
System.out.println("adding strreturn str1: " + str1 + "\n");
}
}
}
break;
}
}
if (counter > 3) {
break;
}
} }
} }
if (!tooclosematch) {
strreturn.put(strreturn.size() + 1, str1);
System.out.println("adding strreturn str1: " + str1 + "\n");
}
}); });
return strreturn; return strreturn;
} }
@ -818,22 +853,36 @@ public class Datahandler {
ConcurrentMap<String, Annotation> Annotationspipeline = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap<String, Annotation> Annotationspipeline = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<String, Annotation> AnnotationspipelineSentiment = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap<String, Annotation> AnnotationspipelineSentiment = new MapMaker().concurrencyLevel(2).makeMap();
strmap.values().parallelStream().forEach(str -> { strmap.values().parallelStream().forEach(str -> {
Annotation strAnno = new Annotation(str); Annotation strAnno1 = new Annotation(str);
Annotationspipeline.put(str, strAnno); Annotationspipeline.put(str, strAnno1);
Annotation strAnno2 = new Annotation(str); Annotation strAnno2 = new Annotation(str);
AnnotationspipelineSentiment.put(str, strAnno2); AnnotationspipelineSentiment.put(str, strAnno2);
try {
CoreDocument CD = new CoreDocument(str);
pipeline.annotate(CD);
coreDocumentAnnotationCache.put(str, CD);
} catch (Exception e) {
System.out.println("failed document annotation: " + e + "\n");
}
stringCache.put(stringCache.size() + 1, str); stringCache.put(stringCache.size() + 1, str);
}); });
System.out.println("pre iterator annotation update \n"); System.out.println("pre iterator annotation update \n");
pipeline.annotate(Annotationspipeline.values()); pipeline.annotate(Annotationspipeline.values());
pipelineSentiment.annotate(AnnotationspipelineSentiment.values()); pipelineSentiment.annotate(AnnotationspipelineSentiment.values());
Annotationspipeline.entrySet().forEach(pipelineEntry -> { Annotationspipeline.entrySet().forEach(pipelineEntry -> {
pipelineAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue()); if (pipelineEntry != null) {
pipelineAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue());
} else {
System.out.println("failed pipeline cache \n");
}
}); });
AnnotationspipelineSentiment.entrySet().forEach(pipelineEntry -> { AnnotationspipelineSentiment.entrySet().forEach(pipelineEntry -> {
pipelineSentimentAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue()); if (pipelineEntry != null) {
pipelineSentimentAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue());
} else {
System.out.println("failed sentiment cache \n");
}
}); });
return strmap; return strmap;
} }
} }

View File

@ -6,6 +6,8 @@
package FunctionLayer; package FunctionLayer;
import com.google.common.collect.MapMaker; import com.google.common.collect.MapMaker;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.pipeline.CoreEntityMention;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentMap;
@ -15,17 +17,17 @@ import java.util.concurrent.ConcurrentMap;
* @author install1 * @author install1
*/ */
public class MessageResponseHandler { public class MessageResponseHandler {
private static ConcurrentMap<Integer, String> str = new MapMaker().concurrencyLevel(2).makeMap(); private static ConcurrentMap<Integer, String> str = new MapMaker().concurrencyLevel(2).makeMap();
public static ConcurrentMap<Integer, String> getStr() { public static ConcurrentMap<Integer, String> getStr() {
return str; return str;
} }
public static void setStr(ConcurrentMap<Integer, String> str) { public static void setStr(ConcurrentMap<Integer, String> str) {
MessageResponseHandler.str = str; MessageResponseHandler.str = str;
} }
public static void getMessage(String message) { public static void getMessage(String message) {
if (message != null && !message.isEmpty()) { if (message != null && !message.isEmpty()) {
message = message.replace("@", ""); message = message.replace("@", "");
@ -38,8 +40,8 @@ public class MessageResponseHandler {
str.put(str.size() + 1, message); str.put(str.size() + 1, message);
} }
} }
public synchronized static String selectReponseMessage(String toString, String mostRecentMsg) throws CustomError { public synchronized static String selectReponseMessage(String toString, String mostRecentMsg, String personName) throws CustomError {
ConcurrentMap<Integer, String> str1 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap<Integer, String> str1 = new MapMaker().concurrencyLevel(2).makeMap();
str1.put(str1.size() + 1, toString); str1.put(str1.size() + 1, toString);
str1 = Datahandler.cutContent(str1, false); str1 = Datahandler.cutContent(str1, false);
@ -50,6 +52,21 @@ public class MessageResponseHandler {
} }
} }
String getResponseMsg = Datahandler.instance.getResponseMsg(strreturn, mostRecentMsg); String getResponseMsg = Datahandler.instance.getResponseMsg(strreturn, mostRecentMsg);
getResponseMsg = checkPersonPresnetInSentence(personName, getResponseMsg);
return getResponseMsg; return getResponseMsg;
} }
private static String checkPersonPresnetInSentence(String personName, String responseMsg) {
String strreturn = responseMsg;
CoreDocument pipelineCoreDcoument = new CoreDocument(responseMsg);
Datahandler.getPipeline().annotate(pipelineCoreDcoument);
for (CoreEntityMention em : pipelineCoreDcoument.entityMentions()) {
String entityType = em.entityType();
if (entityType.equals("PERSON")) {
String replace = strreturn.replaceFirst(em.text(), personName);
return replace;
}
}
return responseMsg;
}
} }

View File

@ -17,6 +17,8 @@ import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations; import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.pipeline.CoreEntityMention;
import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.process.CoreLabelTokenFactory; import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.DocumentPreprocessor; import edu.stanford.nlp.process.DocumentPreprocessor;
@ -38,6 +40,7 @@ import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.OptionalDouble; import java.util.OptionalDouble;
import java.util.Set; import java.util.Set;
@ -62,7 +65,6 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
private SimilarityMatrix smxParam; private SimilarityMatrix smxParam;
private String str; private String str;
private String str1; private String str1;
private ShiftReduceParser model;
private MaxentTagger tagger; private MaxentTagger tagger;
private GrammaticalStructureFactory gsf; private GrammaticalStructureFactory gsf;
private StanfordCoreNLP pipeline; private StanfordCoreNLP pipeline;
@ -74,13 +76,15 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
private Annotation pipelineAnnotation2; private Annotation pipelineAnnotation2;
private Annotation pipelineAnnotation1Sentiment; private Annotation pipelineAnnotation1Sentiment;
private Annotation pipelineAnnotation2Sentiment; private Annotation pipelineAnnotation2Sentiment;
private CoreDocument pipelineCoreDcoument1;
private CoreDocument pipelineCoreDcoument2;
public SentimentAnalyzerTest(String str, String str1, SimilarityMatrix smxParam, Annotation str1Annotation, Annotation str2Annotation, public SentimentAnalyzerTest(String str, String str1, SimilarityMatrix smxParam, Annotation str1Annotation, Annotation str2Annotation,
Annotation strPipeline1, Annotation strPipeline2, Annotation strPipeSentiment1, Annotation strPipeSentiment2) { Annotation strPipeline1, Annotation strPipeline2, Annotation strPipeSentiment1, Annotation strPipeSentiment2,
CoreDocument pipelineCoreDcoument1, CoreDocument pipelineCoreDcoument2) {
this.str = str; this.str = str;
this.str1 = str1; this.str1 = str1;
this.smxParam = smxParam; this.smxParam = smxParam;
this.model = Datahandler.getModel();
this.tagger = Datahandler.getTagger(); this.tagger = Datahandler.getTagger();
this.pipeline = Datahandler.getPipeline(); this.pipeline = Datahandler.getPipeline();
this.pipelineSentiment = Datahandler.getPipelineSentiment(); this.pipelineSentiment = Datahandler.getPipelineSentiment();
@ -90,8 +94,10 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
this.jmweStrAnnotation2 = str2Annotation; this.jmweStrAnnotation2 = str2Annotation;
this.pipelineAnnotation1 = strPipeline1; this.pipelineAnnotation1 = strPipeline1;
this.pipelineAnnotation2 = strPipeline2; this.pipelineAnnotation2 = strPipeline2;
this.pipelineAnnotation1Sentiment = strPipeSentiment1; //maybe process? this.pipelineAnnotation1Sentiment = strPipeSentiment1;
this.pipelineAnnotation2Sentiment = strPipeSentiment2; this.pipelineAnnotation2Sentiment = strPipeSentiment2;
this.pipelineCoreDcoument1 = pipelineCoreDcoument1;
this.pipelineCoreDcoument2 = pipelineCoreDcoument2;
} }
@Override @Override
@ -106,12 +112,14 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
= PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=firstDelete"); = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=firstDelete");
tokenizer.setTokenizerFactory(ptbTokenizerFactory); tokenizer.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : tokenizer) { for (List<HasWord> sentence : tokenizer) {
taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); taggedwordlist1.add(tagger.tagSentence(sentence));
//taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
} }
tokenizer = new DocumentPreprocessor(new StringReader(str)); tokenizer = new DocumentPreprocessor(new StringReader(str));
tokenizer.setTokenizerFactory(ptbTokenizerFactory); tokenizer.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : tokenizer) { for (List<HasWord> sentence : tokenizer) {
taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); taggedwordlist2.add(tagger.tagSentence(sentence));
//taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
} }
int counter = 0; int counter = 0;
int counter1 = 0; int counter1 = 0;
@ -817,6 +825,74 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
double SentenceScoreDiff = leven.computeLevenshteinDistance(); double SentenceScoreDiff = leven.computeLevenshteinDistance();
SentenceScoreDiff *= 15; SentenceScoreDiff *= 15;
score -= SentenceScoreDiff; score -= SentenceScoreDiff;
ConcurrentMap<Integer, String> nerEntities1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntities2 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntities3 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntities4 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntityTokenTags1 = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, String> nerEntityTokenTags2 = new MapMaker().concurrencyLevel(2).makeMap();
for (CoreEntityMention em : pipelineCoreDcoument1.entityMentions()) {
Set<Map.Entry<String, Double>> entrySet = em.entityTypeConfidences().entrySet();
String entityType = em.entityType();
Double EntityConfidences = 0.0;
for (Map.Entry<String, Double> entries : entrySet) {
EntityConfidences = entries.getValue();
}
List<CoreLabel> tokens = em.tokens();
for (CoreLabel token : tokens) {
if (!nerEntityTokenTags1.values().contains(token.tag())) {
if (entityType.equals("PERSON") && EntityConfidences < 0.80) {
score -= 6000;
} else {
nerEntityTokenTags1.put(nerEntityTokenTags1.size() + 1, token.tag());
}
}
}
if (!nerEntities1.values().contains(em.text())) {
nerEntities1.put(nerEntities1.size() + 1, em.text());
nerEntities3.put(nerEntities3.size() + 1, em.entityType());
}
}
for (CoreEntityMention em : pipelineCoreDcoument2.entityMentions()) {
Set<Map.Entry<String, Double>> entrySet = em.entityTypeConfidences().entrySet();
String entityType = em.entityType();
Double EntityConfidences = 0.0;
for (Map.Entry<String, Double> entries : entrySet) {
EntityConfidences = entries.getValue();
}
List<CoreLabel> tokens = em.tokens();
for (CoreLabel token : tokens) {
if (!nerEntityTokenTags2.values().contains(token.tag())) {
if (entityType.equals("PERSON") && EntityConfidences < 0.80) {
score -= 6000;
} else {
nerEntityTokenTags2.put(nerEntityTokenTags2.size() + 1, token.tag());
}
}
}
if (!nerEntities2.values().contains(em.text())) {
nerEntities2.put(nerEntities2.size() + 1, em.text());
nerEntities4.put(nerEntities4.size() + 1, em.entityType());
}
}
for (String strEnts1 : nerEntities1.values()) {
Collection<String> values = nerEntities2.values();
for (String strEnts2 : values) {
if (strEnts1.equalsIgnoreCase(strEnts2)) {
score += 7500;
}
}
}
for (String strEnts1 : nerEntities3.values()) {
if (nerEntities4.values().contains(strEnts1)) {
score += 3500;
}
}
for (String strToken : nerEntityTokenTags1.values()) {
if (nerEntityTokenTags2.values().contains(strToken)) {
score += 2500;
}
}
} catch (Exception ex) { } catch (Exception ex) {
System.out.println("SENTIMENT stacktrace Overall catch: " + ex.getMessage() + "\n"); System.out.println("SENTIMENT stacktrace Overall catch: " + ex.getMessage() + "\n");
} }

View File

@ -33,7 +33,7 @@ import org.javacord.api.entity.user.User;
*/ */
public class DiscordHandler { public class DiscordHandler {
private static String MostRecentMsg = "what do you think of humanzz"; private static String MostRecentMsg = "how are you today bot";
public static void main(String[] args) { public static void main(String[] args) {
System.setProperty("java.util.concurrent.ForkJoinPool.common.parallelism", "25"); System.setProperty("java.util.concurrent.ForkJoinPool.common.parallelism", "25");
@ -51,15 +51,6 @@ public class DiscordHandler {
System.out.println("FINISHED ALL ANNOTATIONS"); System.out.println("FINISHED ALL ANNOTATIONS");
Datahandler.instance.addHLstatsMessages(); Datahandler.instance.addHLstatsMessages();
Datahandler.instance.updateStringCache(); Datahandler.instance.updateStringCache();
//order matters
if (Datahandler.instance.getstringCacheSize() != 0) {
while (Datahandler.instance.getlHMSMXSize() * Datahandler.instance.getlHMSMXSize() * 2.5
< (Datahandler.instance.getstringCacheSize()
* Datahandler.instance.getstringCacheSize())
- Datahandler.instance.getstringCacheSize()) {
Datahandler.instance.updateMatrixes();
}
}
String token = "NTI5NzAxNTk5NjAyMjc4NDAx.Dw0vDg.7-aMjVWdQMYPl8qVNyvTCPS5F_A"; String token = "NTI5NzAxNTk5NjAyMjc4NDAx.Dw0vDg.7-aMjVWdQMYPl8qVNyvTCPS5F_A";
DiscordApi api = new DiscordApiBuilder().setToken(token).login().join(); DiscordApi api = new DiscordApiBuilder().setToken(token).login().join();
api.addMessageCreateListener(event -> { api.addMessageCreateListener(event -> {
@ -94,7 +85,8 @@ public class DiscordHandler {
|| event.getServerTextChannel().get().toString().contains("general-autism")) { || event.getServerTextChannel().get().toString().contains("general-autism")) {
String ResponseStr; String ResponseStr;
try { try {
ResponseStr = MessageResponseHandler.selectReponseMessage(event.getMessage().toString(), MostRecentMsg); String person = event.getMessageAuthor().getName();
ResponseStr = MessageResponseHandler.selectReponseMessage(event.getMessage().toString(), MostRecentMsg, person);
if (!ResponseStr.isEmpty()) { if (!ResponseStr.isEmpty()) {
System.out.print("\nResponseStr3: " + ResponseStr + "\n"); System.out.print("\nResponseStr3: " + ResponseStr + "\n");
event.getChannel().sendMessage(ResponseStr); event.getChannel().sendMessage(ResponseStr);
@ -103,7 +95,6 @@ public class DiscordHandler {
new Thread(() -> { new Thread(() -> {
try { try {
Datahandler.instance.checkIfUpdateStrings(false); Datahandler.instance.checkIfUpdateStrings(false);
Datahandler.instance.updateMatrixes();
} catch (CustomError ex) { } catch (CustomError ex) {
Logger.getLogger(DiscordHandler.class.getName()).log(Level.SEVERE, null, ex); Logger.getLogger(DiscordHandler.class.getName()).log(Level.SEVERE, null, ex);
} }