2019-03-20 22:38:28 +01:00
|
|
|
/*
|
|
|
|
* To change this license header, choose License Headers in Project Properties.
|
|
|
|
* To change this template file, choose Tools | Templates
|
|
|
|
* and open the template in the editor.
|
|
|
|
*/
|
|
|
|
package FunctionLayer;
|
|
|
|
|
|
|
|
import DataLayer.DataMapper;
|
|
|
|
import FunctionLayer.StanfordParser.SentimentAnalyzerTest;
|
2019-05-19 20:35:18 +02:00
|
|
|
import FunctionLayer.StanfordParser.SentimentValueCache;
|
2019-03-20 22:38:28 +01:00
|
|
|
import com.google.common.base.Stopwatch;
|
|
|
|
import com.google.common.collect.MapMaker;
|
|
|
|
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
|
|
|
|
import edu.stanford.nlp.ie.crf.CRFClassifier;
|
|
|
|
import edu.stanford.nlp.ling.CoreLabel;
|
|
|
|
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
|
|
|
|
import edu.stanford.nlp.pipeline.Annotation;
|
2019-04-14 14:18:01 +02:00
|
|
|
import edu.stanford.nlp.pipeline.CoreDocument;
|
2019-03-20 22:38:28 +01:00
|
|
|
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
|
|
|
|
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
|
|
|
|
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
|
|
|
|
import edu.stanford.nlp.trees.TreebankLanguagePack;
|
|
|
|
import java.io.IOException;
|
2019-05-24 16:08:25 +02:00
|
|
|
import static java.lang.Math.random;
|
2019-03-20 22:38:28 +01:00
|
|
|
import java.sql.SQLException;
|
2019-04-20 00:17:18 +02:00
|
|
|
import java.util.AbstractMap;
|
2019-03-24 23:04:19 +01:00
|
|
|
import java.util.ArrayList;
|
2019-03-20 22:38:28 +01:00
|
|
|
import java.util.Collection;
|
2019-05-24 16:08:25 +02:00
|
|
|
import java.util.Collections;
|
2019-03-20 22:38:28 +01:00
|
|
|
import java.util.LinkedHashMap;
|
|
|
|
import java.util.List;
|
2019-08-03 14:35:09 +02:00
|
|
|
import java.util.ListIterator;
|
2019-03-20 22:38:28 +01:00
|
|
|
import java.util.Map;
|
|
|
|
import java.util.Map.Entry;
|
|
|
|
import java.util.Properties;
|
2019-05-12 19:06:22 +02:00
|
|
|
import java.util.Set;
|
2019-03-20 22:38:28 +01:00
|
|
|
import java.util.concurrent.Callable;
|
2020-03-07 18:40:59 +01:00
|
|
|
import java.util.concurrent.CompletionService;
|
2019-03-20 22:38:28 +01:00
|
|
|
import java.util.concurrent.ConcurrentMap;
|
|
|
|
import java.util.concurrent.CountDownLatch;
|
|
|
|
import java.util.concurrent.ExecutionException;
|
2020-03-07 18:40:59 +01:00
|
|
|
import java.util.concurrent.ExecutorCompletionService;
|
2019-08-03 14:35:09 +02:00
|
|
|
import java.util.concurrent.ExecutorService;
|
|
|
|
import java.util.concurrent.Executors;
|
2019-03-20 22:38:28 +01:00
|
|
|
import java.util.concurrent.ForkJoinPool;
|
2019-05-24 16:08:25 +02:00
|
|
|
import java.util.concurrent.ForkJoinTask;
|
2019-03-20 22:38:28 +01:00
|
|
|
import java.util.concurrent.Future;
|
2019-08-03 14:35:09 +02:00
|
|
|
import java.util.concurrent.ThreadLocalRandom;
|
2019-03-20 22:38:28 +01:00
|
|
|
import java.util.concurrent.TimeUnit;
|
|
|
|
import java.util.concurrent.TimeoutException;
|
2019-05-09 23:00:27 +02:00
|
|
|
import java.util.function.Consumer;
|
2019-03-20 22:38:28 +01:00
|
|
|
import java.util.logging.Level;
|
|
|
|
import java.util.logging.Logger;
|
2019-05-09 23:00:27 +02:00
|
|
|
import java.util.stream.Collectors;
|
2019-08-03 14:35:09 +02:00
|
|
|
import java.util.stream.Stream;
|
2019-03-20 22:38:28 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @author install1
|
|
|
|
*/
|
|
|
|
public class Datahandler {
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-04-14 14:18:01 +02:00
|
|
|
public static final long EXPIRE_TIME_IN_SECONDS = TimeUnit.SECONDS.convert(10, TimeUnit.MINUTES);
|
2019-03-20 22:38:28 +01:00
|
|
|
public static final long EXPIRE_TIME_IN_SECONDS1 = TimeUnit.SECONDS.convert(10, TimeUnit.HOURS);
|
|
|
|
public static Datahandler instance = new Datahandler();
|
2019-04-06 23:58:18 +02:00
|
|
|
private static Annotation strAnno;
|
|
|
|
private static Annotation strAnnoSentiment;
|
|
|
|
private static Annotation strAnnoJMWE;
|
2019-04-14 14:18:01 +02:00
|
|
|
private static CoreDocument coreDoc;
|
2020-03-21 18:43:46 +01:00
|
|
|
private static final ConcurrentMap<Integer, String> stringCache = new MapMaker().concurrencyLevel(6).makeMap();
|
2019-03-20 22:38:28 +01:00
|
|
|
private static ConcurrentMap<String, Annotation> pipelineAnnotationCache;
|
|
|
|
private static ConcurrentMap<String, Annotation> pipelineSentimentAnnotationCache;
|
|
|
|
private static ConcurrentMap<String, Annotation> jmweAnnotationCache;
|
2019-04-14 14:18:01 +02:00
|
|
|
private static ConcurrentMap<String, CoreDocument> coreDocumentAnnotationCache;
|
2019-05-19 20:35:18 +02:00
|
|
|
private static ConcurrentMap<String, SentimentValueCache> sentimentCachingMap = new MapMaker().concurrencyLevel(6).makeMap();
|
2019-03-20 22:38:28 +01:00
|
|
|
private LinkedHashMap<String, LinkedHashMap<String, Double>> lHMSMX = new LinkedHashMap();
|
|
|
|
private final Stopwatch stopwatch;
|
2019-03-29 12:34:40 +01:00
|
|
|
private static String similar = "";
|
2019-03-20 22:38:28 +01:00
|
|
|
private static String shiftReduceParserPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz";
|
|
|
|
private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz";
|
|
|
|
private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz";
|
|
|
|
private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
|
2019-04-14 14:18:01 +02:00
|
|
|
private static String nerModel = "edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz";
|
|
|
|
private static String nerModel2 = "edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz";
|
|
|
|
private static String nerModel3 = "edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz";
|
2019-05-09 23:00:27 +02:00
|
|
|
private static final String customStopWordList = "start,starts,period,periods,a,an,and,are,as,at,be,but,by,for,if,in,into,is,it,no,not,of,on,or,such,that,the,their,then,there,these,they,this,to,was,will,with";
|
2019-03-20 22:38:28 +01:00
|
|
|
private static MaxentTagger tagger;
|
2019-04-14 14:18:01 +02:00
|
|
|
private static String[] options = {"-maxLength", "100"};
|
2019-03-20 22:38:28 +01:00
|
|
|
private static Properties props = new Properties();
|
|
|
|
private static Properties propsSentiment = new Properties();
|
|
|
|
private static GrammaticalStructureFactory gsf;
|
|
|
|
private static LexicalizedParser lp;
|
|
|
|
private static TreebankLanguagePack tlp;
|
|
|
|
private static AbstractSequenceClassifier<CoreLabel> classifier;
|
|
|
|
// set up Stanford CoreNLP pipeline
|
2019-05-09 23:00:27 +02:00
|
|
|
private static final StanfordCoreNLP pipeline = getPipeLineSetUp();
|
2019-03-20 22:38:28 +01:00
|
|
|
private static StanfordCoreNLP pipelineSentiment;
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public Datahandler() {
|
|
|
|
this.stopwatch = Stopwatch.createUnstarted();
|
2019-05-11 20:33:45 +02:00
|
|
|
this.jmweAnnotationCache = new MapMaker().concurrencyLevel(3).makeMap();
|
|
|
|
this.pipelineAnnotationCache = new MapMaker().concurrencyLevel(4).makeMap();
|
|
|
|
this.pipelineSentimentAnnotationCache = new MapMaker().concurrencyLevel(4).makeMap();
|
|
|
|
this.coreDocumentAnnotationCache = new MapMaker().concurrencyLevel(5).makeMap();
|
2019-03-20 22:38:28 +01:00
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
|
|
|
public static StanfordCoreNLP getPipeline() {
|
|
|
|
return pipeline;
|
|
|
|
}
|
|
|
|
|
2019-05-09 23:00:27 +02:00
|
|
|
private static StanfordCoreNLP getPipeLineSetUp() {
|
|
|
|
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse");
|
|
|
|
props.setProperty("parse.model", shiftReduceParserPath);
|
|
|
|
props.setProperty("parse.maxlen", "90");
|
|
|
|
props.setProperty("parse.binaryTrees", "true");
|
|
|
|
props.setProperty("threads", "25");
|
|
|
|
props.setProperty("pos.maxlen", "90");
|
|
|
|
props.setProperty("tokenize.maxlen", "90");
|
|
|
|
props.setProperty("ssplit.maxlen", "90");
|
|
|
|
props.setProperty("lemma.maxlen", "90");
|
|
|
|
props.setProperty("ner.model", nerModel + "," + nerModel2 + "," + nerModel3);
|
|
|
|
props.setProperty("ner.combinationMode", "HIGH_RECALL");
|
|
|
|
props.setProperty("regexner.ignorecase", "true");
|
|
|
|
props.setProperty("ner.fine.regexner.ignorecase", "true");
|
|
|
|
props.setProperty("tokenize.options", "untokenizable=firstDelete");
|
|
|
|
return new StanfordCoreNLP(props);
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public void shiftReduceParserInitiate() {
|
|
|
|
//got 8 cores
|
2019-05-09 23:00:27 +02:00
|
|
|
CountDownLatch cdl = new CountDownLatch(2);
|
2019-03-20 22:38:28 +01:00
|
|
|
new Thread(() -> {
|
|
|
|
try {
|
|
|
|
classifier = CRFClassifier.getClassifierNoExceptions(nerModel);
|
|
|
|
} catch (ClassCastException ex) {
|
|
|
|
Logger.getLogger(Datahandler.class.getName()).log(Level.SEVERE, null, ex);
|
|
|
|
}
|
|
|
|
cdl.countDown();
|
|
|
|
}).start();
|
|
|
|
new Thread(() -> {
|
|
|
|
propsSentiment.setProperty("parse.model", lexParserEnglishRNN);
|
|
|
|
propsSentiment.setProperty("sentiment.model", sentimentModel);
|
2019-03-26 21:38:03 +01:00
|
|
|
propsSentiment.setProperty("parse.maxlen", "90");
|
|
|
|
propsSentiment.setProperty("threads", "25");
|
|
|
|
propsSentiment.setProperty("pos.maxlen", "90");
|
|
|
|
propsSentiment.setProperty("tokenize.maxlen", "90");
|
|
|
|
propsSentiment.setProperty("ssplit.maxlen", "90");
|
2019-05-09 23:00:27 +02:00
|
|
|
propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,sentiment,lemma,stopword"); //coref too expensive memorywise
|
|
|
|
propsSentiment.setProperty("customAnnotatorClass.stopword", "FunctionLayer.StopwordAnnotator");
|
|
|
|
propsSentiment.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList);
|
|
|
|
propsSentiment.setProperty("tokenize.options", "untokenizable=firstDelete");
|
2019-03-20 22:38:28 +01:00
|
|
|
pipelineSentiment = new StanfordCoreNLP(propsSentiment);
|
|
|
|
tagger = new MaxentTagger(taggerPath);
|
|
|
|
cdl.countDown();
|
|
|
|
}).start();
|
|
|
|
lp = LexicalizedParser.loadModel(lexParserEnglishRNN, options);
|
|
|
|
tlp = lp.getOp().langpack();
|
|
|
|
gsf = tlp.grammaticalStructureFactory();
|
|
|
|
try {
|
|
|
|
cdl.await();
|
|
|
|
} catch (InterruptedException ex) {
|
2020-03-07 18:40:59 +01:00
|
|
|
//System.out.println("cdl await interrupted: " + ex.getLocalizedMessage() + "\n");
|
2019-03-20 22:38:28 +01:00
|
|
|
}
|
|
|
|
System.out.println("finished shiftReduceParserInitiate\n");
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public static AbstractSequenceClassifier<CoreLabel> getClassifier() {
|
|
|
|
return classifier;
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public static void setClassifier(AbstractSequenceClassifier<CoreLabel> classifier) {
|
|
|
|
Datahandler.classifier = classifier;
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public void updateStringCache() {
|
|
|
|
try {
|
|
|
|
checkIfUpdateStrings(true);
|
|
|
|
} catch (CustomError ex) {
|
|
|
|
Logger.getLogger(Datahandler.class.getName()).log(Level.SEVERE, null, ex);
|
|
|
|
}
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public static GrammaticalStructureFactory getGsf() {
|
|
|
|
return gsf;
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public static MaxentTagger getTagger() {
|
|
|
|
return tagger;
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
private Map<Integer, String> getCache() throws SQLException, IOException, CustomError {
|
|
|
|
return DataMapper.getAllStrings();
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public int getlHMSMXSize() {
|
|
|
|
return lHMSMX.size();
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public int getstringCacheSize() {
|
|
|
|
return stringCache.size();
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public void initiateMYSQL() throws SQLException, IOException {
|
|
|
|
try {
|
|
|
|
DataMapper.createTables();
|
|
|
|
stringCache.putAll(getCache());
|
2019-05-09 23:00:27 +02:00
|
|
|
// lHMSMX = DataMapper.getAllRelationScores();
|
2019-03-20 22:38:28 +01:00
|
|
|
} catch (CustomError ex) {
|
|
|
|
Logger.getLogger(Datahandler.class
|
|
|
|
.getName()).log(Level.SEVERE, null, ex);
|
|
|
|
}
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public void addHLstatsMessages() {
|
2019-04-20 00:17:18 +02:00
|
|
|
ConcurrentMap<String, Integer> hlStatsMessages = new MapMaker().concurrencyLevel(2).makeMap();
|
2019-03-20 22:38:28 +01:00
|
|
|
ConcurrentMap<Integer, String> strCacheLocal = stringCache;
|
2019-04-20 00:17:18 +02:00
|
|
|
Collection<String> strs = DataMapper.getHLstatsMessages().values();
|
|
|
|
for (String str : strs) {
|
|
|
|
if (hlStatsMessages.get(str) == null) {
|
|
|
|
hlStatsMessages.put(str, hlStatsMessages.size());
|
2019-03-26 21:38:03 +01:00
|
|
|
}
|
|
|
|
}
|
2020-03-21 18:43:46 +01:00
|
|
|
int capacity = 1250;
|
2019-05-09 23:00:27 +02:00
|
|
|
hlStatsMessages.keySet().forEach(str -> {
|
|
|
|
if (!str.startsWith("!") && MessageResponseHandler.getStr().values().size() < capacity) {
|
2019-03-24 23:04:19 +01:00
|
|
|
String orElse = strCacheLocal.values().parallelStream().filter(e -> e.equals(str)).findAny().orElse(null);
|
|
|
|
if (orElse == null) {
|
2019-03-20 22:38:28 +01:00
|
|
|
MessageResponseHandler.getMessage(str);
|
|
|
|
}
|
|
|
|
}
|
2019-03-24 23:04:19 +01:00
|
|
|
});
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-24 23:04:19 +01:00
|
|
|
public void instantiateAnnotationMapJMWE() {
|
|
|
|
if (!stringCache.isEmpty()) {
|
|
|
|
ConcurrentMap<String, Annotation> jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(stringCache.values());
|
|
|
|
for (Entry<String, Annotation> entries : jmweAnnotation.entrySet()) {
|
|
|
|
jmweAnnotationCache.put(entries.getKey(), entries.getValue());
|
2019-03-20 22:38:28 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public void instantiateAnnotationMap() {
|
|
|
|
if (!stringCache.isEmpty()) {
|
2019-03-26 21:38:03 +01:00
|
|
|
ConcurrentMap<String, Annotation> Annotationspipeline = new MapMaker().concurrencyLevel(2).makeMap();
|
|
|
|
ConcurrentMap<String, Annotation> AnnotationspipelineSentiment = new MapMaker().concurrencyLevel(2).makeMap();
|
2019-03-20 22:38:28 +01:00
|
|
|
stringCache.values().parallelStream().forEach(str -> {
|
|
|
|
Annotation strAnno = new Annotation(str);
|
2019-08-07 14:00:00 +02:00
|
|
|
strAnno.compact();
|
2019-03-26 21:38:03 +01:00
|
|
|
Annotationspipeline.put(str, strAnno);
|
2019-03-20 22:38:28 +01:00
|
|
|
Annotation strAnno2 = new Annotation(str);
|
2019-08-07 14:00:00 +02:00
|
|
|
strAnno2.compact();
|
2019-03-26 21:38:03 +01:00
|
|
|
AnnotationspipelineSentiment.put(str, strAnno2);
|
|
|
|
});
|
2019-05-09 23:00:27 +02:00
|
|
|
ConcurrentMap<String, CoreDocument> coreDocumentpipelineMap = getMultipleCoreDocumentsWaySuggestion(stringCache.values(), pipeline);
|
2019-03-26 21:38:03 +01:00
|
|
|
pipeline.annotate(Annotationspipeline.values());
|
|
|
|
pipelineSentiment.annotate(AnnotationspipelineSentiment.values());
|
|
|
|
Annotationspipeline.entrySet().forEach(pipelineEntry -> {
|
2019-08-07 14:00:00 +02:00
|
|
|
//relatively experimental change
|
|
|
|
pipelineEntry.getValue().compact();
|
2019-03-26 21:38:03 +01:00
|
|
|
pipelineAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue());
|
|
|
|
});
|
|
|
|
AnnotationspipelineSentiment.entrySet().forEach(pipelineEntry -> {
|
2019-08-07 14:00:00 +02:00
|
|
|
pipelineEntry.getValue().compact();
|
2019-03-26 21:38:03 +01:00
|
|
|
pipelineSentimentAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue());
|
2019-03-20 22:38:28 +01:00
|
|
|
});
|
2019-05-09 23:00:27 +02:00
|
|
|
coreDocumentpipelineMap.entrySet().stream().forEach(CD -> {
|
|
|
|
coreDocumentAnnotationCache.put(CD.getKey(), CD.getValue());
|
|
|
|
});
|
2019-03-20 22:38:28 +01:00
|
|
|
}
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2020-03-21 18:43:46 +01:00
|
|
|
private ConcurrentMap<Integer, String> futuresReturnOverallEvaluation(List<SimilarityMatrix> similarityMatrixes) {
|
|
|
|
ConcurrentMap<Integer, String> strmapreturn = new MapMaker().concurrencyLevel(6).makeMap();
|
2020-03-07 18:40:59 +01:00
|
|
|
if (!similarityMatrixes.isEmpty()) {
|
|
|
|
String newPrimary = similarityMatrixes.get(0).getPrimaryString();
|
2020-03-21 18:43:46 +01:00
|
|
|
int evaluationCap = 500;
|
2020-03-07 18:40:59 +01:00
|
|
|
int iterator = 0;
|
|
|
|
for (SimilarityMatrix SMX : similarityMatrixes) {
|
2020-03-21 18:43:46 +01:00
|
|
|
final Double scoreRelationNewMsgToRecentMsg = SMX.getDistance();
|
|
|
|
if (scoreRelationNewMsgToRecentMsg > evaluationCap) {
|
2019-08-03 14:35:09 +02:00
|
|
|
strmapreturn = addSMXToMapReturn(strmapreturn, SMX);
|
2019-05-19 20:35:18 +02:00
|
|
|
}
|
2020-03-07 18:40:59 +01:00
|
|
|
//System.out.println("similarityMatrixes size: " + similarityMatrixes.size() + "\niterator: " + iterator);
|
|
|
|
iterator++;
|
2019-05-19 20:35:18 +02:00
|
|
|
}
|
2019-05-11 20:33:45 +02:00
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
return strmapreturn;
|
2019-03-20 22:38:28 +01:00
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-08-03 14:35:09 +02:00
|
|
|
private ConcurrentMap<Integer, String> addSMXToMapReturn(ConcurrentMap<Integer, String> strmapreturn, SimilarityMatrix SMX) {
|
2020-03-21 18:43:46 +01:00
|
|
|
if (!strmapreturn.containsValue(SMX.getPrimaryString())) {
|
2019-08-03 14:35:09 +02:00
|
|
|
strmapreturn.put(strmapreturn.size(), SMX.getPrimaryString());
|
|
|
|
String transmittedStr = SMX.getSecondaryString();
|
|
|
|
SentimentValueCache cacheValue1 = SMX.getCacheValue1();
|
|
|
|
SentimentValueCache cacheValue2 = SMX.getCacheValue2();
|
|
|
|
if (cacheValue1 != null && !sentimentCachingMap.keySet().contains(SMX.getPrimaryString())) {
|
2020-03-21 18:43:46 +01:00
|
|
|
sentimentCachingMap.put(SMX.getSecondaryString(), SMX.getCacheValue1());
|
2019-04-14 14:18:01 +02:00
|
|
|
}
|
2019-08-03 14:35:09 +02:00
|
|
|
if (cacheValue2 != null && !sentimentCachingMap.keySet().contains(transmittedStr)) {
|
|
|
|
sentimentCachingMap.put(transmittedStr, SMX.getCacheValue2());
|
2019-05-12 19:06:22 +02:00
|
|
|
}
|
2019-05-11 20:33:45 +02:00
|
|
|
}
|
2019-08-03 14:35:09 +02:00
|
|
|
return strmapreturn;
|
2019-05-09 23:00:27 +02:00
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-08-03 14:35:09 +02:00
|
|
|
private List<SimilarityMatrix> StrComparringNoSentenceRelationMap(
|
2020-03-21 18:43:46 +01:00
|
|
|
ConcurrentMap<Integer, String> strCacheLocal, Collection<String> str, ConcurrentMap<String, Annotation> localJMWEMap,
|
2019-05-19 20:35:18 +02:00
|
|
|
ConcurrentMap<String, Annotation> localPipelineAnnotation, ConcurrentMap<String, Annotation> localPipelineSentimentAnnotation,
|
2020-03-07 18:40:59 +01:00
|
|
|
ConcurrentMap<String, CoreDocument> localCoreDocumentMap, CompletionService<SimilarityMatrix> ecs, int index) {
|
|
|
|
int prefix_size = 125;
|
2019-05-19 20:35:18 +02:00
|
|
|
SentimentValueCache sentimentCacheStr = sentimentCachingMap.getOrDefault(str, null);
|
2019-08-03 14:35:09 +02:00
|
|
|
List<SimilarityMatrix> smxReturnList = new ArrayList();
|
2019-08-03 18:08:34 +02:00
|
|
|
List<String> randomIterationComparision = new ArrayList();
|
2020-03-07 18:40:59 +01:00
|
|
|
int iteratecap = strCacheLocal.size() > prefix_size ? strCacheLocal.size() - prefix_size : strCacheLocal.size();
|
2019-08-03 18:08:34 +02:00
|
|
|
int iterator = ThreadLocalRandom.current().nextInt(0, iteratecap);
|
|
|
|
int iterated = 0;
|
2019-05-09 23:00:27 +02:00
|
|
|
for (String str1 : strCacheLocal.values()) {
|
2020-03-07 18:40:59 +01:00
|
|
|
if (iterated >= iterator && iterated < iterator + prefix_size) {
|
2019-08-03 18:08:34 +02:00
|
|
|
randomIterationComparision.add(str1);
|
|
|
|
}
|
2020-03-07 18:40:59 +01:00
|
|
|
if (iterated > iterator + prefix_size) {
|
2019-08-03 18:08:34 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
iterated++;
|
|
|
|
}
|
|
|
|
for (String str1 : randomIterationComparision) {
|
2020-03-21 18:43:46 +01:00
|
|
|
for (String str2 : str) {
|
|
|
|
if (!str2.equals(str1)) {
|
|
|
|
SimilarityMatrix SMXInit = new SimilarityMatrix(str2, str1);
|
|
|
|
SentimentValueCache sentimentCacheStr1 = sentimentCachingMap.getOrDefault(str1, null);
|
|
|
|
Callable<SimilarityMatrix> worker;
|
|
|
|
if (stringCache.size() < prefix_size) {
|
|
|
|
worker = new SentimentAnalyzerTest(str2, str1, SMXInit,
|
|
|
|
localJMWEMap.get(str), localJMWEMap.get(str1), localPipelineAnnotation.get(str),
|
|
|
|
localPipelineAnnotation.get(str1), localPipelineSentimentAnnotation.get(str),
|
|
|
|
localPipelineSentimentAnnotation.get(str1), localCoreDocumentMap.get(str), localCoreDocumentMap.get(str1), sentimentCacheStr, sentimentCacheStr1);
|
|
|
|
} else {
|
|
|
|
worker = new SentimentAnalyzerTest(str2, str1, SMXInit,
|
|
|
|
localJMWEMap.get(str), jmweAnnotationCache.get(str1), localPipelineAnnotation.get(str),
|
|
|
|
pipelineAnnotationCache.get(str1), localPipelineSentimentAnnotation.get(str),
|
|
|
|
pipelineSentimentAnnotationCache.get(str1), localCoreDocumentMap.get(str), coreDocumentAnnotationCache.get(str1), sentimentCacheStr, sentimentCacheStr1);
|
|
|
|
}
|
|
|
|
ecs.submit(worker);
|
|
|
|
index++;
|
2019-05-09 23:00:27 +02:00
|
|
|
}
|
2019-08-04 00:34:30 +02:00
|
|
|
}
|
|
|
|
}
|
2020-03-07 18:40:59 +01:00
|
|
|
for (int i = 0; i < index; i++) {
|
2019-08-04 00:34:30 +02:00
|
|
|
try {
|
2020-03-07 18:40:59 +01:00
|
|
|
Future<SimilarityMatrix> take = ecs.take();
|
|
|
|
SimilarityMatrix smx = take.get();
|
2020-03-21 18:43:46 +01:00
|
|
|
if (smx != null && !smxReturnList.contains(smx)) {
|
|
|
|
smxReturnList.add(smx);
|
|
|
|
}
|
2019-08-04 00:34:30 +02:00
|
|
|
} catch (InterruptedException | ExecutionException ex) {
|
2020-03-07 18:40:59 +01:00
|
|
|
//
|
2019-05-09 23:00:27 +02:00
|
|
|
}
|
|
|
|
}
|
2020-03-07 18:40:59 +01:00
|
|
|
index = 0;
|
2020-03-21 18:43:46 +01:00
|
|
|
System.out.println("smxReturnList size: " + smxReturnList.size());
|
2019-08-03 14:35:09 +02:00
|
|
|
return smxReturnList;
|
2019-05-09 23:00:27 +02:00
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
|
|
|
private ConcurrentMap<Integer, String> stringIteratorComparator(ConcurrentMap<Integer, String> strmap,
|
2019-05-09 23:00:27 +02:00
|
|
|
ConcurrentMap<Integer, String> strCacheLocal, ConcurrentMap<String, Annotation> localJMWEMap,
|
|
|
|
ConcurrentMap<String, Annotation> localPipelineAnnotation, ConcurrentMap<String, Annotation> localPipelineSentimentAnnotation,
|
|
|
|
ConcurrentMap<String, CoreDocument> localCoreDocumentMap) {
|
2020-03-07 18:40:59 +01:00
|
|
|
ExecutorService threadPool = Executors.newCachedThreadPool();
|
|
|
|
CompletionService<SimilarityMatrix> ecs = new ExecutorCompletionService<>(threadPool);
|
|
|
|
int index = 0;
|
2020-03-21 18:43:46 +01:00
|
|
|
//System.out.println("strmap siuze: " + strmap.size());
|
|
|
|
List<SimilarityMatrix> StrComparringNoSentenceRelationMap = StrComparringNoSentenceRelationMap(strCacheLocal, strmap.values(),
|
|
|
|
localJMWEMap, localPipelineAnnotation, localPipelineSentimentAnnotation, localCoreDocumentMap, ecs, index);
|
2020-03-07 18:40:59 +01:00
|
|
|
threadPool.shutdown();
|
2020-03-21 18:43:46 +01:00
|
|
|
//System.out.println("StrComparringNoSentenceRelationMap size: " + StrComparringNoSentenceRelationMap.size());
|
|
|
|
Collections.sort(StrComparringNoSentenceRelationMap, (e1, e2) -> e1.getPrimaryString().compareTo(e2.getPrimaryString()));
|
|
|
|
ConcurrentMap<Integer, String> strmapreturn = futuresReturnOverallEvaluation(StrComparringNoSentenceRelationMap);
|
|
|
|
//System.out.println("strmapreturn size: " + strmapreturn.size());
|
2019-04-14 14:18:01 +02:00
|
|
|
return strmapreturn;
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-08-03 14:35:09 +02:00
|
|
|
private ConcurrentMap<Integer, String> removeNonSensicalStrings(ConcurrentMap<Integer, String> strmap) {
|
2019-05-09 23:00:27 +02:00
|
|
|
final ConcurrentMap<Integer, String> strCacheLocal = stringCache.size() < 150 ? strmap : stringCache;
|
|
|
|
final ConcurrentMap<String, Annotation> localJMWEMap = getMultipleJMWEAnnotation(strmap.values());
|
|
|
|
final ConcurrentMap<String, Annotation> localPipelineAnnotation = getMultiplePipelineAnnotation(strmap.values());
|
|
|
|
final ConcurrentMap<String, Annotation> localPipelineSentimentAnnotation = getMultiplePipelineSentimentAnnotation(strmap.values());
|
|
|
|
final ConcurrentMap<String, CoreDocument> localCoreDocumentMap = getMultipleCoreDocumentsWaySuggestion(strmap.values(), pipeline);
|
|
|
|
return stringIteratorComparator(strmap, strCacheLocal, localJMWEMap, localPipelineAnnotation, localPipelineSentimentAnnotation, localCoreDocumentMap);
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public synchronized void checkIfUpdateStrings(boolean hlStatsMsg) throws CustomError {
|
|
|
|
if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) {
|
|
|
|
ConcurrentMap<Integer, String> str = MessageResponseHandler.getStr();
|
2020-03-21 18:43:46 +01:00
|
|
|
System.out.println("str size: " + str.size());
|
2019-03-20 22:38:28 +01:00
|
|
|
str = cutContent(str, hlStatsMsg);
|
|
|
|
str = filterContent(str);
|
|
|
|
str = removeSlacks(str);
|
2020-03-07 18:40:59 +01:00
|
|
|
//System.out.println("finished removeSlacks \n" + str.size() + "\n");
|
2019-04-14 14:18:01 +02:00
|
|
|
str = removeNonSensicalStrings(str);
|
2020-03-21 18:43:46 +01:00
|
|
|
System.out.println("removeNonSensicalStrings str size POST: " + str.size() + "\n");
|
2019-03-26 21:38:03 +01:00
|
|
|
str = annotationCacheUpdate(str);
|
2020-03-21 18:43:46 +01:00
|
|
|
System.out.println("annotationCacheUpdate str size POST: " + str.size() + "\n");
|
2019-04-14 14:18:01 +02:00
|
|
|
ConcurrentMap<Integer, String> strf = str;
|
|
|
|
if (!stringCache.isEmpty()) {
|
|
|
|
new Thread(() -> {
|
|
|
|
try {
|
|
|
|
DataMapper.InsertMYSQLStrings(strf);
|
|
|
|
} catch (CustomError ex) {
|
|
|
|
Logger.getLogger(Datahandler.class
|
|
|
|
.getName()).log(Level.SEVERE, null, ex);
|
|
|
|
}
|
2020-03-07 18:40:59 +01:00
|
|
|
MessageResponseHandler.setStr(new MapMaker().concurrencyLevel(6).makeMap());
|
2019-04-14 14:18:01 +02:00
|
|
|
}).start();
|
|
|
|
} else {
|
|
|
|
try {
|
|
|
|
DataMapper.InsertMYSQLStrings(strf);
|
|
|
|
} catch (CustomError ex) {
|
|
|
|
Logger.getLogger(Datahandler.class
|
|
|
|
.getName()).log(Level.SEVERE, null, ex);
|
|
|
|
}
|
|
|
|
MessageResponseHandler.setStr(new MapMaker().concurrencyLevel(2).makeMap());
|
2019-03-20 22:38:28 +01:00
|
|
|
}
|
|
|
|
if (!stopwatch.isRunning()) {
|
|
|
|
stopwatch.start();
|
|
|
|
} else {
|
|
|
|
stopwatch.reset();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
|
|
|
private String trimString(String str) {
|
2019-03-20 22:38:28 +01:00
|
|
|
str = str.trim();
|
|
|
|
if (str.startsWith("<@")) {
|
|
|
|
str = str.substring(str.indexOf("> ") + 2);
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
return str;
|
|
|
|
}
|
|
|
|
|
2020-03-07 18:40:59 +01:00
|
|
|
private String getResponseFutures(String strF) {
|
|
|
|
List<String> values_copy = new ArrayList<String>(stringCache.values());
|
|
|
|
int maxsize = values_copy.size() > 500 ? 500 : values_copy.size();
|
|
|
|
Collections.shuffle(values_copy);
|
|
|
|
List<String> strCache = values_copy.subList(0, maxsize);
|
|
|
|
double preRelationUserCounters = -150000.0;
|
2019-05-24 16:08:25 +02:00
|
|
|
//WHY THE FUCK CANT YOU JUST TRANSFER A SimilarityMatrix OBJECT LIST LIKE ANY OTHER NORMAL COLLECTION, WHY DOES IT HAVE TO BE A FUCKING STRING LIST
|
|
|
|
List<String> concurrentRelations = new ArrayList();
|
2020-03-07 18:40:59 +01:00
|
|
|
List<Callable<SimilarityMatrix>> call_able_list = new ArrayList();
|
|
|
|
for (String str1 : strCache) {
|
2019-03-24 23:04:19 +01:00
|
|
|
if (!strF.equals(str1)) {
|
2019-05-24 16:08:25 +02:00
|
|
|
SentimentValueCache sentimentCacheStr1 = sentimentCachingMap.getOrDefault(str1, null);
|
|
|
|
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(strF, str1, new SimilarityMatrix(strF, str1),
|
|
|
|
strAnnoJMWE, jmweAnnotationCache.get(str1), strAnno,
|
|
|
|
pipelineAnnotationCache.get(str1), strAnnoSentiment,
|
|
|
|
pipelineSentimentAnnotationCache.get(str1), coreDoc, coreDocumentAnnotationCache.get(str1), null, sentimentCacheStr1);
|
2020-03-07 18:40:59 +01:00
|
|
|
call_able_list.add(worker);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (Callable<SimilarityMatrix> callSMX : call_able_list) {
|
|
|
|
try {
|
|
|
|
SimilarityMatrix getSMX = callSMX.call();
|
|
|
|
if (getSMX != null) {
|
|
|
|
Double scoreRelationLastUserMsg = getSMX.getDistance();
|
|
|
|
if (scoreRelationLastUserMsg > preRelationUserCounters) {
|
|
|
|
preRelationUserCounters = scoreRelationLastUserMsg;
|
|
|
|
concurrentRelations.add(getSMX.getSecondaryString());
|
|
|
|
//System.out.println("secondary: " + getSMX.getSecondaryString() + "\nDistance: " + getSMX.getDistance() + "\n");
|
|
|
|
//System.out.println("SUCESS concurrentRelationsMap size: " + concurrentRelations.size() + "\n");
|
2019-04-14 14:18:01 +02:00
|
|
|
}
|
|
|
|
}
|
2020-03-07 18:40:59 +01:00
|
|
|
} catch (Exception ex) {
|
|
|
|
Logger.getLogger(Datahandler.class.getName()).log(Level.SEVERE, null, ex);
|
2019-03-20 22:38:28 +01:00
|
|
|
}
|
2019-04-20 00:17:18 +02:00
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
StringBuilder SB = new StringBuilder();
|
2020-03-07 18:40:59 +01:00
|
|
|
double randomLenghtPermit = strF.length() * (Math.random() * Math.random() * Math.random());
|
2019-05-24 16:08:25 +02:00
|
|
|
Collections.reverse(concurrentRelations);
|
2020-03-07 18:40:59 +01:00
|
|
|
if (concurrentRelations.isEmpty()) {
|
|
|
|
return "failure, preventing stuckness";
|
|
|
|
}
|
2019-08-07 14:00:00 +02:00
|
|
|
String firstRelation = concurrentRelations.get(0);
|
2019-05-24 16:08:25 +02:00
|
|
|
for (String secondaryRelation : concurrentRelations) {
|
|
|
|
if (SB.toString().length() > randomLenghtPermit && !SB.toString().isEmpty()) {
|
|
|
|
break;
|
2019-04-20 00:17:18 +02:00
|
|
|
}
|
2019-08-07 14:00:00 +02:00
|
|
|
boolean append = appendToString(firstRelation, secondaryRelation);
|
|
|
|
if (append) {
|
|
|
|
SB.append(secondaryRelation).append(" ");
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
}
|
|
|
|
return SB.toString();
|
|
|
|
}
|
|
|
|
|
2019-08-07 14:00:00 +02:00
|
|
|
private boolean appendToString(String firstRelation, String secondaryRelation) {
|
|
|
|
if (firstRelation.equals(secondaryRelation)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
Double scoreRelationStrF = getScoreRelationStrF(firstRelation, secondaryRelation);
|
|
|
|
if (scoreRelationStrF > 1900) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-03-07 18:40:59 +01:00
|
|
|
public String getResponseMsg(String str) throws CustomError {
|
2019-05-24 16:08:25 +02:00
|
|
|
String strF = trimString(str);
|
|
|
|
getSingularAnnotation(strF);
|
2020-03-07 18:40:59 +01:00
|
|
|
return getResponseFutures(strF);
|
2019-03-20 22:38:28 +01:00
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-04-06 23:58:18 +02:00
|
|
|
public void getSingularAnnotation(String str) {
|
|
|
|
strAnno = new Annotation(str);
|
2019-08-07 14:00:00 +02:00
|
|
|
strAnno.compact();
|
2019-04-06 23:58:18 +02:00
|
|
|
pipeline.annotate(strAnno);
|
|
|
|
strAnnoSentiment = new Annotation(str);
|
2019-08-07 14:00:00 +02:00
|
|
|
strAnnoSentiment.compact();
|
2019-04-06 23:58:18 +02:00
|
|
|
pipelineSentiment.annotate(strAnnoSentiment);
|
|
|
|
List<String> notactualList = new ArrayList();
|
|
|
|
notactualList.add(str);
|
|
|
|
ConcurrentMap<String, Annotation> jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(notactualList);
|
|
|
|
strAnnoJMWE = jmweAnnotation.values().iterator().next();
|
2019-08-07 14:00:00 +02:00
|
|
|
strAnnoJMWE.compact();
|
2019-04-14 14:18:01 +02:00
|
|
|
CoreDocument coreDocument = new CoreDocument(str);
|
|
|
|
pipeline.annotate(coreDocument);
|
|
|
|
coreDoc = coreDocument;
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-05-09 23:00:27 +02:00
|
|
|
private static ConcurrentMap<String, Annotation> getMultipleJMWEAnnotation(Collection<String> str) {
|
2019-04-14 14:18:01 +02:00
|
|
|
ConcurrentMap<String, Annotation> jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(str);
|
|
|
|
return jmweAnnotation;
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-05-09 23:00:27 +02:00
|
|
|
private static ConcurrentMap<String, Annotation> getMultiplePipelineAnnotation(Collection<String> str) {
|
2019-04-14 14:18:01 +02:00
|
|
|
ConcurrentMap<String, Annotation> pipelineAnnotationMap = new MapMaker().concurrencyLevel(2).makeMap();
|
|
|
|
for (String str1 : str) {
|
|
|
|
Annotation strAnno1 = new Annotation(str1);
|
|
|
|
pipelineAnnotationMap.put(str1, strAnno1);
|
|
|
|
}
|
|
|
|
pipeline.annotate(pipelineAnnotationMap.values());
|
|
|
|
return pipelineAnnotationMap;
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-05-09 23:00:27 +02:00
|
|
|
private static ConcurrentMap<String, Annotation> getMultiplePipelineSentimentAnnotation(Collection<String> str) {
|
2019-04-14 14:18:01 +02:00
|
|
|
ConcurrentMap<String, Annotation> pipelineAnnotationMap = new MapMaker().concurrencyLevel(2).makeMap();
|
|
|
|
for (String str1 : str) {
|
|
|
|
Annotation strAnno1 = new Annotation(str1);
|
|
|
|
pipelineAnnotationMap.put(str1, strAnno1);
|
|
|
|
}
|
|
|
|
pipelineSentiment.annotate(pipelineAnnotationMap.values());
|
|
|
|
return pipelineAnnotationMap;
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-04-06 23:58:18 +02:00
|
|
|
private Double getScoreRelationNewMsgToRecentMsg(String str, String mostRecentMsg) {
|
|
|
|
SimilarityMatrix SMX = new SimilarityMatrix(str, mostRecentMsg);
|
2019-05-19 20:35:18 +02:00
|
|
|
SentimentValueCache cacheSentiment1 = sentimentCachingMap.getOrDefault(str, null);
|
|
|
|
SentimentValueCache cacheSentiment2 = sentimentCachingMap.getOrDefault(mostRecentMsg, null);
|
2019-04-06 23:58:18 +02:00
|
|
|
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, mostRecentMsg, SMX,
|
|
|
|
jmweAnnotationCache.get(str), jmweAnnotationCache.get(mostRecentMsg), pipelineAnnotationCache.get(str),
|
|
|
|
pipelineAnnotationCache.get(mostRecentMsg), pipelineSentimentAnnotationCache.get(str),
|
2019-04-14 14:18:01 +02:00
|
|
|
pipelineSentimentAnnotationCache.get(mostRecentMsg), coreDocumentAnnotationCache.get(str),
|
2019-05-19 20:35:18 +02:00
|
|
|
coreDocumentAnnotationCache.get(mostRecentMsg), cacheSentiment1, cacheSentiment2);
|
2019-04-06 23:58:18 +02:00
|
|
|
SimilarityMatrix callSMX = null;
|
|
|
|
try {
|
|
|
|
callSMX = worker.call();
|
|
|
|
} catch (Exception ex) {
|
|
|
|
Logger.getLogger(Datahandler.class
|
|
|
|
.getName()).log(Level.SEVERE, null, ex);
|
|
|
|
}
|
|
|
|
if (callSMX != null) {
|
|
|
|
double smxDistance = callSMX.getDistance();
|
|
|
|
return smxDistance;
|
|
|
|
}
|
|
|
|
return 0.0;
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-04-20 00:17:18 +02:00
|
|
|
private Double getScoreRelationStrF(String str, String mostRecentMsg) {
|
|
|
|
SimilarityMatrix SMX = new SimilarityMatrix(str, mostRecentMsg);
|
2019-05-19 20:35:18 +02:00
|
|
|
SentimentValueCache cacheSentiment1 = sentimentCachingMap.getOrDefault(str, null);
|
|
|
|
SentimentValueCache cacheSentiment2 = sentimentCachingMap.getOrDefault(mostRecentMsg, null);
|
2019-04-20 00:17:18 +02:00
|
|
|
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, mostRecentMsg, SMX,
|
|
|
|
strAnnoJMWE, jmweAnnotationCache.get(mostRecentMsg), strAnno,
|
|
|
|
pipelineAnnotationCache.get(mostRecentMsg), strAnnoSentiment,
|
2019-05-19 20:35:18 +02:00
|
|
|
pipelineSentimentAnnotationCache.get(mostRecentMsg), coreDoc, coreDocumentAnnotationCache.get(mostRecentMsg), cacheSentiment1, cacheSentiment2);
|
2019-04-20 00:17:18 +02:00
|
|
|
SimilarityMatrix callSMX = null;
|
|
|
|
try {
|
|
|
|
callSMX = worker.call();
|
|
|
|
} catch (Exception ex) {
|
|
|
|
Logger.getLogger(Datahandler.class
|
|
|
|
.getName()).log(Level.SEVERE, null, ex);
|
|
|
|
}
|
|
|
|
if (callSMX != null) {
|
|
|
|
double smxDistance = callSMX.getDistance();
|
|
|
|
return smxDistance;
|
|
|
|
}
|
|
|
|
return 0.0;
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public static ConcurrentMap<Integer, String> cutContent(ConcurrentMap<Integer, String> str, boolean hlStatsMsg) {
|
|
|
|
ConcurrentMap<Integer, String> returnlist = new MapMaker().concurrencyLevel(2).makeMap();
|
2020-03-07 18:40:59 +01:00
|
|
|
str.values().forEach(str1 -> {
|
2019-03-20 22:38:28 +01:00
|
|
|
int iend = str1.indexOf("content: ");
|
|
|
|
if (iend != -1) {
|
|
|
|
String trs = str1.substring(iend + 9);
|
|
|
|
returnlist.put(returnlist.size() + 1, trs.substring(0, trs.length() - 1));
|
|
|
|
} else if (hlStatsMsg) {
|
|
|
|
returnlist.put(returnlist.size() + 1, str1);
|
|
|
|
}
|
2019-03-24 23:04:19 +01:00
|
|
|
});
|
2019-03-20 22:38:28 +01:00
|
|
|
return returnlist;
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
public static ConcurrentMap<Integer, String> filterContent(ConcurrentMap<Integer, String> str) {
|
|
|
|
ConcurrentMap<Integer, String> strlistreturn = new MapMaker().concurrencyLevel(2).makeMap();
|
2020-03-07 18:40:59 +01:00
|
|
|
str.values().forEach(str1 -> {
|
2019-03-24 23:04:19 +01:00
|
|
|
if (!str1.isEmpty() && str1.length() > 3) {
|
|
|
|
str1 = str1.trim();
|
|
|
|
if (str1.contains("PM*")) {
|
2019-03-25 10:43:54 +01:00
|
|
|
str1 = str1.substring(str1.indexOf("PM*") + 3);
|
2019-03-24 23:04:19 +01:00
|
|
|
}
|
|
|
|
if (str1.contains("AM*")) {
|
2019-03-25 10:43:54 +01:00
|
|
|
str1 = str1.substring(str1.indexOf("AM*") + 3);
|
2019-03-24 23:04:19 +01:00
|
|
|
}
|
|
|
|
for (Character c : str1.toCharArray()) {
|
|
|
|
if (c == '?' || c == '°') {
|
|
|
|
str1 = str1.replace("?", " <:wlenny:514861023002624001> ");
|
|
|
|
str1 = str1.replace("°", " <:wlenny:514861023002624001> ");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (str1.contains("(Counter-Terrorist)")) {
|
|
|
|
str1 = str1.replace("(Counter-Terrorist)", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("(Terrorist)")) {
|
|
|
|
str1 = str1.replace("(Terrorist)", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("(Spectator)")) {
|
|
|
|
str1 = str1.replace("(Spectator)", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("*DEAD*")) {
|
|
|
|
str1 = str1.replace("*DEAD*", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{red}")) {
|
|
|
|
str1 = str1.replace("{red}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{orange}")) {
|
|
|
|
str1 = str1.replace("{orange}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{yellow}")) {
|
|
|
|
str1 = str1.replace("{yellow}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{green}")) {
|
|
|
|
str1 = str1.replace("{green}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{lightblue}")) {
|
|
|
|
str1 = str1.replace("{lightblue}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{blue}")) {
|
|
|
|
str1 = str1.replace("{blue}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{purple}")) {
|
|
|
|
str1 = str1.replace("{purple}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{white}")) {
|
|
|
|
str1 = str1.replace("{white}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{fullblue}")) {
|
|
|
|
str1 = str1.replace("{fullblue}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{cyan}")) {
|
|
|
|
str1 = str1.replace("{cyan}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{lime}")) {
|
|
|
|
str1 = str1.replace("{lime}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{deeppink}")) {
|
|
|
|
str1 = str1.replace("{deeppink}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{slategray}")) {
|
|
|
|
str1 = str1.replace("{slategray}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{dodgerblue}")) {
|
|
|
|
str1 = str1.replace("{dodgerblue}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{black}")) {
|
|
|
|
str1 = str1.replace("{black}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{orangered}")) {
|
|
|
|
str1 = str1.replace("{orangered}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{darkorchid}")) {
|
|
|
|
str1 = str1.replace("{darkorchid}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{pink}")) {
|
|
|
|
str1 = str1.replace("{pink}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{lightyellow}")) {
|
|
|
|
str1 = str1.replace("{lightyellow}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{chocolate}")) {
|
|
|
|
str1 = str1.replace("{chocolate}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{beige}")) {
|
|
|
|
str1 = str1.replace("{beige}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{azure}")) {
|
|
|
|
str1 = str1.replace("{azure}", " ");
|
|
|
|
}
|
|
|
|
if (str1.contains("{yellowgreen}")) {
|
|
|
|
str1 = str1.replace("{yellowgreen}", " ");
|
|
|
|
}
|
|
|
|
str1 = str1.trim();
|
|
|
|
if (str1.length() > 2 && (!str1.startsWith("!"))) {
|
|
|
|
strlistreturn.put(strlistreturn.size() + 1, str1);
|
2019-03-20 22:38:28 +01:00
|
|
|
}
|
|
|
|
}
|
2019-03-24 23:04:19 +01:00
|
|
|
});
|
2019-03-20 22:38:28 +01:00
|
|
|
return strlistreturn;
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-20 22:38:28 +01:00
|
|
|
private ConcurrentMap<Integer, String> removeSlacks(ConcurrentMap<Integer, String> str) {
|
|
|
|
ConcurrentMap<Integer, String> strreturn = new MapMaker().concurrencyLevel(2).makeMap();
|
2019-04-14 14:18:01 +02:00
|
|
|
if (stringCache.isEmpty()) {
|
|
|
|
return str;
|
|
|
|
}
|
|
|
|
Collection<String> values = stringCache.values();
|
2020-03-07 18:40:59 +01:00
|
|
|
str.values().forEach(str1 -> {
|
2019-04-14 14:18:01 +02:00
|
|
|
boolean tooclosematch = false;
|
|
|
|
for (String strVals : values) {
|
|
|
|
LevenshteinDistance leven = new LevenshteinDistance(strVals, str1);
|
|
|
|
double Distance = leven.computeLevenshteinDistance();
|
|
|
|
Double maxpermittedDistance = 2.5;
|
|
|
|
if (Distance < maxpermittedDistance) {
|
|
|
|
tooclosematch = true;
|
|
|
|
break;
|
2019-03-20 22:38:28 +01:00
|
|
|
}
|
|
|
|
}
|
2019-04-14 14:18:01 +02:00
|
|
|
if (!tooclosematch) {
|
|
|
|
strreturn.put(strreturn.size() + 1, str1);
|
2019-08-03 14:35:09 +02:00
|
|
|
//System.out.println("adding strreturn str1: " + str1 + "\n");
|
2019-04-14 14:18:01 +02:00
|
|
|
}
|
2019-03-24 23:04:19 +01:00
|
|
|
});
|
2019-03-20 22:38:28 +01:00
|
|
|
return strreturn;
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-03-26 21:38:03 +01:00
|
|
|
private ConcurrentMap<Integer, String> annotationCacheUpdate(ConcurrentMap<Integer, String> strmap) {
|
2019-03-24 23:04:19 +01:00
|
|
|
ConcurrentMap<String, Annotation> jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(strmap.values());
|
|
|
|
for (Entry<String, Annotation> jmweitr : jmweAnnotation.entrySet()) {
|
2019-03-26 21:38:03 +01:00
|
|
|
jmweAnnotationCache.put(jmweitr.getKey(), jmweitr.getValue());
|
2019-03-24 23:04:19 +01:00
|
|
|
}
|
2019-05-09 23:00:27 +02:00
|
|
|
ConcurrentMap<String, Annotation> Annotationspipeline = new MapMaker().concurrencyLevel(4).makeMap();
|
|
|
|
ConcurrentMap<String, Annotation> AnnotationspipelineSentiment = new MapMaker().concurrencyLevel(4).makeMap();
|
|
|
|
ConcurrentMap<String, CoreDocument> coreDocumentpipelineMap = getMultipleCoreDocumentsWaySuggestion(strmap.values(), pipeline);
|
2020-03-07 18:40:59 +01:00
|
|
|
strmap.values().forEach(str -> {
|
2019-04-14 14:18:01 +02:00
|
|
|
Annotation strAnno1 = new Annotation(str);
|
|
|
|
Annotationspipeline.put(str, strAnno1);
|
2019-03-26 21:38:03 +01:00
|
|
|
Annotation strAnno2 = new Annotation(str);
|
|
|
|
AnnotationspipelineSentiment.put(str, strAnno2);
|
|
|
|
stringCache.put(stringCache.size() + 1, str);
|
2019-03-20 22:38:28 +01:00
|
|
|
});
|
2019-03-26 21:38:03 +01:00
|
|
|
pipeline.annotate(Annotationspipeline.values());
|
|
|
|
pipelineSentiment.annotate(AnnotationspipelineSentiment.values());
|
|
|
|
Annotationspipeline.entrySet().forEach(pipelineEntry -> {
|
2019-04-14 14:18:01 +02:00
|
|
|
if (pipelineEntry != null) {
|
|
|
|
pipelineAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue());
|
|
|
|
}
|
2019-03-20 22:38:28 +01:00
|
|
|
});
|
2019-03-26 21:38:03 +01:00
|
|
|
AnnotationspipelineSentiment.entrySet().forEach(pipelineEntry -> {
|
2019-04-14 14:18:01 +02:00
|
|
|
if (pipelineEntry != null) {
|
|
|
|
pipelineSentimentAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue());
|
|
|
|
}
|
2019-03-20 22:38:28 +01:00
|
|
|
});
|
2019-05-09 23:00:27 +02:00
|
|
|
coreDocumentpipelineMap.entrySet().forEach(coreDocumentEntry -> {
|
|
|
|
coreDocumentAnnotationCache.put(coreDocumentEntry.getKey(), coreDocumentEntry.getValue());
|
|
|
|
});
|
2019-03-26 21:38:03 +01:00
|
|
|
return strmap;
|
2019-03-20 22:38:28 +01:00
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2020-03-07 18:40:59 +01:00
|
|
|
public int getMessageOverHead() {
|
|
|
|
return stringCache.values().size() - (stringCache.values().size() / 10);
|
|
|
|
}
|
|
|
|
|
2019-05-09 23:00:27 +02:00
|
|
|
private static class AnnotationCollector<T> implements Consumer<T> {
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-05-09 23:00:27 +02:00
|
|
|
private static int i = 0;
|
2019-08-03 14:35:09 +02:00
|
|
|
private List<T> annotationsT = new ArrayList();
|
2019-05-24 16:08:25 +02:00
|
|
|
|
2019-05-09 23:00:27 +02:00
|
|
|
@Override
|
2019-08-03 14:35:09 +02:00
|
|
|
public void accept(T ann) {
|
|
|
|
//System.out.println("adding ann: " + ann.toString());
|
2019-05-09 23:00:27 +02:00
|
|
|
annotationsT.add(ann);
|
|
|
|
}
|
|
|
|
}
|
2019-05-24 16:08:25 +02:00
|
|
|
|
|
|
|
public static ConcurrentMap<String, CoreDocument> getMultipleCoreDocumentsWaySuggestion(Collection<String> str, StanfordCoreNLP localNLP) {
|
2019-05-09 23:00:27 +02:00
|
|
|
AnnotationCollector<Annotation> annCollector = new AnnotationCollector();
|
2019-08-04 00:34:30 +02:00
|
|
|
for (String exampleString : str) {
|
2019-05-09 23:00:27 +02:00
|
|
|
localNLP.annotate(new Annotation(exampleString), annCollector);
|
|
|
|
annCollector.i++;
|
2019-08-04 00:34:30 +02:00
|
|
|
//System.out.println("iterator: " + annCollector.i + "\nstr size: " + str.size() + "\n");
|
2019-05-09 23:00:27 +02:00
|
|
|
}
|
|
|
|
try {
|
2019-08-04 00:34:30 +02:00
|
|
|
Thread.sleep(8000);
|
2019-05-09 23:00:27 +02:00
|
|
|
} catch (InterruptedException ex) {
|
|
|
|
Logger.getLogger(Datahandler.class.getName()).log(Level.SEVERE, null, ex);
|
|
|
|
}
|
2019-08-03 14:35:09 +02:00
|
|
|
ConcurrentMap<String, CoreDocument> annotationreturnMap = new MapMaker().concurrencyLevel(6).makeMap();
|
|
|
|
for (Annotation ann : annCollector.annotationsT) {
|
|
|
|
if (ann != null) {
|
2019-08-04 00:34:30 +02:00
|
|
|
ann.compact();
|
2019-08-03 14:35:09 +02:00
|
|
|
CoreDocument CD = new CoreDocument(ann);
|
2019-05-09 23:00:27 +02:00
|
|
|
annotationreturnMap.put(CD.text(), CD);
|
2019-08-04 00:34:30 +02:00
|
|
|
//System.out.println("CD text:" + CD.text() + "\niterator: " + iterator + "\nsize: " + annCollector.annotationsT.size());
|
2019-05-09 23:00:27 +02:00
|
|
|
}
|
2019-08-03 14:35:09 +02:00
|
|
|
}
|
2019-05-09 23:00:27 +02:00
|
|
|
return annotationreturnMap;
|
|
|
|
}
|
2019-03-20 22:38:28 +01:00
|
|
|
}
|