892 lines
48 KiB
Java
892 lines
48 KiB
Java
package FunctionLayer;
|
|
|
|
import DataLayer.DataMapper;
|
|
import FunctionLayer.StanfordParser.SentimentAnalyzerTest;
|
|
import edu.mit.jmwe.data.IMWE;
|
|
import edu.mit.jmwe.data.IToken;
|
|
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
|
|
import edu.stanford.nlp.ie.crf.CRFClassifier;
|
|
import edu.stanford.nlp.ling.CoreAnnotations;
|
|
import edu.stanford.nlp.ling.CoreLabel;
|
|
import edu.stanford.nlp.ling.TaggedWord;
|
|
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
|
|
import edu.stanford.nlp.pipeline.Annotation;
|
|
import edu.stanford.nlp.pipeline.CoreDocument;
|
|
import edu.stanford.nlp.pipeline.CoreEntityMention;
|
|
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
|
|
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
|
|
import edu.stanford.nlp.trees.*;
|
|
import edu.stanford.nlp.util.CoreMap;
|
|
import org.ejml.simple.SimpleMatrix;
|
|
|
|
import java.io.FileInputStream;
|
|
import java.io.FileNotFoundException;
|
|
import java.io.IOException;
|
|
import java.util.*;
|
|
import java.util.concurrent.*;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
|
|
public class Datahandler {
|
|
|
|
//wanted to put this in config too but welp cant be arsed to set this up differently.
|
|
//4 threads for the rest of eternity it is.
|
|
private ExecutorService pool = Executors.newFixedThreadPool(4);
|
|
private CompletionService completionService = new ExecutorCompletionService(pool);
|
|
private HashMap<String, Annotation> pipelineAnnotationCache;
|
|
private HashMap<String, Annotation> pipelineSentimentAnnotationCache;
|
|
private HashMap<String, CoreDocument> coreDocumentAnnotationCache;
|
|
private HashMap<String, Annotation> jmweAnnotationCache;
|
|
|
|
private MaxentTagger tagger = new MaxentTagger();
|
|
|
|
private GrammaticalStructureFactory gsf;
|
|
private AbstractSequenceClassifier<CoreLabel> classifier;
|
|
|
|
//SentimentAnalyzer Hashmaps
|
|
private HashMap<String, Integer> tokenizeCountingHashMap = new HashMap();
|
|
|
|
private HashMap<String, List<List<TaggedWord>>> taggedWordListHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<String>> retrieveTGWListHashMap = new HashMap();
|
|
|
|
private HashMap<String, List<CoreMap>> sentences1HashMap = new HashMap();
|
|
|
|
private HashMap<String, List<CoreMap>> sentencesSentimentHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<Tree>> trees1HashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<GrammaticalStructure>> grammaticalStructureHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<TypedDependency>> typedDependenciesHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<Integer>> rnnCoreAnnotationsPredictedHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<SimpleMatrix>> simpleMatricesHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<SimpleMatrix>> simpleMatricesNodevectorsHashMap = new HashMap();
|
|
|
|
private HashMap<String, List> listHashMap = new HashMap();
|
|
|
|
private HashMap<String, Integer> longestHashMap = new HashMap();
|
|
|
|
private HashMap<String, Integer> sentimentHashMap = new HashMap();
|
|
|
|
private HashMap<String, List<IMWE<IToken>>> imwesHashMap = new HashMap();
|
|
|
|
private HashMap<String, Integer> InflectedCounterNegativeHashMap = new HashMap();
|
|
|
|
private HashMap<String, Integer> InflectedCounterPositiveHashMap = new HashMap();
|
|
|
|
private HashMap<String, ArrayList<String>> tokenEntryHashMap = new HashMap();
|
|
|
|
private HashMap<String, Integer> MarkedContinuousCounterHashMap = new HashMap();
|
|
|
|
private HashMap<String, Integer> UnmarkedPatternCounterHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<String>> strTokensIpartFormHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<String>> tokenFormsHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<String>> strTokenEntryGetPOSHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<Integer>> intTokenEntyCountsHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<String>> ITokenTagsHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<String>> strTokenStemsHashMap = new HashMap();
|
|
|
|
private HashMap<String, Integer> AnotatorcounterHashMap = new HashMap();
|
|
|
|
private HashMap<String, Integer> TokensCounterHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<String>> entityTokenTagsHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<String>> nerEntitiesHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<String>> nerEntitiesTypeHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<String>> stopWordTokenHashMap = new HashMap();
|
|
|
|
private HashMap<String, java.util.ArrayList<String>> stopWordLemmaHashMap = new HashMap();
|
|
|
|
private HashMap<String, Integer> PairCounterHashMap = new HashMap();
|
|
|
|
private HashMap<String, ArrayList<String>> strResponses = new HashMap<>();
|
|
|
|
public Datahandler() {
|
|
jmweAnnotationCache = new HashMap<String, Annotation>();
|
|
pipelineAnnotationCache = new HashMap<String, Annotation>();
|
|
pipelineSentimentAnnotationCache = new HashMap<String, Annotation>();
|
|
coreDocumentAnnotationCache = new HashMap<String, CoreDocument>();
|
|
gsf = initiateGrammaticalStructureFactory();
|
|
String nerModel = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz";
|
|
classifier = CRFClassifier.getClassifierNoExceptions(nerModel);
|
|
}
|
|
|
|
private GrammaticalStructureFactory initiateGrammaticalStructureFactory() {
|
|
// lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"
|
|
String lexParserEnglishPCFG = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
|
|
LexicalizedParser lp = LexicalizedParser.loadModel(lexParserEnglishPCFG, "-maxLength", "100");
|
|
TreebankLanguagePack tlp = lp.getOp().langpack();
|
|
return tlp.grammaticalStructureFactory();
|
|
}
|
|
|
|
public StanfordCoreNLP pipeLineSetUp() {
|
|
Properties props = new Properties();
|
|
String shiftReduceParserPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz";
|
|
// nerModel2 = "edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz"
|
|
//String nerModel2 = "edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz";
|
|
// nerModel3 = "edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz"
|
|
//String nerModel3 = "edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz";
|
|
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse");
|
|
props.setProperty("parse.model", shiftReduceParserPath);
|
|
props.setProperty("parse.maxlen", "90");
|
|
props.setProperty("parse.binaryTrees", "true");
|
|
props.setProperty("threads", "1");
|
|
props.setProperty("pos.maxlen", "90");
|
|
props.setProperty("tokenize.maxlen", "90");
|
|
props.setProperty("ssplit.maxlen", "90");
|
|
props.setProperty("lemma.maxlen", "90");
|
|
props.setProperty("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz" +
|
|
",edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz" +
|
|
",edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz");
|
|
props.setProperty("ner.combinationMode", "HIGH_RECALL");
|
|
props.setProperty("regexner.ignorecase", "true");
|
|
props.setProperty("ner.fine.regexner.ignorecase", "true");
|
|
props.setProperty("tokenize.options", "untokenizable=firstKeep");
|
|
return new StanfordCoreNLP(props);
|
|
}
|
|
|
|
public StanfordCoreNLP shiftReduceParserInitiate() {
|
|
Properties propsSentiment = new Properties();
|
|
// lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"
|
|
String lexParserEnglishPCFG = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
|
|
String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz";
|
|
// taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"
|
|
String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words-distsim.tagger";
|
|
String customStopWordList = "start,starts,period,periods,a,an,and,are,as,at,be,but,by,for,if,in,into,is,it,no,not,of," +
|
|
"on,or,such,that,the,their,then,there,these,they,this,to,was,will,with";
|
|
propsSentiment.setProperty("parse.model", lexParserEnglishPCFG);
|
|
propsSentiment.setProperty("sentiment.model", sentimentModel);
|
|
propsSentiment.setProperty("parse.maxlen", "90");
|
|
propsSentiment.setProperty("threads", "1");
|
|
propsSentiment.setProperty("pos.maxlen", "90");
|
|
propsSentiment.setProperty("tokenize.maxlen", "90");
|
|
propsSentiment.setProperty("ssplit.maxlen", "90");
|
|
propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,sentiment,lemma,stopword"); //coref too expensive memorywise
|
|
propsSentiment.setProperty("customAnnotatorClass.stopword", "FunctionLayer.StopwordAnnotator");
|
|
propsSentiment.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList);
|
|
propsSentiment.setProperty("tokenize.options", "untokenizable=firstKeep");
|
|
tagger = new MaxentTagger(taggerPath);
|
|
return new StanfordCoreNLP(propsSentiment);
|
|
}
|
|
|
|
public String trimString(String str) {
|
|
String message = str.trim();
|
|
if (message.startsWith("<@")) {
|
|
message = message.substring(message.indexOf("> ") + 2);
|
|
}
|
|
if (!message.isEmpty()) {
|
|
message = message.replace("@", "");
|
|
if (message.contains("<>")) {
|
|
message = message.substring(message.indexOf(">"));
|
|
}
|
|
if (message.startsWith("[ *")) {
|
|
message = message.substring(message.indexOf("]"));
|
|
}
|
|
}
|
|
return message;
|
|
}
|
|
|
|
private void createStrAnnotation(String str, StanfordCoreNLP stanfordCoreNLP, Boolean sentimentBool) {
|
|
Annotation strAnno2 = new Annotation(str);
|
|
strAnno2.compact();
|
|
try {
|
|
stanfordCoreNLP.annotate(strAnno2);
|
|
if (sentimentBool) {
|
|
pipelineSentimentAnnotationCache.put(str, strAnno2);
|
|
} else {
|
|
pipelineAnnotationCache.put(str, strAnno2);
|
|
}
|
|
} catch (Exception e) {
|
|
System.out.println("stanfordcorenlp annotate failed" + e.getMessage());
|
|
}
|
|
}
|
|
|
|
private SentimentAnalyzerTest getReponseFuturesHelper(String strF, String str1, StanfordCoreNLP stanfordCoreNLP,
|
|
StanfordCoreNLP stanfordCoreNLPSentiment,
|
|
List<CoreMap> coreMaps1, Annotation strAnno,
|
|
Annotation strAnnoSentiment, CoreDocument coreDocument
|
|
, Integer tokenizeCountingF, List<List<TaggedWord>> taggedWordListF, ArrayList<TypedDependency> typedDependenciesF
|
|
, ArrayList<Integer> rnnCoreAnnotationsPredictedF, ArrayList<SimpleMatrix> simpleMatricesF
|
|
, ArrayList<SimpleMatrix> simpleMatricesNodevectorsF, List<String> listF, Integer longestF, List<CoreMap> sentencesF
|
|
, List<CoreMap> sentencesSentimentF, ArrayList<Tree> treesF, ArrayList<GrammaticalStructure> grammaticalStructuresF
|
|
, Integer sentimentLongestF, List<IMWE<IToken>> imwesF, Integer inflectedCounterNegativeF, Integer inflectedCounterPositiveF
|
|
, ArrayList<String> tokenEntryF, Integer unmarkedPatternCounterF, ArrayList<String> strTokensIpartFormF, ArrayList<String> tokenFormsF
|
|
, ArrayList<Integer> intTokenEntyCountsF, Integer markedContinuousCounterF, ArrayList<String> ITokenTagsF
|
|
, ArrayList<String> strTokenEntryGetPOSF, ArrayList<String> retrieveTGWListF, Integer pairCounterF
|
|
, Integer tokensCounterF, ArrayList<String> stopWordLemmaF, ArrayList<String> nerEntitiesF
|
|
, ArrayList<String> stopWordTokenF, ArrayList<String> entityTokenTagsF, ArrayList<String> nerEntitiesTypeF
|
|
, Integer anotatorcounterF, ArrayList<String> strTokenStemsF) {
|
|
Annotation annotation2 = pipelineSentimentAnnotationCache.getOrDefault(str1, null);
|
|
Annotation annotation4 = pipelineAnnotationCache.getOrDefault(str1, null);
|
|
CoreDocument coreDocument1 = coreDocumentAnnotationCache.getOrDefault(str1, null);
|
|
Annotation jmweAnnotation = jmweAnnotationCache.getOrDefault(str1, null);
|
|
if (annotation2 == null) {
|
|
createStrAnnotation(str1, stanfordCoreNLPSentiment, true);
|
|
}
|
|
if (annotation4 == null) {
|
|
createStrAnnotation(str1, stanfordCoreNLP, false);
|
|
}
|
|
if (coreDocument1 == null) {
|
|
getCoreDocumentsSuggested(stanfordCoreNLP, str1);
|
|
}
|
|
if (jmweAnnotation == null) {
|
|
getJMWEAnnotation(str1);
|
|
jmweAnnotation = jmweAnnotationCache.get(str1);
|
|
}
|
|
Integer tokenizeCounting = tokenizeCountingHashMap.getOrDefault(str1, null);
|
|
|
|
List<List<TaggedWord>> taggedWordList1 = taggedWordListHashMap.getOrDefault(str1, null);
|
|
|
|
java.util.ArrayList<String> retrieveTGWList1 = retrieveTGWListHashMap.getOrDefault(str1, null);
|
|
|
|
List<CoreMap> sentence1 = sentences1HashMap.getOrDefault(str1, null);
|
|
|
|
List<CoreMap> sentenceSentiment1 = sentencesSentimentHashMap.getOrDefault(str1, null);
|
|
ArrayList<Tree> trees1 = trees1HashMap.getOrDefault(str1, null);
|
|
List<CoreMap> coreMaps2 = new ArrayList<>();
|
|
ArrayList<GrammaticalStructure> grammaticalStructures1 = grammaticalStructureHashMap.getOrDefault(str1, null);
|
|
if (jmweAnnotation != null) {
|
|
coreMaps2 = jmweAnnotation.get(CoreAnnotations.SentencesAnnotation.class);
|
|
}
|
|
ArrayList<TypedDependency> typedDependencies1 = typedDependenciesHashMap.getOrDefault(str1, null);
|
|
ArrayList<Integer> rnnCoreAnnotationsPredicted1 = rnnCoreAnnotationsPredictedHashMap.getOrDefault(str1, null);
|
|
ArrayList<SimpleMatrix> simpleMatrices1 = simpleMatricesHashMap.getOrDefault(str1, null);
|
|
simpleMatricesHashMap.getOrDefault(str1, null);
|
|
ArrayList<SimpleMatrix> simpleMatricesNodevectors1 = simpleMatricesNodevectorsHashMap.getOrDefault(str1, null);
|
|
List list1 = listHashMap.getOrDefault(str1, null);
|
|
Integer longest1 = longestHashMap.getOrDefault(str1, null);
|
|
Integer sentimentLongest1 = sentimentHashMap.getOrDefault(str1, null);
|
|
List<IMWE<IToken>> imwes1 = imwesHashMap.getOrDefault(str1, null);
|
|
Integer InflectedCounterNegative1 = InflectedCounterNegativeHashMap.getOrDefault(str1, null);
|
|
Integer InflectedCounterPositive1 = InflectedCounterPositiveHashMap.getOrDefault(str1, null);
|
|
ArrayList<String> tokenEntry1 = tokenEntryHashMap.getOrDefault(str1, null);
|
|
Integer MarkedContinuousCounter1 = MarkedContinuousCounterHashMap.getOrDefault(str1, null);
|
|
Integer UnmarkedPatternCounter1 = UnmarkedPatternCounterHashMap.getOrDefault(str1, null);
|
|
ArrayList<String> strTokensIpartForm1 = strTokensIpartFormHashMap.getOrDefault(str1, null);
|
|
ArrayList<String> tokenForms1 = tokenFormsHashMap.getOrDefault(str1, null);
|
|
ArrayList<String> strTokenEntryGetPOS1 = strTokenEntryGetPOSHashMap.getOrDefault(str1, null);
|
|
ArrayList<Integer> intTokenEntyCounts1 = intTokenEntyCountsHashMap.getOrDefault(str1, null);
|
|
ArrayList<String> ITokenTags1 = ITokenTagsHashMap.getOrDefault(str1, null);
|
|
ArrayList<String> strTokenStems1 = strTokenStemsHashMap.getOrDefault(str1, null);
|
|
Integer Anotatorcounter1 = AnotatorcounterHashMap.getOrDefault(str1, null);
|
|
Integer TokensCounter1 = TokensCounterHashMap.getOrDefault(str1, null);
|
|
ArrayList<String> entityTokenTags1 = entityTokenTagsHashMap.getOrDefault(str1, null);
|
|
ArrayList<String> nerEntities1 = nerEntitiesHashMap.getOrDefault(str1, null);
|
|
ArrayList<String> nerEntitiesType1 = nerEntitiesTypeHashMap.getOrDefault(str1, null);
|
|
ArrayList<String> stopWordToken1 = stopWordTokenHashMap.getOrDefault(str1, null);
|
|
ArrayList<String> stopWordLemma1 = stopWordLemmaHashMap.getOrDefault(str1, null);
|
|
Integer PairCounter1 = PairCounterHashMap.getOrDefault(str1, null);
|
|
|
|
Annotation annotationStrPipeLine1 = pipelineAnnotationCache.get(str1);
|
|
Annotation annotationStrPipeLineSentiment1 = pipelineSentimentAnnotationCache.get(str1);
|
|
|
|
SentimentAnalyzerTest SMX = new SentimentAnalyzerTest(strF, str1,
|
|
coreMaps1, coreMaps2, strAnno,
|
|
//sometimes Annotation(str) returns null so in that case better use result of sentiment
|
|
annotationStrPipeLine1 == null ? annotationStrPipeLineSentiment1 : annotationStrPipeLine1,
|
|
strAnnoSentiment,
|
|
annotationStrPipeLineSentiment1, coreDocument, coreDocumentAnnotationCache.get(str1),
|
|
tagger, gsf, classifier, tokenizeCounting, tokenizeCountingF,
|
|
taggedWordListF, taggedWordList1, retrieveTGWListF, retrieveTGWList1,
|
|
sentencesF, sentence1, sentencesSentimentF, sentenceSentiment1, treesF, trees1,
|
|
grammaticalStructuresF, grammaticalStructures1, typedDependenciesF,
|
|
typedDependencies1, rnnCoreAnnotationsPredictedF, rnnCoreAnnotationsPredicted1,
|
|
simpleMatricesF, simpleMatrices1, simpleMatricesNodevectorsF, simpleMatricesNodevectors1,
|
|
listF, list1, longestF, longest1, sentimentLongestF, sentimentLongest1, imwesF,
|
|
imwes1, inflectedCounterNegativeF, InflectedCounterNegative1, inflectedCounterPositiveF,
|
|
InflectedCounterPositive1, tokenEntryF, tokenEntry1, markedContinuousCounterF,
|
|
MarkedContinuousCounter1, unmarkedPatternCounterF, UnmarkedPatternCounter1,
|
|
strTokensIpartFormF, strTokensIpartForm1, tokenFormsF, tokenForms1,
|
|
strTokenEntryGetPOSF, strTokenEntryGetPOS1, intTokenEntyCountsF,
|
|
intTokenEntyCounts1, ITokenTagsF, ITokenTags1, strTokenStemsF, strTokenStems1,
|
|
anotatorcounterF, Anotatorcounter1, tokensCounterF, TokensCounter1,
|
|
entityTokenTagsF, entityTokenTags1, nerEntitiesF, nerEntities1, nerEntitiesTypeF,
|
|
nerEntitiesType1, stopWordTokenF, stopWordToken1, stopWordLemmaF, stopWordLemma1,
|
|
pairCounterF, PairCounter1
|
|
);
|
|
if (tokenizeCounting == null) {
|
|
tokenizeCountingHashMap.put(str1, SMX.getTokenizeCounting());
|
|
}
|
|
if (taggedWordList1 == null) {
|
|
taggedWordListHashMap.put(str1, SMX.getTaggedWordList1());
|
|
}
|
|
if (retrieveTGWList1 == null) {
|
|
retrieveTGWListHashMap.put(str1, SMX.getRetrieveTGWList1());
|
|
}
|
|
if (sentence1 == null) {
|
|
sentences1HashMap.put(str1, SMX.getSentences1());
|
|
}
|
|
if (sentenceSentiment1 == null) {
|
|
sentencesSentimentHashMap.put(str1, SMX.getSentencesSentiment1());
|
|
}
|
|
if (trees1 == null) {
|
|
trees1HashMap.put(str1, SMX.getTrees1());
|
|
}
|
|
if (grammaticalStructures1 == null) {
|
|
grammaticalStructureHashMap.put(str1, SMX.getGrammaticalStructures1());
|
|
}
|
|
if (typedDependencies1 == null) {
|
|
typedDependenciesHashMap.put(str1, SMX.getTypedDependencies1());
|
|
}
|
|
if (rnnCoreAnnotationsPredicted1 == null) {
|
|
rnnCoreAnnotationsPredictedHashMap.put(str1, SMX.getRnnCoreAnnotationsPredicted1());
|
|
}
|
|
if (simpleMatrices1 == null) {
|
|
simpleMatricesHashMap.put(str1, SMX.getSimpleMatrices1());
|
|
}
|
|
if (simpleMatricesNodevectors1 == null) {
|
|
simpleMatricesNodevectorsHashMap.put(str1, SMX.getSimpleMatricesNodevectors1());
|
|
}
|
|
if (list1 == null) {
|
|
listHashMap.put(str1, SMX.getList1());
|
|
}
|
|
if (longest1 == null) {
|
|
longestHashMap.put(str1, SMX.getLongest1());
|
|
}
|
|
if (sentimentLongest1 == null) {
|
|
sentimentHashMap.put(str1, SMX.getSentimentLongest1());
|
|
}
|
|
if (imwes1 == null) {
|
|
imwesHashMap.put(str1, SMX.getImwes1());
|
|
}
|
|
if (InflectedCounterNegative1 == null) {
|
|
InflectedCounterNegativeHashMap.put(str1, SMX.getInflectedCounterNegative1());
|
|
}
|
|
if (InflectedCounterPositive1 == null) {
|
|
InflectedCounterPositiveHashMap.put(str1, SMX.getInflectedCounterPositive1());
|
|
}
|
|
if (tokenEntry1 == null) {
|
|
tokenEntryHashMap.put(str1, SMX.getTokenEntry1());
|
|
}
|
|
if (MarkedContinuousCounter1 == null) {
|
|
MarkedContinuousCounterHashMap.put(str1, SMX.getMarkedContinuousCounter1());
|
|
}
|
|
if (UnmarkedPatternCounter1 == null) {
|
|
UnmarkedPatternCounterHashMap.put(str1, SMX.getUnmarkedPatternCounter1());
|
|
}
|
|
if (strTokensIpartForm1 == null) {
|
|
strTokensIpartFormHashMap.put(str1, SMX.getStrTokensIpartForm1());
|
|
}
|
|
if (tokenForms1 == null) {
|
|
tokenFormsHashMap.put(str1, SMX.getTokenForms1());
|
|
}
|
|
if (strTokenEntryGetPOS1 == null) {
|
|
strTokenEntryGetPOSHashMap.put(str1, SMX.getStrTokenEntryGetPOS1());
|
|
}
|
|
if (intTokenEntyCounts1 == null) {
|
|
intTokenEntyCountsHashMap.put(str1, SMX.getIntTokenEntyCounts1());
|
|
}
|
|
if (ITokenTags1 == null) {
|
|
ITokenTagsHashMap.put(str1, SMX.getITokenTags1());
|
|
}
|
|
if (strTokenStems1 == null) {
|
|
strTokenStemsHashMap.put(str1, SMX.getStrTokenStems1());
|
|
}
|
|
if (Anotatorcounter1 == null) {
|
|
AnotatorcounterHashMap.put(str1, SMX.getAnotatorcounter1());
|
|
}
|
|
if (TokensCounter1 == null) {
|
|
TokensCounterHashMap.put(str1, SMX.getTokensCounter1());
|
|
}
|
|
if (entityTokenTags1 == null) {
|
|
entityTokenTagsHashMap.put(str1, SMX.getEntityTokenTags1());
|
|
}
|
|
if (nerEntities1 == null) {
|
|
nerEntitiesHashMap.put(str1, SMX.getNerEntities1());
|
|
}
|
|
if (nerEntitiesType1 == null) {
|
|
nerEntitiesTypeHashMap.put(str1, SMX.getNerEntitiesType1());
|
|
}
|
|
if (stopWordToken1 == null) {
|
|
stopWordTokenHashMap.put(str1, SMX.getStopWordToken1());
|
|
}
|
|
if (stopWordLemma1 == null) {
|
|
stopWordLemmaHashMap.put(str1, SMX.getStopWordLemma1());
|
|
}
|
|
if (PairCounter1 == null) {
|
|
PairCounterHashMap.put(str1, SMX.getPairCounter1());
|
|
}
|
|
return SMX;
|
|
}
|
|
|
|
private class get_res implements Callable<SentimentAnalyzerTest> {
|
|
private final String strF;
|
|
private final String str1;
|
|
private final StanfordCoreNLP stanfordCoreNLP;
|
|
private final StanfordCoreNLP stanfordCoreNLPSentiment;
|
|
private final List<CoreMap> coreMaps1;
|
|
private final Annotation strAnno;
|
|
private final Annotation strAnnoSentiment;
|
|
private final CoreDocument coreDocument;
|
|
private final Integer tokenizeCountingF;
|
|
private final List<List<TaggedWord>> taggedWordListF;
|
|
private final ArrayList<TypedDependency> typedDependenciesF;
|
|
private final ArrayList<Integer> rnnCoreAnnotationsPredictedF;
|
|
private final ArrayList<SimpleMatrix> simpleMatricesF;
|
|
private final ArrayList<SimpleMatrix> simpleMatricesNodevectorsF;
|
|
private final List<String> listF;
|
|
private final Integer longestF;
|
|
private final List<CoreMap> sentencesF;
|
|
private final List<CoreMap> sentencesSentimentF;
|
|
private final ArrayList<Tree> treesF;
|
|
private final ArrayList<GrammaticalStructure> grammaticalStructuresF;
|
|
private final Integer sentimentLongestF;
|
|
private final List<IMWE<IToken>> imwesF;
|
|
private final Integer inflectedCounterNegativeF;
|
|
private final Integer inflectedCounterPositiveF;
|
|
private final ArrayList<String> tokenEntryF;
|
|
private final Integer unmarkedPatternCounterF;
|
|
private final ArrayList<String> strTokensIpartFormF;
|
|
private final ArrayList<String> tokenFormsF;
|
|
private final ArrayList<Integer> intTokenEntyCountsF;
|
|
private final Integer markedContinuousCounterF;
|
|
private final ArrayList<String> iTokenTagsF;
|
|
private final ArrayList<String> strTokenEntryGetPOSF;
|
|
private final ArrayList<String> retrieveTGWListF;
|
|
private final Integer pairCounterF;
|
|
private final Integer tokensCounterF;
|
|
private final ArrayList<String> stopWordLemmaF;
|
|
private final ArrayList<String> nerEntitiesF;
|
|
private final ArrayList<String> stopWordTokenF;
|
|
private final ArrayList<String> entityTokenTagsF;
|
|
private final ArrayList<String> nerEntitiesTypeF;
|
|
private final Integer anotatorcounterF;
|
|
private final ArrayList<String> strTokenStemsF;
|
|
|
|
public get_res(String strF, String str1, StanfordCoreNLP stanfordCoreNLP, StanfordCoreNLP stanfordCoreNLPSentiment, List<CoreMap> coreMaps1, Annotation strAnno, Annotation strAnnoSentiment, CoreDocument coreDocument, Integer tokenizeCountingF, List<List<TaggedWord>> taggedWordListF, ArrayList<TypedDependency> typedDependenciesF, ArrayList<Integer> rnnCoreAnnotationsPredictedF, ArrayList<SimpleMatrix> simpleMatricesF, ArrayList<SimpleMatrix> simpleMatricesNodevectorsF, List<String> listF, Integer longestF, List<CoreMap> sentencesF, List<CoreMap> sentencesSentimentF, ArrayList<Tree> treesF, ArrayList<GrammaticalStructure> grammaticalStructuresF, Integer sentimentLongestF, List<IMWE<IToken>> imwesF, Integer inflectedCounterNegativeF, Integer inflectedCounterPositiveF, ArrayList<String> tokenEntryF, Integer unmarkedPatternCounterF, ArrayList<String> strTokensIpartFormF, ArrayList<String> tokenFormsF, ArrayList<Integer> intTokenEntyCountsF, Integer markedContinuousCounterF, ArrayList<String> iTokenTagsF, ArrayList<String> strTokenEntryGetPOSF, ArrayList<String> retrieveTGWListF, Integer pairCounterF, Integer tokensCounterF, ArrayList<String> stopWordLemmaF, ArrayList<String> nerEntitiesF, ArrayList<String> stopWordTokenF, ArrayList<String> entityTokenTagsF, ArrayList<String> nerEntitiesTypeF, Integer anotatorcounterF, ArrayList<String> strTokenStemsF) {
|
|
|
|
this.strF = strF;
|
|
this.str1 = str1;
|
|
this.stanfordCoreNLP = stanfordCoreNLP;
|
|
this.stanfordCoreNLPSentiment = stanfordCoreNLPSentiment;
|
|
this.coreMaps1 = coreMaps1;
|
|
this.strAnno = strAnno;
|
|
this.strAnnoSentiment = strAnnoSentiment;
|
|
this.coreDocument = coreDocument;
|
|
this.tokenizeCountingF = tokenizeCountingF;
|
|
this.taggedWordListF = taggedWordListF;
|
|
this.typedDependenciesF = typedDependenciesF;
|
|
this.rnnCoreAnnotationsPredictedF = rnnCoreAnnotationsPredictedF;
|
|
this.simpleMatricesF = simpleMatricesF;
|
|
this.simpleMatricesNodevectorsF = simpleMatricesNodevectorsF;
|
|
this.listF = listF;
|
|
this.longestF = longestF;
|
|
this.sentencesF = sentencesF;
|
|
this.sentencesSentimentF = sentencesSentimentF;
|
|
this.treesF = treesF;
|
|
this.grammaticalStructuresF = grammaticalStructuresF;
|
|
this.sentimentLongestF = sentimentLongestF;
|
|
this.imwesF = imwesF;
|
|
this.inflectedCounterNegativeF = inflectedCounterNegativeF;
|
|
this.inflectedCounterPositiveF = inflectedCounterPositiveF;
|
|
this.tokenEntryF = tokenEntryF;
|
|
this.unmarkedPatternCounterF = unmarkedPatternCounterF;
|
|
this.strTokensIpartFormF = strTokensIpartFormF;
|
|
this.tokenFormsF = tokenFormsF;
|
|
this.intTokenEntyCountsF = intTokenEntyCountsF;
|
|
this.markedContinuousCounterF = markedContinuousCounterF;
|
|
this.iTokenTagsF = iTokenTagsF;
|
|
this.strTokenEntryGetPOSF = strTokenEntryGetPOSF;
|
|
this.retrieveTGWListF = retrieveTGWListF;
|
|
this.pairCounterF = pairCounterF;
|
|
this.tokensCounterF = tokensCounterF;
|
|
this.stopWordLemmaF = stopWordLemmaF;
|
|
this.nerEntitiesF = nerEntitiesF;
|
|
this.stopWordTokenF = stopWordTokenF;
|
|
this.entityTokenTagsF = entityTokenTagsF;
|
|
this.nerEntitiesTypeF = nerEntitiesTypeF;
|
|
this.anotatorcounterF = anotatorcounterF;
|
|
this.strTokenStemsF = strTokenStemsF;
|
|
}
|
|
|
|
@Override
|
|
public SentimentAnalyzerTest call() throws Exception {
|
|
return getReponseFuturesHelper(strF, str1, stanfordCoreNLP, stanfordCoreNLPSentiment,
|
|
coreMaps1, strAnno, strAnnoSentiment, coreDocument, tokenizeCountingF, taggedWordListF
|
|
, typedDependenciesF, rnnCoreAnnotationsPredictedF, simpleMatricesF, simpleMatricesNodevectorsF
|
|
, listF, longestF, sentencesF, sentencesSentimentF, treesF, grammaticalStructuresF, sentimentLongestF
|
|
, imwesF, inflectedCounterNegativeF, inflectedCounterPositiveF, tokenEntryF, unmarkedPatternCounterF
|
|
, strTokensIpartFormF, tokenFormsF, intTokenEntyCountsF, markedContinuousCounterF, iTokenTagsF
|
|
, strTokenEntryGetPOSF, retrieveTGWListF, pairCounterF, tokensCounterF, stopWordLemmaF, nerEntitiesF
|
|
, stopWordTokenF, entityTokenTagsF, nerEntitiesTypeF, anotatorcounterF, strTokenStemsF);
|
|
}
|
|
}
|
|
|
|
public String getResponseFutures(String strF, StanfordCoreNLP stanfordCoreNLP, StanfordCoreNLP stanfordCoreNLPSentiment) {
|
|
if (strResponses.getOrDefault(strF, null) == null) {
|
|
strResponses.put(strF, new ArrayList<>());
|
|
}
|
|
|
|
Annotation strAnno = new Annotation(strF);
|
|
strAnno.compact();
|
|
stanfordCoreNLP.annotate(strAnno);
|
|
|
|
Annotation strAnnoSentiment = new Annotation(strF);
|
|
strAnnoSentiment.compact();
|
|
stanfordCoreNLPSentiment.annotate(strAnnoSentiment);
|
|
|
|
Annotation annotation = new Annotation(strF);
|
|
stanfordCoreNLP.annotate(annotation);
|
|
CoreDocument coreDocument = new CoreDocument(annotation);
|
|
Annotation jmweAnnotationF = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(strF);
|
|
List<CoreMap> coreMaps1 = jmweAnnotationF.get(CoreAnnotations.SentencesAnnotation.class);
|
|
|
|
Integer tokenizeCountingF = null;
|
|
List<List<TaggedWord>> taggedWordListF = null;
|
|
java.util.ArrayList<String> retrieveTGWListF = null;
|
|
List<CoreMap> sentencesF = null;
|
|
List<CoreMap> sentencesSentimentF = null;
|
|
java.util.ArrayList<Tree> treesF = null;
|
|
ArrayList<GrammaticalStructure> grammaticalStructuresF = null;
|
|
java.util.ArrayList<TypedDependency> typedDependenciesF = null;
|
|
java.util.ArrayList<Integer> rnnCoreAnnotationsPredictedF = null;
|
|
java.util.ArrayList<SimpleMatrix> simpleMatricesF = null;
|
|
java.util.ArrayList<SimpleMatrix> simpleMatricesNodevectorsF = null;
|
|
List<String> listF = null;
|
|
Integer longestF = null;
|
|
Integer sentimentLongestF = null;
|
|
List<IMWE<IToken>> imwesF = null;
|
|
Integer InflectedCounterNegativeF = null;
|
|
Integer InflectedCounterPositiveF = null;
|
|
ArrayList<String> tokenEntryF = null;
|
|
Integer MarkedContinuousCounterF = null;
|
|
Integer UnmarkedPatternCounterF = null;
|
|
ArrayList<String> strTokensIpartFormF = null;
|
|
java.util.ArrayList<String> tokenFormsF = null;
|
|
ArrayList<String> strTokenEntryGetPOSF = null;
|
|
java.util.ArrayList<Integer> intTokenEntyCountsF = null;
|
|
ArrayList<String> ITokenTagsF = null;
|
|
java.util.ArrayList<String> strTokenStemsF = null;
|
|
Integer AnotatorcounterF = null;
|
|
Integer TokensCounterF = null;
|
|
java.util.ArrayList<String> entityTokenTagsF = null;
|
|
java.util.ArrayList<String> nerEntitiesF = null;
|
|
java.util.ArrayList<String> nerEntitiesTypeF = null;
|
|
java.util.ArrayList<String> stopWordTokenF = null;
|
|
java.util.ArrayList<String> stopWordLemmaF = null;
|
|
Integer PairCounterF = null;
|
|
|
|
ArrayList<String> concurrentRelations = new ArrayList();
|
|
StringBuilder SB = new StringBuilder();
|
|
List<String> ues_copy = new ArrayList(DataMapper.getAllStrings());
|
|
double preRelationUserCounters = -155000.0;
|
|
|
|
//System.out.println(ues_copy.toString());
|
|
ArrayList<Future<SentimentAnalyzerTest>> futures = new ArrayList<>();
|
|
|
|
for (String str1 : ues_copy) {
|
|
if (strF != str1) {
|
|
//critical section
|
|
Future<SentimentAnalyzerTest> submit = completionService.submit(new get_res(strF, str1, stanfordCoreNLP, stanfordCoreNLPSentiment,
|
|
coreMaps1, strAnno, strAnnoSentiment, coreDocument, tokenizeCountingF, taggedWordListF
|
|
, typedDependenciesF, rnnCoreAnnotationsPredictedF, simpleMatricesF, simpleMatricesNodevectorsF
|
|
, listF, longestF, sentencesF, sentencesSentimentF, treesF, grammaticalStructuresF, sentimentLongestF
|
|
, imwesF, InflectedCounterNegativeF, InflectedCounterPositiveF, tokenEntryF, UnmarkedPatternCounterF
|
|
, strTokensIpartFormF, tokenFormsF, intTokenEntyCountsF, MarkedContinuousCounterF, ITokenTagsF
|
|
, strTokenEntryGetPOSF, retrieveTGWListF, PairCounterF, TokensCounterF, stopWordLemmaF, nerEntitiesF
|
|
, stopWordTokenF, entityTokenTagsF, nerEntitiesTypeF, AnotatorcounterF, strTokenStemsF));
|
|
futures.add(submit);
|
|
//end of critical section, do the rest sequential.
|
|
}
|
|
}
|
|
|
|
int pending = futures.size();
|
|
while (pending > 0) {
|
|
try {
|
|
Future<SentimentAnalyzerTest> completed = completionService.poll(100, TimeUnit.MILLISECONDS);
|
|
if (completed != null) {
|
|
--pending;
|
|
SentimentAnalyzerTest SMX = completed.get();
|
|
if (SMX == null) continue;
|
|
double scoreRelationLastUserMsg = SMX.getScore();
|
|
if (scoreRelationLastUserMsg > preRelationUserCounters) {
|
|
preRelationUserCounters = scoreRelationLastUserMsg;
|
|
concurrentRelations.add(SMX.getSecondaryString());
|
|
}
|
|
|
|
//this part below should be sequential hopefully
|
|
if (tokenizeCountingF == null) {
|
|
tokenizeCountingF = SMX.getTokenizeCountingF();
|
|
}
|
|
if (taggedWordListF == null) {
|
|
taggedWordListF = SMX.getTaggedWordListF();
|
|
}
|
|
if (typedDependenciesF == null) {
|
|
typedDependenciesF = SMX.getTypedDependenciesF();
|
|
}
|
|
if (rnnCoreAnnotationsPredictedF == null) {
|
|
rnnCoreAnnotationsPredictedF = SMX.getRnnCoreAnnotationsPredictedF();
|
|
}
|
|
if (simpleMatricesF == null) {
|
|
simpleMatricesF = SMX.getSimpleMatricesF();
|
|
}
|
|
if (simpleMatricesNodevectorsF == null) {
|
|
simpleMatricesNodevectorsF = SMX.getSimpleMatricesNodevectorsF();
|
|
}
|
|
if (listF == null) {
|
|
listF = SMX.getListF();
|
|
}
|
|
if (longestF == null) {
|
|
longestF = SMX.getLongestF();
|
|
}
|
|
if (sentencesF == null) {
|
|
sentencesF = SMX.getSentencesF();
|
|
}
|
|
if (sentencesSentimentF == null) {
|
|
sentencesSentimentF = SMX.getSentencesSentimentF();
|
|
}
|
|
if (treesF == null) {
|
|
treesF = SMX.getTreesF();
|
|
}
|
|
if (grammaticalStructuresF == null) {
|
|
grammaticalStructuresF = SMX.getGrammaticalStructuresF();
|
|
}
|
|
if (sentimentLongestF == null) {
|
|
sentimentLongestF = SMX.getSentimentLongestF();
|
|
}
|
|
if (imwesF == null) {
|
|
imwesF = SMX.getImwesF();
|
|
}
|
|
if (InflectedCounterNegativeF == null) {
|
|
InflectedCounterNegativeF = SMX.getInflectedCounterNegativeF();
|
|
}
|
|
if (InflectedCounterPositiveF == null) {
|
|
InflectedCounterPositiveF = SMX.getInflectedCounterPositiveF();
|
|
}
|
|
if (tokenEntryF == null) {
|
|
tokenEntryF = SMX.getTokenEntryF();
|
|
}
|
|
if (UnmarkedPatternCounterF == null) {
|
|
UnmarkedPatternCounterF = SMX.getUnmarkedPatternCounterF();
|
|
}
|
|
if (strTokensIpartFormF == null) {
|
|
strTokensIpartFormF = SMX.getStrTokensIpartFormF();
|
|
}
|
|
if (tokenFormsF == null) {
|
|
tokenFormsF = SMX.getTokenFormsF();
|
|
}
|
|
if (intTokenEntyCountsF == null) {
|
|
intTokenEntyCountsF = SMX.getIntTokenEntyCountsF();
|
|
}
|
|
if (MarkedContinuousCounterF == null) {
|
|
MarkedContinuousCounterF = SMX.getMarkedContinuousCounterF();
|
|
}
|
|
if (ITokenTagsF == null) {
|
|
ITokenTagsF = SMX.getITokenTagsF();
|
|
}
|
|
if (strTokenEntryGetPOSF == null) {
|
|
strTokenEntryGetPOSF = SMX.getStrTokenEntryGetPOSF();
|
|
}
|
|
if (retrieveTGWListF == null) {
|
|
retrieveTGWListF = SMX.getRetrieveTGWListF();
|
|
}
|
|
if (PairCounterF == null) {
|
|
PairCounterF = SMX.getPairCounterF();
|
|
}
|
|
if (TokensCounterF == null) {
|
|
TokensCounterF = SMX.getTokensCounterF();
|
|
}
|
|
if (stopWordLemmaF == null) {
|
|
stopWordLemmaF = SMX.getStopWordLemmaF();
|
|
}
|
|
if (nerEntitiesF == null) {
|
|
nerEntitiesF = SMX.getNerEntitiesF();
|
|
}
|
|
if (stopWordTokenF == null) {
|
|
stopWordTokenF = SMX.getStopWordTokenF();
|
|
}
|
|
if (entityTokenTagsF == null) {
|
|
entityTokenTagsF = SMX.getEntityTokenTagsF();
|
|
}
|
|
if (nerEntitiesTypeF == null) {
|
|
nerEntitiesTypeF = SMX.getNerEntitiesTypeF();
|
|
}
|
|
if (AnotatorcounterF == null) {
|
|
AnotatorcounterF = SMX.getAnotatorcounterF();
|
|
}
|
|
if (strTokenStemsF == null) {
|
|
strTokenStemsF = SMX.getStrTokenStemsF();
|
|
}
|
|
}
|
|
} catch (InterruptedException e) {
|
|
//throw new RuntimeException(e);
|
|
pending = 0;
|
|
Properties prop = new Properties();
|
|
String fileName = "app.config";
|
|
try (FileInputStream fis = new FileInputStream(fileName)) {
|
|
prop.load(fis);
|
|
} catch (FileNotFoundException ex) {
|
|
} catch (IOException ex) {
|
|
}
|
|
System.out.printf(Arrays.toString(e.getStackTrace()));
|
|
pool.shutdown();
|
|
pool = Executors.newFixedThreadPool(Integer.valueOf(prop.getProperty("app.thread_count")));
|
|
completionService = new ExecutorCompletionService(pool);
|
|
} catch (ExecutionException e) {
|
|
//throw new RuntimeException(e);
|
|
pending = 0;
|
|
Properties prop = new Properties();
|
|
String fileName = "app.config";
|
|
try (FileInputStream fis = new FileInputStream(fileName)) {
|
|
prop.load(fis);
|
|
} catch (FileNotFoundException ex) {
|
|
} catch (IOException ex) {
|
|
}
|
|
System.out.printf(Arrays.toString(e.getStackTrace()));
|
|
pool.shutdown();
|
|
pool = Executors.newFixedThreadPool(Integer.valueOf(prop.getProperty("app.thread_count")));
|
|
completionService = new ExecutorCompletionService(pool);
|
|
}
|
|
}
|
|
|
|
int cacheRequirement = 8500;
|
|
if (preRelationUserCounters > cacheRequirement && !ues_copy.contains(strF) && filterContent(strF)) {
|
|
DataMapper.InsertMYSQLStrings(strF);
|
|
DataMapper.checkStringsToDelete();
|
|
}
|
|
double randomLenghtPermit = strF.length() * (Math.random() * Math.random() * (Math.random() * 10));
|
|
Collections.reverse(concurrentRelations);
|
|
ArrayList<String> mysqlUpdateLastUsed = new ArrayList();
|
|
if (!concurrentRelations.isEmpty()) {
|
|
for (String secondaryRelation : concurrentRelations) {
|
|
if (SB.toString().length() > randomLenghtPermit && !SB.toString().isEmpty()) {
|
|
break;
|
|
}
|
|
|
|
ArrayList<String> orDefault = strResponses.getOrDefault(strF, null);
|
|
boolean skip = false;
|
|
for (String strItr : orDefault) {
|
|
if (secondaryRelation.equalsIgnoreCase(strItr)) {
|
|
skip = true;
|
|
if (orDefault.size() + 3 >= concurrentRelations.size()) {
|
|
orDefault = new ArrayList<>();
|
|
strResponses.put(strF, orDefault);
|
|
} else if (orDefault.size() > 5) {
|
|
double v = Math.random() * 10;
|
|
if (v > 5.6) {
|
|
orDefault = new ArrayList<>();
|
|
strResponses.put(strF, orDefault);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
if (skip) continue;
|
|
|
|
if (!SB.isEmpty()) {
|
|
String testSTR = SB.toString() + " " + secondaryRelation;
|
|
SentimentAnalyzerTest SMX = getReponseFuturesHelper(strF, testSTR, stanfordCoreNLP, stanfordCoreNLPSentiment,
|
|
coreMaps1, strAnno, strAnnoSentiment, coreDocument, tokenizeCountingF, taggedWordListF
|
|
, typedDependenciesF, rnnCoreAnnotationsPredictedF, simpleMatricesF, simpleMatricesNodevectorsF
|
|
, listF, longestF, sentencesF, sentencesSentimentF, treesF, grammaticalStructuresF, sentimentLongestF
|
|
, imwesF, InflectedCounterNegativeF, InflectedCounterPositiveF, tokenEntryF, UnmarkedPatternCounterF
|
|
, strTokensIpartFormF, tokenFormsF, intTokenEntyCountsF, MarkedContinuousCounterF, ITokenTagsF
|
|
, strTokenEntryGetPOSF, retrieveTGWListF, PairCounterF, TokensCounterF, stopWordLemmaF, nerEntitiesF
|
|
, stopWordTokenF, entityTokenTagsF, nerEntitiesTypeF, AnotatorcounterF, strTokenStemsF);
|
|
double scoreRelationLastUserMsg = SMX.getScore();
|
|
if (preRelationUserCounters > scoreRelationLastUserMsg) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
SB.append(secondaryRelation).append(" ");
|
|
mysqlUpdateLastUsed.add(secondaryRelation);
|
|
orDefault.add(secondaryRelation);
|
|
strResponses.put(strF, orDefault);
|
|
}
|
|
}
|
|
if (SB.toString().isEmpty()) {
|
|
return "failure, preventing stuckness";
|
|
}
|
|
DataMapper.updateLastUsed(mysqlUpdateLastUsed);
|
|
return SB.toString();
|
|
}
|
|
|
|
private void getJMWEAnnotation(String str1) {
|
|
Annotation jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(str1);
|
|
jmweAnnotationCache.put(str1, jmweAnnotation);
|
|
}
|
|
|
|
public String getResponseMsg(String str, String personName, StanfordCoreNLP stanfordCoreNLP,
|
|
StanfordCoreNLP stanfordCoreNLPSentiment, Boolean ingameResponse) {
|
|
String responseFutures = "";
|
|
String strF = trimString(str);
|
|
//System.out.println("post trimstring(). strF: " + strF);
|
|
responseFutures = getResponseFutures(strF, stanfordCoreNLP, stanfordCoreNLPSentiment);
|
|
if (!ingameResponse) {
|
|
responseFutures = checkPersonPresentInSentence(personName, responseFutures, strF, stanfordCoreNLP,
|
|
stanfordCoreNLPSentiment);
|
|
}
|
|
return responseFutures;
|
|
}
|
|
|
|
private String checkPersonPresentInSentence(String personName, String responseMsg, String userLastMessage,
|
|
StanfordCoreNLP stanfordCoreNLP, StanfordCoreNLP stanfordCoreNLPSentiment) {
|
|
try {
|
|
CoreDocument pipelineCoreDcoument = new CoreDocument(responseMsg);
|
|
CoreDocument pipelineCoreDcoumentLastMsg = new CoreDocument(userLastMessage);
|
|
stanfordCoreNLP.annotate(pipelineCoreDcoument);
|
|
stanfordCoreNLPSentiment.annotate(pipelineCoreDcoumentLastMsg);
|
|
String regex = "(.*?\\d){10,}";
|
|
if (pipelineCoreDcoument.entityMentions() != null) {
|
|
for (CoreEntityMention em : pipelineCoreDcoument.entityMentions()) {
|
|
String entityType = em.entityType();
|
|
if (entityType == "PERSON") {
|
|
String str = responseMsg;
|
|
String emText = em.text();
|
|
Pattern pattern = Pattern.compile(regex);
|
|
Matcher matcher = pattern.matcher(personName);
|
|
boolean isMatched = matcher.matches();
|
|
if (emText != personName && !isMatched) {
|
|
if (pipelineCoreDcoumentLastMsg.entityMentions() != null) {
|
|
for (CoreEntityMention emLastMsg : pipelineCoreDcoumentLastMsg.entityMentions()) {
|
|
if (emText != emLastMsg.text() && !Character.isDigit(Integer.parseInt(emLastMsg.text().trim()))) {
|
|
str = (responseMsg.substring(0, responseMsg.indexOf(emText)) + " "
|
|
+ emLastMsg + " " + responseMsg.substring(responseMsg.indexOf(emText)));
|
|
}
|
|
}
|
|
}
|
|
str += personName;
|
|
return str;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (Exception e) {
|
|
System.out.println("SCUFFED JAYZ: " + e.getMessage());
|
|
}
|
|
return responseMsg;
|
|
}
|
|
|
|
public boolean filterContent(String str) {
|
|
if (!str.isEmpty() && str.length() > 3) {
|
|
String str1Local = str.trim();
|
|
if (str1Local.length() > 2 && !str1Local.startsWith("!")) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
public void getCoreDocumentsSuggested(StanfordCoreNLP pipeline, String str) {
|
|
Annotation annotation = new Annotation(str);
|
|
pipeline.annotate(annotation);
|
|
CoreDocument coreDocument = new CoreDocument(annotation);
|
|
coreDocumentAnnotationCache.put(str, coreDocument);
|
|
}
|
|
}
|