projects-jenz/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java

892 lines
48 KiB
Java

package FunctionLayer;
import DataLayer.DataMapper;
import FunctionLayer.StanfordParser.SentimentAnalyzerTest;
import edu.mit.jmwe.data.IMWE;
import edu.mit.jmwe.data.IToken;
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.pipeline.CoreEntityMention;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.util.CoreMap;
import org.ejml.simple.SimpleMatrix;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Datahandler {
//wanted to put this in config too but welp cant be arsed to set this up differently.
//4 threads for the rest of eternity it is.
private ExecutorService pool = Executors.newFixedThreadPool(4);
private CompletionService completionService = new ExecutorCompletionService(pool);
private HashMap<String, Annotation> pipelineAnnotationCache;
private HashMap<String, Annotation> pipelineSentimentAnnotationCache;
private HashMap<String, CoreDocument> coreDocumentAnnotationCache;
private HashMap<String, Annotation> jmweAnnotationCache;
private MaxentTagger tagger = new MaxentTagger();
private GrammaticalStructureFactory gsf;
private AbstractSequenceClassifier<CoreLabel> classifier;
//SentimentAnalyzer Hashmaps
private HashMap<String, Integer> tokenizeCountingHashMap = new HashMap();
private HashMap<String, List<List<TaggedWord>>> taggedWordListHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<String>> retrieveTGWListHashMap = new HashMap();
private HashMap<String, List<CoreMap>> sentences1HashMap = new HashMap();
private HashMap<String, List<CoreMap>> sentencesSentimentHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<Tree>> trees1HashMap = new HashMap();
private HashMap<String, java.util.ArrayList<GrammaticalStructure>> grammaticalStructureHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<TypedDependency>> typedDependenciesHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<Integer>> rnnCoreAnnotationsPredictedHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<SimpleMatrix>> simpleMatricesHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<SimpleMatrix>> simpleMatricesNodevectorsHashMap = new HashMap();
private HashMap<String, List> listHashMap = new HashMap();
private HashMap<String, Integer> longestHashMap = new HashMap();
private HashMap<String, Integer> sentimentHashMap = new HashMap();
private HashMap<String, List<IMWE<IToken>>> imwesHashMap = new HashMap();
private HashMap<String, Integer> InflectedCounterNegativeHashMap = new HashMap();
private HashMap<String, Integer> InflectedCounterPositiveHashMap = new HashMap();
private HashMap<String, ArrayList<String>> tokenEntryHashMap = new HashMap();
private HashMap<String, Integer> MarkedContinuousCounterHashMap = new HashMap();
private HashMap<String, Integer> UnmarkedPatternCounterHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<String>> strTokensIpartFormHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<String>> tokenFormsHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<String>> strTokenEntryGetPOSHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<Integer>> intTokenEntyCountsHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<String>> ITokenTagsHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<String>> strTokenStemsHashMap = new HashMap();
private HashMap<String, Integer> AnotatorcounterHashMap = new HashMap();
private HashMap<String, Integer> TokensCounterHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<String>> entityTokenTagsHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<String>> nerEntitiesHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<String>> nerEntitiesTypeHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<String>> stopWordTokenHashMap = new HashMap();
private HashMap<String, java.util.ArrayList<String>> stopWordLemmaHashMap = new HashMap();
private HashMap<String, Integer> PairCounterHashMap = new HashMap();
private HashMap<String, ArrayList<String>> strResponses = new HashMap<>();
public Datahandler() {
jmweAnnotationCache = new HashMap<String, Annotation>();
pipelineAnnotationCache = new HashMap<String, Annotation>();
pipelineSentimentAnnotationCache = new HashMap<String, Annotation>();
coreDocumentAnnotationCache = new HashMap<String, CoreDocument>();
gsf = initiateGrammaticalStructureFactory();
String nerModel = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz";
classifier = CRFClassifier.getClassifierNoExceptions(nerModel);
}
private GrammaticalStructureFactory initiateGrammaticalStructureFactory() {
// lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"
String lexParserEnglishPCFG = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
LexicalizedParser lp = LexicalizedParser.loadModel(lexParserEnglishPCFG, "-maxLength", "100");
TreebankLanguagePack tlp = lp.getOp().langpack();
return tlp.grammaticalStructureFactory();
}
public StanfordCoreNLP pipeLineSetUp() {
Properties props = new Properties();
String shiftReduceParserPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz";
// nerModel2 = "edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz"
//String nerModel2 = "edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz";
// nerModel3 = "edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz"
//String nerModel3 = "edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz";
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse");
props.setProperty("parse.model", shiftReduceParserPath);
props.setProperty("parse.maxlen", "90");
props.setProperty("parse.binaryTrees", "true");
props.setProperty("threads", "1");
props.setProperty("pos.maxlen", "90");
props.setProperty("tokenize.maxlen", "90");
props.setProperty("ssplit.maxlen", "90");
props.setProperty("lemma.maxlen", "90");
props.setProperty("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz" +
",edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz" +
",edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz");
props.setProperty("ner.combinationMode", "HIGH_RECALL");
props.setProperty("regexner.ignorecase", "true");
props.setProperty("ner.fine.regexner.ignorecase", "true");
props.setProperty("tokenize.options", "untokenizable=firstKeep");
return new StanfordCoreNLP(props);
}
public StanfordCoreNLP shiftReduceParserInitiate() {
Properties propsSentiment = new Properties();
// lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"
String lexParserEnglishPCFG = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz";
// taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"
String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words-distsim.tagger";
String customStopWordList = "start,starts,period,periods,a,an,and,are,as,at,be,but,by,for,if,in,into,is,it,no,not,of," +
"on,or,such,that,the,their,then,there,these,they,this,to,was,will,with";
propsSentiment.setProperty("parse.model", lexParserEnglishPCFG);
propsSentiment.setProperty("sentiment.model", sentimentModel);
propsSentiment.setProperty("parse.maxlen", "90");
propsSentiment.setProperty("threads", "1");
propsSentiment.setProperty("pos.maxlen", "90");
propsSentiment.setProperty("tokenize.maxlen", "90");
propsSentiment.setProperty("ssplit.maxlen", "90");
propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,sentiment,lemma,stopword"); //coref too expensive memorywise
propsSentiment.setProperty("customAnnotatorClass.stopword", "FunctionLayer.StopwordAnnotator");
propsSentiment.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList);
propsSentiment.setProperty("tokenize.options", "untokenizable=firstKeep");
tagger = new MaxentTagger(taggerPath);
return new StanfordCoreNLP(propsSentiment);
}
public String trimString(String str) {
String message = str.trim();
if (message.startsWith("<@")) {
message = message.substring(message.indexOf("> ") + 2);
}
if (!message.isEmpty()) {
message = message.replace("@", "");
if (message.contains("<>")) {
message = message.substring(message.indexOf(">"));
}
if (message.startsWith("[ *")) {
message = message.substring(message.indexOf("]"));
}
}
return message;
}
private void createStrAnnotation(String str, StanfordCoreNLP stanfordCoreNLP, Boolean sentimentBool) {
Annotation strAnno2 = new Annotation(str);
strAnno2.compact();
try {
stanfordCoreNLP.annotate(strAnno2);
if (sentimentBool) {
pipelineSentimentAnnotationCache.put(str, strAnno2);
} else {
pipelineAnnotationCache.put(str, strAnno2);
}
} catch (Exception e) {
System.out.println("stanfordcorenlp annotate failed" + e.getMessage());
}
}
private SentimentAnalyzerTest getReponseFuturesHelper(String strF, String str1, StanfordCoreNLP stanfordCoreNLP,
StanfordCoreNLP stanfordCoreNLPSentiment,
List<CoreMap> coreMaps1, Annotation strAnno,
Annotation strAnnoSentiment, CoreDocument coreDocument
, Integer tokenizeCountingF, List<List<TaggedWord>> taggedWordListF, ArrayList<TypedDependency> typedDependenciesF
, ArrayList<Integer> rnnCoreAnnotationsPredictedF, ArrayList<SimpleMatrix> simpleMatricesF
, ArrayList<SimpleMatrix> simpleMatricesNodevectorsF, List<String> listF, Integer longestF, List<CoreMap> sentencesF
, List<CoreMap> sentencesSentimentF, ArrayList<Tree> treesF, ArrayList<GrammaticalStructure> grammaticalStructuresF
, Integer sentimentLongestF, List<IMWE<IToken>> imwesF, Integer inflectedCounterNegativeF, Integer inflectedCounterPositiveF
, ArrayList<String> tokenEntryF, Integer unmarkedPatternCounterF, ArrayList<String> strTokensIpartFormF, ArrayList<String> tokenFormsF
, ArrayList<Integer> intTokenEntyCountsF, Integer markedContinuousCounterF, ArrayList<String> ITokenTagsF
, ArrayList<String> strTokenEntryGetPOSF, ArrayList<String> retrieveTGWListF, Integer pairCounterF
, Integer tokensCounterF, ArrayList<String> stopWordLemmaF, ArrayList<String> nerEntitiesF
, ArrayList<String> stopWordTokenF, ArrayList<String> entityTokenTagsF, ArrayList<String> nerEntitiesTypeF
, Integer anotatorcounterF, ArrayList<String> strTokenStemsF) {
Annotation annotation2 = pipelineSentimentAnnotationCache.getOrDefault(str1, null);
Annotation annotation4 = pipelineAnnotationCache.getOrDefault(str1, null);
CoreDocument coreDocument1 = coreDocumentAnnotationCache.getOrDefault(str1, null);
Annotation jmweAnnotation = jmweAnnotationCache.getOrDefault(str1, null);
if (annotation2 == null) {
createStrAnnotation(str1, stanfordCoreNLPSentiment, true);
}
if (annotation4 == null) {
createStrAnnotation(str1, stanfordCoreNLP, false);
}
if (coreDocument1 == null) {
getCoreDocumentsSuggested(stanfordCoreNLP, str1);
}
if (jmweAnnotation == null) {
getJMWEAnnotation(str1);
jmweAnnotation = jmweAnnotationCache.get(str1);
}
Integer tokenizeCounting = tokenizeCountingHashMap.getOrDefault(str1, null);
List<List<TaggedWord>> taggedWordList1 = taggedWordListHashMap.getOrDefault(str1, null);
java.util.ArrayList<String> retrieveTGWList1 = retrieveTGWListHashMap.getOrDefault(str1, null);
List<CoreMap> sentence1 = sentences1HashMap.getOrDefault(str1, null);
List<CoreMap> sentenceSentiment1 = sentencesSentimentHashMap.getOrDefault(str1, null);
ArrayList<Tree> trees1 = trees1HashMap.getOrDefault(str1, null);
List<CoreMap> coreMaps2 = new ArrayList<>();
ArrayList<GrammaticalStructure> grammaticalStructures1 = grammaticalStructureHashMap.getOrDefault(str1, null);
if (jmweAnnotation != null) {
coreMaps2 = jmweAnnotation.get(CoreAnnotations.SentencesAnnotation.class);
}
ArrayList<TypedDependency> typedDependencies1 = typedDependenciesHashMap.getOrDefault(str1, null);
ArrayList<Integer> rnnCoreAnnotationsPredicted1 = rnnCoreAnnotationsPredictedHashMap.getOrDefault(str1, null);
ArrayList<SimpleMatrix> simpleMatrices1 = simpleMatricesHashMap.getOrDefault(str1, null);
simpleMatricesHashMap.getOrDefault(str1, null);
ArrayList<SimpleMatrix> simpleMatricesNodevectors1 = simpleMatricesNodevectorsHashMap.getOrDefault(str1, null);
List list1 = listHashMap.getOrDefault(str1, null);
Integer longest1 = longestHashMap.getOrDefault(str1, null);
Integer sentimentLongest1 = sentimentHashMap.getOrDefault(str1, null);
List<IMWE<IToken>> imwes1 = imwesHashMap.getOrDefault(str1, null);
Integer InflectedCounterNegative1 = InflectedCounterNegativeHashMap.getOrDefault(str1, null);
Integer InflectedCounterPositive1 = InflectedCounterPositiveHashMap.getOrDefault(str1, null);
ArrayList<String> tokenEntry1 = tokenEntryHashMap.getOrDefault(str1, null);
Integer MarkedContinuousCounter1 = MarkedContinuousCounterHashMap.getOrDefault(str1, null);
Integer UnmarkedPatternCounter1 = UnmarkedPatternCounterHashMap.getOrDefault(str1, null);
ArrayList<String> strTokensIpartForm1 = strTokensIpartFormHashMap.getOrDefault(str1, null);
ArrayList<String> tokenForms1 = tokenFormsHashMap.getOrDefault(str1, null);
ArrayList<String> strTokenEntryGetPOS1 = strTokenEntryGetPOSHashMap.getOrDefault(str1, null);
ArrayList<Integer> intTokenEntyCounts1 = intTokenEntyCountsHashMap.getOrDefault(str1, null);
ArrayList<String> ITokenTags1 = ITokenTagsHashMap.getOrDefault(str1, null);
ArrayList<String> strTokenStems1 = strTokenStemsHashMap.getOrDefault(str1, null);
Integer Anotatorcounter1 = AnotatorcounterHashMap.getOrDefault(str1, null);
Integer TokensCounter1 = TokensCounterHashMap.getOrDefault(str1, null);
ArrayList<String> entityTokenTags1 = entityTokenTagsHashMap.getOrDefault(str1, null);
ArrayList<String> nerEntities1 = nerEntitiesHashMap.getOrDefault(str1, null);
ArrayList<String> nerEntitiesType1 = nerEntitiesTypeHashMap.getOrDefault(str1, null);
ArrayList<String> stopWordToken1 = stopWordTokenHashMap.getOrDefault(str1, null);
ArrayList<String> stopWordLemma1 = stopWordLemmaHashMap.getOrDefault(str1, null);
Integer PairCounter1 = PairCounterHashMap.getOrDefault(str1, null);
Annotation annotationStrPipeLine1 = pipelineAnnotationCache.get(str1);
Annotation annotationStrPipeLineSentiment1 = pipelineSentimentAnnotationCache.get(str1);
SentimentAnalyzerTest SMX = new SentimentAnalyzerTest(strF, str1,
coreMaps1, coreMaps2, strAnno,
//sometimes Annotation(str) returns null so in that case better use result of sentiment
annotationStrPipeLine1 == null ? annotationStrPipeLineSentiment1 : annotationStrPipeLine1,
strAnnoSentiment,
annotationStrPipeLineSentiment1, coreDocument, coreDocumentAnnotationCache.get(str1),
tagger, gsf, classifier, tokenizeCounting, tokenizeCountingF,
taggedWordListF, taggedWordList1, retrieveTGWListF, retrieveTGWList1,
sentencesF, sentence1, sentencesSentimentF, sentenceSentiment1, treesF, trees1,
grammaticalStructuresF, grammaticalStructures1, typedDependenciesF,
typedDependencies1, rnnCoreAnnotationsPredictedF, rnnCoreAnnotationsPredicted1,
simpleMatricesF, simpleMatrices1, simpleMatricesNodevectorsF, simpleMatricesNodevectors1,
listF, list1, longestF, longest1, sentimentLongestF, sentimentLongest1, imwesF,
imwes1, inflectedCounterNegativeF, InflectedCounterNegative1, inflectedCounterPositiveF,
InflectedCounterPositive1, tokenEntryF, tokenEntry1, markedContinuousCounterF,
MarkedContinuousCounter1, unmarkedPatternCounterF, UnmarkedPatternCounter1,
strTokensIpartFormF, strTokensIpartForm1, tokenFormsF, tokenForms1,
strTokenEntryGetPOSF, strTokenEntryGetPOS1, intTokenEntyCountsF,
intTokenEntyCounts1, ITokenTagsF, ITokenTags1, strTokenStemsF, strTokenStems1,
anotatorcounterF, Anotatorcounter1, tokensCounterF, TokensCounter1,
entityTokenTagsF, entityTokenTags1, nerEntitiesF, nerEntities1, nerEntitiesTypeF,
nerEntitiesType1, stopWordTokenF, stopWordToken1, stopWordLemmaF, stopWordLemma1,
pairCounterF, PairCounter1
);
if (tokenizeCounting == null) {
tokenizeCountingHashMap.put(str1, SMX.getTokenizeCounting());
}
if (taggedWordList1 == null) {
taggedWordListHashMap.put(str1, SMX.getTaggedWordList1());
}
if (retrieveTGWList1 == null) {
retrieveTGWListHashMap.put(str1, SMX.getRetrieveTGWList1());
}
if (sentence1 == null) {
sentences1HashMap.put(str1, SMX.getSentences1());
}
if (sentenceSentiment1 == null) {
sentencesSentimentHashMap.put(str1, SMX.getSentencesSentiment1());
}
if (trees1 == null) {
trees1HashMap.put(str1, SMX.getTrees1());
}
if (grammaticalStructures1 == null) {
grammaticalStructureHashMap.put(str1, SMX.getGrammaticalStructures1());
}
if (typedDependencies1 == null) {
typedDependenciesHashMap.put(str1, SMX.getTypedDependencies1());
}
if (rnnCoreAnnotationsPredicted1 == null) {
rnnCoreAnnotationsPredictedHashMap.put(str1, SMX.getRnnCoreAnnotationsPredicted1());
}
if (simpleMatrices1 == null) {
simpleMatricesHashMap.put(str1, SMX.getSimpleMatrices1());
}
if (simpleMatricesNodevectors1 == null) {
simpleMatricesNodevectorsHashMap.put(str1, SMX.getSimpleMatricesNodevectors1());
}
if (list1 == null) {
listHashMap.put(str1, SMX.getList1());
}
if (longest1 == null) {
longestHashMap.put(str1, SMX.getLongest1());
}
if (sentimentLongest1 == null) {
sentimentHashMap.put(str1, SMX.getSentimentLongest1());
}
if (imwes1 == null) {
imwesHashMap.put(str1, SMX.getImwes1());
}
if (InflectedCounterNegative1 == null) {
InflectedCounterNegativeHashMap.put(str1, SMX.getInflectedCounterNegative1());
}
if (InflectedCounterPositive1 == null) {
InflectedCounterPositiveHashMap.put(str1, SMX.getInflectedCounterPositive1());
}
if (tokenEntry1 == null) {
tokenEntryHashMap.put(str1, SMX.getTokenEntry1());
}
if (MarkedContinuousCounter1 == null) {
MarkedContinuousCounterHashMap.put(str1, SMX.getMarkedContinuousCounter1());
}
if (UnmarkedPatternCounter1 == null) {
UnmarkedPatternCounterHashMap.put(str1, SMX.getUnmarkedPatternCounter1());
}
if (strTokensIpartForm1 == null) {
strTokensIpartFormHashMap.put(str1, SMX.getStrTokensIpartForm1());
}
if (tokenForms1 == null) {
tokenFormsHashMap.put(str1, SMX.getTokenForms1());
}
if (strTokenEntryGetPOS1 == null) {
strTokenEntryGetPOSHashMap.put(str1, SMX.getStrTokenEntryGetPOS1());
}
if (intTokenEntyCounts1 == null) {
intTokenEntyCountsHashMap.put(str1, SMX.getIntTokenEntyCounts1());
}
if (ITokenTags1 == null) {
ITokenTagsHashMap.put(str1, SMX.getITokenTags1());
}
if (strTokenStems1 == null) {
strTokenStemsHashMap.put(str1, SMX.getStrTokenStems1());
}
if (Anotatorcounter1 == null) {
AnotatorcounterHashMap.put(str1, SMX.getAnotatorcounter1());
}
if (TokensCounter1 == null) {
TokensCounterHashMap.put(str1, SMX.getTokensCounter1());
}
if (entityTokenTags1 == null) {
entityTokenTagsHashMap.put(str1, SMX.getEntityTokenTags1());
}
if (nerEntities1 == null) {
nerEntitiesHashMap.put(str1, SMX.getNerEntities1());
}
if (nerEntitiesType1 == null) {
nerEntitiesTypeHashMap.put(str1, SMX.getNerEntitiesType1());
}
if (stopWordToken1 == null) {
stopWordTokenHashMap.put(str1, SMX.getStopWordToken1());
}
if (stopWordLemma1 == null) {
stopWordLemmaHashMap.put(str1, SMX.getStopWordLemma1());
}
if (PairCounter1 == null) {
PairCounterHashMap.put(str1, SMX.getPairCounter1());
}
return SMX;
}
private class get_res implements Callable<SentimentAnalyzerTest> {
private final String strF;
private final String str1;
private final StanfordCoreNLP stanfordCoreNLP;
private final StanfordCoreNLP stanfordCoreNLPSentiment;
private final List<CoreMap> coreMaps1;
private final Annotation strAnno;
private final Annotation strAnnoSentiment;
private final CoreDocument coreDocument;
private final Integer tokenizeCountingF;
private final List<List<TaggedWord>> taggedWordListF;
private final ArrayList<TypedDependency> typedDependenciesF;
private final ArrayList<Integer> rnnCoreAnnotationsPredictedF;
private final ArrayList<SimpleMatrix> simpleMatricesF;
private final ArrayList<SimpleMatrix> simpleMatricesNodevectorsF;
private final List<String> listF;
private final Integer longestF;
private final List<CoreMap> sentencesF;
private final List<CoreMap> sentencesSentimentF;
private final ArrayList<Tree> treesF;
private final ArrayList<GrammaticalStructure> grammaticalStructuresF;
private final Integer sentimentLongestF;
private final List<IMWE<IToken>> imwesF;
private final Integer inflectedCounterNegativeF;
private final Integer inflectedCounterPositiveF;
private final ArrayList<String> tokenEntryF;
private final Integer unmarkedPatternCounterF;
private final ArrayList<String> strTokensIpartFormF;
private final ArrayList<String> tokenFormsF;
private final ArrayList<Integer> intTokenEntyCountsF;
private final Integer markedContinuousCounterF;
private final ArrayList<String> iTokenTagsF;
private final ArrayList<String> strTokenEntryGetPOSF;
private final ArrayList<String> retrieveTGWListF;
private final Integer pairCounterF;
private final Integer tokensCounterF;
private final ArrayList<String> stopWordLemmaF;
private final ArrayList<String> nerEntitiesF;
private final ArrayList<String> stopWordTokenF;
private final ArrayList<String> entityTokenTagsF;
private final ArrayList<String> nerEntitiesTypeF;
private final Integer anotatorcounterF;
private final ArrayList<String> strTokenStemsF;
public get_res(String strF, String str1, StanfordCoreNLP stanfordCoreNLP, StanfordCoreNLP stanfordCoreNLPSentiment, List<CoreMap> coreMaps1, Annotation strAnno, Annotation strAnnoSentiment, CoreDocument coreDocument, Integer tokenizeCountingF, List<List<TaggedWord>> taggedWordListF, ArrayList<TypedDependency> typedDependenciesF, ArrayList<Integer> rnnCoreAnnotationsPredictedF, ArrayList<SimpleMatrix> simpleMatricesF, ArrayList<SimpleMatrix> simpleMatricesNodevectorsF, List<String> listF, Integer longestF, List<CoreMap> sentencesF, List<CoreMap> sentencesSentimentF, ArrayList<Tree> treesF, ArrayList<GrammaticalStructure> grammaticalStructuresF, Integer sentimentLongestF, List<IMWE<IToken>> imwesF, Integer inflectedCounterNegativeF, Integer inflectedCounterPositiveF, ArrayList<String> tokenEntryF, Integer unmarkedPatternCounterF, ArrayList<String> strTokensIpartFormF, ArrayList<String> tokenFormsF, ArrayList<Integer> intTokenEntyCountsF, Integer markedContinuousCounterF, ArrayList<String> iTokenTagsF, ArrayList<String> strTokenEntryGetPOSF, ArrayList<String> retrieveTGWListF, Integer pairCounterF, Integer tokensCounterF, ArrayList<String> stopWordLemmaF, ArrayList<String> nerEntitiesF, ArrayList<String> stopWordTokenF, ArrayList<String> entityTokenTagsF, ArrayList<String> nerEntitiesTypeF, Integer anotatorcounterF, ArrayList<String> strTokenStemsF) {
this.strF = strF;
this.str1 = str1;
this.stanfordCoreNLP = stanfordCoreNLP;
this.stanfordCoreNLPSentiment = stanfordCoreNLPSentiment;
this.coreMaps1 = coreMaps1;
this.strAnno = strAnno;
this.strAnnoSentiment = strAnnoSentiment;
this.coreDocument = coreDocument;
this.tokenizeCountingF = tokenizeCountingF;
this.taggedWordListF = taggedWordListF;
this.typedDependenciesF = typedDependenciesF;
this.rnnCoreAnnotationsPredictedF = rnnCoreAnnotationsPredictedF;
this.simpleMatricesF = simpleMatricesF;
this.simpleMatricesNodevectorsF = simpleMatricesNodevectorsF;
this.listF = listF;
this.longestF = longestF;
this.sentencesF = sentencesF;
this.sentencesSentimentF = sentencesSentimentF;
this.treesF = treesF;
this.grammaticalStructuresF = grammaticalStructuresF;
this.sentimentLongestF = sentimentLongestF;
this.imwesF = imwesF;
this.inflectedCounterNegativeF = inflectedCounterNegativeF;
this.inflectedCounterPositiveF = inflectedCounterPositiveF;
this.tokenEntryF = tokenEntryF;
this.unmarkedPatternCounterF = unmarkedPatternCounterF;
this.strTokensIpartFormF = strTokensIpartFormF;
this.tokenFormsF = tokenFormsF;
this.intTokenEntyCountsF = intTokenEntyCountsF;
this.markedContinuousCounterF = markedContinuousCounterF;
this.iTokenTagsF = iTokenTagsF;
this.strTokenEntryGetPOSF = strTokenEntryGetPOSF;
this.retrieveTGWListF = retrieveTGWListF;
this.pairCounterF = pairCounterF;
this.tokensCounterF = tokensCounterF;
this.stopWordLemmaF = stopWordLemmaF;
this.nerEntitiesF = nerEntitiesF;
this.stopWordTokenF = stopWordTokenF;
this.entityTokenTagsF = entityTokenTagsF;
this.nerEntitiesTypeF = nerEntitiesTypeF;
this.anotatorcounterF = anotatorcounterF;
this.strTokenStemsF = strTokenStemsF;
}
@Override
public SentimentAnalyzerTest call() throws Exception {
return getReponseFuturesHelper(strF, str1, stanfordCoreNLP, stanfordCoreNLPSentiment,
coreMaps1, strAnno, strAnnoSentiment, coreDocument, tokenizeCountingF, taggedWordListF
, typedDependenciesF, rnnCoreAnnotationsPredictedF, simpleMatricesF, simpleMatricesNodevectorsF
, listF, longestF, sentencesF, sentencesSentimentF, treesF, grammaticalStructuresF, sentimentLongestF
, imwesF, inflectedCounterNegativeF, inflectedCounterPositiveF, tokenEntryF, unmarkedPatternCounterF
, strTokensIpartFormF, tokenFormsF, intTokenEntyCountsF, markedContinuousCounterF, iTokenTagsF
, strTokenEntryGetPOSF, retrieveTGWListF, pairCounterF, tokensCounterF, stopWordLemmaF, nerEntitiesF
, stopWordTokenF, entityTokenTagsF, nerEntitiesTypeF, anotatorcounterF, strTokenStemsF);
}
}
public String getResponseFutures(String strF, StanfordCoreNLP stanfordCoreNLP, StanfordCoreNLP stanfordCoreNLPSentiment) {
if (strResponses.getOrDefault(strF, null) == null) {
strResponses.put(strF, new ArrayList<>());
}
Annotation strAnno = new Annotation(strF);
strAnno.compact();
stanfordCoreNLP.annotate(strAnno);
Annotation strAnnoSentiment = new Annotation(strF);
strAnnoSentiment.compact();
stanfordCoreNLPSentiment.annotate(strAnnoSentiment);
Annotation annotation = new Annotation(strF);
stanfordCoreNLP.annotate(annotation);
CoreDocument coreDocument = new CoreDocument(annotation);
Annotation jmweAnnotationF = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(strF);
List<CoreMap> coreMaps1 = jmweAnnotationF.get(CoreAnnotations.SentencesAnnotation.class);
Integer tokenizeCountingF = null;
List<List<TaggedWord>> taggedWordListF = null;
java.util.ArrayList<String> retrieveTGWListF = null;
List<CoreMap> sentencesF = null;
List<CoreMap> sentencesSentimentF = null;
java.util.ArrayList<Tree> treesF = null;
ArrayList<GrammaticalStructure> grammaticalStructuresF = null;
java.util.ArrayList<TypedDependency> typedDependenciesF = null;
java.util.ArrayList<Integer> rnnCoreAnnotationsPredictedF = null;
java.util.ArrayList<SimpleMatrix> simpleMatricesF = null;
java.util.ArrayList<SimpleMatrix> simpleMatricesNodevectorsF = null;
List<String> listF = null;
Integer longestF = null;
Integer sentimentLongestF = null;
List<IMWE<IToken>> imwesF = null;
Integer InflectedCounterNegativeF = null;
Integer InflectedCounterPositiveF = null;
ArrayList<String> tokenEntryF = null;
Integer MarkedContinuousCounterF = null;
Integer UnmarkedPatternCounterF = null;
ArrayList<String> strTokensIpartFormF = null;
java.util.ArrayList<String> tokenFormsF = null;
ArrayList<String> strTokenEntryGetPOSF = null;
java.util.ArrayList<Integer> intTokenEntyCountsF = null;
ArrayList<String> ITokenTagsF = null;
java.util.ArrayList<String> strTokenStemsF = null;
Integer AnotatorcounterF = null;
Integer TokensCounterF = null;
java.util.ArrayList<String> entityTokenTagsF = null;
java.util.ArrayList<String> nerEntitiesF = null;
java.util.ArrayList<String> nerEntitiesTypeF = null;
java.util.ArrayList<String> stopWordTokenF = null;
java.util.ArrayList<String> stopWordLemmaF = null;
Integer PairCounterF = null;
ArrayList<String> concurrentRelations = new ArrayList();
StringBuilder SB = new StringBuilder();
List<String> ues_copy = new ArrayList(DataMapper.getAllStrings());
double preRelationUserCounters = -155000.0;
//System.out.println(ues_copy.toString());
ArrayList<Future<SentimentAnalyzerTest>> futures = new ArrayList<>();
for (String str1 : ues_copy) {
if (strF != str1) {
//critical section
Future<SentimentAnalyzerTest> submit = completionService.submit(new get_res(strF, str1, stanfordCoreNLP, stanfordCoreNLPSentiment,
coreMaps1, strAnno, strAnnoSentiment, coreDocument, tokenizeCountingF, taggedWordListF
, typedDependenciesF, rnnCoreAnnotationsPredictedF, simpleMatricesF, simpleMatricesNodevectorsF
, listF, longestF, sentencesF, sentencesSentimentF, treesF, grammaticalStructuresF, sentimentLongestF
, imwesF, InflectedCounterNegativeF, InflectedCounterPositiveF, tokenEntryF, UnmarkedPatternCounterF
, strTokensIpartFormF, tokenFormsF, intTokenEntyCountsF, MarkedContinuousCounterF, ITokenTagsF
, strTokenEntryGetPOSF, retrieveTGWListF, PairCounterF, TokensCounterF, stopWordLemmaF, nerEntitiesF
, stopWordTokenF, entityTokenTagsF, nerEntitiesTypeF, AnotatorcounterF, strTokenStemsF));
futures.add(submit);
//end of critical section, do the rest sequential.
}
}
int pending = futures.size();
while (pending > 0) {
try {
Future<SentimentAnalyzerTest> completed = completionService.poll(100, TimeUnit.MILLISECONDS);
if (completed != null) {
--pending;
SentimentAnalyzerTest SMX = completed.get();
if (SMX == null) continue;
double scoreRelationLastUserMsg = SMX.getScore();
if (scoreRelationLastUserMsg > preRelationUserCounters) {
preRelationUserCounters = scoreRelationLastUserMsg;
concurrentRelations.add(SMX.getSecondaryString());
}
//this part below should be sequential hopefully
if (tokenizeCountingF == null) {
tokenizeCountingF = SMX.getTokenizeCountingF();
}
if (taggedWordListF == null) {
taggedWordListF = SMX.getTaggedWordListF();
}
if (typedDependenciesF == null) {
typedDependenciesF = SMX.getTypedDependenciesF();
}
if (rnnCoreAnnotationsPredictedF == null) {
rnnCoreAnnotationsPredictedF = SMX.getRnnCoreAnnotationsPredictedF();
}
if (simpleMatricesF == null) {
simpleMatricesF = SMX.getSimpleMatricesF();
}
if (simpleMatricesNodevectorsF == null) {
simpleMatricesNodevectorsF = SMX.getSimpleMatricesNodevectorsF();
}
if (listF == null) {
listF = SMX.getListF();
}
if (longestF == null) {
longestF = SMX.getLongestF();
}
if (sentencesF == null) {
sentencesF = SMX.getSentencesF();
}
if (sentencesSentimentF == null) {
sentencesSentimentF = SMX.getSentencesSentimentF();
}
if (treesF == null) {
treesF = SMX.getTreesF();
}
if (grammaticalStructuresF == null) {
grammaticalStructuresF = SMX.getGrammaticalStructuresF();
}
if (sentimentLongestF == null) {
sentimentLongestF = SMX.getSentimentLongestF();
}
if (imwesF == null) {
imwesF = SMX.getImwesF();
}
if (InflectedCounterNegativeF == null) {
InflectedCounterNegativeF = SMX.getInflectedCounterNegativeF();
}
if (InflectedCounterPositiveF == null) {
InflectedCounterPositiveF = SMX.getInflectedCounterPositiveF();
}
if (tokenEntryF == null) {
tokenEntryF = SMX.getTokenEntryF();
}
if (UnmarkedPatternCounterF == null) {
UnmarkedPatternCounterF = SMX.getUnmarkedPatternCounterF();
}
if (strTokensIpartFormF == null) {
strTokensIpartFormF = SMX.getStrTokensIpartFormF();
}
if (tokenFormsF == null) {
tokenFormsF = SMX.getTokenFormsF();
}
if (intTokenEntyCountsF == null) {
intTokenEntyCountsF = SMX.getIntTokenEntyCountsF();
}
if (MarkedContinuousCounterF == null) {
MarkedContinuousCounterF = SMX.getMarkedContinuousCounterF();
}
if (ITokenTagsF == null) {
ITokenTagsF = SMX.getITokenTagsF();
}
if (strTokenEntryGetPOSF == null) {
strTokenEntryGetPOSF = SMX.getStrTokenEntryGetPOSF();
}
if (retrieveTGWListF == null) {
retrieveTGWListF = SMX.getRetrieveTGWListF();
}
if (PairCounterF == null) {
PairCounterF = SMX.getPairCounterF();
}
if (TokensCounterF == null) {
TokensCounterF = SMX.getTokensCounterF();
}
if (stopWordLemmaF == null) {
stopWordLemmaF = SMX.getStopWordLemmaF();
}
if (nerEntitiesF == null) {
nerEntitiesF = SMX.getNerEntitiesF();
}
if (stopWordTokenF == null) {
stopWordTokenF = SMX.getStopWordTokenF();
}
if (entityTokenTagsF == null) {
entityTokenTagsF = SMX.getEntityTokenTagsF();
}
if (nerEntitiesTypeF == null) {
nerEntitiesTypeF = SMX.getNerEntitiesTypeF();
}
if (AnotatorcounterF == null) {
AnotatorcounterF = SMX.getAnotatorcounterF();
}
if (strTokenStemsF == null) {
strTokenStemsF = SMX.getStrTokenStemsF();
}
}
} catch (InterruptedException e) {
//throw new RuntimeException(e);
pending = 0;
Properties prop = new Properties();
String fileName = "app.config";
try (FileInputStream fis = new FileInputStream(fileName)) {
prop.load(fis);
} catch (FileNotFoundException ex) {
} catch (IOException ex) {
}
System.out.printf(Arrays.toString(e.getStackTrace()));
pool.shutdown();
pool = Executors.newFixedThreadPool(Integer.valueOf(prop.getProperty("app.thread_count")));
completionService = new ExecutorCompletionService(pool);
} catch (ExecutionException e) {
//throw new RuntimeException(e);
pending = 0;
Properties prop = new Properties();
String fileName = "app.config";
try (FileInputStream fis = new FileInputStream(fileName)) {
prop.load(fis);
} catch (FileNotFoundException ex) {
} catch (IOException ex) {
}
System.out.printf(Arrays.toString(e.getStackTrace()));
pool.shutdown();
pool = Executors.newFixedThreadPool(Integer.valueOf(prop.getProperty("app.thread_count")));
completionService = new ExecutorCompletionService(pool);
}
}
int cacheRequirement = 8500;
if (preRelationUserCounters > cacheRequirement && !ues_copy.contains(strF) && filterContent(strF)) {
DataMapper.InsertMYSQLStrings(strF);
DataMapper.checkStringsToDelete();
}
double randomLenghtPermit = strF.length() * (Math.random() * Math.random() * (Math.random() * 10));
Collections.reverse(concurrentRelations);
ArrayList<String> mysqlUpdateLastUsed = new ArrayList();
if (!concurrentRelations.isEmpty()) {
for (String secondaryRelation : concurrentRelations) {
if (SB.toString().length() > randomLenghtPermit && !SB.toString().isEmpty()) {
break;
}
ArrayList<String> orDefault = strResponses.getOrDefault(strF, null);
boolean skip = false;
for (String strItr : orDefault) {
if (secondaryRelation.equalsIgnoreCase(strItr)) {
skip = true;
if (orDefault.size() + 3 >= concurrentRelations.size()) {
orDefault = new ArrayList<>();
strResponses.put(strF, orDefault);
} else if (orDefault.size() > 5) {
double v = Math.random() * 10;
if (v > 5.6) {
orDefault = new ArrayList<>();
strResponses.put(strF, orDefault);
}
}
break;
}
}
if (skip) continue;
if (!SB.isEmpty()) {
String testSTR = SB.toString() + " " + secondaryRelation;
SentimentAnalyzerTest SMX = getReponseFuturesHelper(strF, testSTR, stanfordCoreNLP, stanfordCoreNLPSentiment,
coreMaps1, strAnno, strAnnoSentiment, coreDocument, tokenizeCountingF, taggedWordListF
, typedDependenciesF, rnnCoreAnnotationsPredictedF, simpleMatricesF, simpleMatricesNodevectorsF
, listF, longestF, sentencesF, sentencesSentimentF, treesF, grammaticalStructuresF, sentimentLongestF
, imwesF, InflectedCounterNegativeF, InflectedCounterPositiveF, tokenEntryF, UnmarkedPatternCounterF
, strTokensIpartFormF, tokenFormsF, intTokenEntyCountsF, MarkedContinuousCounterF, ITokenTagsF
, strTokenEntryGetPOSF, retrieveTGWListF, PairCounterF, TokensCounterF, stopWordLemmaF, nerEntitiesF
, stopWordTokenF, entityTokenTagsF, nerEntitiesTypeF, AnotatorcounterF, strTokenStemsF);
double scoreRelationLastUserMsg = SMX.getScore();
if (preRelationUserCounters > scoreRelationLastUserMsg) {
break;
}
}
SB.append(secondaryRelation).append(" ");
mysqlUpdateLastUsed.add(secondaryRelation);
orDefault.add(secondaryRelation);
strResponses.put(strF, orDefault);
}
}
if (SB.toString().isEmpty()) {
return "failure, preventing stuckness";
}
DataMapper.updateLastUsed(mysqlUpdateLastUsed);
return SB.toString();
}
private void getJMWEAnnotation(String str1) {
Annotation jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(str1);
jmweAnnotationCache.put(str1, jmweAnnotation);
}
public String getResponseMsg(String str, String personName, StanfordCoreNLP stanfordCoreNLP,
StanfordCoreNLP stanfordCoreNLPSentiment, Boolean ingameResponse) {
String responseFutures = "";
String strF = trimString(str);
//System.out.println("post trimstring(). strF: " + strF);
responseFutures = getResponseFutures(strF, stanfordCoreNLP, stanfordCoreNLPSentiment);
if (!ingameResponse) {
responseFutures = checkPersonPresentInSentence(personName, responseFutures, strF, stanfordCoreNLP,
stanfordCoreNLPSentiment);
}
return responseFutures;
}
private String checkPersonPresentInSentence(String personName, String responseMsg, String userLastMessage,
StanfordCoreNLP stanfordCoreNLP, StanfordCoreNLP stanfordCoreNLPSentiment) {
try {
CoreDocument pipelineCoreDcoument = new CoreDocument(responseMsg);
CoreDocument pipelineCoreDcoumentLastMsg = new CoreDocument(userLastMessage);
stanfordCoreNLP.annotate(pipelineCoreDcoument);
stanfordCoreNLPSentiment.annotate(pipelineCoreDcoumentLastMsg);
String regex = "(.*?\\d){10,}";
if (pipelineCoreDcoument.entityMentions() != null) {
for (CoreEntityMention em : pipelineCoreDcoument.entityMentions()) {
String entityType = em.entityType();
if (entityType == "PERSON") {
String str = responseMsg;
String emText = em.text();
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(personName);
boolean isMatched = matcher.matches();
if (emText != personName && !isMatched) {
if (pipelineCoreDcoumentLastMsg.entityMentions() != null) {
for (CoreEntityMention emLastMsg : pipelineCoreDcoumentLastMsg.entityMentions()) {
if (emText != emLastMsg.text() && !Character.isDigit(Integer.parseInt(emLastMsg.text().trim()))) {
str = (responseMsg.substring(0, responseMsg.indexOf(emText)) + " "
+ emLastMsg + " " + responseMsg.substring(responseMsg.indexOf(emText)));
}
}
}
str += personName;
return str;
}
}
}
}
} catch (Exception e) {
System.out.println("SCUFFED JAYZ: " + e.getMessage());
}
return responseMsg;
}
public boolean filterContent(String str) {
if (!str.isEmpty() && str.length() > 3) {
String str1Local = str.trim();
if (str1Local.length() > 2 && !str1Local.startsWith("!")) {
return true;
}
}
return false;
}
public void getCoreDocumentsSuggested(StanfordCoreNLP pipeline, String str) {
Annotation annotation = new Annotation(str);
pipeline.annotate(annotation);
CoreDocument coreDocument = new CoreDocument(annotation);
coreDocumentAnnotationCache.put(str, coreDocument);
}
}