From b1aa449b59b03b3a0d9c088a402ea19033e671c7 Mon Sep 17 00:00:00 2001 From: jenzur Date: Thu, 9 May 2019 23:00:27 +0200 Subject: [PATCH] you know that feeling when you cant tell if you forgot to add something --- .../src/main/java/DataLayer/DataMapper.java | 4 +- .../main/java/FunctionLayer/Datahandler.java | 441 ++++++++--------- .../java/FunctionLayer/SimilarityMatrix.java | 6 - .../StanfordParser/SentimentAnalyzerTest.java | 448 +++++++++++++----- .../java/FunctionLayer/StopwordAnnotator.java | 108 +++++ .../PresentationLayer/DiscordHandler.java | 5 +- 6 files changed, 637 insertions(+), 375 deletions(-) create mode 100644 ArtificialAutism/src/main/java/FunctionLayer/StopwordAnnotator.java diff --git a/ArtificialAutism/src/main/java/DataLayer/DataMapper.java b/ArtificialAutism/src/main/java/DataLayer/DataMapper.java index ef992cb9..96a234fa 100644 --- a/ArtificialAutism/src/main/java/DataLayer/DataMapper.java +++ b/ArtificialAutism/src/main/java/DataLayer/DataMapper.java @@ -137,7 +137,7 @@ public class DataMapper { CloseConnections(l_pStatement, l_rsSearch, l_cCon); } } - + /* public static LinkedHashMap> getAllRelationScores() { int count = getSementicsDBRows(); LinkedHashMap> LHMSMX = new LinkedHashMap(); @@ -173,7 +173,7 @@ public class DataMapper { } return LHMSMX; } - + */ public static ConcurrentMap getHLstatsMessages() { ConcurrentMap hlStatsMessages = new MapMaker().concurrencyLevel(2).makeMap(); try (Connection l_cCon = DBCPDataSourceHLstats.getConnection()) { diff --git a/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java b/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java index 3f4ce460..381bf8b0 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java @@ -24,17 +24,11 @@ import java.sql.SQLException; import java.util.AbstractMap; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; -import static java.util.Collections.reverseOrder; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import static java.util.Map.Entry.comparingByValue; import java.util.Properties; -import java.util.Random; -import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.CountDownLatch; @@ -43,9 +37,10 @@ import java.util.concurrent.ForkJoinPool; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.function.Consumer; import java.util.logging.Level; import java.util.logging.Logger; -import static java.util.stream.Collectors.toList; +import java.util.stream.Collectors; /** * @@ -62,11 +57,11 @@ public class Datahandler { private static Annotation strAnnoJMWE; private static CoreDocument coreDoc; private static ConcurrentMap> sentenceRelationMap; - private volatile boolean refreshMatrixFromDB; - private static volatile int secondaryIterator = 0; private static volatile Double preRelationCounters = 0.0; private static volatile Double preRelationUserCounters = 0.0; - private final ConcurrentMap stringCache; + private static final ConcurrentMap stringCache = new MapMaker().concurrencyLevel(2).makeMap(); + private static int positiveRelationCounter = 0; + private static int negativeRelationCounter = 0; private static ConcurrentMap pipelineAnnotationCache; private static ConcurrentMap pipelineSentimentAnnotationCache; private static ConcurrentMap jmweAnnotationCache; @@ -75,8 +70,7 @@ public class Datahandler { private static ConcurrentMap conversationUserMatchMap; private LinkedHashMap> lHMSMX = new LinkedHashMap(); private final Stopwatch stopwatch; - private final Stopwatch stopwatch1; - private ForkJoinPool executor; + private static final ForkJoinPool executor = instantiateExecutor(); private static String similar = ""; private static String shiftReduceParserPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz"; private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz"; @@ -85,7 +79,7 @@ public class Datahandler { private static String nerModel = "edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz"; private static String nerModel2 = "edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz"; private static String nerModel3 = "edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz"; - private static String nerModelCaseless = "edu/stanford/nlp/models/truecase/truecasing.fast.caseless.qn.ser.gz"; + private static final String customStopWordList = "start,starts,period,periods,a,an,and,are,as,at,be,but,by,for,if,in,into,is,it,no,not,of,on,or,such,that,the,their,then,there,these,they,this,to,was,will,with"; private static MaxentTagger tagger; private static String[] options = {"-maxLength", "100"}; private static Properties props = new Properties(); @@ -95,13 +89,11 @@ public class Datahandler { private static TreebankLanguagePack tlp; private static AbstractSequenceClassifier classifier; // set up Stanford CoreNLP pipeline - private static StanfordCoreNLP pipeline; + private static final StanfordCoreNLP pipeline = getPipeLineSetUp(); private static StanfordCoreNLP pipelineSentiment; public Datahandler() { this.stopwatch = Stopwatch.createUnstarted(); - this.stopwatch1 = Stopwatch.createStarted(); - this.stringCache = new MapMaker().concurrencyLevel(2).makeMap(); this.jmweAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap(); this.pipelineAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap(); this.pipelineSentimentAnnotationCache = new MapMaker().concurrencyLevel(2).makeMap(); @@ -111,9 +103,27 @@ public class Datahandler { this.conversationUserMatchMap = new MapMaker().concurrencyLevel(2).makeMap(); } + private static StanfordCoreNLP getPipeLineSetUp() { + props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse"); + props.setProperty("parse.model", shiftReduceParserPath); + props.setProperty("parse.maxlen", "90"); + props.setProperty("parse.binaryTrees", "true"); + props.setProperty("threads", "25"); + props.setProperty("pos.maxlen", "90"); + props.setProperty("tokenize.maxlen", "90"); + props.setProperty("ssplit.maxlen", "90"); + props.setProperty("lemma.maxlen", "90"); + props.setProperty("ner.model", nerModel + "," + nerModel2 + "," + nerModel3); + props.setProperty("ner.combinationMode", "HIGH_RECALL"); + props.setProperty("regexner.ignorecase", "true"); + props.setProperty("ner.fine.regexner.ignorecase", "true"); + props.setProperty("tokenize.options", "untokenizable=firstDelete"); + return new StanfordCoreNLP(props); + } + public void shiftReduceParserInitiate() { //got 8 cores - CountDownLatch cdl = new CountDownLatch(3); + CountDownLatch cdl = new CountDownLatch(2); new Thread(() -> { try { classifier = CRFClassifier.getClassifierNoExceptions(nerModel); @@ -130,30 +140,14 @@ public class Datahandler { propsSentiment.setProperty("pos.maxlen", "90"); propsSentiment.setProperty("tokenize.maxlen", "90"); propsSentiment.setProperty("ssplit.maxlen", "90"); - propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,sentiment"); //coref too expensive memorywise, does it need depparse? - propsSentiment.setProperty("tokenize.options", "untokenizable=firstKeep"); + propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,sentiment,lemma,stopword"); //coref too expensive memorywise + propsSentiment.setProperty("customAnnotatorClass.stopword", "FunctionLayer.StopwordAnnotator"); + propsSentiment.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList); + propsSentiment.setProperty("tokenize.options", "untokenizable=firstDelete"); pipelineSentiment = new StanfordCoreNLP(propsSentiment); tagger = new MaxentTagger(taggerPath); cdl.countDown(); }).start(); - new Thread(() -> { - props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse,ner"); - props.setProperty("parse.model", shiftReduceParserPath); - props.setProperty("parse.maxlen", "90"); - props.setProperty("parse.binaryTrees", "true"); - props.setProperty("threads", "25"); - props.setProperty("pos.maxlen", "90"); - props.setProperty("tokenize.maxlen", "90"); - props.setProperty("ssplit.maxlen", "90"); - props.setProperty("lemma.maxlen", "90"); - props.setProperty("ner.model", nerModel + "," + nerModel2 + "," + nerModel3); - props.setProperty("ner.combinationMode", "HIGH_RECALL"); - props.setProperty("regexner.ignorecase", "true"); - props.setProperty("ner.fine.regexner.ignorecase", "true"); - props.setProperty("tokenize.options", "untokenizable=firstKeep"); //firstKeep //firstDelete - pipeline = new StanfordCoreNLP(props); - cdl.countDown(); - }).start(); lp = LexicalizedParser.loadModel(lexParserEnglishRNN, options); tlp = lp.getOp().langpack(); gsf = tlp.grammaticalStructureFactory(); @@ -181,8 +175,9 @@ public class Datahandler { } } - public void instantiateExecutor() { - this.executor = new ForkJoinPool(25, + private static ForkJoinPool instantiateExecutor() { + //Runtime.getRuntime().availableProcessors() or static value like 25 + return new ForkJoinPool(Runtime.getRuntime().availableProcessors(), ForkJoinPool.defaultForkJoinWorkerThreadFactory, null, false); } @@ -219,7 +214,7 @@ public class Datahandler { try { DataMapper.createTables(); stringCache.putAll(getCache()); - lHMSMX = DataMapper.getAllRelationScores(); + // lHMSMX = DataMapper.getAllRelationScores(); } catch (CustomError ex) { Logger.getLogger(Datahandler.class .getName()).log(Level.SEVERE, null, ex); @@ -235,8 +230,9 @@ public class Datahandler { hlStatsMessages.put(str, hlStatsMessages.size()); } } - hlStatsMessages.keySet().parallelStream().forEach(str -> { - if (!str.startsWith("!")) { + int capacity = 35000; + hlStatsMessages.keySet().forEach(str -> { + if (!str.startsWith("!") && MessageResponseHandler.getStr().values().size() < capacity) { String orElse = strCacheLocal.values().parallelStream().filter(e -> e.equals(str)).findAny().orElse(null); if (orElse == null) { MessageResponseHandler.getMessage(str); @@ -263,10 +259,8 @@ public class Datahandler { Annotationspipeline.put(str, strAnno); Annotation strAnno2 = new Annotation(str); AnnotationspipelineSentiment.put(str, strAnno2); - CoreDocument CD = new CoreDocument(str); - pipeline.annotate(CD); - coreDocumentAnnotationCache.put(str, CD); }); + ConcurrentMap coreDocumentpipelineMap = getMultipleCoreDocumentsWaySuggestion(stringCache.values(), pipeline); pipeline.annotate(Annotationspipeline.values()); pipelineSentiment.annotate(AnnotationspipelineSentiment.values()); Annotationspipeline.entrySet().forEach(pipelineEntry -> { @@ -275,210 +269,130 @@ public class Datahandler { AnnotationspipelineSentiment.entrySet().forEach(pipelineEntry -> { pipelineSentimentAnnotationCache.put(pipelineEntry.getKey(), pipelineEntry.getValue()); }); + coreDocumentpipelineMap.entrySet().stream().forEach(CD -> { + coreDocumentAnnotationCache.put(CD.getKey(), CD.getValue()); + }); } } - public synchronized void updateMatrixes() { - refreshMatrixFromDB = false; - if (stopwatch1.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS1) { - refreshMatrixFromDB = true; - lHMSMX = DataMapper.getAllRelationScores(); - stopwatch1.reset(); - } - if (stringCache.values().size() > 10 && !refreshMatrixFromDB) { - ConcurrentMap stringCachelocal = stringCache; - int selectUpdate = -1; - int iteratorCap = 25; - LinkedHashMap> LHMSMXLocal = lHMSMX; - int ij2 = 0; - for (String str : stringCachelocal.values()) { - LinkedHashMap orDefault = LHMSMXLocal.getOrDefault(str, null); - if (orDefault == null) { - selectUpdate = ij2; - break; - } - ij2++; - } - //secondaryIterator - if (selectUpdate == -1 || selectUpdate + 1 >= stringCachelocal.size() || stringCachelocal.get(selectUpdate) == null) { - Integer iterator = 0; - while (iterator == 0) { - if (secondaryIterator >= stringCachelocal.size()) { - secondaryIterator = 0; - } - String get = stringCachelocal.get(secondaryIterator); - if (get == null) { - secondaryIterator++; - } else { - selectUpdate = secondaryIterator; - iterator++; - } - } - secondaryIterator++; - } - String getStringCacheStr = stringCachelocal.get(selectUpdate); - ConcurrentMap matrixUpdateMap = new MapMaker().concurrencyLevel(2).makeMap(); - ConcurrentMap> futures = new MapMaker().concurrencyLevel(2).makeMap(); - stringCachelocal.values().forEach((str1) -> { - if (!getStringCacheStr.equals(str1)) { - boolean present = false; - LinkedHashMap orDefault = lHMSMX.getOrDefault(getStringCacheStr, null); - if (orDefault != null) { - Collection strkeys = orDefault.keySet(); - for (String strkey : strkeys) { - if (strkey.equals(str1)) { - present = true; - break; - } - } - } - if (!present) { - orDefault = lHMSMX.getOrDefault(str1, null); - if (orDefault != null) { - Collection strkeys = orDefault.keySet(); - for (String strkey : strkeys) { - if (strkey.equals(getStringCacheStr)) { - present = true; - break; - } - } - } - } - if (!present) { - LinkedHashMap orDefault1 = lHMSMX.getOrDefault(getStringCacheStr, null); - if (orDefault1 == null) { - orDefault1 = new LinkedHashMap(); - } - orDefault1.put(str1, 0.0); - lHMSMX.put(getStringCacheStr, orDefault1); - SimilarityMatrix SMX = new SimilarityMatrix(getStringCacheStr, str1); - Callable worker = new SentimentAnalyzerTest(getStringCacheStr, str1, SMX, jmweAnnotationCache.get(getStringCacheStr), - jmweAnnotationCache.get(str1), pipelineAnnotationCache.get(getStringCacheStr), pipelineAnnotationCache.get(str1), - pipelineSentimentAnnotationCache.get(getStringCacheStr), pipelineSentimentAnnotationCache.get(str1), - coreDocumentAnnotationCache.get(getStringCacheStr), coreDocumentAnnotationCache.get(str1)); - futures.put(futures.size() + 1, executor.submit(worker)); - } - } - }); - System.out.println("finished worker assignment, futures size: " + futures.size() + "\n"); - futures.values().parallelStream().forEach((future) -> { - SimilarityMatrix SMX = new SimilarityMatrix("", ""); - try { - SMX = future.get(5, TimeUnit.SECONDS); - } catch (InterruptedException | ExecutionException | TimeoutException ex) { - Logger.getLogger(Datahandler.class.getName()).log(Level.SEVERE, null, ex); - SMX = null; - } - if (SMX != null) { - LinkedHashMap getFuture = lHMSMX.getOrDefault(SMX.getPrimaryString(), null); - getFuture.put(SMX.getSecondaryString(), SMX.getDistance()); - lHMSMX.put(SMX.getPrimaryString(), getFuture); - matrixUpdateMap.put(matrixUpdateMap.size() + 1, SMX); - } - }); - - new Thread(() -> { - try { - if (!matrixUpdateMap.isEmpty()) { - DataMapper.insertSementicMatrixes(matrixUpdateMap); - System.out.println("finished datamapper semetic insert"); - } - } catch (CustomError ex) { - Logger.getLogger(Datahandler.class - .getName()).log(Level.SEVERE, null, ex); - } - }).start(); - } - } - - /** - * sentenceRelationMap only catches prior strF or already computed results - * from same operation, so alot of times its null if msg from other channel - * - * @param strmap - * @return - */ - public ConcurrentMap removeNonSensicalStrings(ConcurrentMap strmap) { - ConcurrentMap strmapreturn = new MapMaker().concurrencyLevel(2).makeMap(); + private final static ConcurrentMap cachedReturnEvaluations(ConcurrentMap getMap, ConcurrentMap strmapreturn, String str) { + List dummy = new ArrayList(); int relationCap = 20; - ConcurrentMap strCacheLocal = stringCache.size() < 150 ? strmap : stringCache; - ConcurrentMap localJMWEMap = getMultipleJMWEAnnotation(strmap.values()); - ConcurrentMap localPipelineAnnotation = getMultiplePipelineAnnotation(strmap.values()); - ConcurrentMap localPipelineSentimentAnnotation = getMultiplePipelineSentimentAnnotation(strmap.values()); - ConcurrentMap localCoreDocumentMap = getMultipleCoreDocuments(strmap.values()); - for (String str : strmap.values()) { - int positiveRelationCounter = 0; - int negativeRelationCounter = 0; - ConcurrentMap> futures = new MapMaker().concurrencyLevel(2).makeMap(); - ConcurrentMap getMap = sentenceRelationMap.get(str); - if (getMap == null) { - ConcurrentMap mapUdate = new MapMaker().concurrencyLevel(2).makeMap(); - for (String str1 : strCacheLocal.values()) { - if (!str.equals(str1)) { - SimilarityMatrix SMX = new SimilarityMatrix(str, str1); - Callable worker; - if (stringCache.size() < 150) { - worker = new SentimentAnalyzerTest(str, str1, SMX, - localJMWEMap.get(str), localJMWEMap.get(str1), localPipelineAnnotation.get(str), - localPipelineAnnotation.get(str1), localPipelineSentimentAnnotation.get(str), - localPipelineSentimentAnnotation.get(str1), localCoreDocumentMap.get(str), localCoreDocumentMap.get(str1)); - futures.put(futures.size() + 1, executor.submit(worker)); - } else { - worker = new SentimentAnalyzerTest(str, str1, SMX, - localJMWEMap.get(str), jmweAnnotationCache.get(str1), localPipelineAnnotation.get(str), - pipelineAnnotationCache.get(str1), localPipelineSentimentAnnotation.get(str), - pipelineSentimentAnnotationCache.get(str1), localCoreDocumentMap.get(str), coreDocumentAnnotationCache.get(str1)); - } - futures.put(futures.size() + 1, executor.submit(worker)); + getMap.entrySet().parallelStream().forEach(mapValues -> { + if (!strmapreturn.values().contains(str) && dummy.isEmpty()) { + Double scoreRelationNewMsgToRecentMsg = mapValues.getValue(); + System.out.println("strmapreturn size: " + strmapreturn.size() + "\nscoreRelationNewMsgToRecentMsg: " + + scoreRelationNewMsgToRecentMsg + "\n"); + if (scoreRelationNewMsgToRecentMsg >= 4100.0) { + positiveRelationCounter++; + if (positiveRelationCounter > relationCap) { + strmapreturn.put(strmapreturn.size() + 1, str); + } + } else if (scoreRelationNewMsgToRecentMsg <= -5000.0) { + negativeRelationCounter++; + if (negativeRelationCounter > relationCap * 2) { + dummy.add(1); } } - for (Future future : futures.values()) { - try { - SimilarityMatrix getSMX = future.get(5, TimeUnit.SECONDS); - Double scoreRelationNewMsgToRecentMsg = getSMX.getDistance(); - System.out.println("strmapreturn size: " + strmapreturn.size() + "\nscoreRelationNewMsgToRecentMsg: " - + scoreRelationNewMsgToRecentMsg + "\n"); - mapUdate.put(getSMX.getSecondaryString(), getSMX.getDistance()); - if (scoreRelationNewMsgToRecentMsg >= 5000.0) { - positiveRelationCounter++; - if (positiveRelationCounter > relationCap) { - strmapreturn.put(strmapreturn.size() + 1, str); - break; - } - } else if (scoreRelationNewMsgToRecentMsg <= -5000.0) { - negativeRelationCounter++; - if (negativeRelationCounter > relationCap * 2) { - break; - } - } - } catch (InterruptedException | ExecutionException | TimeoutException ex) { - Logger.getLogger(Datahandler.class.getName()).log(Level.SEVERE, null, ex); - } - } - sentenceRelationMap.put(str, mapUdate); - } else { - for (Entry mapValues : getMap.entrySet()) { - Double scoreRelationNewMsgToRecentMsg = mapValues.getValue(); + } + }); + return strmapreturn; + } + + private final static ConcurrentMap futuresReturnOverallEvaluation(ConcurrentMap> futures, ConcurrentMap strmapreturn, String str) { + int relationCap = 20; + final ConcurrentMap mapUdate = new MapMaker().concurrencyLevel(2).makeMap(); + final List dummy = new ArrayList(); + futures.values().parallelStream().forEach(future -> { + if (!strmapreturn.values().contains(str) && dummy.isEmpty()) { + final SimilarityMatrix getSMX = retrieveFutureSMX(future); + if (getSMX != null) { + Double scoreRelationNewMsgToRecentMsg = getSMX.getDistance(); System.out.println("strmapreturn size: " + strmapreturn.size() + "\nscoreRelationNewMsgToRecentMsg: " + scoreRelationNewMsgToRecentMsg + "\n"); - if (scoreRelationNewMsgToRecentMsg >= 5000.0) { + mapUdate.put(getSMX.getSecondaryString(), scoreRelationNewMsgToRecentMsg); + if (scoreRelationNewMsgToRecentMsg >= 4100.0) { positiveRelationCounter++; if (positiveRelationCounter > relationCap) { strmapreturn.put(strmapreturn.size() + 1, str); - break; } } else if (scoreRelationNewMsgToRecentMsg <= -5000.0) { negativeRelationCounter++; if (negativeRelationCounter > relationCap * 2) { - break; + dummy.add(1); } } } } + }); + sentenceRelationMap.put(str, mapUdate); + return strmapreturn; + } + + private final static ConcurrentMap> StrComparringNoSentenceRelationMap(ConcurrentMap strCacheLocal, + String str, ConcurrentMap localJMWEMap, ConcurrentMap localPipelineAnnotation, + ConcurrentMap localPipelineSentimentAnnotation, ConcurrentMap localCoreDocumentMap, int strmapSize) { + final ConcurrentMap> futures = new MapMaker().concurrencyLevel(4).makeMap(); + for (String str1 : strCacheLocal.values()) { + if (!str.equals(str1)) { + final SimilarityMatrix SMX = new SimilarityMatrix(str, str1); + final Callable worker; + if (stringCache.size() < 150) { + worker = new SentimentAnalyzerTest(str, str1, SMX, + localJMWEMap.get(str), localJMWEMap.get(str1), localPipelineAnnotation.get(str), + localPipelineAnnotation.get(str1), localPipelineSentimentAnnotation.get(str), + localPipelineSentimentAnnotation.get(str1), localCoreDocumentMap.get(str), localCoreDocumentMap.get(str1)); + } else { + worker = new SentimentAnalyzerTest(str, str1, SMX, + localJMWEMap.get(str), jmweAnnotationCache.get(str1), localPipelineAnnotation.get(str), + pipelineAnnotationCache.get(str1), localPipelineSentimentAnnotation.get(str), + pipelineSentimentAnnotationCache.get(str1), localCoreDocumentMap.get(str), coreDocumentAnnotationCache.get(str1)); + } + futures.put(futures.size() + 1, executor.submit(worker)); + System.out.println("futures.size(): " + futures.size() + "\nstrmap.values().size(): " + strmapSize + "\n"); + } + } + return futures; + } + + private final static ConcurrentMap stringIteratorComparator(ConcurrentMap strmap, + ConcurrentMap strCacheLocal, ConcurrentMap localJMWEMap, + ConcurrentMap localPipelineAnnotation, ConcurrentMap localPipelineSentimentAnnotation, + ConcurrentMap localCoreDocumentMap) { + ConcurrentMap strmapreturn = new MapMaker().concurrencyLevel(4).makeMap(); + for (String str : strmap.values()) { + final ConcurrentMap getMap = sentenceRelationMap.get(str); + positiveRelationCounter = 0; + negativeRelationCounter = 0; + if (getMap == null) { + final ConcurrentMap> futures = StrComparringNoSentenceRelationMap(strCacheLocal, str, localJMWEMap, localPipelineAnnotation, localPipelineSentimentAnnotation, localCoreDocumentMap, strmap.size()); + strmapreturn = futuresReturnOverallEvaluation(futures, strmapreturn, str); + } else { + strmapreturn = cachedReturnEvaluations(getMap, strmapreturn, str); + } } return strmapreturn; } + private static ConcurrentMap removeNonSensicalStrings(ConcurrentMap strmap) { + final ConcurrentMap strCacheLocal = stringCache.size() < 150 ? strmap : stringCache; + final ConcurrentMap localJMWEMap = getMultipleJMWEAnnotation(strmap.values()); + final ConcurrentMap localPipelineAnnotation = getMultiplePipelineAnnotation(strmap.values()); + final ConcurrentMap localPipelineSentimentAnnotation = getMultiplePipelineSentimentAnnotation(strmap.values()); + final ConcurrentMap localCoreDocumentMap = getMultipleCoreDocumentsWaySuggestion(strmap.values(), pipeline); + System.out.println("finished removeNonSensicalStrings annotations \n"); + return stringIteratorComparator(strmap, strCacheLocal, localJMWEMap, localPipelineAnnotation, localPipelineSentimentAnnotation, localCoreDocumentMap); + } + + private static SimilarityMatrix retrieveFutureSMX(Future future) { + try { + return future.get(5, TimeUnit.SECONDS); + } catch (InterruptedException | ExecutionException | TimeoutException ex) { + Logger.getLogger(Datahandler.class.getName()).log(Level.SEVERE, null, ex); + } + return null; + } + public synchronized void checkIfUpdateStrings(boolean hlStatsMsg) throws CustomError { if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) { ConcurrentMap str = MessageResponseHandler.getStr(); @@ -677,7 +591,7 @@ public class Datahandler { iterator--; System.out.println("result: " + result + "\ngetRelation.getKey(): " + getRelation.getKey() + "\npreRelationUserCounters: " + preRelationUserCounters + "\npreRelationUserCounterDouble: " + preRelationUserCounterDouble + "\n"); - if (getRelation.getKey() * 2 < result) { + if (getRelation.getKey() < result) { break; } } @@ -699,12 +613,12 @@ public class Datahandler { coreDoc = coreDocument; } - public ConcurrentMap getMultipleJMWEAnnotation(Collection str) { + private static ConcurrentMap getMultipleJMWEAnnotation(Collection str) { ConcurrentMap jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(str); return jmweAnnotation; } - public ConcurrentMap getMultiplePipelineAnnotation(Collection str) { + private static ConcurrentMap getMultiplePipelineAnnotation(Collection str) { ConcurrentMap pipelineAnnotationMap = new MapMaker().concurrencyLevel(2).makeMap(); for (String str1 : str) { Annotation strAnno1 = new Annotation(str1); @@ -714,7 +628,7 @@ public class Datahandler { return pipelineAnnotationMap; } - public ConcurrentMap getMultiplePipelineSentimentAnnotation(Collection str) { + private static ConcurrentMap getMultiplePipelineSentimentAnnotation(Collection str) { ConcurrentMap pipelineAnnotationMap = new MapMaker().concurrencyLevel(2).makeMap(); for (String str1 : str) { Annotation strAnno1 = new Annotation(str1); @@ -724,16 +638,6 @@ public class Datahandler { return pipelineAnnotationMap; } - public ConcurrentMap getMultipleCoreDocuments(Collection str) { - ConcurrentMap pipelineCoreDocumentAnnotations = new MapMaker().concurrencyLevel(2).makeMap(); - str.parallelStream().forEach((str1) -> { - CoreDocument coreDocument = new CoreDocument(str1); - pipeline.annotate(coreDocument); - pipelineCoreDocumentAnnotations.put(str1, coreDocument); - }); - return pipelineCoreDocumentAnnotations; - } - private Double getScoreRelationNewMsgToRecentMsg(String str, String mostRecentMsg) { SimilarityMatrix SMX = new SimilarityMatrix(str, mostRecentMsg); Callable worker = new SentimentAnalyzerTest(str, mostRecentMsg, SMX, @@ -979,7 +883,6 @@ public class Datahandler { if (stringCache.isEmpty()) { return str; } - Collection values = stringCache.values(); str.values().parallelStream().forEach(str1 -> { boolean tooclosematch = false; @@ -1005,20 +908,14 @@ public class Datahandler { for (Entry jmweitr : jmweAnnotation.entrySet()) { jmweAnnotationCache.put(jmweitr.getKey(), jmweitr.getValue()); } - ConcurrentMap Annotationspipeline = new MapMaker().concurrencyLevel(2).makeMap(); - ConcurrentMap AnnotationspipelineSentiment = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap Annotationspipeline = new MapMaker().concurrencyLevel(4).makeMap(); + ConcurrentMap AnnotationspipelineSentiment = new MapMaker().concurrencyLevel(4).makeMap(); + ConcurrentMap coreDocumentpipelineMap = getMultipleCoreDocumentsWaySuggestion(strmap.values(), pipeline); strmap.values().parallelStream().forEach(str -> { Annotation strAnno1 = new Annotation(str); Annotationspipeline.put(str, strAnno1); Annotation strAnno2 = new Annotation(str); AnnotationspipelineSentiment.put(str, strAnno2); - try { - CoreDocument CD = new CoreDocument(str); - pipeline.annotate(CD); - coreDocumentAnnotationCache.put(str, CD); - } catch (Exception e) { - System.out.println("failed document annotation: " + e + "\n"); - } stringCache.put(stringCache.size() + 1, str); }); System.out.println("pre iterator annotation update \n"); @@ -1038,6 +935,50 @@ public class Datahandler { System.out.println("failed sentiment cache \n"); } }); + coreDocumentpipelineMap.entrySet().forEach(coreDocumentEntry -> { + coreDocumentAnnotationCache.put(coreDocumentEntry.getKey(), coreDocumentEntry.getValue()); + }); return strmap; } + + private static class AnnotationCollector implements Consumer { + + private static int i = 0; + private final List annotationsT = new ArrayList(); + + @Override + public final void accept(T ann) { + annotationsT.add(ann); + } + } + + public final static ConcurrentMap getMultipleCoreDocumentsWaySuggestion(Collection str, StanfordCoreNLP localNLP) { + AnnotationCollector annCollector = new AnnotationCollector(); + for (final String exampleString : str) { + System.out.println("exampleString: " + exampleString + "\n"); + localNLP.annotate(new Annotation(exampleString), annCollector); + annCollector.i++; + System.out.println("iterator: " + annCollector.i + "\nstr size: " + str.size() + "\n"); + } + try { + Thread.sleep(10000); + } catch (InterruptedException ex) { + Logger.getLogger(Datahandler.class.getName()).log(Level.SEVERE, null, ex); + } + ConcurrentMap annotationreturnMap = new MapMaker().concurrencyLevel(2).makeMap(); + List coreDocs = annCollector.annotationsT.stream().map(ann -> { + try { + return new CoreDocument(ann); + } catch (Exception ex) { + System.out.println(ex.getLocalizedMessage()); + return null; + } + }).collect(Collectors.toList()); + coreDocs.stream().forEach(CD -> { + if (CD != null) { + annotationreturnMap.put(CD.text(), CD); + } + }); + return annotationreturnMap; + } } diff --git a/ArtificialAutism/src/main/java/FunctionLayer/SimilarityMatrix.java b/ArtificialAutism/src/main/java/FunctionLayer/SimilarityMatrix.java index 1445a0e4..8c8c20da 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/SimilarityMatrix.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/SimilarityMatrix.java @@ -5,12 +5,6 @@ */ package FunctionLayer; -import com.google.common.collect.Multimap; -import com.google.common.collect.Multiset; -import java.util.Collection; -import java.util.Map; -import java.util.Set; - /** * diff --git a/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java b/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java index f585d09b..382e0618 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java @@ -3,6 +3,7 @@ package FunctionLayer.StanfordParser; import FunctionLayer.LevenshteinDistance; import FunctionLayer.Datahandler; import FunctionLayer.SimilarityMatrix; +import FunctionLayer.StopwordAnnotator; import com.google.common.collect.MapMaker; import edu.mit.jmwe.data.IMWE; import edu.mit.jmwe.data.IMWEDesc; @@ -36,6 +37,7 @@ import edu.stanford.nlp.trees.TreeCoreAnnotations; import edu.stanford.nlp.trees.TypedDependency; import edu.stanford.nlp.trees.tregex.gui.Tdiff; import edu.stanford.nlp.util.CoreMap; +import edu.stanford.nlp.util.Pair; import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; @@ -49,6 +51,7 @@ import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BinaryOperator; import java.util.function.Function; +import org.apache.lucene.analysis.core.StopAnalyzer; import org.ejml.simple.SimpleMatrix; /* @@ -146,6 +149,7 @@ public class SentimentAnalyzerTest implements Callable { }); }); score += runCount.get() * 64; + ////System.out.println("score post runCountGet: " + score + "\n"); ConcurrentMap sentenceConstituencyParseList = new MapMaker().concurrencyLevel(2).makeMap(); try { for (CoreMap sentence : pipelineAnnotation1.get(CoreAnnotations.SentencesAnnotation.class)) { @@ -174,10 +178,14 @@ public class SentimentAnalyzerTest implements Callable { } int constituents1 = constinuent1.size() - constiRelationsize; int constituents2 = constinuent2.size() - constiRelationsize; - if (constituents1 > 0 && constituents2 > 0) { + if (constituents1 * 5 < constituents2 || constituents2 * 5 < constituents1) { score -= (constituents1 + constituents2) * 200; + } else if (constituents1 == 0 || constituents2 == 0) { + score -= constiRelationsize * 200; } else { - score += constiRelationsize * 200; + score += constiRelationsize * 160; + //System.out.println("score post constiRelationsize: " + score + "\nconstituents1: " + constituents1 + // + "\nconstituents2: " + constituents2 + "\nconstiRelationsize: " + constiRelationsize + "\n"); } GrammaticalStructure gs1 = gsf.newGrammaticalStructure(sentenceConstituencyParse1); Collection allTypedDependencies1 = gs1.allTypedDependencies(); @@ -190,7 +198,8 @@ public class SentimentAnalyzerTest implements Callable { IndexedWord gov = TDY1.gov(); GrammaticalRelation grammaticalRelation = gs.getGrammaticalRelation(gov, dep); if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) { - score += 1900; + score += 700; + //System.out.println("grammaticalRelation applicable score: " + score + "\n"); grammaticalRelation1++; } GrammaticalRelation reln = TDY1.reln(); @@ -205,49 +214,85 @@ public class SentimentAnalyzerTest implements Callable { GrammaticalRelation grammaticalRelation = gs1.getGrammaticalRelation(gov, dep); if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) { score += 900; + //System.out.println("grammaticalRelation appliceable score: " + score + "\n"); grammaticalRelation2++; } GrammaticalRelation reln = TDY.reln(); if (reln.isApplicable(sentenceConstituencyParse1)) { score += 525; + //System.out.println("reln appliceable score: " + score + "\n"); relationApplicable2++; } } - if ((grammaticalRelation1 == 0 && grammaticalRelation2 > 0) || (grammaticalRelation2 == 0 && grammaticalRelation1 > 0)) { + if ((grammaticalRelation1 == 0 && grammaticalRelation2 > 4) || (grammaticalRelation2 == 0 && grammaticalRelation1 > 4)) { score -= 3450; + //System.out.println("grammaticalRelation1 score trim: " + score + "\ngrammaticalRelation1: " + grammaticalRelation1 + // + "\ngrammaticalRelation2: " + grammaticalRelation2 + "\n"); } if (!allTypedDependencies.isEmpty() || !allTypedDependencies1.isEmpty()) { int allTypeDep1 = allTypedDependencies.size(); int allTypeDep2 = allTypedDependencies1.size(); if (allTypeDep1 <= allTypeDep2 * 5 && allTypeDep2 <= allTypeDep1 * 5) { - if (!alltypeDepsSizeMap.values().contains(allTypeDep1)) { - score += allTypeDep1 * 600; + if (allTypeDep1 > 0 && allTypeDep2 > 0) { + if (allTypeDep1 * 2 <= allTypeDep2 || allTypeDep2 * 2 <= allTypeDep1) { + score -= allTypeDep1 > allTypeDep2 ? (allTypeDep1 - allTypeDep2) * 160 : (allTypeDep2 - allTypeDep1) * 160; + //System.out.println(" allTypeDep score: " + score + "\nallTypeDep1: " + allTypeDep1 + "\nallTypeDep2: " + // + allTypeDep2 + "\n"); + } else { + score += allTypeDep1 > allTypeDep2 ? (allTypeDep1 - allTypeDep2) * 600 : (allTypeDep2 - allTypeDep1) * 600; + //System.out.println(" allTypeDep score: " + score + "\nallTypeDep1: " + allTypeDep1 + "\nallTypeDep2: " + // + allTypeDep2 + "\n"); + } alltypeDepsSizeMap.put(alltypeDepsSizeMap.size() + 1, allTypeDep1); - } - if (!alltypeDepsSizeMap.values().contains(allTypeDep1)) { - score += allTypeDep2 * 600; alltypeDepsSizeMap.put(alltypeDepsSizeMap.size() + 1, allTypeDep2); } } if (allTypeDep1 >= 5 && allTypeDep2 >= 5) { int largerTypeDep = allTypeDep1 > allTypeDep2 ? allTypeDep1 : allTypeDep2; int smallerTypeDep = allTypeDep1 < allTypeDep2 ? allTypeDep1 : allTypeDep2; - int summation = largerTypeDep * largerTypeDep - smallerTypeDep * smallerTypeDep; - if (summation > 50 && summation < 75) { + int summation = (largerTypeDep * largerTypeDep) - (smallerTypeDep * smallerTypeDep); + if (summation / largerTypeDep < 15.0 && summation / largerTypeDep > 10.0 && smallerTypeDep * 2 > largerTypeDep + && !summationMap.values().contains(summation)) { score += summation * 80; - } else if (!summationMap.values().contains(summation)) { - score -= largerTypeDep * 500; summationMap.put(summationMap.size() + 1, summation); + //System.out.println("score post summation: " + score + "\nsummation: " + summation + "\n"); + } else if (largerTypeDep == smallerTypeDep) { + score += 2500; + //System.out.println("score largerTypeDep equals smallerTypeDep: " + score + "\nlargerTypeDep: " + largerTypeDep + "\n"); } } if (relationApplicable1 > 0 && relationApplicable2 > 0 && relationApplicable1 == relationApplicable2 && grammaticalRelation1 > 0 && grammaticalRelation2 > 0 && grammaticalRelation1 == grammaticalRelation2) { score += 3500; - } else { - score += allTypeDep1 > allTypeDep2 - ? (allTypeDep2 - allTypeDep1) * (allTypeDep2 * 50) - : (allTypeDep1 - allTypeDep2) * (allTypeDep1 * 50); + //System.out.println("score relationApplicable equal: " + score + "\n"); + } else if (allTypeDep1 * 5 < allTypeDep2 || allTypeDep2 * 5 < allTypeDep1) { + score -= allTypeDep1 > allTypeDep2 ? (allTypeDep1 - allTypeDep2) * (allTypeDep2 * 450) + : (allTypeDep2 - allTypeDep1) * (allTypeDep1 * 450); + //System.out.println("score minus grammaticalRelation equal: " + score + "\n"); } + if (relationApplicable1 > 1 && relationApplicable2 > 1 && relationApplicable1 * 3 > relationApplicable2 + && relationApplicable2 * 3 > relationApplicable1) { + score += relationApplicable1 > relationApplicable2 ? (relationApplicable1 - relationApplicable2) * 1500 + : (relationApplicable2 - relationApplicable1) * 1500; + //System.out.println("score relationApplicable plus: " + score + "\n"); + } else if (relationApplicable1 * 5 < relationApplicable2 || relationApplicable2 * 5 < relationApplicable1) { + score -= relationApplicable1 > relationApplicable2 ? (relationApplicable1 - relationApplicable2) * 500 + : (relationApplicable2 - relationApplicable1) * 500; + //System.out.println("score relationApplicable minus: " + score + "\n"); + } + if (grammaticalRelation1 > 0 && grammaticalRelation2 > 0 && grammaticalRelation1 * 3 > grammaticalRelation2 + && grammaticalRelation2 * 3 > grammaticalRelation1) { + score += grammaticalRelation1 > grammaticalRelation2 ? (grammaticalRelation1 - grammaticalRelation2) * 1500 + : (grammaticalRelation2 - grammaticalRelation1) * 1500; + //System.out.println("score grammaticalRelation plus: " + score + "\n"); + } else if (grammaticalRelation1 * 5 < grammaticalRelation2 || grammaticalRelation2 * 5 < grammaticalRelation1) { + score -= grammaticalRelation1 > grammaticalRelation2 ? (grammaticalRelation1 - grammaticalRelation2) * 500 + : (grammaticalRelation2 - grammaticalRelation1) * 500; + //System.out.println("score grammaticalRelation minus: " + score + "\n"); + } + //System.out.println("score post relationApplicable1 veri: " + score + "\nrelationApplicable1: " + relationApplicable1 + // + "\nrelationApplicable2: " + relationApplicable2 + "\ngrammaticalRelation1: " + grammaticalRelation1 + "\n" + // + "grammaticalRelation2: " + grammaticalRelation2 + "\n"); } AtomicInteger runCount1 = new AtomicInteger(0); sentenceConstituencyParse.taggedLabeledYield().forEach((LBW) -> { @@ -259,9 +304,10 @@ public class SentimentAnalyzerTest implements Callable { runCount1.getAndIncrement(); }); }); - score += runCount1.get() * 1500; + score += runCount1.get() * 250; } } + //System.out.println("score pre typeSizeSmallest: " + score + "\n"); int typeSizeSmallest = 100; int typeSizeLargest = 0; for (Integer i : alltypeDepsSizeMap.values()) { @@ -273,7 +319,7 @@ public class SentimentAnalyzerTest implements Callable { } } if (typeSizeLargest >= typeSizeSmallest * 3) { - score -= typeSizeLargest * 1600; + score -= typeSizeLargest * 160; } typeSizeLargest = 0; typeSizeSmallest = 100; @@ -286,10 +332,10 @@ public class SentimentAnalyzerTest implements Callable { } } if (typeSizeLargest >= typeSizeSmallest * 3) { - score -= typeSizeLargest * 1600; + score -= typeSizeLargest * 160; } } catch (Exception ex) { - System.out.println("pipelineAnnotation stacktrace: " + ex.getLocalizedMessage() + "\n"); + //System.out.println("pipelineAnnotation stacktrace: " + ex.getLocalizedMessage() + "\n"); } sentenceConstituencyParseList.clear(); ConcurrentMap simpleSMXlist = new MapMaker().concurrencyLevel(2).makeMap(); @@ -308,6 +354,7 @@ public class SentimentAnalyzerTest implements Callable { ConcurrentMap dotMap = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap elementSumMap = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap dotSumMap = new MapMaker().concurrencyLevel(2).makeMap(); + //System.out.println("score pre pipelineAnnotation2Sentiment: " + score + "\n"); for (CoreMap sentence : pipelineAnnotation2Sentiment.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); sentiment2.put(sentiment2.size() + 1, RNNCoreAnnotations.getPredictedClass(tree)); @@ -332,59 +379,77 @@ public class SentimentAnalyzerTest implements Callable { } Double dotPredictionIntervalDifference = largest - shortest; subtracter *= 25; + //System.out.println("subtracter: " + subtracter + "\n"); if (dotPredictionIntervalDifference < 5.0) { if (dotPredictions.values().size() > 0) { - score += subtracter; + if (subtracter > 0) { + score -= subtracter; + } else { + score += subtracter; + //System.out.println("score + subtracter: " + score + "\nsubtracter: " + subtracter + "\n"); + } } } else { - score -= subtracter; + score -= subtracter / 10; } } else { subtracter -= 100; subtracter *= 25; score += subtracter * dotPrediction; + //System.out.println("score + subtracter * dotPrediction: " + score + "\nsubtracter: " + subtracter + "\ndotPrediction: " + //+ dotPrediction + "\n"); } dotPredictions.put(dotPredictions.size() + 1, dotPrediction); } + //System.out.println("score post subtracter1: " + score + "\n"); Double subTracPre = 0.0; for (Double subtractors : subtractorMap.values()) { if (Objects.equals(subTracPre, subtractors)) { - score -= 2000; + score -= 1500; + //System.out.println("score minus subTracPre equals: " + score + "\nsubTracPre: " + subTracPre + "\n"); } subTracPre = subtractors; } ConcurrentMap DotOverTransfer = dotPredictions; dotPredictions = new MapMaker().concurrencyLevel(2).makeMap(); + Double totalSubtraction = 0.0; for (SimpleMatrix simpleSMX : simpleSMXlist.values()) { double dotPrediction = simpleSMX.dot(predictions) * 100; AccumulateDotMap.put(AccumulateDotMap.size() + 1, dotPrediction); double subtracter = dotPrediction > 50 ? dotPrediction - 100 : dotPrediction > 0 ? 100 - dotPrediction : 0; + //System.out.println("dotPrediction: " + dotPrediction + "\nsubtracter: " + subtracter + "\n"); subtractorMap.put(subtractorMap.size() + 1, subtracter); if (!dotPredictions.values().contains(dotPrediction)) { - subtracter *= 25; - int match = 0; for (Double transferDots : DotOverTransfer.values()) { if (transferDots == dotPrediction) { - score += subtracter; - match++; + totalSubtraction += transferDots; + } else { + score -= subtracter * 25; + //System.out.println("score minus subtracter: " + score + "\nsubtracter: " + subtracter + "\n"); } - } - if (match == 0) { - score -= subtracter; + //System.out.println("transferDots: " + transferDots + "\n"); } } else { subtracter -= 100; subtracter *= 25; - score += subtracter * dotPrediction; + score -= subtracter * dotPrediction; + //System.out.println("score minus subtracter * dotPrediction 2: " + score + "\ndotPrediction: " + // + dotPrediction + "\n"); } dotPredictions.put(dotPredictions.size() + 1, dotPrediction); } + if (totalSubtraction > 45.0) { + score -= totalSubtraction * 25; + } else { + score += totalSubtraction * 25; + } + //System.out.println("score post totalSubtraction: " + score + "\ntotalSubtraction: " + totalSubtraction + "\n"); Double preAccumulatorDot = 0.0; Double postAccumulatorDot = 0.0; for (Double accumulators : AccumulateDotMap.values()) { if (Objects.equals(preAccumulatorDot, accumulators)) { if (Objects.equals(postAccumulatorDot, accumulators)) { - score -= 4000; + score -= 1400; } postAccumulatorDot = accumulators; } @@ -393,7 +458,7 @@ public class SentimentAnalyzerTest implements Callable { subTracPre = 0.0; for (Double subtractors : subtractorMap.values()) { if (Objects.equals(subTracPre, subtractors)) { - score -= 2000; + score -= 500; } subTracPre = subtractors; } @@ -404,7 +469,7 @@ public class SentimentAnalyzerTest implements Callable { double elementSum = nodeVector.kron(simpleSMX).elementSum(); if (preDot == dot) { if (postDot == dot) { - score -= 4000; + score -= 500; } postDot = dot; } @@ -414,28 +479,34 @@ public class SentimentAnalyzerTest implements Callable { dotMap.put(dotMap.size() + 1, dot); if (!dotSumMap.values().contains(dot)) { if (dot < 0.000) { - score += dot * 1500; + score += dot * 500; + //System.out.println("score + dot * 500: " + score + "\ndot: " + dot + "\n"); } else if (dot < 0.1) { score += 256; + //System.out.println("score + 256: " + score + "\ndot: " + dot + " 0.50) { - score -= 2400; + score -= 1200; } dotSumMap.put(dotSumMap.size() + 1, dot); } else { - score -= 750; + score -= 250; } if (!elementSumMap.values().contains(elementSum)) { if (elementSum < 0.01 && elementSum > 0.00) { score += 3300; + //System.out.println("score elementSum < 0.01 && elementSum > 0.00: " + score + "\nelementSum: " + // + elementSum + "\n"); } else if (elementSum > 0.1 && elementSum < 0.2) { score += 1100; + //System.out.println("score elementSum < 0.01 && elementSum > 0.00: " + score + "\nelementSum: " + // + elementSum + "\n"); } else { - score -= elementSum * 1424; + score -= elementSum * 1024; } elementSumMap.put(elementSumMap.size() + 1, elementSum); } else { - score -= 750; + score -= 250; } } for (SimpleMatrix simpleSMX : simpleSMXlistVector.values()) { @@ -443,7 +514,7 @@ public class SentimentAnalyzerTest implements Callable { double elementSum = simpleSMX.kron(nodeVector).elementSum(); if (preDot == dot) { if (postDot == dot) { - score -= 4000; + score -= 500; } postDot = dot; } @@ -454,28 +525,35 @@ public class SentimentAnalyzerTest implements Callable { if (!dotSumMap.values().contains(dot)) { if (dot < 0.1) { score += 256; + //System.out.println("score dot < 0.1: " + score + "\ndot: " + // + dot + "\n"); } if (dot > 0.50) { - score -= 2400; + score -= 1400; } dotSumMap.put(dotSumMap.size() + 1, dot); } else { - score -= 750; + score -= 250; } if (!elementSumMap.values().contains(elementSum)) { if (elementSum < 0.01 && elementSum > 0.00) { score += 1300; + //System.out.println("score elementSum < 0.01 && elementSum > 0.00: " + score + "\nelementSum: " + // + elementSum + "\n"); } else if (elementSum > 0.1 && elementSum < 1.0) { score += 1100; + //System.out.println("score elementSum < 0.01 && elementSum > 0.00: " + score + "\nelementSum: " + // + elementSum + "\n"); } else { - score -= elementSum * 1424; + score -= elementSum * 1024; } elementSumMap.put(elementSumMap.size() + 1, elementSum); } else { - score -= 750; + score -= 250; } } } + //System.out.println("score post sentiment analyzer2: " + score + "\n"); OptionalDouble minvalueDots = dotMap.values().stream().mapToDouble(Double::doubleValue).min(); OptionalDouble maxvalueDots = dotMap.values().stream().mapToDouble(Double::doubleValue).max(); double total = minvalueDots.getAsDouble() + maxvalueDots.getAsDouble(); @@ -485,36 +563,43 @@ public class SentimentAnalyzerTest implements Callable { } if (permitted) { Double dotsVariance = maxvalueDots.getAsDouble() - minvalueDots.getAsDouble(); + //System.out.println("maxvalueDots.getAsDouble():" + maxvalueDots.getAsDouble() + "\nminvalueDots.getAsDouble():" + // + minvalueDots.getAsDouble() + "\ndotsVariance: " + dotsVariance + "\n"); if (maxvalueDots.getAsDouble() > minvalueDots.getAsDouble() * 10) { score -= 5500; } else if (minvalueDots.getAsDouble() < -0.10) { score -= 3500; - } else if (dotsVariance < 0.5) { - score += 3500; + } else if (dotsVariance < 0.5 && dotsVariance > 0.1) { + score -= 3500; } else if (dotsVariance > minvalueDots.getAsDouble() * 2) { score += 3500; + //System.out.println("varians 4 score. " + score + "\n"); + } else if (minvalueDots.getAsDouble() * 3 > maxvalueDots.getAsDouble() && maxvalueDots.getAsDouble() < 0.1001) { + score += dotsVariance * 200000; } } + //System.out.println("score post dotsVariance: " + score + "\n"); OptionalDouble minvalueElements = elementSumCounter.values().stream().mapToDouble(Double::doubleValue).min(); OptionalDouble maxvalueElements = elementSumCounter.values().stream().mapToDouble(Double::doubleValue).max(); Double elementsVariance = maxvalueElements.getAsDouble() - minvalueElements.getAsDouble(); + //System.out.println("elementsVariance: " + elementsVariance + "\nmaxvalueElements.getAsDouble(): " + // + maxvalueElements.getAsDouble() + "\nminvalueElements.getAsDouble(): " + minvalueElements.getAsDouble() + "\n"); if (elementsVariance == 0.0) { score -= 550; } else if (elementsVariance < 0.02 && elementsVariance > -0.01) { score += 3500; - } else if (elementsVariance < 0.5 && maxvalueElements.getAsDouble() > 0.0 && minvalueElements.getAsDouble() > 0.0 && elementsVariance > 0.000) { - score += 3500; } else if (minvalueElements.getAsDouble() < 0.0 && minvalueElements.getAsDouble() - maxvalueElements.getAsDouble() < 0.50) { score -= 2500; + } else if (elementsVariance * 2 >= maxvalueElements.getAsDouble() && elementsVariance < 0.1) { + score -= elementsVariance * 86000; } - + //System.out.println("score post elementsVariance: " + score + "\n"); score -= (sentiment1.size() > sentiment2.size() ? sentiment1.size() - sentiment2.size() : sentiment2.size() - sentiment1.size()) * 500; - DocumentReaderAndWriter readerAndWriter = classifier.makePlainTextReaderAndWriter(); List classifyRaw1 = classifier.classifyRaw(str, readerAndWriter); List classifyRaw2 = classifier.classifyRaw(str1, readerAndWriter); score -= (classifyRaw1.size() > classifyRaw2.size() ? classifyRaw1.size() - classifyRaw2.size() : classifyRaw2.size() - classifyRaw1.size()) * 200; - + //System.out.println("score post classifyRaw: " + score + "\n"); int mainSentiment1 = 0; int longest1 = 0; int mainSentiment2 = 0; @@ -539,24 +624,25 @@ public class SentimentAnalyzerTest implements Callable { longest2 = partText.length(); } } + //System.out.println("score post pipelineAnnotation2Sentiment: " + score + "\n"); if (longest1 != longest2) { long deffLongest = longest1 > longest2 ? longest1 : longest2; long deffshorter = longest1 < longest2 ? longest1 : longest2; if (deffLongest > deffshorter * 5) { score -= 5500; } else if (deffLongest < (deffshorter * 2) - 1 && deffLongest - deffshorter <= 45) { - score += (deffLongest - deffshorter) * 120; + score += (deffLongest - deffshorter) * 20; } else if (mainSentiment1 != mainSentiment2 && deffLongest - deffshorter > 20 && deffLongest - deffshorter < 45) { score += (deffLongest - deffshorter) * 20; } else if (deffLongest - deffshorter < 2) { - score += (deffLongest + deffshorter) * 40; + score += (deffLongest - deffshorter) * 20; } else if (deffshorter * 2 >= deffLongest && deffshorter * 2 < deffLongest + 5) { - score += deffLongest * 20; + score += (deffLongest - deffshorter) * 20; } else { score -= (deffLongest - deffshorter) * 50; } if (deffLongest - deffshorter <= 5) { - score += 2500; + score += 250; } } int tokensCounter1 = 0; @@ -570,7 +656,8 @@ public class SentimentAnalyzerTest implements Callable { int MarkedContinuousCounter2 = 0; Integer MarkedContiniousCounter1Entries = 0; Integer MarkedContiniousCounter2Entries = 0; - int UnmarkedPatternCounter = 0; + int UnmarkedPatternCounter1 = 0; + int UnmarkedPatternCounter2 = 0; ConcurrentMap ITokenMapTag1 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap ITokenMapTag2 = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap strTokenStems1 = new MapMaker().concurrencyLevel(2).makeMap(); @@ -599,7 +686,7 @@ public class SentimentAnalyzerTest implements Callable { Collection values = token.getPartMap().values(); IMWEDesc entry = token.getEntry(); MarkedContinuousCounter1 += entry.getMarkedContinuous(); - UnmarkedPatternCounter += entry.getUnmarkedPattern(); + UnmarkedPatternCounter1 += entry.getUnmarkedPattern(); for (IMWEDesc.IPart iPart : values) { strTokenGetiPart1.put(strTokenGetiPart1.size() + 1, iPart.getForm()); } @@ -633,7 +720,7 @@ public class SentimentAnalyzerTest implements Callable { Collection values = token.getPartMap().values(); IMWEDesc entry = token.getEntry(); MarkedContinuousCounter2 += entry.getMarkedContinuous(); - UnmarkedPatternCounter += entry.getUnmarkedPattern(); + UnmarkedPatternCounter2 += entry.getUnmarkedPattern(); for (IMWEDesc.IPart iPart : values) { strTokenGetiPart2.put(strTokenGetiPart2.size() + 1, iPart.getForm()); } @@ -655,26 +742,33 @@ public class SentimentAnalyzerTest implements Callable { anotatorcounter2++; } } catch (Exception ex) { - System.out.println("SENTIMENT stacktrace: " + ex.getMessage() + "\n"); + //System.out.println("SENTIMENT stacktrace: " + ex.getMessage() + "\n"); } + int entry1 = entryCounts1.values().size(); int entry2 = entryCounts2.values().size(); - if ((entry1 >= entry2 * 5 && entry2 > 0) || (entry2 >= entry1 * 5 && entry1 > 0)) { - score -= entry1 > entry2 ? (entry1 - entry2) * 450 : (entry2 - entry1) * 450; - } else if (entry1 >= entry2 * 50 || entry2 >= entry1 * 50) { - score -= entry1 > entry2 ? entry1 * 180 : entry2 * 180; - } else if (entry1 >= entry2 * 2 || entry2 >= entry1 * 2) { - score += entry1 > entry2 ? (entry1 - entry2) * 450 : (entry2 - entry1) * 450; - } else if (entry1 == 0 && entry2 == 0) { - score -= 4500; - } else if (entry1 == entry2) { - score += 5500; + //System.out.println("score post JMWEAnnotation: " + score + "\nentry1: " + entry1 + "\nentry2: " + entry2 + "\n"); + if (entry1 > 0 && entry2 > 0) { + if ((entry1 >= entry2 * 5) || (entry2 >= entry1 * 5)) { + score -= entry1 > entry2 ? (entry1 - entry2) * 450 : (entry2 - entry1) * 450; + //System.out.println("1"); + } else if ((entry1 >= entry2 * 50 || entry2 >= entry1 * 50)) { + score -= entry1 > entry2 ? entry1 * 180 : entry2 * 180; + //System.out.println("2"); + } else if (entry1 >= entry2 * 2 || entry2 >= entry1 * 2) { + score += entry1 > entry2 ? (entry1 - entry2) * 450 : (entry2 - entry1) * 450; + //System.out.println("3"); + } else if (entry1 > 10 && entry2 > 10 && entry1 * 2 > entry2 && entry2 * 2 > entry1) { + score += entry1 > entry2 ? entry2 * 600 : entry1 * 600; + //System.out.println("6"); + } } ConcurrentMap countsMap = new MapMaker().concurrencyLevel(2).makeMap(); for (int counts : entryCounts1.values()) { for (int counts1 : entryCounts2.values()) { if (counts == counts1 && counts > 0 && !countsMap.values().contains(counts)) { score += counts * 250; + //System.out.println("score post counts: " + score + "\nCounts: " + counts + "\n"); countsMap.put(countsMap.size() + 1, counts); } } @@ -684,23 +778,27 @@ public class SentimentAnalyzerTest implements Callable { for (String strTokenPos2 : strTokenEntryPOS2.values()) { if (strTokenPos1.equals(strTokenPos2)) { score += 500; - } else { score -= 650; - + //System.out.println("strTokenEntryPOS score: " + score + "\n"); } } } } - if (UnmarkedPatternCounter > 0 && UnmarkedPatternCounter < 5) { - score -= UnmarkedPatternCounter * 1600; - } else { - score -= UnmarkedPatternCounter * 10; + //System.out.println("score pre UnmarkedPatternCounter: " + score + "\nUnmarkedPatternCounter1: " + UnmarkedPatternCounter1 + // + "\nUnmarkedPatternCounter2: " + UnmarkedPatternCounter2 + "\n"); + if (UnmarkedPatternCounter1 > 0 && UnmarkedPatternCounter2 > 0) { + if (UnmarkedPatternCounter1 * 2 > UnmarkedPatternCounter2 && UnmarkedPatternCounter2 * 2 > UnmarkedPatternCounter1) { + score += 2500; + } else if (UnmarkedPatternCounter1 * 5 < UnmarkedPatternCounter2 || UnmarkedPatternCounter2 * 5 < UnmarkedPatternCounter1) { + score -= 4000; + } } - + //System.out.println("score post UnmarkedPatternCounter: " + score + "\n"); if (MarkedContinuousCounter1 > 0 && MarkedContinuousCounter2 > 0) { if (MarkedContinuousCounter1 > MarkedContinuousCounter2 * 50 || MarkedContinuousCounter2 > MarkedContinuousCounter1 * 50) { score -= MarkedContinuousCounter1 > MarkedContinuousCounter2 ? MarkedContinuousCounter1 * 120 : MarkedContinuousCounter2 * 120; + //System.out.println("score post MarkedContinuousCounter too big: " + score + "\n"); } else if (!Objects.equals(MarkedContiniousCounter1Entries, MarkedContiniousCounter2Entries) && (MarkedContinuousCounter1 * 2 >= MarkedContinuousCounter2 * MarkedContinuousCounter1) || (MarkedContinuousCounter2 * 2 >= MarkedContinuousCounter1 * MarkedContinuousCounter2)) { @@ -719,6 +817,7 @@ public class SentimentAnalyzerTest implements Callable { || MarkedContiniousCounter2Entries * 5 < MarkedContinuousCounter1 || MarkedContiniousCounter2Entries * 5 < MarkedContinuousCounter2) { score -= MarkedContinuousCounter1 > MarkedContinuousCounter2 ? MarkedContinuousCounter1 * 400 : MarkedContinuousCounter2 * 400; + //System.out.println("score post MarkedContinuousCounter: " + score + "\n"); } } } @@ -730,6 +829,7 @@ public class SentimentAnalyzerTest implements Callable { score += 400; } else { score -= 200; + //System.out.println("score minus strTokenGetiPart: " + score + "\n"); } } } @@ -747,7 +847,8 @@ public class SentimentAnalyzerTest implements Callable { if (strTokenEntry1.equals(strTokenEntry2)) { score += boundariyLeacks ? 2500 : 2500 / 2; } else if (!boundariyLeacks) { - score -= 1250; + score -= 450; + //System.out.println("boundariyLeacks score: " + score + "\n"); } else { remnantCounter++; } @@ -755,7 +856,9 @@ public class SentimentAnalyzerTest implements Callable { entryTokenMap.put(entryTokenMap.size() + 1, strTokenEntry2); } } - score -= remnantCounter * 250; + //System.out.println("score pre remnantCounter: " + score + "\n"); + score += remnantCounter * 250; + //System.out.println("score post remnantCounter: " + score + "\n"); ConcurrentMap iTokenMapTagsMap = new MapMaker().concurrencyLevel(2).makeMap(); for (String strmapTag : ITokenMapTag1.values()) { for (String strmapTag1 : ITokenMapTag2.values()) { @@ -767,6 +870,7 @@ public class SentimentAnalyzerTest implements Callable { } } } + //System.out.println("score post strmapTag: " + score + "\n"); int tokenform1size = strTokenForm1.values().size(); int tokenform2size = strTokenForm2.values().size(); if (tokenform1size > 0 || tokenform2size > 0) { @@ -777,50 +881,81 @@ public class SentimentAnalyzerTest implements Callable { score -= 1600; } else { score += 500; + //System.out.println("tokenform1size score500: " + score + "\n"); } } } } else if (tokenform1size > 0 && tokenform2size > 0) { - score += tokenform1size > tokenform2size ? tokenform1size * 1600 : tokenform2size * 1600; + if (tokenform1size * 2 >= tokenform2size && tokenform2size * 2 >= tokenform1size) { + score += tokenform1size > tokenform2size ? tokenform1size * 600 : tokenform2size * 600; + } else if (tokenform1size * 4 <= tokenform2size || tokenform2size * 4 <= tokenform1size) { + score -= tokenform1size > tokenform2size ? (tokenform1size - tokenform2size) * 600 : (tokenform2size - tokenform1size) * 600; + } + //System.out.println("tokenform1size score: " + score + "\ntokenform1size: " + tokenform1size + "\ntokenform2size: " + // + tokenform2size + "\n"); } - } else { - tokenform1size = tokenform1size > 0 ? tokenform1size : 1; - tokenform2size = tokenform2size > 0 ? tokenform2size : 1; - score -= (tokenform1size + tokenform2size) * 1200; } + //System.out.println("Score pre tokenStemmingMap: " + score + "\n"); ConcurrentMap tokenStemmingMap = new MapMaker().concurrencyLevel(2).makeMap(); for (String strTokenStem : strTokenStems1.values()) { for (String strTokenStem1 : strTokenStems2.values()) { - if (strTokenStem.equals(strTokenStem1)) { + if (strTokenStem.equals(strTokenStem1) && !tokenStemmingMap.values().contains(strTokenStem)) { score += 1500; - } else if (!tokenStemmingMap.values().contains(strTokenStem)) { - score -= 150; tokenStemmingMap.put(tokenStemmingMap.size() + 1, strTokenStem); } + //System.out.println("score strTokenStem: " + score + "\n"); } } + //System.out.println("Score pre inflected: " + score + "\n"); + //System.out.println("inflectedCounterPositive1: " + inflectedCounterPositive1 + "\ninflectedCounterPositive2: " + // + inflectedCounterPositive2 + "\ninflectedCounterNegative: " + inflectedCounterNegative + "\n"); if (inflectedCounterPositive1 + inflectedCounterPositive2 > inflectedCounterNegative && inflectedCounterNegative > 0) { - score += (inflectedCounterPositive1 - inflectedCounterNegative) * 650; + score += ((inflectedCounterPositive1 + inflectedCounterPositive2) - inflectedCounterNegative) * 650; + //System.out.println("score inflectedCounterPositive plus: " + score + "\n"); } if (inflectedCounterPositive1 > 0 && inflectedCounterPositive2 > 0) { - score += ((inflectedCounterPositive1 + inflectedCounterPositive2) - inflectedCounterNegative) * 550; - } - if (anotatorcounter1 > 1 && anotatorcounter2 > 1) { - score += (anotatorcounter1 - anotatorcounter2) * 400; - } - if ((tokensCounter1 > 0 && tokensCounter2 > 0) && tokensCounter1 < tokensCounter2 * 5 && tokensCounter2 < tokensCounter1 * 5) { - score += (tokensCounter1 + tokensCounter2) * 1400; - } else { - int elseint = tokensCounter1 >= tokensCounter2 ? (tokensCounter1 - tokensCounter2) * 500 : (tokensCounter2 - tokensCounter1) * 500; - if ((tokensCounter1 > tokensCounter2 * 5 || tokensCounter2 > tokensCounter1 * 5) - && tokensCounter1 > 0 && tokensCounter2 > 0) { - score -= (tokensCounter1 + tokensCounter2) * 1500; - } else if (elseint > 0 && tokensCounter1 > 0 && tokensCounter2 > 0) { - score += elseint * 2; - } else if (elseint == 0) { - score += 1500; + if (inflectedCounterPositive1 * 2 > inflectedCounterPositive2 && inflectedCounterPositive2 * 2 > inflectedCounterPositive1) { + score += ((inflectedCounterPositive1 + inflectedCounterPositive2) - inflectedCounterNegative) * 550; + //System.out.println("score plus inflectedCounterPositive * 2: " + score + "\n"); + } else if (inflectedCounterPositive1 * 5 < inflectedCounterPositive2 || inflectedCounterPositive2 * 5 < inflectedCounterPositive1) { + score -= inflectedCounterPositive1 > inflectedCounterPositive2 ? (inflectedCounterPositive1 - inflectedCounterPositive2) * 400 + : (inflectedCounterPositive2 - inflectedCounterPositive1) * 400; + //System.out.println("score minus inflectedCounterPositive * 2: " + score + "\n"); } } + //System.out.println("anotatorcounter1: " + anotatorcounter1 + "\nanotatorcounter2: " + anotatorcounter2 + "\n"); + if (anotatorcounter1 > 1 && anotatorcounter2 > 1) { + if (anotatorcounter1 * 2 > anotatorcounter2 && anotatorcounter2 * 2 > anotatorcounter1) { + score += anotatorcounter1 > anotatorcounter2 ? (anotatorcounter1 - anotatorcounter2) * 700 + : (anotatorcounter2 - anotatorcounter1) * 700; + //System.out.println("score plus anotatorcounter: " + score + "\n"); + } else if (anotatorcounter1 * 5 < anotatorcounter2 || anotatorcounter2 * 5 < anotatorcounter1) { + score -= anotatorcounter1 > anotatorcounter2 ? (anotatorcounter1 - anotatorcounter2) * 400 : (anotatorcounter2 - anotatorcounter1) * 400; + //System.out.println("score minus anotatorcounter: " + score + "\n"); + } + } + //System.out.println("tokensCounter1: " + tokensCounter1 + "\ntokensCounter2: " + tokensCounter2 + "\n"); + if ((tokensCounter1 > 1 && tokensCounter2 > 1) && tokensCounter1 < tokensCounter2 * 5 && tokensCounter2 < tokensCounter1 * 5) { + if (tokensCounter1 > tokensCounter2 / 2 && tokensCounter2 > tokensCounter1 / 2) { + score += (tokensCounter1 + tokensCounter2) * 1400; + //System.out.println("score plus tokensCounter: " + score + "\n"); + } else { + score -= 3500; + //System.out.println("score minus tokensCounter: " + score + "\n"); + } + } else { + int elseint = tokensCounter1 >= tokensCounter2 ? (tokensCounter1 - tokensCounter2) * 500 : (tokensCounter2 - tokensCounter1) * 500; + //System.out.println("elseint: " + elseint + " tokensCounter2 * 5 || tokensCounter2 > tokensCounter1 * 5) + && tokensCounter1 > 0 && tokensCounter2 > 0) { + score -= tokensCounter1 > tokensCounter2 ? (tokensCounter1 - tokensCounter2) * 500 : (tokensCounter2 - tokensCounter1) * 500; + //System.out.println("score post tokensCounter: " + score + "\n"); + } else if (elseint > 0 && tokensCounter1 > 0 && tokensCounter2 > 0) { + score -= elseint * 2; + //System.out.println("score post elseint: " + elseint + "\n"); + } + } + //System.out.println("Score Pre levenhstein: " + score + "\n"); LevenshteinDistance leven = new LevenshteinDistance(str, str1); double SentenceScoreDiff = leven.computeLevenshteinDistance(); SentenceScoreDiff *= 15; @@ -841,9 +976,7 @@ public class SentimentAnalyzerTest implements Callable { List tokens = em.tokens(); for (CoreLabel token : tokens) { if (!nerEntityTokenTags1.values().contains(token.tag())) { - if (entityType.equals("PERSON") && EntityConfidences < 0.80) { - score -= 6000; - } else { + if (entityType.equals("PERSON") && EntityConfidences > 0.80) { nerEntityTokenTags1.put(nerEntityTokenTags1.size() + 1, token.tag()); } } @@ -863,9 +996,7 @@ public class SentimentAnalyzerTest implements Callable { List tokens = em.tokens(); for (CoreLabel token : tokens) { if (!nerEntityTokenTags2.values().contains(token.tag())) { - if (entityType.equals("PERSON") && EntityConfidences < 0.80) { - score -= 6000; - } else { + if (entityType.equals("PERSON") && EntityConfidences > 0.80) { nerEntityTokenTags2.put(nerEntityTokenTags2.size() + 1, token.tag()); } } @@ -875,26 +1006,115 @@ public class SentimentAnalyzerTest implements Callable { nerEntities4.put(nerEntities4.size() + 1, em.entityType()); } } + //System.out.println("score post PERSON trim: " + score + "\n"); for (String strEnts1 : nerEntities1.values()) { Collection values = nerEntities2.values(); for (String strEnts2 : values) { if (strEnts1.equalsIgnoreCase(strEnts2)) { - score += 7500; + score += 2500; + //System.out.println("score strEnts1 plus: " + score + "\n"); + } else { + score -= 150; } } } for (String strEnts1 : nerEntities3.values()) { if (nerEntities4.values().contains(strEnts1)) { - score += 3500; + score -= 1500; + //System.out.println("score nerEntities4 minus: " + score + "\n"); + } else { + score -= 150; } } for (String strToken : nerEntityTokenTags1.values()) { if (nerEntityTokenTags2.values().contains(strToken)) { - score += 2500; + score += 2000; + //System.out.println("score nerEntities4 plus: " + score + "\n"); + } else { + score -= 150; } } + //System.out.println("score pre stopwordTokens: " + score + "\n"); + ConcurrentMap stopwordTokens = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap stopwordTokens1 = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap stopWordLemma = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap stopWordLemma1 = new MapMaker().concurrencyLevel(2).makeMap(); + Integer pairCounter1 = 0; + Integer pairCounter2 = 0; + String customStopWordList = "start,starts,period,periods,a,an,and,are,as,at,be,but,by,for,if,in,into,is,it,no,not,of,on,or,such,that,the,their,then,there,these,they,this,to,was,will,with"; + List tokensSentiment1 = pipelineAnnotation1Sentiment.get(CoreAnnotations.TokensAnnotation.class); + List tokensSentiment2 = pipelineAnnotation2Sentiment.get(CoreAnnotations.TokensAnnotation.class); + Set stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET; + Set stopWordsCustom = StopwordAnnotator.getStopWordList(customStopWordList, true); + for (CoreLabel token : tokensSentiment1) { + Pair stopword = token.get(StopwordAnnotator.class); + String word = token.word().toLowerCase(); + if (stopWords.contains(word) || stopWordsCustom.contains(word)) { + stopwordTokens.put(stopwordTokens.size(), word); + } + String lemma = token.lemma().toLowerCase(); + if (stopWords.contains(lemma) || stopWordsCustom.contains(lemma)) { + stopWordLemma.put(stopWordLemma.size(), lemma); + } + if (stopword.first() && stopword.second()) { + pairCounter1++; + } + //System.out.println("stopword Pair: " + stopword.first() + " " + stopword.second() + "\nword: " + // + word + "\nlemma: " + lemma + "\n"); + } + for (CoreLabel token : tokensSentiment2) { + Pair stopword = token.get(StopwordAnnotator.class); + String word = token.word().toLowerCase(); + if (stopWords.contains(word) || stopWordsCustom.contains(word)) { + stopwordTokens1.put(stopwordTokens1.size(), word); + } + String lemma = token.lemma().toLowerCase(); + if (stopWords.contains(lemma) || stopWordsCustom.contains(lemma)) { + stopWordLemma1.put(stopWordLemma1.size(), lemma); + } + if (stopword.first() && stopword.second()) { + pairCounter2++; + } + //System.out.println("stopword Pair: " + stopword.first() + " " + stopword.second() + "\nword: " + // + word + "\nlemma: " + lemma + "\n"); + } + for (String stopwords1 : stopwordTokens.values()) { + for (String stopwords2 : stopwordTokens1.values()) { + if (stopwords1.equals(stopwords2)) { + score -= 500; + //System.out.println("score stopwordsToken: " + score + "\n"); + } + } + } + for (String stopwords1 : stopWordLemma.values()) { + for (String stopwords2 : stopWordLemma1.values()) { + if (stopwords1.equals(stopwords2)) { + score -= 500; + //System.out.println("score stopwords Lemma: " + score + "\n"); + } + } + } + if (!stopwordTokens.values().isEmpty() && !stopwordTokens1.values().isEmpty()) { + int stopwordsize1 = stopwordTokens.values().size(); + int stopwordsize2 = stopwordTokens1.values().size(); + if (stopwordsize1 * 5 < stopwordsize2 || stopwordsize2 * 5 < stopwordsize1) { + score -= stopwordsize1 > stopwordsize2 ? (stopwordsize1 - stopwordsize2) * 850 : (stopwordsize2 - stopwordsize1) * 850; + } else { + score += stopwordsize1 > stopwordsize2 ? (stopwordsize1 - stopwordsize2) * 850 : (stopwordsize2 - stopwordsize1) * 850;; + } + //System.out.println("score post stopwordsize: " + score + "\nstopwordsize1: " + stopwordsize1 + "\nstopwordsize2: " + // + stopwordsize2 + "\n"); + } + if (pairCounter1 > 0 && pairCounter2 > 0) { + if (pairCounter1 * 3 <= pairCounter2 || pairCounter2 * 3 <= pairCounter1) { + score -= pairCounter1 > pairCounter2 ? (pairCounter1 - pairCounter2) * 1500 : (pairCounter2 - pairCounter1) * 1500; + } else { + score += pairCounter1 > pairCounter2 ? (pairCounter1 - pairCounter2) * 700 : (pairCounter2 - pairCounter1) * 700; + } + //System.out.println("score post pairCounter: " + score + "\npairCounter1: " + pairCounter1 + "\npairCounter2: " + pairCounter2 + "\n"); + } } catch (Exception ex) { - System.out.println("SENTIMENT stacktrace Overall catch: " + ex.getMessage() + "\n"); + //System.out.println("SENTIMENT stacktrace Overall catch: " + ex.getMessage() + "\n"); } System.out.println("Final current score: " + score + "\nSentence 1: " + str + "\nSentence 2: " + str1 + "\n"); smxParam.setDistance(score); diff --git a/ArtificialAutism/src/main/java/FunctionLayer/StopwordAnnotator.java b/ArtificialAutism/src/main/java/FunctionLayer/StopwordAnnotator.java new file mode 100644 index 00000000..b6df578c --- /dev/null +++ b/ArtificialAutism/src/main/java/FunctionLayer/StopwordAnnotator.java @@ -0,0 +1,108 @@ +package FunctionLayer; + +import java.util.Collections; +import java.util.List; +import java.util.Properties; +import java.util.Set; +import java.util.Arrays; + +import edu.stanford.nlp.ling.CoreAnnotation; +import edu.stanford.nlp.pipeline.Annotator; +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.core.StopAnalyzer; + +import edu.stanford.nlp.ling.*; +import edu.stanford.nlp.pipeline.Annotation; +import edu.stanford.nlp.util.Pair; +import edu.stanford.nlp.util.ArraySet; + +/** + * CoreNlp Annotator that checks if in coming token is a stopword + * + * @author John Conwell + * @author Paul Landes + */ +public class StopwordAnnotator implements Annotator, CoreAnnotation> { + + /** + * stopword annotator class name used in annotators property + */ + public static final String ANNOTATOR_CLASS = "stopword"; + + /** + * Property key to specify the comma delimited list of custom stopwords + */ + public static final String STOPWORDS_LIST = "stopword-list"; + + /** + * Property key to specify if stopword list is case insensitive + */ + public static final String IGNORE_STOPWORD_CASE = "ignore-stopword-case"; + + private static Class boolPair = Pair.makePair(true, true).getClass(); + + private Properties props; + private CharArraySet stopwords; + + public StopwordAnnotator() { + this(new Properties()); + } + + public StopwordAnnotator(String notUsed, Properties props) { + this(props); + } + + public StopwordAnnotator(Properties props) { + this.props = props; + if (this.props.containsKey(STOPWORDS_LIST)) { + String stopwordList = props.getProperty(STOPWORDS_LIST); + boolean ignoreCase = Boolean.parseBoolean(props.getProperty(IGNORE_STOPWORD_CASE, "false")); + this.stopwords = getStopWordList(stopwordList, ignoreCase); + } else { + this.stopwords = (CharArraySet) StopAnalyzer.ENGLISH_STOP_WORDS_SET; + } + } + + @Override + public void annotate(Annotation annotation) { + if (stopwords != null && stopwords.size() > 0 && annotation.containsKey(CoreAnnotations.TokensAnnotation.class)) { + List tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); + for (CoreLabel token : tokens) { + boolean isWordStopword = stopwords.contains(token.word().toLowerCase()); + boolean isLemmaStopword = stopwords.contains(token.lemma().toLowerCase()); + Pair pair = Pair.makePair(isWordStopword, isLemmaStopword); + token.set(StopwordAnnotator.class, pair); + } + } + } + + @Override + public Set> requirementsSatisfied() { + return Collections.singleton(StopwordAnnotator.class); + } + + @Override + public Set> requires() { + return Collections.unmodifiableSet(new ArraySet<>(Arrays.asList( + CoreAnnotations.TextAnnotation.class, + CoreAnnotations.TokensAnnotation.class, + CoreAnnotations.LemmaAnnotation.class, + CoreAnnotations.PartOfSpeechAnnotation.class + ))); + } + + @Override + @SuppressWarnings("unchecked") + public Class> getType() { + return (Class>) boolPair; + } + + public static CharArraySet getStopWordList(String stopwordList, boolean ignoreCase) { + String[] terms = stopwordList.split(","); + CharArraySet stopwordSet = new CharArraySet(terms.length, ignoreCase); + for (String term : terms) { + stopwordSet.add(term); + } + return CharArraySet.unmodifiableSet(stopwordSet); + } +} diff --git a/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java b/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java index be1292b8..288e3303 100644 --- a/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java +++ b/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java @@ -44,13 +44,12 @@ public class DiscordHandler { Logger.getLogger(DiscordHandler.class.getName()).log(Level.SEVERE, null, ex); } PipelineJMWESingleton.getINSTANCE(); - Datahandler.instance.instantiateExecutor(); Datahandler.instance.instantiateAnnotationMapJMWE(); Datahandler.instance.shiftReduceParserInitiate(); Datahandler.instance.instantiateAnnotationMap(); System.out.println("FINISHED ALL ANNOTATIONS"); - //Datahandler.instance.addHLstatsMessages(); - //Datahandler.instance.updateStringCache(); + Datahandler.instance.addHLstatsMessages(); + Datahandler.instance.updateStringCache(); String token = "NTI5NzAxNTk5NjAyMjc4NDAx.Dw0vDg.7-aMjVWdQMYPl8qVNyvTCPS5F_A"; DiscordApi api = new DiscordApiBuilder().setToken(token).login().join(); api.addMessageCreateListener(event -> {