diff --git a/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java b/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java index f3ba4996..9b889240 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java @@ -27,6 +27,7 @@ import edu.stanford.nlp.trees.TreebankLanguagePack; import java.io.IOException; import java.io.StringReader; import java.sql.SQLException; +import java.util.ArrayList; import java.util.Collection; import java.util.LinkedHashMap; import java.util.List; @@ -38,16 +39,12 @@ import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; import java.util.concurrent.ForkJoinPool; -import java.util.concurrent.ForkJoinTask; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.logging.Level; import java.util.logging.Logger; -import java.util.stream.Collectors; /** * @@ -157,9 +154,9 @@ public class Datahandler { } public void instantiateExecutor() { - this.executor = new ForkJoinPool(Runtime.getRuntime().availableProcessors(), + this.executor = new ForkJoinPool(25, ForkJoinPool.defaultForkJoinWorkerThreadFactory, - null, false); //true + null, false); } public static GrammaticalStructureFactory getGsf() { @@ -208,26 +205,23 @@ public class Datahandler { public void addHLstatsMessages() { ConcurrentMap hlStatsMessages = DataMapper.getHLstatsMessages(); ConcurrentMap strCacheLocal = stringCache; - int hardcap = 7500; - int counter = 0; - for (String str : hlStatsMessages.values()) { + //might want a hardcap + hlStatsMessages.values().parallelStream().forEach(str -> { if (!str.startsWith("!")) { - boolean present = false; - for (String str1 : strCacheLocal.values()) { - if (str.equals(str1)) { - present = true; - break; - } - } - if (!present) { - //System.out.println("addHLstatsMessages adding str: " + str + "\n"); + String orElse = strCacheLocal.values().parallelStream().filter(e -> e.equals(str)).findAny().orElse(null); + if (orElse == null) { MessageResponseHandler.getMessage(str); } } - if (counter >= hardcap) { - break; + }); + } + + public void instantiateAnnotationMapJMWE() { + if (!stringCache.isEmpty()) { + ConcurrentMap jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(stringCache.values()); + for (Entry entries : jmweAnnotation.entrySet()) { + jmweAnnotationCache.put(entries.getKey(), entries.getValue()); } - counter++; } } @@ -238,41 +232,11 @@ public class Datahandler { Annotation strAnno = new Annotation(str); pipeline.annotate(strAnno); pipelineAnnotationCache.put(str, strAnno); - jmweAnnotationCache.put(str, PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(str)); Annotation strAnno2 = new Annotation(str); pipelineSentiment.annotate(strAnno2); pipelineSentimentAnnotationCache.put(str, strAnno2); }); } - System.out.println("FINISHED ALL ANNOTATIONS"); - System.out.println("FINISHED ALL ANNOTATIONS"); - System.out.println("FINISHED ALL ANNOTATIONS"); - - /* - int poolsize = Runtime.getRuntime().availableProcessors(); - CountDownLatch cdl = new CountDownLatch(poolsize + 1); - int rangeAdder = (stringCache.values().size() / poolsize); - for (int i = 0; i < poolsize; i++) { - final int ij = i; - new Thread(() -> { - int counter = rangeAdder * ij; - for (int j = 0; j < rangeAdder; j++) { - String str = stringCache.getOrDefault(counter + j, null); - if (str != null) { - System.out.println("adding str jmwe annotation: " + str + "\n"); - Annotation strAnno = new Annotation(str); - pipelineJMWE.annotate(strAnno); - jmweAnnotationCache.put(str, strAnno); - } - } - cdl.countDown(); - }).start(); - } - try { - cdl.await(); - } catch (InterruptedException ex) { - System.out.println("cdl await interrupted: " + ex.getLocalizedMessage() + "\n"); - }*/ } public synchronized void checkIfUpdateMatrixes() { @@ -388,8 +352,11 @@ public class Datahandler { if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) { ConcurrentMap str = MessageResponseHandler.getStr(); str = cutContent(str, hlStatsMsg); + System.out.println("finished cutcontent \nstr size: " + str.size() + "\n"); str = filterContent(str); + System.out.println("finished filterContent \nstr size: " + str.size() + "\n"); str = removeSlacks(str); + System.out.println("finished removeSlacks \n" + str.size() + "\n"); str = verifyCalculationFitness(str); System.out.println("Check if updateString str size POST: " + str.size() + "\n"); try { @@ -451,19 +418,28 @@ public class Datahandler { } } System.out.println("none within 8 range"); + Annotation strAnno = new Annotation(str); + pipeline.annotate(strAnno); + Annotation strAnno2 = new Annotation(str); + pipelineSentiment.annotate(strAnno2); + List notactualList = new ArrayList(); + notactualList.add(str); + ConcurrentMap jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(notactualList); + final Annotation strAnno3 = jmweAnnotation.values().iterator().next(); ConcurrentMap strCache = stringCache; ConcurrentMap> futureslocal = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap futurereturn = new MapMaker().concurrencyLevel(2).makeMap(); - for (String str1 : strCache.values()) { - if (!str.equals(str1)) { - SimilarityMatrix SMX = new SimilarityMatrix(str, str1); - Callable worker = new SentimentAnalyzerTest(str, str1, SMX, - jmweAnnotationCache.get(str), jmweAnnotationCache.get(str1), pipelineAnnotationCache.get(str), - pipelineAnnotationCache.get(str1), pipelineSentimentAnnotationCache.get(str), + String strF = str; + strCache.values().parallelStream().forEach((str1) -> { + if (!strF.equals(str1)) { + SimilarityMatrix SMX = new SimilarityMatrix(strF, str1); + Callable worker = new SentimentAnalyzerTest(strF, str1, SMX, + strAnno3, jmweAnnotationCache.get(str1), strAnno, + pipelineAnnotationCache.get(str1), strAnno2, pipelineSentimentAnnotationCache.get(str1)); futureslocal.put(futureslocal.size() + 1, executor.submit(worker)); } - } + }); int index = 0; futureslocal.values().parallelStream().forEach((future) -> { SimilarityMatrix SMX = new SimilarityMatrix("", ""); @@ -476,8 +452,10 @@ public class Datahandler { }); for (SimilarityMatrix SMX : futurereturn.values()) { double distance = SMX.getDistance(); + /* System.out.println("index: " + index + "\nfutures size: " + futureslocal.values().size() + "\nScore: " + SMX.getDistance() + "\nSecondary: " + SMX.getSecondaryString() + "\nPrimary: " + SMX.getPrimaryString() + "\n"); + */ if (distance > Score) { Score = distance; SMXreturn = SMX; @@ -490,18 +468,20 @@ public class Datahandler { } public String mostSimilar(String toBeCompared, ConcurrentMap concurrentStrings) { - int minDistance = 8; + int minDistance = 7; String similar = ""; - ConcurrentMap>> futures = new MapMaker().concurrencyLevel(2).makeMap(); + List>> futures = new ArrayList(); ConcurrentMap futuresreturnvalues = new MapMaker().concurrencyLevel(2).makeMap(); concurrentStrings.values().parallelStream().forEach((str) -> { - Callable> worker = new LevenshteinDistance(toBeCompared, str); - futures.put(futures.size() + 1, executor.submit(worker)); + Callable> worker = new LevenshteinDistance(toBeCompared, str); + futures.add(executor.submit(worker)); }); - futures.values().parallelStream().forEach((future) -> { + futures.parallelStream().forEach((future) -> { try { - Entry get = future.get(); - futuresreturnvalues.put(get.getKey(), get.getValue()); + ConcurrentMap get = future.get(); + get.entrySet().forEach((str) -> { + futuresreturnvalues.put(str.getKey(), str.getValue()); + }); } catch (NullPointerException | InterruptedException | ExecutionException ex) { System.out.println("failed future\nex: " + ex.getMessage() + "\n"); } @@ -519,7 +499,7 @@ public class Datahandler { public static ConcurrentMap cutContent(ConcurrentMap str, boolean hlStatsMsg) { ConcurrentMap returnlist = new MapMaker().concurrencyLevel(2).makeMap(); - for (String str1 : str.values()) { + str.values().parallelStream().forEach(str1 -> { int iend = str1.indexOf("content: "); if (iend != -1) { String trs = str1.substring(iend + 9); @@ -527,182 +507,190 @@ public class Datahandler { } else if (hlStatsMsg) { returnlist.put(returnlist.size() + 1, str1); } - } + }); return returnlist; } public static ConcurrentMap filterContent(ConcurrentMap str) { ConcurrentMap strlistreturn = new MapMaker().concurrencyLevel(2).makeMap(); - for (String str1 : str.values()) { - if (str1.isEmpty() || str1.length() < 3) { - continue; - } - str1 = str1.trim(); - if (str1.contains("PM*")) { - str1 = str1.substring(str1.indexOf("PM*") + 5); - } - if (str1.contains("AM*")) { - str1 = str1.substring(str1.indexOf("AM*") + 5); - } - for (Character c : str1.toCharArray()) { - if (c == '?' || c == '°') { - str1 = str1.replace("?", " <:wlenny:514861023002624001> "); - str1 = str1.replace("°", " <:wlenny:514861023002624001> "); + str.values().parallelStream().forEach(str1 -> { + if (!str1.isEmpty() && str1.length() > 3) { + str1 = str1.trim(); + if (str1.contains("PM*")) { + str1 = str1.substring(str1.indexOf("PM*") + 5); + } + if (str1.contains("AM*")) { + str1 = str1.substring(str1.indexOf("AM*") + 5); + } + for (Character c : str1.toCharArray()) { + if (c == '?' || c == '°') { + str1 = str1.replace("?", " <:wlenny:514861023002624001> "); + str1 = str1.replace("°", " <:wlenny:514861023002624001> "); + } + } + if (str1.contains("(Counter-Terrorist)")) { + str1 = str1.replace("(Counter-Terrorist)", " "); + } + if (str1.contains("(Terrorist)")) { + str1 = str1.replace("(Terrorist)", " "); + } + if (str1.contains("(Spectator)")) { + str1 = str1.replace("(Spectator)", " "); + } + if (str1.contains("*DEAD*")) { + str1 = str1.replace("*DEAD*", " "); + } + if (str1.contains("{red}")) { + str1 = str1.replace("{red}", " "); + } + if (str1.contains("{orange}")) { + str1 = str1.replace("{orange}", " "); + } + if (str1.contains("{yellow}")) { + str1 = str1.replace("{yellow}", " "); + } + if (str1.contains("{green}")) { + str1 = str1.replace("{green}", " "); + } + if (str1.contains("{lightblue}")) { + str1 = str1.replace("{lightblue}", " "); + } + if (str1.contains("{blue}")) { + str1 = str1.replace("{blue}", " "); + } + if (str1.contains("{purple}")) { + str1 = str1.replace("{purple}", " "); + } + if (str1.contains("{white}")) { + str1 = str1.replace("{white}", " "); + } + if (str1.contains("{fullblue}")) { + str1 = str1.replace("{fullblue}", " "); + } + if (str1.contains("{cyan}")) { + str1 = str1.replace("{cyan}", " "); + } + if (str1.contains("{lime}")) { + str1 = str1.replace("{lime}", " "); + } + if (str1.contains("{deeppink}")) { + str1 = str1.replace("{deeppink}", " "); + } + if (str1.contains("{slategray}")) { + str1 = str1.replace("{slategray}", " "); + } + if (str1.contains("{dodgerblue}")) { + str1 = str1.replace("{dodgerblue}", " "); + } + if (str1.contains("{black}")) { + str1 = str1.replace("{black}", " "); + } + if (str1.contains("{orangered}")) { + str1 = str1.replace("{orangered}", " "); + } + if (str1.contains("{darkorchid}")) { + str1 = str1.replace("{darkorchid}", " "); + } + if (str1.contains("{pink}")) { + str1 = str1.replace("{pink}", " "); + } + if (str1.contains("{lightyellow}")) { + str1 = str1.replace("{lightyellow}", " "); + } + if (str1.contains("{chocolate}")) { + str1 = str1.replace("{chocolate}", " "); + } + if (str1.contains("{beige}")) { + str1 = str1.replace("{beige}", " "); + } + if (str1.contains("{azure}")) { + str1 = str1.replace("{azure}", " "); + } + if (str1.contains("{yellowgreen}")) { + str1 = str1.replace("{yellowgreen}", " "); + } + str1 = str1.trim(); + if (str1.length() > 2 && (!str1.startsWith("!"))) { + strlistreturn.put(strlistreturn.size() + 1, str1); } } - if (str1.contains("(Counter-Terrorist)")) { - str1 = str1.replace("(Counter-Terrorist)", " "); - } - if (str1.contains("(Terrorist)")) { - str1 = str1.replace("(Terrorist)", " "); - } - if (str1.contains("(Spectator)")) { - str1 = str1.replace("(Spectator)", " "); - } - if (str1.contains("*DEAD*")) { - str1 = str1.replace("*DEAD*", " "); - } - if (str1.contains("{red}")) { - str1 = str1.replace("{red}", " "); - } - if (str1.contains("{orange}")) { - str1 = str1.replace("{orange}", " "); - } - if (str1.contains("{yellow}")) { - str1 = str1.replace("{yellow}", " "); - } - if (str1.contains("{green}")) { - str1 = str1.replace("{green}", " "); - } - if (str1.contains("{lightblue}")) { - str1 = str1.replace("{lightblue}", " "); - } - if (str1.contains("{blue}")) { - str1 = str1.replace("{blue}", " "); - } - if (str1.contains("{purple}")) { - str1 = str1.replace("{purple}", " "); - } - if (str1.contains("{white}")) { - str1 = str1.replace("{white}", " "); - } - if (str1.contains("{fullblue}")) { - str1 = str1.replace("{fullblue}", " "); - } - if (str1.contains("{cyan}")) { - str1 = str1.replace("{cyan}", " "); - } - if (str1.contains("{lime}")) { - str1 = str1.replace("{lime}", " "); - } - if (str1.contains("{deeppink}")) { - str1 = str1.replace("{deeppink}", " "); - } - if (str1.contains("{slategray}")) { - str1 = str1.replace("{slategray}", " "); - } - if (str1.contains("{dodgerblue}")) { - str1 = str1.replace("{dodgerblue}", " "); - } - if (str1.contains("{black}")) { - str1 = str1.replace("{black}", " "); - } - if (str1.contains("{orangered}")) { - str1 = str1.replace("{orangered}", " "); - } - if (str1.contains("{darkorchid}")) { - str1 = str1.replace("{darkorchid}", " "); - } - if (str1.contains("{pink}")) { - str1 = str1.replace("{pink}", " "); - } - if (str1.contains("{lightyellow}")) { - str1 = str1.replace("{lightyellow}", " "); - } - if (str1.contains("{chocolate}")) { - str1 = str1.replace("{chocolate}", " "); - } - if (str1.contains("{beige}")) { - str1 = str1.replace("{beige}", " "); - } - if (str1.contains("{azure}")) { - str1 = str1.replace("{azure}", " "); - } - if (str1.contains("{yellowgreen}")) { - str1 = str1.replace("{yellowgreen}", " "); - } - str1 = str1.trim(); - if (str1.length() > 2 && (!str1.startsWith("!"))) { - strlistreturn.put(strlistreturn.size() + 1, str1); - } - } + }); return strlistreturn; } private ConcurrentMap removeSlacks(ConcurrentMap str) { ShiftReduceParser model = getModel(); MaxentTagger tagger = getTagger(); - List taggedWords; ConcurrentMap strreturn = new MapMaker().concurrencyLevel(2).makeMap(); - for (String str1 : str.values()) { - int counter = 0; + str.values().parallelStream().forEach(str1 -> { ConcurrentMap TGWList = new MapMaker().concurrencyLevel(2).makeMap(); - DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1)); - for (List sentence : tokenizer) { - List tagged1 = tagger.tagSentence(sentence); - Tree tree = model.apply(tagged1); - taggedWords = tree.taggedYield(); - for (TaggedWord TGW : taggedWords) { - if (!TGWList.values().contains(TGW.tag()) && !TGW.tag().equals(":") && !TGW.word().equals(TGW.tag())) { - TGWList.put(TGWList.size() + 1, TGW.tag()); - counter++; - } - if (counter > 3) { - int addCounter = 0; - ConcurrentMap wordList = new MapMaker().concurrencyLevel(2).makeMap(); - for (Word lab : tree.yieldWords()) { - if (lab != null && lab.word() != null) { - //System.out.println("lab: " + lab + " \n"); - if (!wordList.values().contains(lab) && lab.value() != null && !lab.value().equals(":")) { - wordList.put(wordList.size() + 1, lab); - addCounter++; - } - } + DocumentPreprocessor tokenizer = null; + try { + tokenizer = new DocumentPreprocessor(new StringReader(str1)); + } catch (Exception ex) { + System.out.println("failed tokenizer removeslacks: " + ex.getLocalizedMessage() + "\n"); + tokenizer = null; + } + if (tokenizer != null) { + for (List sentence : tokenizer) { + int counter = 0; + List taggedWords; + List tagged1 = tagger.tagSentence(sentence); + Tree tree = model.apply(tagged1); + taggedWords = tree.taggedYield(); + for (TaggedWord TGW : taggedWords) { + if (!TGWList.values().contains(TGW.tag()) && !TGW.tag().equals(":") && !TGW.word().equals(TGW.tag())) { + TGWList.put(TGWList.size() + 1, TGW.tag()); + counter++; } - if (addCounter > 3) { - addCounter = 0; - ConcurrentMap HWlist = new MapMaker().concurrencyLevel(2).makeMap(); - for (HasWord HW : tree.yieldHasWord()) { - if (HW != null && HW.word() != null && !HWlist.values().contains(HW)) { - addCounter++; - HWlist.put(HWlist.size() + 1, HW); + if (counter > 3) { + int addCounter = 0; + ConcurrentMap wordList = new MapMaker().concurrencyLevel(2).makeMap(); + for (Word lab : tree.yieldWords()) { + if (lab != null && lab.word() != null) { + //System.out.println("lab: " + lab + " \n"); + if (!wordList.values().contains(lab) && lab.value() != null && !lab.value().equals(":")) { + wordList.put(wordList.size() + 1, lab); + addCounter++; + } } } if (addCounter > 3) { - boolean tooclosematch = false; - Collection values = stringCache.values(); - for (String strVals : values) { - LevenshteinDistance leven = new LevenshteinDistance(strVals, str1); - double Distance = leven.computeLevenshteinDistance(); - int maxpermittedDistance = 2; - if (Distance < maxpermittedDistance) { - tooclosematch = true; - break; + addCounter = 0; + ConcurrentMap HWlist = new MapMaker().concurrencyLevel(2).makeMap(); + for (HasWord HW : tree.yieldHasWord()) { + if (HW != null && HW.word() != null && !HWlist.values().contains(HW)) { + addCounter++; + HWlist.put(HWlist.size() + 1, HW); } } - if (!tooclosematch) { - strreturn.put(strreturn.size() + 1, str1); + if (addCounter > 3) { + boolean tooclosematch = false; + Collection values = stringCache.values(); + for (String strVals : values) { + LevenshteinDistance leven = new LevenshteinDistance(strVals, str1); + double Distance = leven.computeLevenshteinDistance(); + int maxpermittedDistance = 2; + if (Distance < maxpermittedDistance) { + tooclosematch = true; + break; + } + } + if (!tooclosematch) { + strreturn.put(strreturn.size() + 1, str1); + System.out.println("adding strreturn str1: " + str1 + "\n"); + } } } + break; } + } + if (counter > 3) { break; } } - if (counter > 3) { - break; - } } - } + }); return strreturn; } @@ -711,11 +699,14 @@ public class Datahandler { ConcurrentMap pipelineAnnotateCachelcl = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap pipelineSentimentAnnotateCachelcl = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap jmweAnnotateCachelcl = new MapMaker().concurrencyLevel(2).makeMap(); + ConcurrentMap jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(strmap.values()); + for (Entry jmweitr : jmweAnnotation.entrySet()) { + jmweAnnotateCachelcl.put(jmweitr.getKey(), jmweitr.getValue()); + } strmap.values().parallelStream().forEach(strCache -> { Annotation strAnno = new Annotation(strCache); pipeline.annotate(strAnno); pipelineAnnotateCachelcl.put(strCache, strAnno); - jmweAnnotateCachelcl.put(strCache, PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(strCache)); Annotation strAnno2 = new Annotation(strCache); pipelineSentiment.annotate(strAnno2); pipelineSentimentAnnotateCachelcl.put(strCache, strAnno2); @@ -736,7 +727,6 @@ public class Datahandler { futures.put(futures.size() + 1, executor.submit(worker)); }); }); - System.out.println("verifycalc futures size: " + futures.size() + "\n"); futures.values().parallelStream().forEach((future) -> { SimilarityMatrix get; try { @@ -751,19 +741,21 @@ public class Datahandler { } if (!presentstr) { returnmap.put(returnmap.size() + 1, addStr); - System.out.println("adding addStr: " + addStr + "\n"); } } catch (InterruptedException | ExecutionException | TimeoutException ex) { System.out.println("failed verification: " + ex.getMessage() + "\n"); } }); + jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(returnmap.values()); + for (Entry jmweitr : jmweAnnotation.entrySet()) { + jmweAnnotationCache.put(jmweitr.getKey(), jmweitr.getValue()); + } returnmap.values().parallelStream().forEach(strCache -> { stringCache.put(stringCache.size() + 1, strCache); System.out.println("str annotation pipeline pipelinesentiment: " + strCache + "\n"); Annotation strAnno = new Annotation(strCache); pipeline.annotate(strAnno); pipelineAnnotationCache.put(strCache, strAnno); - jmweAnnotationCache.put(strCache, PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(strCache)); Annotation strAnno2 = new Annotation(strCache); pipelineSentiment.annotate(strAnno2); pipelineSentimentAnnotationCache.put(strCache, strAnno2); diff --git a/ArtificialAutism/src/main/java/FunctionLayer/LevenshteinDistance.java b/ArtificialAutism/src/main/java/FunctionLayer/LevenshteinDistance.java index a83d19f4..870b4d2e 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/LevenshteinDistance.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/LevenshteinDistance.java @@ -5,19 +5,21 @@ */ package FunctionLayer; +import com.google.common.collect.MapMaker; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentMap; /** * * @author install1 */ -public class LevenshteinDistance implements Callable> { +public class LevenshteinDistance implements Callable> { private CharSequence lhs; private CharSequence rhs; - private Entry distanceEntry; + private ConcurrentMap distanceEntry = new MapMaker().concurrencyLevel(2).makeMap(); private static int minimum(int a, int b, int c) { return Math.min(Math.min(a, b), c); @@ -28,7 +30,7 @@ public class LevenshteinDistance implements Callable> this.rhs = rhs; } - public int computeLevenshteinDistance() { + public double computeLevenshteinDistance() { int[][] distance = new int[lhs.length() + 1][rhs.length() + 1]; for (int i = 0; i <= lhs.length(); i++) { distance[i][0] = i; @@ -48,29 +50,23 @@ public class LevenshteinDistance implements Callable> } @Override - public Entry call() { - try { - int[][] distance = new int[lhs.length() + 1][rhs.length() + 1]; - - for (int i = 0; i <= lhs.length(); i++) { - distance[i][0] = i; - } - for (int j = 1; j <= rhs.length(); j++) { - distance[0][j] = j; - } - for (int i = 1; i <= lhs.length(); i++) { - for (int j = 1; j <= rhs.length(); j++) { - distance[i][j] = minimum( - distance[i - 1][j] + 1, - distance[i][j - 1] + 1, - distance[i - 1][j - 1] + ((lhs.charAt(i - 1) == rhs.charAt(j - 1)) ? 0 : 1)); - } - } - distanceEntry.setValue(distance[lhs.length()][rhs.length()]); - } catch (Exception ex) { - System.out.println("ex msg: " + ex.getMessage() + "\n"); - return null; + public ConcurrentMap call() { + int[][] distance = new int[lhs.length() + 1][rhs.length() + 1]; + for (int i = 0; i <= lhs.length(); i++) { + distance[i][0] = i; } + for (int j = 1; j <= rhs.length(); j++) { + distance[0][j] = j; + } + for (int i = 1; i <= lhs.length(); i++) { + for (int j = 1; j <= rhs.length(); j++) { + distance[i][j] = minimum( + distance[i - 1][j] + 1, + distance[i][j - 1] + 1, + distance[i - 1][j - 1] + ((lhs.charAt(i - 1) == rhs.charAt(j - 1)) ? 0 : 1)); + } + } + distanceEntry.put(lhs.toString(), distance[lhs.length()][rhs.length()]); return distanceEntry; } } diff --git a/ArtificialAutism/src/main/java/FunctionLayer/PipelineJMWESingleton.java b/ArtificialAutism/src/main/java/FunctionLayer/PipelineJMWESingleton.java index bd7962a5..c3afd795 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/PipelineJMWESingleton.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/PipelineJMWESingleton.java @@ -5,18 +5,35 @@ */ package FunctionLayer; -import edu.mit.jmwe.data.IMWEDescID; -import edu.mit.jmwe.data.IRootMWEDesc; +import com.google.common.collect.MapMaker; +import edu.mit.jmwe.data.IMWE; +import edu.mit.jmwe.data.IToken; +import edu.mit.jmwe.data.Token; +import edu.mit.jmwe.detect.CompositeDetector; +import edu.mit.jmwe.detect.Consecutive; +import edu.mit.jmwe.detect.Exhaustive; +import edu.mit.jmwe.detect.IMWEDetector; +import edu.mit.jmwe.detect.InflectionPattern; +import edu.mit.jmwe.detect.MoreFrequentAsMWE; +import edu.mit.jmwe.detect.ProperNouns; import edu.mit.jmwe.index.IMWEIndex; -import edu.mit.jmwe.index.InMemoryMWEIndex; +import edu.mit.jmwe.index.MWEIndex; +import edu.stanford.nlp.ling.CoreAnnotation; +import edu.stanford.nlp.ling.CoreAnnotations; +import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation; +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.ling.CoreLabel.GenericAnnotation; +import edu.stanford.nlp.ling.JMWEAnnotation; import edu.stanford.nlp.pipeline.Annotation; -import edu.stanford.nlp.pipeline.JMWEAnnotator; import edu.stanford.nlp.pipeline.StanfordCoreNLP; +import edu.stanford.nlp.util.CoreMap; +import java.io.File; import java.io.IOException; -import java.util.Map; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; import java.util.Properties; -import java.util.logging.Level; -import java.util.logging.Logger; +import java.util.concurrent.ConcurrentMap; /** * @@ -25,35 +42,110 @@ import java.util.logging.Logger; //maybe not public? public class PipelineJMWESingleton { - public final static PipelineJMWESingleton INSTANCE = new PipelineJMWESingleton(); - private static Properties propsJMWE; - private volatile static StanfordCoreNLP pipelineJMWE = initializeJMWE(); - //super important synchronization lock - public synchronized final static Annotation getJMWEAnnotation(String str) { - Annotation annoStr = new Annotation(str); - pipelineJMWE.annotate(annoStr); - return annoStr; - } - - public final static StanfordCoreNLP initializeJMWE() { - String jmweIndexData = "/home/javatests/lib/mweindex_wordnet3.0_semcor1.6.data"; // ./lib/mweindex_wordnet3.0_semcor1.6.data - String jmweIndexDataLocalTest = "E:/java8/Projects/mweindex_wordnet3.0_semcor1.6.data"; - propsJMWE = new Properties(); - propsJMWE.setProperty("customAnnotatorClass.jmwe", "edu.stanford.nlp.pipeline.JMWEAnnotator"); - propsJMWE.setProperty("customAnnotatorClass.jmwe.verbose", "false"); - propsJMWE.setProperty("customAnnotatorClass.jmwe.underscoreReplacement", "-"); - propsJMWE.setProperty("customAnnotatorClass.jmwe.indexData", jmweIndexData); //jmweIndexDataLocalTest jmweIndexData - propsJMWE.setProperty("customAnnotatorClass.jmwe.detector", "Exhaustive"); - //"Consecutive", "Exhaustive", "ProperNouns", "Complex" and "CompositeConsecutiveProperNouns" - propsJMWE.setProperty("annotators", "tokenize, ssplit, pos, lemma, jmwe"); - System.out.println("finished singleton constructor \n"); - return new StanfordCoreNLP(propsJMWE); - } + //if not needed to be volatile dont make it, increases time + // private volatile InMemoryMWEIndex mweMemoryIndex; + // private volatile static MWEIndex mweIndex; + // private volatile JMWEAnnotator jmweanno = initializeJMWE(); + public volatile static PipelineJMWESingleton INSTANCE; + private volatile static int incrementer = 0; + private static StanfordCoreNLP localNLP = initializeJMWE(); + private static String underscoreSpaceReplacement; private PipelineJMWESingleton() { } - public final static PipelineJMWESingleton getINSTANCE() { - return INSTANCE; + public static void getINSTANCE() { + INSTANCE = new PipelineJMWESingleton(); } + + public final ConcurrentMap getJMWEAnnotation(Collection strvalues) { + boolean verbose = false; + IMWEIndex index; + String jmweIndexData = "/home/javatests/lib/mweindex_wordnet3.0_semcor1.6.data"; // ./lib/mweindex_wordnet3.0_semcor1.6.data + String jmweIndexDataLocalTest = "E:/java8/Projects/mweindex_wordnet3.0_semcor1.6.data"; + File indexFile = new File((String) jmweIndexData); + index = new MWEIndex(indexFile); + String detectorName = "Exhaustive"; + try { + index.open(); + } catch (IOException e) { + throw new RuntimeException("unable to open IMWEIndex index"); + } + IMWEDetector detector = getDetector(index, detectorName); + ConcurrentMap returnAnnotations = new MapMaker().concurrencyLevel(2).makeMap(); + strvalues.parallelStream().forEach(str -> { + Annotation annoStr = new Annotation(str); + localNLP.annotate(annoStr); + for (CoreMap sentence : annoStr.get(CoreAnnotations.SentencesAnnotation.class)) { + List> mwes = getjMWEInSentence(sentence, index, detector, verbose); + sentence.set(JMWEAnnotation.class, mwes); + } + returnAnnotations.put(str, annoStr); + System.out.println("incrementer: " + incrementer + "\n"); + incrementer++; + }); + index.close(); + return returnAnnotations; + } + + public final static StanfordCoreNLP initializeJMWE() { + Properties propsJMWE; + propsJMWE = new Properties(); + propsJMWE.setProperty("annotators", "tokenize,ssplit,pos,lemma"); + underscoreSpaceReplacement = "-"; + localNLP = new StanfordCoreNLP(propsJMWE); + System.out.println("finished singleton constructor \n"); + return localNLP; + } + + public IMWEDetector getDetector(IMWEIndex index, String detector) { + IMWEDetector iMWEdetector = null; + switch (detector) { + case "Consecutive": + iMWEdetector = new Consecutive(index); + break; + case "Exhaustive": + iMWEdetector = new Exhaustive(index); + break; + case "ProperNouns": + iMWEdetector = ProperNouns.getInstance(); + break; + case "Complex": + iMWEdetector = new CompositeDetector(ProperNouns.getInstance(), + new MoreFrequentAsMWE(new InflectionPattern(new Consecutive(index)))); + break; + case "CompositeConsecutiveProperNouns": + iMWEdetector = new CompositeDetector(new Consecutive(index), ProperNouns.getInstance()); + break; + default: + throw new IllegalArgumentException("Invalid detector argument " + detector + + ", only \"Consecutive\", \"Exhaustive\", \"ProperNouns\", \"Complex\" or \"CompositeConsecutiveProperNouns\" are supported."); + } + return iMWEdetector; + } + + public List> getjMWEInSentence(CoreMap sentence, IMWEIndex index, IMWEDetector detector, + boolean verbose) { + List tokens = getITokens(sentence.get(CoreAnnotations.TokensAnnotation.class)); + List> mwes = detector.detect(tokens); + if (verbose) { + for (IMWE token : mwes) { + System.out.println("IMWE: " + token); + } + } + return mwes; + } + + public List getITokens(List tokens) { + return getITokens(tokens, underscoreSpaceReplacement); + } + + public List getITokens(List tokens, String underscoreSpaceReplacement) { + List sentence = new ArrayList(); + for (CoreLabel token : tokens) { + sentence.add(new Token(token.originalText().replaceAll("_", underscoreSpaceReplacement).replaceAll(" ", underscoreSpaceReplacement), token.get(CoreAnnotations.PartOfSpeechAnnotation.class), token.lemma().replaceAll("_", underscoreSpaceReplacement).replaceAll(" ", underscoreSpaceReplacement))); + } + return sentence; + } + } diff --git a/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java b/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java index a67031e2..7d826b93 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java @@ -18,7 +18,10 @@ import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations; import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.StanfordCoreNLP; +import edu.stanford.nlp.process.CoreLabelTokenFactory; import edu.stanford.nlp.process.DocumentPreprocessor; +import edu.stanford.nlp.process.PTBTokenizer; +import edu.stanford.nlp.process.TokenizerFactory; import edu.stanford.nlp.sentiment.SentimentCoreAnnotations; import edu.stanford.nlp.sequences.DocumentReaderAndWriter; import edu.stanford.nlp.tagger.maxent.MaxentTagger; @@ -94,10 +97,14 @@ public class SentimentAnalyzerTest implements Callable { List> taggedwordlist1 = new ArrayList(); List> taggedwordlist2 = new ArrayList(); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1)); + TokenizerFactory ptbTokenizerFactory + = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep"); + tokenizer.setTokenizerFactory(ptbTokenizerFactory); for (List sentence : tokenizer) { taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); } tokenizer = new DocumentPreprocessor(new StringReader(str)); + tokenizer.setTokenizerFactory(ptbTokenizerFactory); for (List sentence : tokenizer) { taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); } @@ -192,7 +199,7 @@ public class SentimentAnalyzerTest implements Callable { } } } catch (Exception ex) { - System.out.println("pipelineAnnotation stacktrace: " + ex.getLocalizedMessage()+ "\n"); + System.out.println("pipelineAnnotation stacktrace: " + ex.getLocalizedMessage() + "\n"); } sentenceConstituencyParseList.clear(); ConcurrentMap simpleSMXlist = new MapMaker().concurrencyLevel(2).makeMap(); @@ -422,7 +429,7 @@ public class SentimentAnalyzerTest implements Callable { score -= tokensCounter1 >= tokensCounter2 ? (tokensCounter1 - tokensCounter2) * 500 : (tokensCounter2 - tokensCounter1) * 500; } LevenshteinDistance leven = new LevenshteinDistance(str, str1); - int SentenceScoreDiff = leven.computeLevenshteinDistance(); + double SentenceScoreDiff = leven.computeLevenshteinDistance(); SentenceScoreDiff *= 15; score -= SentenceScoreDiff; } catch (Exception ex) { diff --git a/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java b/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java index 9d0dc471..dba53629 100644 --- a/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java +++ b/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java @@ -21,6 +21,7 @@ import FunctionLayer.PipelineJMWESingleton; import java.io.IOException; import java.sql.SQLException; import java.util.List; +import java.util.concurrent.CountDownLatch; import java.util.logging.Level; import java.util.logging.Logger; import org.javacord.api.DiscordApi; @@ -34,18 +35,19 @@ import org.javacord.api.entity.user.User; public class DiscordHandler { public static void main(String[] args) { - new Thread(() -> { - try { - Datahandler.instance.initiateMYSQL(); - PipelineJMWESingleton.getINSTANCE(); - System.out.println("finished initiating MYSQL"); - } catch (SQLException | IOException ex) { - Logger.getLogger(DiscordHandler.class.getName()).log(Level.SEVERE, null, ex); - } - }).start(); - Datahandler.instance.shiftReduceParserInitiate(); + System.setProperty("java.util.concurrent.ForkJoinPool.common.parallelism", "25"); + try { + Datahandler.instance.initiateMYSQL(); + System.out.println("finished initiating MYSQL"); + } catch (SQLException | IOException ex) { + Logger.getLogger(DiscordHandler.class.getName()).log(Level.SEVERE, null, ex); + } + PipelineJMWESingleton.getINSTANCE(); Datahandler.instance.instantiateExecutor(); + Datahandler.instance.instantiateAnnotationMapJMWE(); + Datahandler.instance.shiftReduceParserInitiate(); Datahandler.instance.instantiateAnnotationMap(); + System.out.println("FINISHED ALL ANNOTATIONS"); Datahandler.instance.addHLstatsMessages(); Datahandler.instance.updateStringCache(); //order matters