From 17ef94ef07167277052cbefce7a540461f11a9d9 Mon Sep 17 00:00:00 2001 From: jenzur Date: Mon, 25 Mar 2019 10:43:54 +0100 Subject: [PATCH] me gusta --- .../main/java/FunctionLayer/Datahandler.java | 29 ++++++++----------- .../FunctionLayer/PipelineJMWESingleton.java | 4 +-- .../StanfordParser/SentimentAnalyzerTest.java | 3 +- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java b/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java index 9b889240..66f2f16d 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/Datahandler.java @@ -114,6 +114,7 @@ public class Datahandler { propsSentiment.setProperty("sentiment.model", sentimentModel); propsSentiment.setProperty("parse.maxlen", "100"); propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,depparse,sentiment"); //coref too expensive memorywise + propsSentiment.setProperty("tokenize.options", "untokenizable=firstDelete"); pipelineSentiment = new StanfordCoreNLP(propsSentiment); tagger = new MaxentTagger(taggerPath); cdl.countDown(); @@ -123,6 +124,7 @@ public class Datahandler { props.setProperty("parse.maxlen", "100"); props.setProperty("parse.binaryTrees", "true"); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse"); + props.setProperty("tokenize.options", "untokenizable=firstDelete"); pipeline = new StanfordCoreNLP(props); cdl.countDown(); }).start(); @@ -352,9 +354,7 @@ public class Datahandler { if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) { ConcurrentMap str = MessageResponseHandler.getStr(); str = cutContent(str, hlStatsMsg); - System.out.println("finished cutcontent \nstr size: " + str.size() + "\n"); str = filterContent(str); - System.out.println("finished filterContent \nstr size: " + str.size() + "\n"); str = removeSlacks(str); System.out.println("finished removeSlacks \n" + str.size() + "\n"); str = verifyCalculationFitness(str); @@ -517,10 +517,10 @@ public class Datahandler { if (!str1.isEmpty() && str1.length() > 3) { str1 = str1.trim(); if (str1.contains("PM*")) { - str1 = str1.substring(str1.indexOf("PM*") + 5); + str1 = str1.substring(str1.indexOf("PM*") + 3); } if (str1.contains("AM*")) { - str1 = str1.substring(str1.indexOf("AM*") + 5); + str1 = str1.substring(str1.indexOf("AM*") + 3); } for (Character c : str1.toCharArray()) { if (c == '?' || c == '°') { @@ -710,6 +710,7 @@ public class Datahandler { Annotation strAnno2 = new Annotation(strCache); pipelineSentiment.annotate(strAnno2); pipelineSentimentAnnotateCachelcl.put(strCache, strAnno2); + System.out.println("normal annotating strCache: " + strCache + "\n"); }); final ConcurrentMap allStrings; if (!stringCache.isEmpty()) { @@ -719,29 +720,23 @@ public class Datahandler { } ConcurrentMap> futures = new MapMaker().concurrencyLevel(2).makeMap(); strmap.values().parallelStream().forEach((str) -> { - allStrings.values().parallelStream().forEach((str1) -> { + for (String str1 : allStrings.values()) { Callable worker = new SentimentAnalyzerTest(str, str1, new SimilarityMatrix(str, str1), jmweAnnotateCachelcl.get(str), jmweAnnotateCachelcl.get(str1), pipelineAnnotateCachelcl.get(str), pipelineAnnotateCachelcl.get(str1), pipelineSentimentAnnotateCachelcl.get(str), pipelineSentimentAnnotateCachelcl.get(str1)); futures.put(futures.size() + 1, executor.submit(worker)); - }); + System.out.println("futures size in verify calcs: " + futures.size() + "\n"); + } }); futures.values().parallelStream().forEach((future) -> { SimilarityMatrix get; + //turning from 20 to 5 might be risky? try { - get = future.get(20, TimeUnit.SECONDS); + get = future.get(5, TimeUnit.SECONDS); String addStr = get.getPrimaryString(); - boolean presentstr = false; - for (String returnstr : returnmap.values()) { - if (returnstr.equals(addStr)) { - presentstr = true; - break; - } - } - if (!presentstr) { - returnmap.put(returnmap.size() + 1, addStr); - } + returnmap.put(returnmap.size() + 1, addStr); + System.out.println("returnmap adding: " + addStr + "\n"); } catch (InterruptedException | ExecutionException | TimeoutException ex) { System.out.println("failed verification: " + ex.getMessage() + "\n"); } diff --git a/ArtificialAutism/src/main/java/FunctionLayer/PipelineJMWESingleton.java b/ArtificialAutism/src/main/java/FunctionLayer/PipelineJMWESingleton.java index c3afd795..e6e0cc52 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/PipelineJMWESingleton.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/PipelineJMWESingleton.java @@ -43,9 +43,6 @@ import java.util.concurrent.ConcurrentMap; public class PipelineJMWESingleton { //if not needed to be volatile dont make it, increases time - // private volatile InMemoryMWEIndex mweMemoryIndex; - // private volatile static MWEIndex mweIndex; - // private volatile JMWEAnnotator jmweanno = initializeJMWE(); public volatile static PipelineJMWESingleton INSTANCE; private volatile static int incrementer = 0; private static StanfordCoreNLP localNLP = initializeJMWE(); @@ -92,6 +89,7 @@ public class PipelineJMWESingleton { Properties propsJMWE; propsJMWE = new Properties(); propsJMWE.setProperty("annotators", "tokenize,ssplit,pos,lemma"); + propsJMWE.setProperty("tokenize.options", "untokenizable=firstDelete"); underscoreSpaceReplacement = "-"; localNLP = new StanfordCoreNLP(propsJMWE); System.out.println("finished singleton constructor \n"); diff --git a/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java b/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java index 7d826b93..648e7a2f 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java @@ -97,8 +97,9 @@ public class SentimentAnalyzerTest implements Callable { List> taggedwordlist1 = new ArrayList(); List> taggedwordlist2 = new ArrayList(); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1)); + //noneDelete TokenizerFactory ptbTokenizerFactory - = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep"); + = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=firstDelete"); tokenizer.setTokenizerFactory(ptbTokenizerFactory); for (List sentence : tokenizer) { taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield());