This commit is contained in:
jenzur 2019-03-25 10:43:54 +01:00
parent e07a9d7601
commit 17ef94ef07
3 changed files with 15 additions and 21 deletions

View File

@ -114,6 +114,7 @@ public class Datahandler {
propsSentiment.setProperty("sentiment.model", sentimentModel); propsSentiment.setProperty("sentiment.model", sentimentModel);
propsSentiment.setProperty("parse.maxlen", "100"); propsSentiment.setProperty("parse.maxlen", "100");
propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,depparse,sentiment"); //coref too expensive memorywise propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,depparse,sentiment"); //coref too expensive memorywise
propsSentiment.setProperty("tokenize.options", "untokenizable=firstDelete");
pipelineSentiment = new StanfordCoreNLP(propsSentiment); pipelineSentiment = new StanfordCoreNLP(propsSentiment);
tagger = new MaxentTagger(taggerPath); tagger = new MaxentTagger(taggerPath);
cdl.countDown(); cdl.countDown();
@ -123,6 +124,7 @@ public class Datahandler {
props.setProperty("parse.maxlen", "100"); props.setProperty("parse.maxlen", "100");
props.setProperty("parse.binaryTrees", "true"); props.setProperty("parse.binaryTrees", "true");
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse"); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse");
props.setProperty("tokenize.options", "untokenizable=firstDelete");
pipeline = new StanfordCoreNLP(props); pipeline = new StanfordCoreNLP(props);
cdl.countDown(); cdl.countDown();
}).start(); }).start();
@ -352,9 +354,7 @@ public class Datahandler {
if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) { if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) {
ConcurrentMap<Integer, String> str = MessageResponseHandler.getStr(); ConcurrentMap<Integer, String> str = MessageResponseHandler.getStr();
str = cutContent(str, hlStatsMsg); str = cutContent(str, hlStatsMsg);
System.out.println("finished cutcontent \nstr size: " + str.size() + "\n");
str = filterContent(str); str = filterContent(str);
System.out.println("finished filterContent \nstr size: " + str.size() + "\n");
str = removeSlacks(str); str = removeSlacks(str);
System.out.println("finished removeSlacks \n" + str.size() + "\n"); System.out.println("finished removeSlacks \n" + str.size() + "\n");
str = verifyCalculationFitness(str); str = verifyCalculationFitness(str);
@ -517,10 +517,10 @@ public class Datahandler {
if (!str1.isEmpty() && str1.length() > 3) { if (!str1.isEmpty() && str1.length() > 3) {
str1 = str1.trim(); str1 = str1.trim();
if (str1.contains("PM*")) { if (str1.contains("PM*")) {
str1 = str1.substring(str1.indexOf("PM*") + 5); str1 = str1.substring(str1.indexOf("PM*") + 3);
} }
if (str1.contains("AM*")) { if (str1.contains("AM*")) {
str1 = str1.substring(str1.indexOf("AM*") + 5); str1 = str1.substring(str1.indexOf("AM*") + 3);
} }
for (Character c : str1.toCharArray()) { for (Character c : str1.toCharArray()) {
if (c == '?' || c == '°') { if (c == '?' || c == '°') {
@ -710,6 +710,7 @@ public class Datahandler {
Annotation strAnno2 = new Annotation(strCache); Annotation strAnno2 = new Annotation(strCache);
pipelineSentiment.annotate(strAnno2); pipelineSentiment.annotate(strAnno2);
pipelineSentimentAnnotateCachelcl.put(strCache, strAnno2); pipelineSentimentAnnotateCachelcl.put(strCache, strAnno2);
System.out.println("normal annotating strCache: " + strCache + "\n");
}); });
final ConcurrentMap<Integer, String> allStrings; final ConcurrentMap<Integer, String> allStrings;
if (!stringCache.isEmpty()) { if (!stringCache.isEmpty()) {
@ -719,29 +720,23 @@ public class Datahandler {
} }
ConcurrentMap<Integer, Future<SimilarityMatrix>> futures = new MapMaker().concurrencyLevel(2).makeMap(); ConcurrentMap<Integer, Future<SimilarityMatrix>> futures = new MapMaker().concurrencyLevel(2).makeMap();
strmap.values().parallelStream().forEach((str) -> { strmap.values().parallelStream().forEach((str) -> {
allStrings.values().parallelStream().forEach((str1) -> { for (String str1 : allStrings.values()) {
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, str1, new SimilarityMatrix(str, str1), Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, str1, new SimilarityMatrix(str, str1),
jmweAnnotateCachelcl.get(str), jmweAnnotateCachelcl.get(str1), pipelineAnnotateCachelcl.get(str), jmweAnnotateCachelcl.get(str), jmweAnnotateCachelcl.get(str1), pipelineAnnotateCachelcl.get(str),
pipelineAnnotateCachelcl.get(str1), pipelineSentimentAnnotateCachelcl.get(str), pipelineAnnotateCachelcl.get(str1), pipelineSentimentAnnotateCachelcl.get(str),
pipelineSentimentAnnotateCachelcl.get(str1)); pipelineSentimentAnnotateCachelcl.get(str1));
futures.put(futures.size() + 1, executor.submit(worker)); futures.put(futures.size() + 1, executor.submit(worker));
}); System.out.println("futures size in verify calcs: " + futures.size() + "\n");
}
}); });
futures.values().parallelStream().forEach((future) -> { futures.values().parallelStream().forEach((future) -> {
SimilarityMatrix get; SimilarityMatrix get;
//turning from 20 to 5 might be risky?
try { try {
get = future.get(20, TimeUnit.SECONDS); get = future.get(5, TimeUnit.SECONDS);
String addStr = get.getPrimaryString(); String addStr = get.getPrimaryString();
boolean presentstr = false; returnmap.put(returnmap.size() + 1, addStr);
for (String returnstr : returnmap.values()) { System.out.println("returnmap adding: " + addStr + "\n");
if (returnstr.equals(addStr)) {
presentstr = true;
break;
}
}
if (!presentstr) {
returnmap.put(returnmap.size() + 1, addStr);
}
} catch (InterruptedException | ExecutionException | TimeoutException ex) { } catch (InterruptedException | ExecutionException | TimeoutException ex) {
System.out.println("failed verification: " + ex.getMessage() + "\n"); System.out.println("failed verification: " + ex.getMessage() + "\n");
} }

View File

@ -43,9 +43,6 @@ import java.util.concurrent.ConcurrentMap;
public class PipelineJMWESingleton { public class PipelineJMWESingleton {
//if not needed to be volatile dont make it, increases time //if not needed to be volatile dont make it, increases time
// private volatile InMemoryMWEIndex mweMemoryIndex;
// private volatile static MWEIndex mweIndex;
// private volatile JMWEAnnotator jmweanno = initializeJMWE();
public volatile static PipelineJMWESingleton INSTANCE; public volatile static PipelineJMWESingleton INSTANCE;
private volatile static int incrementer = 0; private volatile static int incrementer = 0;
private static StanfordCoreNLP localNLP = initializeJMWE(); private static StanfordCoreNLP localNLP = initializeJMWE();
@ -92,6 +89,7 @@ public class PipelineJMWESingleton {
Properties propsJMWE; Properties propsJMWE;
propsJMWE = new Properties(); propsJMWE = new Properties();
propsJMWE.setProperty("annotators", "tokenize,ssplit,pos,lemma"); propsJMWE.setProperty("annotators", "tokenize,ssplit,pos,lemma");
propsJMWE.setProperty("tokenize.options", "untokenizable=firstDelete");
underscoreSpaceReplacement = "-"; underscoreSpaceReplacement = "-";
localNLP = new StanfordCoreNLP(propsJMWE); localNLP = new StanfordCoreNLP(propsJMWE);
System.out.println("finished singleton constructor \n"); System.out.println("finished singleton constructor \n");

View File

@ -97,8 +97,9 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
List<List<TaggedWord>> taggedwordlist1 = new ArrayList(); List<List<TaggedWord>> taggedwordlist1 = new ArrayList();
List<List<TaggedWord>> taggedwordlist2 = new ArrayList(); List<List<TaggedWord>> taggedwordlist2 = new ArrayList();
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1)); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
//noneDelete
TokenizerFactory<CoreLabel> ptbTokenizerFactory TokenizerFactory<CoreLabel> ptbTokenizerFactory
= PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep"); = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=firstDelete");
tokenizer.setTokenizerFactory(ptbTokenizerFactory); tokenizer.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : tokenizer) { for (List<HasWord> sentence : tokenizer) {
taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield());