This commit is contained in:
jenzur 2019-03-25 10:43:54 +01:00
parent e07a9d7601
commit 17ef94ef07
3 changed files with 15 additions and 21 deletions

View File

@ -114,6 +114,7 @@ public class Datahandler {
propsSentiment.setProperty("sentiment.model", sentimentModel);
propsSentiment.setProperty("parse.maxlen", "100");
propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,depparse,sentiment"); //coref too expensive memorywise
propsSentiment.setProperty("tokenize.options", "untokenizable=firstDelete");
pipelineSentiment = new StanfordCoreNLP(propsSentiment);
tagger = new MaxentTagger(taggerPath);
cdl.countDown();
@ -123,6 +124,7 @@ public class Datahandler {
props.setProperty("parse.maxlen", "100");
props.setProperty("parse.binaryTrees", "true");
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse");
props.setProperty("tokenize.options", "untokenizable=firstDelete");
pipeline = new StanfordCoreNLP(props);
cdl.countDown();
}).start();
@ -352,9 +354,7 @@ public class Datahandler {
if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) {
ConcurrentMap<Integer, String> str = MessageResponseHandler.getStr();
str = cutContent(str, hlStatsMsg);
System.out.println("finished cutcontent \nstr size: " + str.size() + "\n");
str = filterContent(str);
System.out.println("finished filterContent \nstr size: " + str.size() + "\n");
str = removeSlacks(str);
System.out.println("finished removeSlacks \n" + str.size() + "\n");
str = verifyCalculationFitness(str);
@ -517,10 +517,10 @@ public class Datahandler {
if (!str1.isEmpty() && str1.length() > 3) {
str1 = str1.trim();
if (str1.contains("PM*")) {
str1 = str1.substring(str1.indexOf("PM*") + 5);
str1 = str1.substring(str1.indexOf("PM*") + 3);
}
if (str1.contains("AM*")) {
str1 = str1.substring(str1.indexOf("AM*") + 5);
str1 = str1.substring(str1.indexOf("AM*") + 3);
}
for (Character c : str1.toCharArray()) {
if (c == '?' || c == '°') {
@ -710,6 +710,7 @@ public class Datahandler {
Annotation strAnno2 = new Annotation(strCache);
pipelineSentiment.annotate(strAnno2);
pipelineSentimentAnnotateCachelcl.put(strCache, strAnno2);
System.out.println("normal annotating strCache: " + strCache + "\n");
});
final ConcurrentMap<Integer, String> allStrings;
if (!stringCache.isEmpty()) {
@ -719,29 +720,23 @@ public class Datahandler {
}
ConcurrentMap<Integer, Future<SimilarityMatrix>> futures = new MapMaker().concurrencyLevel(2).makeMap();
strmap.values().parallelStream().forEach((str) -> {
allStrings.values().parallelStream().forEach((str1) -> {
for (String str1 : allStrings.values()) {
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, str1, new SimilarityMatrix(str, str1),
jmweAnnotateCachelcl.get(str), jmweAnnotateCachelcl.get(str1), pipelineAnnotateCachelcl.get(str),
pipelineAnnotateCachelcl.get(str1), pipelineSentimentAnnotateCachelcl.get(str),
pipelineSentimentAnnotateCachelcl.get(str1));
futures.put(futures.size() + 1, executor.submit(worker));
});
System.out.println("futures size in verify calcs: " + futures.size() + "\n");
}
});
futures.values().parallelStream().forEach((future) -> {
SimilarityMatrix get;
//turning from 20 to 5 might be risky?
try {
get = future.get(20, TimeUnit.SECONDS);
get = future.get(5, TimeUnit.SECONDS);
String addStr = get.getPrimaryString();
boolean presentstr = false;
for (String returnstr : returnmap.values()) {
if (returnstr.equals(addStr)) {
presentstr = true;
break;
}
}
if (!presentstr) {
returnmap.put(returnmap.size() + 1, addStr);
}
returnmap.put(returnmap.size() + 1, addStr);
System.out.println("returnmap adding: " + addStr + "\n");
} catch (InterruptedException | ExecutionException | TimeoutException ex) {
System.out.println("failed verification: " + ex.getMessage() + "\n");
}

View File

@ -43,9 +43,6 @@ import java.util.concurrent.ConcurrentMap;
public class PipelineJMWESingleton {
//if not needed to be volatile dont make it, increases time
// private volatile InMemoryMWEIndex mweMemoryIndex;
// private volatile static MWEIndex mweIndex;
// private volatile JMWEAnnotator jmweanno = initializeJMWE();
public volatile static PipelineJMWESingleton INSTANCE;
private volatile static int incrementer = 0;
private static StanfordCoreNLP localNLP = initializeJMWE();
@ -92,6 +89,7 @@ public class PipelineJMWESingleton {
Properties propsJMWE;
propsJMWE = new Properties();
propsJMWE.setProperty("annotators", "tokenize,ssplit,pos,lemma");
propsJMWE.setProperty("tokenize.options", "untokenizable=firstDelete");
underscoreSpaceReplacement = "-";
localNLP = new StanfordCoreNLP(propsJMWE);
System.out.println("finished singleton constructor \n");

View File

@ -97,8 +97,9 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
List<List<TaggedWord>> taggedwordlist1 = new ArrayList();
List<List<TaggedWord>> taggedwordlist2 = new ArrayList();
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
//noneDelete
TokenizerFactory<CoreLabel> ptbTokenizerFactory
= PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep");
= PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=firstDelete");
tokenizer.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : tokenizer) {
taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield());