me gusta
This commit is contained in:
parent
e07a9d7601
commit
17ef94ef07
@ -114,6 +114,7 @@ public class Datahandler {
|
||||
propsSentiment.setProperty("sentiment.model", sentimentModel);
|
||||
propsSentiment.setProperty("parse.maxlen", "100");
|
||||
propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,depparse,sentiment"); //coref too expensive memorywise
|
||||
propsSentiment.setProperty("tokenize.options", "untokenizable=firstDelete");
|
||||
pipelineSentiment = new StanfordCoreNLP(propsSentiment);
|
||||
tagger = new MaxentTagger(taggerPath);
|
||||
cdl.countDown();
|
||||
@ -123,6 +124,7 @@ public class Datahandler {
|
||||
props.setProperty("parse.maxlen", "100");
|
||||
props.setProperty("parse.binaryTrees", "true");
|
||||
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse");
|
||||
props.setProperty("tokenize.options", "untokenizable=firstDelete");
|
||||
pipeline = new StanfordCoreNLP(props);
|
||||
cdl.countDown();
|
||||
}).start();
|
||||
@ -352,9 +354,7 @@ public class Datahandler {
|
||||
if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) {
|
||||
ConcurrentMap<Integer, String> str = MessageResponseHandler.getStr();
|
||||
str = cutContent(str, hlStatsMsg);
|
||||
System.out.println("finished cutcontent \nstr size: " + str.size() + "\n");
|
||||
str = filterContent(str);
|
||||
System.out.println("finished filterContent \nstr size: " + str.size() + "\n");
|
||||
str = removeSlacks(str);
|
||||
System.out.println("finished removeSlacks \n" + str.size() + "\n");
|
||||
str = verifyCalculationFitness(str);
|
||||
@ -517,10 +517,10 @@ public class Datahandler {
|
||||
if (!str1.isEmpty() && str1.length() > 3) {
|
||||
str1 = str1.trim();
|
||||
if (str1.contains("PM*")) {
|
||||
str1 = str1.substring(str1.indexOf("PM*") + 5);
|
||||
str1 = str1.substring(str1.indexOf("PM*") + 3);
|
||||
}
|
||||
if (str1.contains("AM*")) {
|
||||
str1 = str1.substring(str1.indexOf("AM*") + 5);
|
||||
str1 = str1.substring(str1.indexOf("AM*") + 3);
|
||||
}
|
||||
for (Character c : str1.toCharArray()) {
|
||||
if (c == '?' || c == '°') {
|
||||
@ -710,6 +710,7 @@ public class Datahandler {
|
||||
Annotation strAnno2 = new Annotation(strCache);
|
||||
pipelineSentiment.annotate(strAnno2);
|
||||
pipelineSentimentAnnotateCachelcl.put(strCache, strAnno2);
|
||||
System.out.println("normal annotating strCache: " + strCache + "\n");
|
||||
});
|
||||
final ConcurrentMap<Integer, String> allStrings;
|
||||
if (!stringCache.isEmpty()) {
|
||||
@ -719,29 +720,23 @@ public class Datahandler {
|
||||
}
|
||||
ConcurrentMap<Integer, Future<SimilarityMatrix>> futures = new MapMaker().concurrencyLevel(2).makeMap();
|
||||
strmap.values().parallelStream().forEach((str) -> {
|
||||
allStrings.values().parallelStream().forEach((str1) -> {
|
||||
for (String str1 : allStrings.values()) {
|
||||
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, str1, new SimilarityMatrix(str, str1),
|
||||
jmweAnnotateCachelcl.get(str), jmweAnnotateCachelcl.get(str1), pipelineAnnotateCachelcl.get(str),
|
||||
pipelineAnnotateCachelcl.get(str1), pipelineSentimentAnnotateCachelcl.get(str),
|
||||
pipelineSentimentAnnotateCachelcl.get(str1));
|
||||
futures.put(futures.size() + 1, executor.submit(worker));
|
||||
});
|
||||
System.out.println("futures size in verify calcs: " + futures.size() + "\n");
|
||||
}
|
||||
});
|
||||
futures.values().parallelStream().forEach((future) -> {
|
||||
SimilarityMatrix get;
|
||||
//turning from 20 to 5 might be risky?
|
||||
try {
|
||||
get = future.get(20, TimeUnit.SECONDS);
|
||||
get = future.get(5, TimeUnit.SECONDS);
|
||||
String addStr = get.getPrimaryString();
|
||||
boolean presentstr = false;
|
||||
for (String returnstr : returnmap.values()) {
|
||||
if (returnstr.equals(addStr)) {
|
||||
presentstr = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!presentstr) {
|
||||
returnmap.put(returnmap.size() + 1, addStr);
|
||||
}
|
||||
returnmap.put(returnmap.size() + 1, addStr);
|
||||
System.out.println("returnmap adding: " + addStr + "\n");
|
||||
} catch (InterruptedException | ExecutionException | TimeoutException ex) {
|
||||
System.out.println("failed verification: " + ex.getMessage() + "\n");
|
||||
}
|
||||
|
@ -43,9 +43,6 @@ import java.util.concurrent.ConcurrentMap;
|
||||
public class PipelineJMWESingleton {
|
||||
|
||||
//if not needed to be volatile dont make it, increases time
|
||||
// private volatile InMemoryMWEIndex mweMemoryIndex;
|
||||
// private volatile static MWEIndex mweIndex;
|
||||
// private volatile JMWEAnnotator jmweanno = initializeJMWE();
|
||||
public volatile static PipelineJMWESingleton INSTANCE;
|
||||
private volatile static int incrementer = 0;
|
||||
private static StanfordCoreNLP localNLP = initializeJMWE();
|
||||
@ -92,6 +89,7 @@ public class PipelineJMWESingleton {
|
||||
Properties propsJMWE;
|
||||
propsJMWE = new Properties();
|
||||
propsJMWE.setProperty("annotators", "tokenize,ssplit,pos,lemma");
|
||||
propsJMWE.setProperty("tokenize.options", "untokenizable=firstDelete");
|
||||
underscoreSpaceReplacement = "-";
|
||||
localNLP = new StanfordCoreNLP(propsJMWE);
|
||||
System.out.println("finished singleton constructor \n");
|
||||
|
@ -97,8 +97,9 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
|
||||
List<List<TaggedWord>> taggedwordlist1 = new ArrayList();
|
||||
List<List<TaggedWord>> taggedwordlist2 = new ArrayList();
|
||||
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
|
||||
//noneDelete
|
||||
TokenizerFactory<CoreLabel> ptbTokenizerFactory
|
||||
= PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep");
|
||||
= PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=firstDelete");
|
||||
tokenizer.setTokenizerFactory(ptbTokenizerFactory);
|
||||
for (List<HasWord> sentence : tokenizer) {
|
||||
taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
|
||||
|
Loading…
Reference in New Issue
Block a user