me gusta
This commit is contained in:
parent
e07a9d7601
commit
17ef94ef07
@ -114,6 +114,7 @@ public class Datahandler {
|
|||||||
propsSentiment.setProperty("sentiment.model", sentimentModel);
|
propsSentiment.setProperty("sentiment.model", sentimentModel);
|
||||||
propsSentiment.setProperty("parse.maxlen", "100");
|
propsSentiment.setProperty("parse.maxlen", "100");
|
||||||
propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,depparse,sentiment"); //coref too expensive memorywise
|
propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,depparse,sentiment"); //coref too expensive memorywise
|
||||||
|
propsSentiment.setProperty("tokenize.options", "untokenizable=firstDelete");
|
||||||
pipelineSentiment = new StanfordCoreNLP(propsSentiment);
|
pipelineSentiment = new StanfordCoreNLP(propsSentiment);
|
||||||
tagger = new MaxentTagger(taggerPath);
|
tagger = new MaxentTagger(taggerPath);
|
||||||
cdl.countDown();
|
cdl.countDown();
|
||||||
@ -123,6 +124,7 @@ public class Datahandler {
|
|||||||
props.setProperty("parse.maxlen", "100");
|
props.setProperty("parse.maxlen", "100");
|
||||||
props.setProperty("parse.binaryTrees", "true");
|
props.setProperty("parse.binaryTrees", "true");
|
||||||
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse");
|
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse");
|
||||||
|
props.setProperty("tokenize.options", "untokenizable=firstDelete");
|
||||||
pipeline = new StanfordCoreNLP(props);
|
pipeline = new StanfordCoreNLP(props);
|
||||||
cdl.countDown();
|
cdl.countDown();
|
||||||
}).start();
|
}).start();
|
||||||
@ -352,9 +354,7 @@ public class Datahandler {
|
|||||||
if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) {
|
if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) {
|
||||||
ConcurrentMap<Integer, String> str = MessageResponseHandler.getStr();
|
ConcurrentMap<Integer, String> str = MessageResponseHandler.getStr();
|
||||||
str = cutContent(str, hlStatsMsg);
|
str = cutContent(str, hlStatsMsg);
|
||||||
System.out.println("finished cutcontent \nstr size: " + str.size() + "\n");
|
|
||||||
str = filterContent(str);
|
str = filterContent(str);
|
||||||
System.out.println("finished filterContent \nstr size: " + str.size() + "\n");
|
|
||||||
str = removeSlacks(str);
|
str = removeSlacks(str);
|
||||||
System.out.println("finished removeSlacks \n" + str.size() + "\n");
|
System.out.println("finished removeSlacks \n" + str.size() + "\n");
|
||||||
str = verifyCalculationFitness(str);
|
str = verifyCalculationFitness(str);
|
||||||
@ -517,10 +517,10 @@ public class Datahandler {
|
|||||||
if (!str1.isEmpty() && str1.length() > 3) {
|
if (!str1.isEmpty() && str1.length() > 3) {
|
||||||
str1 = str1.trim();
|
str1 = str1.trim();
|
||||||
if (str1.contains("PM*")) {
|
if (str1.contains("PM*")) {
|
||||||
str1 = str1.substring(str1.indexOf("PM*") + 5);
|
str1 = str1.substring(str1.indexOf("PM*") + 3);
|
||||||
}
|
}
|
||||||
if (str1.contains("AM*")) {
|
if (str1.contains("AM*")) {
|
||||||
str1 = str1.substring(str1.indexOf("AM*") + 5);
|
str1 = str1.substring(str1.indexOf("AM*") + 3);
|
||||||
}
|
}
|
||||||
for (Character c : str1.toCharArray()) {
|
for (Character c : str1.toCharArray()) {
|
||||||
if (c == '?' || c == '°') {
|
if (c == '?' || c == '°') {
|
||||||
@ -710,6 +710,7 @@ public class Datahandler {
|
|||||||
Annotation strAnno2 = new Annotation(strCache);
|
Annotation strAnno2 = new Annotation(strCache);
|
||||||
pipelineSentiment.annotate(strAnno2);
|
pipelineSentiment.annotate(strAnno2);
|
||||||
pipelineSentimentAnnotateCachelcl.put(strCache, strAnno2);
|
pipelineSentimentAnnotateCachelcl.put(strCache, strAnno2);
|
||||||
|
System.out.println("normal annotating strCache: " + strCache + "\n");
|
||||||
});
|
});
|
||||||
final ConcurrentMap<Integer, String> allStrings;
|
final ConcurrentMap<Integer, String> allStrings;
|
||||||
if (!stringCache.isEmpty()) {
|
if (!stringCache.isEmpty()) {
|
||||||
@ -719,29 +720,23 @@ public class Datahandler {
|
|||||||
}
|
}
|
||||||
ConcurrentMap<Integer, Future<SimilarityMatrix>> futures = new MapMaker().concurrencyLevel(2).makeMap();
|
ConcurrentMap<Integer, Future<SimilarityMatrix>> futures = new MapMaker().concurrencyLevel(2).makeMap();
|
||||||
strmap.values().parallelStream().forEach((str) -> {
|
strmap.values().parallelStream().forEach((str) -> {
|
||||||
allStrings.values().parallelStream().forEach((str1) -> {
|
for (String str1 : allStrings.values()) {
|
||||||
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, str1, new SimilarityMatrix(str, str1),
|
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, str1, new SimilarityMatrix(str, str1),
|
||||||
jmweAnnotateCachelcl.get(str), jmweAnnotateCachelcl.get(str1), pipelineAnnotateCachelcl.get(str),
|
jmweAnnotateCachelcl.get(str), jmweAnnotateCachelcl.get(str1), pipelineAnnotateCachelcl.get(str),
|
||||||
pipelineAnnotateCachelcl.get(str1), pipelineSentimentAnnotateCachelcl.get(str),
|
pipelineAnnotateCachelcl.get(str1), pipelineSentimentAnnotateCachelcl.get(str),
|
||||||
pipelineSentimentAnnotateCachelcl.get(str1));
|
pipelineSentimentAnnotateCachelcl.get(str1));
|
||||||
futures.put(futures.size() + 1, executor.submit(worker));
|
futures.put(futures.size() + 1, executor.submit(worker));
|
||||||
});
|
System.out.println("futures size in verify calcs: " + futures.size() + "\n");
|
||||||
|
}
|
||||||
});
|
});
|
||||||
futures.values().parallelStream().forEach((future) -> {
|
futures.values().parallelStream().forEach((future) -> {
|
||||||
SimilarityMatrix get;
|
SimilarityMatrix get;
|
||||||
|
//turning from 20 to 5 might be risky?
|
||||||
try {
|
try {
|
||||||
get = future.get(20, TimeUnit.SECONDS);
|
get = future.get(5, TimeUnit.SECONDS);
|
||||||
String addStr = get.getPrimaryString();
|
String addStr = get.getPrimaryString();
|
||||||
boolean presentstr = false;
|
returnmap.put(returnmap.size() + 1, addStr);
|
||||||
for (String returnstr : returnmap.values()) {
|
System.out.println("returnmap adding: " + addStr + "\n");
|
||||||
if (returnstr.equals(addStr)) {
|
|
||||||
presentstr = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!presentstr) {
|
|
||||||
returnmap.put(returnmap.size() + 1, addStr);
|
|
||||||
}
|
|
||||||
} catch (InterruptedException | ExecutionException | TimeoutException ex) {
|
} catch (InterruptedException | ExecutionException | TimeoutException ex) {
|
||||||
System.out.println("failed verification: " + ex.getMessage() + "\n");
|
System.out.println("failed verification: " + ex.getMessage() + "\n");
|
||||||
}
|
}
|
||||||
|
@ -43,9 +43,6 @@ import java.util.concurrent.ConcurrentMap;
|
|||||||
public class PipelineJMWESingleton {
|
public class PipelineJMWESingleton {
|
||||||
|
|
||||||
//if not needed to be volatile dont make it, increases time
|
//if not needed to be volatile dont make it, increases time
|
||||||
// private volatile InMemoryMWEIndex mweMemoryIndex;
|
|
||||||
// private volatile static MWEIndex mweIndex;
|
|
||||||
// private volatile JMWEAnnotator jmweanno = initializeJMWE();
|
|
||||||
public volatile static PipelineJMWESingleton INSTANCE;
|
public volatile static PipelineJMWESingleton INSTANCE;
|
||||||
private volatile static int incrementer = 0;
|
private volatile static int incrementer = 0;
|
||||||
private static StanfordCoreNLP localNLP = initializeJMWE();
|
private static StanfordCoreNLP localNLP = initializeJMWE();
|
||||||
@ -92,6 +89,7 @@ public class PipelineJMWESingleton {
|
|||||||
Properties propsJMWE;
|
Properties propsJMWE;
|
||||||
propsJMWE = new Properties();
|
propsJMWE = new Properties();
|
||||||
propsJMWE.setProperty("annotators", "tokenize,ssplit,pos,lemma");
|
propsJMWE.setProperty("annotators", "tokenize,ssplit,pos,lemma");
|
||||||
|
propsJMWE.setProperty("tokenize.options", "untokenizable=firstDelete");
|
||||||
underscoreSpaceReplacement = "-";
|
underscoreSpaceReplacement = "-";
|
||||||
localNLP = new StanfordCoreNLP(propsJMWE);
|
localNLP = new StanfordCoreNLP(propsJMWE);
|
||||||
System.out.println("finished singleton constructor \n");
|
System.out.println("finished singleton constructor \n");
|
||||||
|
@ -97,8 +97,9 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
|
|||||||
List<List<TaggedWord>> taggedwordlist1 = new ArrayList();
|
List<List<TaggedWord>> taggedwordlist1 = new ArrayList();
|
||||||
List<List<TaggedWord>> taggedwordlist2 = new ArrayList();
|
List<List<TaggedWord>> taggedwordlist2 = new ArrayList();
|
||||||
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
|
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
|
||||||
|
//noneDelete
|
||||||
TokenizerFactory<CoreLabel> ptbTokenizerFactory
|
TokenizerFactory<CoreLabel> ptbTokenizerFactory
|
||||||
= PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep");
|
= PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=firstDelete");
|
||||||
tokenizer.setTokenizerFactory(ptbTokenizerFactory);
|
tokenizer.setTokenizerFactory(ptbTokenizerFactory);
|
||||||
for (List<HasWord> sentence : tokenizer) {
|
for (List<HasWord> sentence : tokenizer) {
|
||||||
taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
|
taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
|
||||||
|
Loading…
Reference in New Issue
Block a user