this should help some

This commit is contained in:
jenzur 2019-03-24 23:04:19 +01:00
parent c5c1a31a14
commit e07a9d7601
5 changed files with 377 additions and 288 deletions

View File

@ -27,6 +27,7 @@ import edu.stanford.nlp.trees.TreebankLanguagePack;
import java.io.IOException;
import java.io.StringReader;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
@ -38,16 +39,12 @@ import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinTask;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
/**
*
@ -157,9 +154,9 @@ public class Datahandler {
}
public void instantiateExecutor() {
this.executor = new ForkJoinPool(Runtime.getRuntime().availableProcessors(),
this.executor = new ForkJoinPool(25,
ForkJoinPool.defaultForkJoinWorkerThreadFactory,
null, false); //true
null, false);
}
public static GrammaticalStructureFactory getGsf() {
@ -208,26 +205,23 @@ public class Datahandler {
public void addHLstatsMessages() {
ConcurrentMap<Integer, String> hlStatsMessages = DataMapper.getHLstatsMessages();
ConcurrentMap<Integer, String> strCacheLocal = stringCache;
int hardcap = 7500;
int counter = 0;
for (String str : hlStatsMessages.values()) {
//might want a hardcap
hlStatsMessages.values().parallelStream().forEach(str -> {
if (!str.startsWith("!")) {
boolean present = false;
for (String str1 : strCacheLocal.values()) {
if (str.equals(str1)) {
present = true;
break;
}
}
if (!present) {
//System.out.println("addHLstatsMessages adding str: " + str + "\n");
String orElse = strCacheLocal.values().parallelStream().filter(e -> e.equals(str)).findAny().orElse(null);
if (orElse == null) {
MessageResponseHandler.getMessage(str);
}
}
if (counter >= hardcap) {
break;
});
}
public void instantiateAnnotationMapJMWE() {
if (!stringCache.isEmpty()) {
ConcurrentMap<String, Annotation> jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(stringCache.values());
for (Entry<String, Annotation> entries : jmweAnnotation.entrySet()) {
jmweAnnotationCache.put(entries.getKey(), entries.getValue());
}
counter++;
}
}
@ -238,41 +232,11 @@ public class Datahandler {
Annotation strAnno = new Annotation(str);
pipeline.annotate(strAnno);
pipelineAnnotationCache.put(str, strAnno);
jmweAnnotationCache.put(str, PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(str));
Annotation strAnno2 = new Annotation(str);
pipelineSentiment.annotate(strAnno2);
pipelineSentimentAnnotationCache.put(str, strAnno2);
});
}
System.out.println("FINISHED ALL ANNOTATIONS");
System.out.println("FINISHED ALL ANNOTATIONS");
System.out.println("FINISHED ALL ANNOTATIONS");
/*
int poolsize = Runtime.getRuntime().availableProcessors();
CountDownLatch cdl = new CountDownLatch(poolsize + 1);
int rangeAdder = (stringCache.values().size() / poolsize);
for (int i = 0; i < poolsize; i++) {
final int ij = i;
new Thread(() -> {
int counter = rangeAdder * ij;
for (int j = 0; j < rangeAdder; j++) {
String str = stringCache.getOrDefault(counter + j, null);
if (str != null) {
System.out.println("adding str jmwe annotation: " + str + "\n");
Annotation strAnno = new Annotation(str);
pipelineJMWE.annotate(strAnno);
jmweAnnotationCache.put(str, strAnno);
}
}
cdl.countDown();
}).start();
}
try {
cdl.await();
} catch (InterruptedException ex) {
System.out.println("cdl await interrupted: " + ex.getLocalizedMessage() + "\n");
}*/
}
public synchronized void checkIfUpdateMatrixes() {
@ -388,8 +352,11 @@ public class Datahandler {
if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) {
ConcurrentMap<Integer, String> str = MessageResponseHandler.getStr();
str = cutContent(str, hlStatsMsg);
System.out.println("finished cutcontent \nstr size: " + str.size() + "\n");
str = filterContent(str);
System.out.println("finished filterContent \nstr size: " + str.size() + "\n");
str = removeSlacks(str);
System.out.println("finished removeSlacks \n" + str.size() + "\n");
str = verifyCalculationFitness(str);
System.out.println("Check if updateString str size POST: " + str.size() + "\n");
try {
@ -451,19 +418,28 @@ public class Datahandler {
}
}
System.out.println("none within 8 range");
Annotation strAnno = new Annotation(str);
pipeline.annotate(strAnno);
Annotation strAnno2 = new Annotation(str);
pipelineSentiment.annotate(strAnno2);
List<String> notactualList = new ArrayList();
notactualList.add(str);
ConcurrentMap<String, Annotation> jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(notactualList);
final Annotation strAnno3 = jmweAnnotation.values().iterator().next();
ConcurrentMap<Integer, String> strCache = stringCache;
ConcurrentMap<Integer, Future<SimilarityMatrix>> futureslocal = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<Integer, SimilarityMatrix> futurereturn = new MapMaker().concurrencyLevel(2).makeMap();
for (String str1 : strCache.values()) {
if (!str.equals(str1)) {
SimilarityMatrix SMX = new SimilarityMatrix(str, str1);
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, str1, SMX,
jmweAnnotationCache.get(str), jmweAnnotationCache.get(str1), pipelineAnnotationCache.get(str),
pipelineAnnotationCache.get(str1), pipelineSentimentAnnotationCache.get(str),
String strF = str;
strCache.values().parallelStream().forEach((str1) -> {
if (!strF.equals(str1)) {
SimilarityMatrix SMX = new SimilarityMatrix(strF, str1);
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(strF, str1, SMX,
strAnno3, jmweAnnotationCache.get(str1), strAnno,
pipelineAnnotationCache.get(str1), strAnno2,
pipelineSentimentAnnotationCache.get(str1));
futureslocal.put(futureslocal.size() + 1, executor.submit(worker));
}
}
});
int index = 0;
futureslocal.values().parallelStream().forEach((future) -> {
SimilarityMatrix SMX = new SimilarityMatrix("", "");
@ -476,8 +452,10 @@ public class Datahandler {
});
for (SimilarityMatrix SMX : futurereturn.values()) {
double distance = SMX.getDistance();
/*
System.out.println("index: " + index + "\nfutures size: " + futureslocal.values().size() + "\nScore: " + SMX.getDistance() + "\nSecondary: "
+ SMX.getSecondaryString() + "\nPrimary: " + SMX.getPrimaryString() + "\n");
*/
if (distance > Score) {
Score = distance;
SMXreturn = SMX;
@ -490,18 +468,20 @@ public class Datahandler {
}
public String mostSimilar(String toBeCompared, ConcurrentMap<Integer, String> concurrentStrings) {
int minDistance = 8;
int minDistance = 7;
String similar = "";
ConcurrentMap<Integer, Future<Entry<String, Integer>>> futures = new MapMaker().concurrencyLevel(2).makeMap();
List<Future<ConcurrentMap<String, Integer>>> futures = new ArrayList();
ConcurrentMap<String, Integer> futuresreturnvalues = new MapMaker().concurrencyLevel(2).makeMap();
concurrentStrings.values().parallelStream().forEach((str) -> {
Callable<Entry<String, Integer>> worker = new LevenshteinDistance(toBeCompared, str);
futures.put(futures.size() + 1, executor.submit(worker));
Callable<ConcurrentMap<String, Integer>> worker = new LevenshteinDistance(toBeCompared, str);
futures.add(executor.submit(worker));
});
futures.values().parallelStream().forEach((future) -> {
futures.parallelStream().forEach((future) -> {
try {
Entry<String, Integer> get = future.get();
futuresreturnvalues.put(get.getKey(), get.getValue());
ConcurrentMap<String, Integer> get = future.get();
get.entrySet().forEach((str) -> {
futuresreturnvalues.put(str.getKey(), str.getValue());
});
} catch (NullPointerException | InterruptedException | ExecutionException ex) {
System.out.println("failed future\nex: " + ex.getMessage() + "\n");
}
@ -519,7 +499,7 @@ public class Datahandler {
public static ConcurrentMap<Integer, String> cutContent(ConcurrentMap<Integer, String> str, boolean hlStatsMsg) {
ConcurrentMap<Integer, String> returnlist = new MapMaker().concurrencyLevel(2).makeMap();
for (String str1 : str.values()) {
str.values().parallelStream().forEach(str1 -> {
int iend = str1.indexOf("content: ");
if (iend != -1) {
String trs = str1.substring(iend + 9);
@ -527,182 +507,190 @@ public class Datahandler {
} else if (hlStatsMsg) {
returnlist.put(returnlist.size() + 1, str1);
}
}
});
return returnlist;
}
public static ConcurrentMap<Integer, String> filterContent(ConcurrentMap<Integer, String> str) {
ConcurrentMap<Integer, String> strlistreturn = new MapMaker().concurrencyLevel(2).makeMap();
for (String str1 : str.values()) {
if (str1.isEmpty() || str1.length() < 3) {
continue;
}
str1 = str1.trim();
if (str1.contains("PM*")) {
str1 = str1.substring(str1.indexOf("PM*") + 5);
}
if (str1.contains("AM*")) {
str1 = str1.substring(str1.indexOf("AM*") + 5);
}
for (Character c : str1.toCharArray()) {
if (c == '?' || c == '°') {
str1 = str1.replace("?", " <:wlenny:514861023002624001> ");
str1 = str1.replace("°", " <:wlenny:514861023002624001> ");
str.values().parallelStream().forEach(str1 -> {
if (!str1.isEmpty() && str1.length() > 3) {
str1 = str1.trim();
if (str1.contains("PM*")) {
str1 = str1.substring(str1.indexOf("PM*") + 5);
}
if (str1.contains("AM*")) {
str1 = str1.substring(str1.indexOf("AM*") + 5);
}
for (Character c : str1.toCharArray()) {
if (c == '?' || c == '°') {
str1 = str1.replace("?", " <:wlenny:514861023002624001> ");
str1 = str1.replace("°", " <:wlenny:514861023002624001> ");
}
}
if (str1.contains("(Counter-Terrorist)")) {
str1 = str1.replace("(Counter-Terrorist)", " ");
}
if (str1.contains("(Terrorist)")) {
str1 = str1.replace("(Terrorist)", " ");
}
if (str1.contains("(Spectator)")) {
str1 = str1.replace("(Spectator)", " ");
}
if (str1.contains("*DEAD*")) {
str1 = str1.replace("*DEAD*", " ");
}
if (str1.contains("{red}")) {
str1 = str1.replace("{red}", " ");
}
if (str1.contains("{orange}")) {
str1 = str1.replace("{orange}", " ");
}
if (str1.contains("{yellow}")) {
str1 = str1.replace("{yellow}", " ");
}
if (str1.contains("{green}")) {
str1 = str1.replace("{green}", " ");
}
if (str1.contains("{lightblue}")) {
str1 = str1.replace("{lightblue}", " ");
}
if (str1.contains("{blue}")) {
str1 = str1.replace("{blue}", " ");
}
if (str1.contains("{purple}")) {
str1 = str1.replace("{purple}", " ");
}
if (str1.contains("{white}")) {
str1 = str1.replace("{white}", " ");
}
if (str1.contains("{fullblue}")) {
str1 = str1.replace("{fullblue}", " ");
}
if (str1.contains("{cyan}")) {
str1 = str1.replace("{cyan}", " ");
}
if (str1.contains("{lime}")) {
str1 = str1.replace("{lime}", " ");
}
if (str1.contains("{deeppink}")) {
str1 = str1.replace("{deeppink}", " ");
}
if (str1.contains("{slategray}")) {
str1 = str1.replace("{slategray}", " ");
}
if (str1.contains("{dodgerblue}")) {
str1 = str1.replace("{dodgerblue}", " ");
}
if (str1.contains("{black}")) {
str1 = str1.replace("{black}", " ");
}
if (str1.contains("{orangered}")) {
str1 = str1.replace("{orangered}", " ");
}
if (str1.contains("{darkorchid}")) {
str1 = str1.replace("{darkorchid}", " ");
}
if (str1.contains("{pink}")) {
str1 = str1.replace("{pink}", " ");
}
if (str1.contains("{lightyellow}")) {
str1 = str1.replace("{lightyellow}", " ");
}
if (str1.contains("{chocolate}")) {
str1 = str1.replace("{chocolate}", " ");
}
if (str1.contains("{beige}")) {
str1 = str1.replace("{beige}", " ");
}
if (str1.contains("{azure}")) {
str1 = str1.replace("{azure}", " ");
}
if (str1.contains("{yellowgreen}")) {
str1 = str1.replace("{yellowgreen}", " ");
}
str1 = str1.trim();
if (str1.length() > 2 && (!str1.startsWith("!"))) {
strlistreturn.put(strlistreturn.size() + 1, str1);
}
}
if (str1.contains("(Counter-Terrorist)")) {
str1 = str1.replace("(Counter-Terrorist)", " ");
}
if (str1.contains("(Terrorist)")) {
str1 = str1.replace("(Terrorist)", " ");
}
if (str1.contains("(Spectator)")) {
str1 = str1.replace("(Spectator)", " ");
}
if (str1.contains("*DEAD*")) {
str1 = str1.replace("*DEAD*", " ");
}
if (str1.contains("{red}")) {
str1 = str1.replace("{red}", " ");
}
if (str1.contains("{orange}")) {
str1 = str1.replace("{orange}", " ");
}
if (str1.contains("{yellow}")) {
str1 = str1.replace("{yellow}", " ");
}
if (str1.contains("{green}")) {
str1 = str1.replace("{green}", " ");
}
if (str1.contains("{lightblue}")) {
str1 = str1.replace("{lightblue}", " ");
}
if (str1.contains("{blue}")) {
str1 = str1.replace("{blue}", " ");
}
if (str1.contains("{purple}")) {
str1 = str1.replace("{purple}", " ");
}
if (str1.contains("{white}")) {
str1 = str1.replace("{white}", " ");
}
if (str1.contains("{fullblue}")) {
str1 = str1.replace("{fullblue}", " ");
}
if (str1.contains("{cyan}")) {
str1 = str1.replace("{cyan}", " ");
}
if (str1.contains("{lime}")) {
str1 = str1.replace("{lime}", " ");
}
if (str1.contains("{deeppink}")) {
str1 = str1.replace("{deeppink}", " ");
}
if (str1.contains("{slategray}")) {
str1 = str1.replace("{slategray}", " ");
}
if (str1.contains("{dodgerblue}")) {
str1 = str1.replace("{dodgerblue}", " ");
}
if (str1.contains("{black}")) {
str1 = str1.replace("{black}", " ");
}
if (str1.contains("{orangered}")) {
str1 = str1.replace("{orangered}", " ");
}
if (str1.contains("{darkorchid}")) {
str1 = str1.replace("{darkorchid}", " ");
}
if (str1.contains("{pink}")) {
str1 = str1.replace("{pink}", " ");
}
if (str1.contains("{lightyellow}")) {
str1 = str1.replace("{lightyellow}", " ");
}
if (str1.contains("{chocolate}")) {
str1 = str1.replace("{chocolate}", " ");
}
if (str1.contains("{beige}")) {
str1 = str1.replace("{beige}", " ");
}
if (str1.contains("{azure}")) {
str1 = str1.replace("{azure}", " ");
}
if (str1.contains("{yellowgreen}")) {
str1 = str1.replace("{yellowgreen}", " ");
}
str1 = str1.trim();
if (str1.length() > 2 && (!str1.startsWith("!"))) {
strlistreturn.put(strlistreturn.size() + 1, str1);
}
}
});
return strlistreturn;
}
private ConcurrentMap<Integer, String> removeSlacks(ConcurrentMap<Integer, String> str) {
ShiftReduceParser model = getModel();
MaxentTagger tagger = getTagger();
List<TaggedWord> taggedWords;
ConcurrentMap<Integer, String> strreturn = new MapMaker().concurrencyLevel(2).makeMap();
for (String str1 : str.values()) {
int counter = 0;
str.values().parallelStream().forEach(str1 -> {
ConcurrentMap<Integer, String> TGWList = new MapMaker().concurrencyLevel(2).makeMap();
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
for (List<HasWord> sentence : tokenizer) {
List<TaggedWord> tagged1 = tagger.tagSentence(sentence);
Tree tree = model.apply(tagged1);
taggedWords = tree.taggedYield();
for (TaggedWord TGW : taggedWords) {
if (!TGWList.values().contains(TGW.tag()) && !TGW.tag().equals(":") && !TGW.word().equals(TGW.tag())) {
TGWList.put(TGWList.size() + 1, TGW.tag());
counter++;
}
if (counter > 3) {
int addCounter = 0;
ConcurrentMap<Integer, Word> wordList = new MapMaker().concurrencyLevel(2).makeMap();
for (Word lab : tree.yieldWords()) {
if (lab != null && lab.word() != null) {
//System.out.println("lab: " + lab + " \n");
if (!wordList.values().contains(lab) && lab.value() != null && !lab.value().equals(":")) {
wordList.put(wordList.size() + 1, lab);
addCounter++;
}
}
DocumentPreprocessor tokenizer = null;
try {
tokenizer = new DocumentPreprocessor(new StringReader(str1));
} catch (Exception ex) {
System.out.println("failed tokenizer removeslacks: " + ex.getLocalizedMessage() + "\n");
tokenizer = null;
}
if (tokenizer != null) {
for (List<HasWord> sentence : tokenizer) {
int counter = 0;
List<TaggedWord> taggedWords;
List<TaggedWord> tagged1 = tagger.tagSentence(sentence);
Tree tree = model.apply(tagged1);
taggedWords = tree.taggedYield();
for (TaggedWord TGW : taggedWords) {
if (!TGWList.values().contains(TGW.tag()) && !TGW.tag().equals(":") && !TGW.word().equals(TGW.tag())) {
TGWList.put(TGWList.size() + 1, TGW.tag());
counter++;
}
if (addCounter > 3) {
addCounter = 0;
ConcurrentMap<Integer, HasWord> HWlist = new MapMaker().concurrencyLevel(2).makeMap();
for (HasWord HW : tree.yieldHasWord()) {
if (HW != null && HW.word() != null && !HWlist.values().contains(HW)) {
addCounter++;
HWlist.put(HWlist.size() + 1, HW);
if (counter > 3) {
int addCounter = 0;
ConcurrentMap<Integer, Word> wordList = new MapMaker().concurrencyLevel(2).makeMap();
for (Word lab : tree.yieldWords()) {
if (lab != null && lab.word() != null) {
//System.out.println("lab: " + lab + " \n");
if (!wordList.values().contains(lab) && lab.value() != null && !lab.value().equals(":")) {
wordList.put(wordList.size() + 1, lab);
addCounter++;
}
}
}
if (addCounter > 3) {
boolean tooclosematch = false;
Collection<String> values = stringCache.values();
for (String strVals : values) {
LevenshteinDistance leven = new LevenshteinDistance(strVals, str1);
double Distance = leven.computeLevenshteinDistance();
int maxpermittedDistance = 2;
if (Distance < maxpermittedDistance) {
tooclosematch = true;
break;
addCounter = 0;
ConcurrentMap<Integer, HasWord> HWlist = new MapMaker().concurrencyLevel(2).makeMap();
for (HasWord HW : tree.yieldHasWord()) {
if (HW != null && HW.word() != null && !HWlist.values().contains(HW)) {
addCounter++;
HWlist.put(HWlist.size() + 1, HW);
}
}
if (!tooclosematch) {
strreturn.put(strreturn.size() + 1, str1);
if (addCounter > 3) {
boolean tooclosematch = false;
Collection<String> values = stringCache.values();
for (String strVals : values) {
LevenshteinDistance leven = new LevenshteinDistance(strVals, str1);
double Distance = leven.computeLevenshteinDistance();
int maxpermittedDistance = 2;
if (Distance < maxpermittedDistance) {
tooclosematch = true;
break;
}
}
if (!tooclosematch) {
strreturn.put(strreturn.size() + 1, str1);
System.out.println("adding strreturn str1: " + str1 + "\n");
}
}
}
break;
}
}
if (counter > 3) {
break;
}
}
if (counter > 3) {
break;
}
}
}
});
return strreturn;
}
@ -711,11 +699,14 @@ public class Datahandler {
ConcurrentMap<String, Annotation> pipelineAnnotateCachelcl = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<String, Annotation> pipelineSentimentAnnotateCachelcl = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<String, Annotation> jmweAnnotateCachelcl = new MapMaker().concurrencyLevel(2).makeMap();
ConcurrentMap<String, Annotation> jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(strmap.values());
for (Entry<String, Annotation> jmweitr : jmweAnnotation.entrySet()) {
jmweAnnotateCachelcl.put(jmweitr.getKey(), jmweitr.getValue());
}
strmap.values().parallelStream().forEach(strCache -> {
Annotation strAnno = new Annotation(strCache);
pipeline.annotate(strAnno);
pipelineAnnotateCachelcl.put(strCache, strAnno);
jmweAnnotateCachelcl.put(strCache, PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(strCache));
Annotation strAnno2 = new Annotation(strCache);
pipelineSentiment.annotate(strAnno2);
pipelineSentimentAnnotateCachelcl.put(strCache, strAnno2);
@ -736,7 +727,6 @@ public class Datahandler {
futures.put(futures.size() + 1, executor.submit(worker));
});
});
System.out.println("verifycalc futures size: " + futures.size() + "\n");
futures.values().parallelStream().forEach((future) -> {
SimilarityMatrix get;
try {
@ -751,19 +741,21 @@ public class Datahandler {
}
if (!presentstr) {
returnmap.put(returnmap.size() + 1, addStr);
System.out.println("adding addStr: " + addStr + "\n");
}
} catch (InterruptedException | ExecutionException | TimeoutException ex) {
System.out.println("failed verification: " + ex.getMessage() + "\n");
}
});
jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(returnmap.values());
for (Entry<String, Annotation> jmweitr : jmweAnnotation.entrySet()) {
jmweAnnotationCache.put(jmweitr.getKey(), jmweitr.getValue());
}
returnmap.values().parallelStream().forEach(strCache -> {
stringCache.put(stringCache.size() + 1, strCache);
System.out.println("str annotation pipeline pipelinesentiment: " + strCache + "\n");
Annotation strAnno = new Annotation(strCache);
pipeline.annotate(strAnno);
pipelineAnnotationCache.put(strCache, strAnno);
jmweAnnotationCache.put(strCache, PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(strCache));
Annotation strAnno2 = new Annotation(strCache);
pipelineSentiment.annotate(strAnno2);
pipelineSentimentAnnotationCache.put(strCache, strAnno2);

View File

@ -5,19 +5,21 @@
*/
package FunctionLayer;
import com.google.common.collect.MapMaker;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentMap;
/**
*
* @author install1
*/
public class LevenshteinDistance implements Callable<Map.Entry<String, Integer>> {
public class LevenshteinDistance implements Callable<ConcurrentMap<String, Integer>> {
private CharSequence lhs;
private CharSequence rhs;
private Entry<String, Integer> distanceEntry;
private ConcurrentMap<String, Integer> distanceEntry = new MapMaker().concurrencyLevel(2).makeMap();
private static int minimum(int a, int b, int c) {
return Math.min(Math.min(a, b), c);
@ -28,7 +30,7 @@ public class LevenshteinDistance implements Callable<Map.Entry<String, Integer>>
this.rhs = rhs;
}
public int computeLevenshteinDistance() {
public double computeLevenshteinDistance() {
int[][] distance = new int[lhs.length() + 1][rhs.length() + 1];
for (int i = 0; i <= lhs.length(); i++) {
distance[i][0] = i;
@ -48,29 +50,23 @@ public class LevenshteinDistance implements Callable<Map.Entry<String, Integer>>
}
@Override
public Entry<String, Integer> call() {
try {
int[][] distance = new int[lhs.length() + 1][rhs.length() + 1];
for (int i = 0; i <= lhs.length(); i++) {
distance[i][0] = i;
}
for (int j = 1; j <= rhs.length(); j++) {
distance[0][j] = j;
}
for (int i = 1; i <= lhs.length(); i++) {
for (int j = 1; j <= rhs.length(); j++) {
distance[i][j] = minimum(
distance[i - 1][j] + 1,
distance[i][j - 1] + 1,
distance[i - 1][j - 1] + ((lhs.charAt(i - 1) == rhs.charAt(j - 1)) ? 0 : 1));
}
}
distanceEntry.setValue(distance[lhs.length()][rhs.length()]);
} catch (Exception ex) {
System.out.println("ex msg: " + ex.getMessage() + "\n");
return null;
public ConcurrentMap<String, Integer> call() {
int[][] distance = new int[lhs.length() + 1][rhs.length() + 1];
for (int i = 0; i <= lhs.length(); i++) {
distance[i][0] = i;
}
for (int j = 1; j <= rhs.length(); j++) {
distance[0][j] = j;
}
for (int i = 1; i <= lhs.length(); i++) {
for (int j = 1; j <= rhs.length(); j++) {
distance[i][j] = minimum(
distance[i - 1][j] + 1,
distance[i][j - 1] + 1,
distance[i - 1][j - 1] + ((lhs.charAt(i - 1) == rhs.charAt(j - 1)) ? 0 : 1));
}
}
distanceEntry.put(lhs.toString(), distance[lhs.length()][rhs.length()]);
return distanceEntry;
}
}

View File

@ -5,18 +5,35 @@
*/
package FunctionLayer;
import edu.mit.jmwe.data.IMWEDescID;
import edu.mit.jmwe.data.IRootMWEDesc;
import com.google.common.collect.MapMaker;
import edu.mit.jmwe.data.IMWE;
import edu.mit.jmwe.data.IToken;
import edu.mit.jmwe.data.Token;
import edu.mit.jmwe.detect.CompositeDetector;
import edu.mit.jmwe.detect.Consecutive;
import edu.mit.jmwe.detect.Exhaustive;
import edu.mit.jmwe.detect.IMWEDetector;
import edu.mit.jmwe.detect.InflectionPattern;
import edu.mit.jmwe.detect.MoreFrequentAsMWE;
import edu.mit.jmwe.detect.ProperNouns;
import edu.mit.jmwe.index.IMWEIndex;
import edu.mit.jmwe.index.InMemoryMWEIndex;
import edu.mit.jmwe.index.MWEIndex;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.CoreLabel.GenericAnnotation;
import edu.stanford.nlp.ling.JMWEAnnotation;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.JMWEAnnotator;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.concurrent.ConcurrentMap;
/**
*
@ -25,35 +42,110 @@ import java.util.logging.Logger;
//maybe not public?
public class PipelineJMWESingleton {
public final static PipelineJMWESingleton INSTANCE = new PipelineJMWESingleton();
private static Properties propsJMWE;
private volatile static StanfordCoreNLP pipelineJMWE = initializeJMWE();
//super important synchronization lock
public synchronized final static Annotation getJMWEAnnotation(String str) {
Annotation annoStr = new Annotation(str);
pipelineJMWE.annotate(annoStr);
return annoStr;
}
public final static StanfordCoreNLP initializeJMWE() {
String jmweIndexData = "/home/javatests/lib/mweindex_wordnet3.0_semcor1.6.data"; // ./lib/mweindex_wordnet3.0_semcor1.6.data
String jmweIndexDataLocalTest = "E:/java8/Projects/mweindex_wordnet3.0_semcor1.6.data";
propsJMWE = new Properties();
propsJMWE.setProperty("customAnnotatorClass.jmwe", "edu.stanford.nlp.pipeline.JMWEAnnotator");
propsJMWE.setProperty("customAnnotatorClass.jmwe.verbose", "false");
propsJMWE.setProperty("customAnnotatorClass.jmwe.underscoreReplacement", "-");
propsJMWE.setProperty("customAnnotatorClass.jmwe.indexData", jmweIndexData); //jmweIndexDataLocalTest jmweIndexData
propsJMWE.setProperty("customAnnotatorClass.jmwe.detector", "Exhaustive");
//"Consecutive", "Exhaustive", "ProperNouns", "Complex" and "CompositeConsecutiveProperNouns"
propsJMWE.setProperty("annotators", "tokenize, ssplit, pos, lemma, jmwe");
System.out.println("finished singleton constructor \n");
return new StanfordCoreNLP(propsJMWE);
}
//if not needed to be volatile dont make it, increases time
// private volatile InMemoryMWEIndex mweMemoryIndex;
// private volatile static MWEIndex mweIndex;
// private volatile JMWEAnnotator jmweanno = initializeJMWE();
public volatile static PipelineJMWESingleton INSTANCE;
private volatile static int incrementer = 0;
private static StanfordCoreNLP localNLP = initializeJMWE();
private static String underscoreSpaceReplacement;
private PipelineJMWESingleton() {
}
public final static PipelineJMWESingleton getINSTANCE() {
return INSTANCE;
public static void getINSTANCE() {
INSTANCE = new PipelineJMWESingleton();
}
public final ConcurrentMap<String, Annotation> getJMWEAnnotation(Collection<String> strvalues) {
boolean verbose = false;
IMWEIndex index;
String jmweIndexData = "/home/javatests/lib/mweindex_wordnet3.0_semcor1.6.data"; // ./lib/mweindex_wordnet3.0_semcor1.6.data
String jmweIndexDataLocalTest = "E:/java8/Projects/mweindex_wordnet3.0_semcor1.6.data";
File indexFile = new File((String) jmweIndexData);
index = new MWEIndex(indexFile);
String detectorName = "Exhaustive";
try {
index.open();
} catch (IOException e) {
throw new RuntimeException("unable to open IMWEIndex index");
}
IMWEDetector detector = getDetector(index, detectorName);
ConcurrentMap<String, Annotation> returnAnnotations = new MapMaker().concurrencyLevel(2).makeMap();
strvalues.parallelStream().forEach(str -> {
Annotation annoStr = new Annotation(str);
localNLP.annotate(annoStr);
for (CoreMap sentence : annoStr.get(CoreAnnotations.SentencesAnnotation.class)) {
List<IMWE<IToken>> mwes = getjMWEInSentence(sentence, index, detector, verbose);
sentence.set(JMWEAnnotation.class, mwes);
}
returnAnnotations.put(str, annoStr);
System.out.println("incrementer: " + incrementer + "\n");
incrementer++;
});
index.close();
return returnAnnotations;
}
public final static StanfordCoreNLP initializeJMWE() {
Properties propsJMWE;
propsJMWE = new Properties();
propsJMWE.setProperty("annotators", "tokenize,ssplit,pos,lemma");
underscoreSpaceReplacement = "-";
localNLP = new StanfordCoreNLP(propsJMWE);
System.out.println("finished singleton constructor \n");
return localNLP;
}
public IMWEDetector getDetector(IMWEIndex index, String detector) {
IMWEDetector iMWEdetector = null;
switch (detector) {
case "Consecutive":
iMWEdetector = new Consecutive(index);
break;
case "Exhaustive":
iMWEdetector = new Exhaustive(index);
break;
case "ProperNouns":
iMWEdetector = ProperNouns.getInstance();
break;
case "Complex":
iMWEdetector = new CompositeDetector(ProperNouns.getInstance(),
new MoreFrequentAsMWE(new InflectionPattern(new Consecutive(index))));
break;
case "CompositeConsecutiveProperNouns":
iMWEdetector = new CompositeDetector(new Consecutive(index), ProperNouns.getInstance());
break;
default:
throw new IllegalArgumentException("Invalid detector argument " + detector
+ ", only \"Consecutive\", \"Exhaustive\", \"ProperNouns\", \"Complex\" or \"CompositeConsecutiveProperNouns\" are supported.");
}
return iMWEdetector;
}
public List<IMWE<IToken>> getjMWEInSentence(CoreMap sentence, IMWEIndex index, IMWEDetector detector,
boolean verbose) {
List<IToken> tokens = getITokens(sentence.get(CoreAnnotations.TokensAnnotation.class));
List<IMWE<IToken>> mwes = detector.detect(tokens);
if (verbose) {
for (IMWE<IToken> token : mwes) {
System.out.println("IMWE<IToken>: " + token);
}
}
return mwes;
}
public List<IToken> getITokens(List<CoreLabel> tokens) {
return getITokens(tokens, underscoreSpaceReplacement);
}
public List<IToken> getITokens(List<CoreLabel> tokens, String underscoreSpaceReplacement) {
List<IToken> sentence = new ArrayList<IToken>();
for (CoreLabel token : tokens) {
sentence.add(new Token(token.originalText().replaceAll("_", underscoreSpaceReplacement).replaceAll(" ", underscoreSpaceReplacement), token.get(CoreAnnotations.PartOfSpeechAnnotation.class), token.lemma().replaceAll("_", underscoreSpaceReplacement).replaceAll(" ", underscoreSpaceReplacement)));
}
return sentence;
}
}

View File

@ -18,7 +18,10 @@ import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
@ -94,10 +97,14 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
List<List<TaggedWord>> taggedwordlist1 = new ArrayList();
List<List<TaggedWord>> taggedwordlist2 = new ArrayList();
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
TokenizerFactory<CoreLabel> ptbTokenizerFactory
= PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep");
tokenizer.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : tokenizer) {
taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
}
tokenizer = new DocumentPreprocessor(new StringReader(str));
tokenizer.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : tokenizer) {
taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
}
@ -192,7 +199,7 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
}
}
} catch (Exception ex) {
System.out.println("pipelineAnnotation stacktrace: " + ex.getLocalizedMessage()+ "\n");
System.out.println("pipelineAnnotation stacktrace: " + ex.getLocalizedMessage() + "\n");
}
sentenceConstituencyParseList.clear();
ConcurrentMap<Integer, SimpleMatrix> simpleSMXlist = new MapMaker().concurrencyLevel(2).makeMap();
@ -422,7 +429,7 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
score -= tokensCounter1 >= tokensCounter2 ? (tokensCounter1 - tokensCounter2) * 500 : (tokensCounter2 - tokensCounter1) * 500;
}
LevenshteinDistance leven = new LevenshteinDistance(str, str1);
int SentenceScoreDiff = leven.computeLevenshteinDistance();
double SentenceScoreDiff = leven.computeLevenshteinDistance();
SentenceScoreDiff *= 15;
score -= SentenceScoreDiff;
} catch (Exception ex) {

View File

@ -21,6 +21,7 @@ import FunctionLayer.PipelineJMWESingleton;
import java.io.IOException;
import java.sql.SQLException;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.javacord.api.DiscordApi;
@ -34,18 +35,19 @@ import org.javacord.api.entity.user.User;
public class DiscordHandler {
public static void main(String[] args) {
new Thread(() -> {
try {
Datahandler.instance.initiateMYSQL();
PipelineJMWESingleton.getINSTANCE();
System.out.println("finished initiating MYSQL");
} catch (SQLException | IOException ex) {
Logger.getLogger(DiscordHandler.class.getName()).log(Level.SEVERE, null, ex);
}
}).start();
Datahandler.instance.shiftReduceParserInitiate();
System.setProperty("java.util.concurrent.ForkJoinPool.common.parallelism", "25");
try {
Datahandler.instance.initiateMYSQL();
System.out.println("finished initiating MYSQL");
} catch (SQLException | IOException ex) {
Logger.getLogger(DiscordHandler.class.getName()).log(Level.SEVERE, null, ex);
}
PipelineJMWESingleton.getINSTANCE();
Datahandler.instance.instantiateExecutor();
Datahandler.instance.instantiateAnnotationMapJMWE();
Datahandler.instance.shiftReduceParserInitiate();
Datahandler.instance.instantiateAnnotationMap();
System.out.println("FINISHED ALL ANNOTATIONS");
Datahandler.instance.addHLstatsMessages();
Datahandler.instance.updateStringCache();
//order matters