updated calculations, updated handling calculation storages, updated DB retrieval, added Distance object, added levenstein, almost everything in mysqldatahandler,

This commit is contained in:
jenzur 2019-03-03 13:17:07 +01:00
parent aca3d9f9c8
commit f64ce5c5a0
8 changed files with 676 additions and 544 deletions

View File

@ -14,6 +14,7 @@ import java.sql.SQLException;
import java.sql.Statement; import java.sql.Statement;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.logging.Level; import java.util.logging.Level;
@ -168,6 +169,47 @@ public class DataMapper {
} }
} }
public static LinkedHashMap<String, LinkedHashMap<String, Double>> getAllRelationScores() {
int count = getSementicsDBRows();
int counter2 = 0;
int hardCapRetrieveCount = 500000;
LinkedHashMap<String, LinkedHashMap<String, Double>> LHMSMX = new LinkedHashMap();
while (count > counter2) {
try (Connection l_cCon = DBCPDataSource.getConnection()) {
l_cCon.setAutoCommit(false);
String l_sSQL = "SELECT * FROM `WordMatrix` WHERE ID > " + counter2 + " AND ID < " + (counter2 + hardCapRetrieveCount);
try (PreparedStatement l_pStatement = l_cCon.prepareStatement(l_sSQL, java.sql.ResultSet.TYPE_FORWARD_ONLY,
java.sql.ResultSet.CONCUR_READ_ONLY)) {
l_pStatement.setFetchSize(Integer.MIN_VALUE);
try (ResultSet l_rsSearch = l_pStatement.executeQuery()) {
int i = 0;
LinkedHashMap<String, Double> LHMLocal = new LinkedHashMap();
while (l_rsSearch.next() && i < hardCapRetrieveCount) {
String str1 = l_rsSearch.getString(1);
String str2 = l_rsSearch.getString(2);
Double score = l_rsSearch.getDouble(3);
LHMLocal.put(str2, score);
while (l_rsSearch.next() && i < hardCapRetrieveCount && str1.equals(l_rsSearch.getString(1))) {
str2 = l_rsSearch.getString(2);
score = l_rsSearch.getDouble(3);
LHMLocal.put(str2, score);
i++;
counter2++;
}
LHMSMX.put(str1, LHMLocal);
System.out.println("i: " + i + "\n" + "free memory: " + Runtime.getRuntime().freeMemory() + "\ncounter2: " + counter2 + "\n");
i++;
counter2++;
}
}
}
} catch (SQLException ex) {
Logger.getLogger(DataMapper.class.getName()).log(Level.SEVERE, null, ex);
}
}
return LHMSMX;
}
public static void CloseConnections(PreparedStatement ps, ResultSet rs, Connection con) { public static void CloseConnections(PreparedStatement ps, ResultSet rs, Connection con) {
if (rs != null) { if (rs != null) {
try { try {
@ -191,4 +233,5 @@ public class DataMapper {
} }
} }
} }
} }

View File

@ -0,0 +1,37 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package FunctionLayer;
/**
*
* @author install1
*/
public class DistanceObject {
private Integer distance;
private String sentence;
public DistanceObject() {
}
public Integer getDistance() {
return distance;
}
public void setDistance(Integer distance) {
this.distance = distance;
}
public String getSentence() {
return sentence;
}
public DistanceObject(Integer distance, String sentence) {
this.distance = distance;
this.sentence = sentence;
}
}

View File

@ -5,19 +5,35 @@
*/ */
package FunctionLayer; package FunctionLayer;
import java.util.concurrent.Callable;
/** /**
* *
* @author install1 * @author install1
*/ */
public class LevenshteinDistance { public class LevenshteinDistance implements Callable<DistanceObject> {
private CharSequence lhs;
private CharSequence rhs;
private DistanceObject dco;
private static int minimum(int a, int b, int c) { private static int minimum(int a, int b, int c) {
return Math.min(Math.min(a, b), c); return Math.min(Math.min(a, b), c);
} }
public static int computeLevenshteinDistance(CharSequence lhs, CharSequence rhs) { public LevenshteinDistance(CharSequence lhs, CharSequence rhs, DistanceObject dco) {
int[][] distance = new int[lhs.length() + 1][rhs.length() + 1]; this.lhs = lhs;
this.rhs = rhs;
this.dco = dco;
}
public LevenshteinDistance(CharSequence lhs, CharSequence rhs) {
this.lhs = lhs;
this.rhs = rhs;
}
public int computeLevenshteinDistance() {
int[][] distance = new int[lhs.length() + 1][rhs.length() + 1];
for (int i = 0; i <= lhs.length(); i++) { for (int i = 0; i <= lhs.length(); i++) {
distance[i][0] = i; distance[i][0] = i;
} }
@ -34,4 +50,32 @@ public class LevenshteinDistance {
} }
return distance[lhs.length()][rhs.length()]; return distance[lhs.length()][rhs.length()];
} }
@Override
public DistanceObject call() {
try {
int[][] distance = new int[lhs.length() + 1][rhs.length() + 1];
for (int i = 0; i <= lhs.length(); i++) {
distance[i][0] = i;
}
for (int j = 1; j <= rhs.length(); j++) {
distance[0][j] = j;
}
for (int i = 1; i <= lhs.length(); i++) {
for (int j = 1; j <= rhs.length(); j++) {
distance[i][j] = minimum(
distance[i - 1][j] + 1,
distance[i][j - 1] + 1,
distance[i - 1][j - 1] + ((lhs.charAt(i - 1) == rhs.charAt(j - 1)) ? 0 : 1));
}
}
dco.setDistance(distance[lhs.length()][rhs.length()]);
} catch (Exception ex) {
System.out.println("ex msg: " + ex.getMessage() + "\n");
dco.setDistance(100);
return dco;
}
return dco;
}
} }

View File

@ -2,25 +2,25 @@
* To change this license header, choose License Headers in Project Properties. * To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates * To change this template file, choose Tools | Templates
* and open the template in the editor. * and open the template in the editor.
//https://stackoverflow.com/questions/43935229/hashmap-with-8-million-entries-becomes-slow
//http://www2.imm.dtu.dk/pubdb/views/edoc_download.php/6364/pdf/imm6364.pdf
*/ */
package FunctionLayer; package FunctionLayer;
import DataLayer.DataMapper; import DataLayer.DataMapper;
import FunctionLayer.StanfordParser.SentimentAnalyzerTest; import FunctionLayer.StanfordParser.SentimentAnalyzerTest;
import com.google.common.base.Stopwatch; import com.google.common.base.Stopwatch;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.MapMaker; import com.google.common.collect.MapMaker;
import com.google.common.collect.Multimap; import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.TaggedWord; import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word; import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser; import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.process.DocumentPreprocessor; import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.tagger.maxent.MaxentTagger; import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreebankLanguagePack; import edu.stanford.nlp.trees.TreebankLanguagePack;
import java.io.IOException; import java.io.IOException;
@ -28,14 +28,23 @@ import java.io.StringReader;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Random; import java.util.Random;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.function.Predicate; import java.util.function.Consumer;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
@ -48,19 +57,89 @@ public class MYSQLDatahandler {
public static final long EXPIRE_TIME_IN_SECONDS = TimeUnit.SECONDS.convert(10, TimeUnit.MINUTES); public static final long EXPIRE_TIME_IN_SECONDS = TimeUnit.SECONDS.convert(10, TimeUnit.MINUTES);
public static final long EXPIRE_TIME_IN_SECONDS1 = TimeUnit.SECONDS.convert(10, TimeUnit.HOURS); public static final long EXPIRE_TIME_IN_SECONDS1 = TimeUnit.SECONDS.convert(10, TimeUnit.HOURS);
public static MYSQLDatahandler instance = new MYSQLDatahandler(); public static MYSQLDatahandler instance = new MYSQLDatahandler();
public static int SemeticsUpdateCount; public static int semeticsUpdateCount;
public static int threadCounter = 0; public static int threadCounter = 0;
private volatile boolean RefreshMatrixFromDB; private volatile boolean refreshMatrixFromDB;
private final ConcurrentMap<Integer, String> StringCache; private final ConcurrentMap<Integer, String> stringCache;
private List<SimilarityMatrix> SimilaritySMXList = new ArrayList(); private LinkedHashMap<String, LinkedHashMap<String, Double>> lHMSMX = new LinkedHashMap();
private List<String> multiprocessCalculations = new ArrayList(); private List<String> multiprocessCalculations = new ArrayList();
private List<Integer> updatedRows = new ArrayList();
private final Stopwatch stopwatch; private final Stopwatch stopwatch;
private final Stopwatch stopwatch1; private final Stopwatch stopwatch1;
private static String modelPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz";
private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz";
private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz";
private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
private static String NERModel = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz";
private static MaxentTagger tagger;
private static ShiftReduceParser model;
private static String[] options = {"-maxLength", "100"};
private static Properties props = new Properties();
private static Properties propsSentiment = new Properties();
private static GrammaticalStructureFactory gsf;
private static LexicalizedParser lp;
private static TreebankLanguagePack tlp;
private static AbstractSequenceClassifier<CoreLabel> classifier;
private static StanfordCoreNLP pipeline;
private static StanfordCoreNLP pipelineSentiment;
public static AbstractSequenceClassifier<CoreLabel> getClassifier() {
return classifier;
}
public static void setClassifier(AbstractSequenceClassifier<CoreLabel> classifier) {
MYSQLDatahandler.classifier = classifier;
}
public MYSQLDatahandler() { public MYSQLDatahandler() {
this.stopwatch = Stopwatch.createUnstarted(); this.stopwatch = Stopwatch.createUnstarted();
this.stopwatch1 = Stopwatch.createStarted(); this.stopwatch1 = Stopwatch.createStarted();
this.StringCache = new MapMaker().concurrencyLevel(2).makeMap(); this.stringCache = new MapMaker().concurrencyLevel(2).makeMap();
}
public static void shiftReduceParserInitiate() {
try {
classifier = CRFClassifier.getClassifierNoExceptions(NERModel);
} catch (ClassCastException ex) {
Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex);
}
model = ShiftReduceParser.loadModel(modelPath, options);
tagger = new MaxentTagger(taggerPath);
lp = LexicalizedParser.loadModel(lexParserEnglishRNN, options);
tlp = lp.getOp().langpack();
gsf = tlp.grammaticalStructureFactory();
// set up pipeline properties
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse");
props.setProperty("parse.model", modelPath);
props.setProperty("parse.maxlen", "100");
props.setProperty("parse.binaryTrees", "true");
propsSentiment.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
propsSentiment.setProperty("parse.model", lexParserEnglishRNN);
propsSentiment.setProperty("sentiment.model", sentimentModel);
propsSentiment.setProperty("parse.maxlen", "100");
// set up pipeline
pipeline = new StanfordCoreNLP(props);
pipelineSentiment = new StanfordCoreNLP(propsSentiment);
}
public static GrammaticalStructureFactory getGsf() {
return gsf;
}
public static StanfordCoreNLP getPipeline() {
return pipeline;
}
public static StanfordCoreNLP getPipelineSentiment() {
return pipelineSentiment;
}
public static MaxentTagger getTagger() {
return tagger;
}
public static ShiftReduceParser getModel() {
return model;
} }
private Map<Integer, String> getCache() throws SQLException, IOException, CustomError { private Map<Integer, String> getCache() throws SQLException, IOException, CustomError {
@ -78,8 +157,8 @@ public class MYSQLDatahandler {
public void initiateMYSQL() throws SQLException, IOException { public void initiateMYSQL() throws SQLException, IOException {
try { try {
DataMapper.createTables(); DataMapper.createTables();
StringCache.putAll(getCache()); stringCache.putAll(getCache());
SimilaritySMXList = DataMapper.getAllSementicMatrixes(); lHMSMX = DataMapper.getAllRelationScores();
} catch (CustomError ex) { } catch (CustomError ex) {
Logger.getLogger(MYSQLDatahandler.class Logger.getLogger(MYSQLDatahandler.class
.getName()).log(Level.SEVERE, null, ex); .getName()).log(Level.SEVERE, null, ex);
@ -87,34 +166,50 @@ public class MYSQLDatahandler {
} }
public synchronized void checkIfUpdateMatrixes() { public synchronized void checkIfUpdateMatrixes() {
RefreshMatrixFromDB = false; refreshMatrixFromDB = false;
int calculationBoundaries = 10; int calculationBoundaries = 10;
int updateBadgesInteger = 500; int updateBadgesInteger = 500;
if (stopwatch1.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS1) { if (stopwatch1.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS1) {
RefreshMatrixFromDB = true; refreshMatrixFromDB = true;
if (threadCounter == 0) { if (threadCounter == 0) {
try { lHMSMX = DataMapper.getAllRelationScores();
SimilaritySMXList = DataMapper.getAllSementicMatrixes(); stopwatch1.reset();
stopwatch1.reset();
} catch (CustomError ex) {
Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex);
}
} }
} }
if (StringCache.values().size() > 10) { if (stringCache.values().size() > 10 && !refreshMatrixFromDB) {
if (!RefreshMatrixFromDB && multiprocessCalculations.size() <= (calculationBoundaries * calculationBoundaries)) { if (multiprocessCalculations.size() <= (calculationBoundaries * calculationBoundaries)) {
threadCounter++; threadCounter++;
List<String> strList = new ArrayList(StringCache.values()); List<String> strList = new ArrayList(stringCache.values());
SemeticsUpdateCount = new Random().nextInt(strList.size() - 6); List<Integer> updateLocal = updatedRows;
int beginindex = SemeticsUpdateCount; int random = -1;
SemeticsUpdateCount += calculationBoundaries / 2; if (!updateLocal.contains(random)) {
int temp = SemeticsUpdateCount; updatedRows.add(random);
}
Collections.sort(updateLocal);
while (updateLocal.contains(random)) {
random = new Random().nextInt(strList.size() - 6);
int indexPrev = Collections.binarySearch(updateLocal, random);
int indexNext = Collections.binarySearch(updateLocal, random + 6);
//-1 will always be index 0
if (indexPrev > 0 && indexNext > 0) {
indexPrev = updateLocal.get(indexPrev);
indexNext = updateLocal.get(indexNext);
}
random = indexPrev < random - 5 && indexNext < random ? random : -1;
}
updatedRows.add(random);
semeticsUpdateCount = random;
int beginindex = semeticsUpdateCount;
semeticsUpdateCount += calculationBoundaries / 2;
int temp = semeticsUpdateCount;
System.out.println("beginindex: " + beginindex + "\ntemp: " + temp + "\n");
List<String> strIndexNavigator = new ArrayList(); List<String> strIndexNavigator = new ArrayList();
strList.subList(beginindex, temp).forEach((str) -> { strList.subList(beginindex, temp).forEach((str) -> {
strIndexNavigator.add(str); strIndexNavigator.add(str);
multiprocessCalculations.add(str); multiprocessCalculations.add(str);
}); });
new Thread(() -> { new Thread(() -> {
LinkedHashMap<String, LinkedHashMap<String, Double>> LHMSMXLocal = lHMSMX;
List<String> strIndexNavigatorL = new ArrayList(strIndexNavigator); List<String> strIndexNavigatorL = new ArrayList(strIndexNavigator);
List<String> strIndexAll = new ArrayList(strList); List<String> strIndexAll = new ArrayList(strList);
List<String> randomIndexesToUpdate = new ArrayList(); List<String> randomIndexesToUpdate = new ArrayList();
@ -127,32 +222,49 @@ public class MYSQLDatahandler {
randomIndexesToUpdate.add(str); randomIndexesToUpdate.add(str);
}); });
List<SimilarityMatrix> matrixUpdateList = new ArrayList(); List<SimilarityMatrix> matrixUpdateList = new ArrayList();
List<Future<SimilarityMatrix>> futures = new ArrayList();
ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
strIndexNavigatorL.forEach((str) -> { strIndexNavigatorL.forEach((str) -> {
randomIndexesToUpdate.stream().filter((str1) -> (!str.equals(str1))).forEachOrdered((str1) -> { randomIndexesToUpdate.stream().filter((str1) -> (!str.equals(str1))).forEachOrdered((str1) -> {
boolean present = false; boolean present = false;
if (multiprocessCalculations.contains(str1)) { if (multiprocessCalculations.contains(str1)) {
present = true; present = true;
} else { } else if (LHMSMXLocal.containsKey(str)) {
for (SimilarityMatrix SMX : SimilaritySMXList) { LinkedHashMap<String, Double> orDefault = LHMSMXLocal.get(str);
if (SMX.getPrimaryString().equals(str) && SMX.getSecondaryString().equals(str1)) { if (orDefault.containsKey(str1)) {
present = true; present = true;
break; }
} } else if (LHMSMXLocal.containsKey(str1)) {
if (SMX.getPrimaryString().equals(str1) && SMX.getSecondaryString().equals(str)) { LinkedHashMap<String, Double> orDefault = LHMSMXLocal.get(str1);
present = true; if (orDefault.containsKey(str)) {
break; present = true;
}
} }
} }
if (!present) { if (!present) {
SimilarityMatrix SMX = new SimilarityMatrix(str, str1); SimilarityMatrix SMX = new SimilarityMatrix(str, str1);
double score = SentimentAnalyzerTest.sentimentanalyzing(str, str1); Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, str1, SMX);
SMX.setDistance(score); futures.add(executor.submit(worker));
matrixUpdateList.add(SMX);
SimilaritySMXList.add(SMX);
} }
}); });
}); });
executor.shutdown();
try {
System.out.println("finished worker assignment, futures size: " + futures.size() + "\n");
for (Future<SimilarityMatrix> future : futures) {
SimilarityMatrix SMX = future.get();
System.out.println("SMX primary: " + SMX.getPrimaryString() + "\nSMX Secondary: " + SMX.getSecondaryString()
+ "\nScore: " + SMX.getDistance() + "\n");
LinkedHashMap<String, Double> get = lHMSMX.getOrDefault(SMX.getPrimaryString(), null);
if (get == null) {
get = new LinkedHashMap();
}
get.put(SMX.getSecondaryString(), SMX.getDistance());
lHMSMX.put(SMX.getPrimaryString(), get);
matrixUpdateList.add(SMX);
}
} catch (InterruptedException | ExecutionException ex) {
Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex);
}
new Thread(() -> { new Thread(() -> {
try { try {
if (!matrixUpdateList.isEmpty()) { if (!matrixUpdateList.isEmpty()) {
@ -160,6 +272,7 @@ public class MYSQLDatahandler {
System.out.println("finished datamapper semetic insert"); System.out.println("finished datamapper semetic insert");
} }
threadCounter--; threadCounter--;
System.out.println("\nthreadCounter: " + threadCounter + "\n");
} catch (CustomError ex) { } catch (CustomError ex) {
Logger.getLogger(MYSQLDatahandler.class Logger.getLogger(MYSQLDatahandler.class
.getName()).log(Level.SEVERE, null, ex); .getName()).log(Level.SEVERE, null, ex);
@ -167,56 +280,73 @@ public class MYSQLDatahandler {
}).start(); }).start();
}). }).
start(); start();
try {
wait(800);
} catch (InterruptedException ex) {
Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex);
}
} else { } else {
if (threadCounter == 0) { if (threadCounter == 0) {
List<String> strList = new ArrayList(StringCache.values()); threadCounter++;
List<SimilarityMatrix> matrixUpdateList = new ArrayList(); new Thread(() -> {
List<String> randomStrList = new ArrayList(); LinkedHashMap<String, LinkedHashMap<String, Double>> LHMSMXLocal = lHMSMX;
int indexes = updateBadgesInteger; List<String> strList = new ArrayList(stringCache.values());
if (indexes >= strList.size()) { List<SimilarityMatrix> matrixUpdateList = new ArrayList();
indexes = strList.size() - 1; List<String> randomStrList = new ArrayList();
} int indexes = updateBadgesInteger;
int beginindexes = new Random().nextInt((strList.size()) - indexes); if (indexes >= strList.size()) {
strList.subList(beginindexes, beginindexes + indexes).forEach((str) -> { indexes = strList.size() - 1;
randomStrList.add(str);
});
multiprocessCalculations.forEach((str) -> {
randomStrList.forEach((str1) -> {
boolean present = false;
for (SimilarityMatrix SMX : SimilaritySMXList) {
if (SMX.getPrimaryString().equals(str) && SMX.getSecondaryString().equals(str1)) {
present = true;
break;
}
if (SMX.getPrimaryString().equals(str1) && SMX.getSecondaryString().equals(str)) {
present = true;
break;
}
}
if (!present) {
SimilarityMatrix SMX = new SimilarityMatrix(str, str1);
double score = SentimentAnalyzerTest.sentimentanalyzing(str, str1);
SMX.setDistance(score);
matrixUpdateList.add(SMX);
SimilaritySMXList.add(SMX);
}
});
});
try {
if (!matrixUpdateList.isEmpty()) {
DataMapper.insertSementicMatrixes(matrixUpdateList);
System.out.println("finished datamapper semetic insert");
} }
} catch (CustomError ex) { int beginindexes = new Random().nextInt((strList.size()) - indexes);
Logger.getLogger(MYSQLDatahandler.class strList.subList(beginindexes, beginindexes + indexes).forEach((str) -> {
.getName()).log(Level.SEVERE, null, ex); randomStrList.add(str);
} });
multiprocessCalculations = new ArrayList(); List<Future<SimilarityMatrix>> futures = new ArrayList();
ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
multiprocessCalculations.forEach((str) -> {
randomStrList.forEach((str1) -> {
boolean present = false;
if (LHMSMXLocal.containsKey(str)) {
LinkedHashMap<String, Double> orDefault = LHMSMXLocal.get(str);
if (orDefault.containsKey(str1)) {
present = true;
}
} else if (LHMSMXLocal.containsKey(str1)) {
LinkedHashMap<String, Double> orDefault = LHMSMXLocal.get(str1);
if (orDefault.containsKey(str)) {
present = true;
}
}
if (!present) {
SimilarityMatrix SMX = new SimilarityMatrix(str, str1);
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, str1, SMX);
futures.add(executor.submit(worker));
}
});
});
executor.shutdown();
try {
for (Future<SimilarityMatrix> future : futures) {
SimilarityMatrix SMX = future.get();
LinkedHashMap<String, Double> get = lHMSMX.getOrDefault(SMX.getPrimaryString(), null);
if (get == null) {
get = new LinkedHashMap();
}
get.put(SMX.getSecondaryString(), SMX.getDistance());
lHMSMX.put(SMX.getPrimaryString(), get);
matrixUpdateList.add(SMX);
}
} catch (InterruptedException | ExecutionException ex) {
Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex);
}
try {
if (!matrixUpdateList.isEmpty()) {
DataMapper.insertSementicMatrixes(matrixUpdateList);
System.out.println("finished datamapper semetic insert");
}
} catch (CustomError ex) {
Logger.getLogger(MYSQLDatahandler.class
.getName()).log(Level.SEVERE, null, ex);
}
multiprocessCalculations = new ArrayList();
updatedRows = new ArrayList();
threadCounter--;
}).start();
} }
} }
} }
@ -233,14 +363,15 @@ public class MYSQLDatahandler {
strUpdate.addAll(str); strUpdate.addAll(str);
try { try {
DataMapper.InsertMYSQLStrings(strUpdate); DataMapper.InsertMYSQLStrings(strUpdate);
} catch (CustomError ex) { } catch (CustomError ex) {
Logger.getLogger(MYSQLDatahandler.class Logger.getLogger(MYSQLDatahandler.class
.getName()).log(Level.SEVERE, null, ex); .getName()).log(Level.SEVERE, null, ex);
} }
MessageResponseHandler.setStr(new ArrayList()); MessageResponseHandler.setStr(new ArrayList());
int j = StringCache.size() + 1; int j = stringCache.size() + 1;
for (String str1 : strUpdate) { for (String str1 : strUpdate) {
StringCache.put(j, str1); stringCache.put(j, str1);
j++; j++;
} }
}).start(); }).start();
@ -252,85 +383,106 @@ public class MYSQLDatahandler {
} }
} }
public String getResponseMsg(String str) throws CustomError { public synchronized String getResponseMsg(String str) throws CustomError {
str = str.trim();
if (str.startsWith("<@")) {
str = str.substring(str.indexOf("> ") + 2);
}
final LinkedHashMap<String, LinkedHashMap<String, Double>> LHMSMXLocal = lHMSMX;
ConcurrentMap<Integer, String> strArrs = stringCache;
double Score = -10000; double Score = -10000;
SimilarityMatrix SMXreturn = null; SimilarityMatrix SMXreturn = new SimilarityMatrix("", "");
List<String> strLocal = new ArrayList(StringCache.values()); System.out.println("pre mostSimilarSTR \n");
for (String str1 : strLocal) { String mostSimilarSTR = mostSimilar(str, strArrs);
if (str.equals(str1)) { if (!mostSimilarSTR.isEmpty()) {
Iterator<SimilarityMatrix> SMXITR = SimilaritySMXList.iterator(); System.out.println("mostSimilarSTR; " + mostSimilarSTR + "\n");
while (SMXITR.hasNext()) { LinkedHashMap<String, Double> orDefault = LHMSMXLocal.getOrDefault(mostSimilarSTR, null);
SimilarityMatrix SMX = SMXITR.next(); if (orDefault != null) {
if (SMX.getPrimaryString().equals(str) || SMX.getSecondaryString().equals(str)) { for (Entry<String, Double> entrySet : orDefault.entrySet()) {
double smxDistance = SMX.getDistance(); double smxDistance = entrySet.getValue();
if (smxDistance > Score) {
Score = smxDistance;
SMXreturn = new SimilarityMatrix(mostSimilarSTR, entrySet.getKey(), smxDistance);
}
}
}
for (Entry<String, LinkedHashMap<String, Double>> values1 : LHMSMXLocal.entrySet()) {
LinkedHashMap<String, Double> value = values1.getValue();
for (Entry<String, Double> keystr : value.entrySet()) {
if (keystr.getKey().equals(mostSimilarSTR)) {
double smxDistance = keystr.getValue();
if (smxDistance > Score) { if (smxDistance > Score) {
Score = smxDistance; Score = smxDistance;
SMXreturn = SMX; SMXreturn = new SimilarityMatrix(values1.getKey(), keystr.getKey(), smxDistance);
} }
} }
} }
break;
} }
} if (!SMXreturn.getPrimaryString().isEmpty()) {
if (SMXreturn != null) { if (SMXreturn.getPrimaryString().equals(mostSimilarSTR)) {
if (SMXreturn.getPrimaryString().equals(str)) { return SMXreturn.getSecondaryString();
return SMXreturn.getSecondaryString(); } else {
} else { return SMXreturn.getPrimaryString();
return SMXreturn.getPrimaryString();
}
}
String[] toArray = strLocal.toArray(new String[strLocal.size()]);
String mostSimilarSTR = mostSimilar(str, toArray);
Iterator<SimilarityMatrix> SMXITR = SimilaritySMXList.iterator();
while (SMXITR.hasNext()) {
System.out.println("mostSimilarSTR; " + mostSimilarSTR + "\n");
mostSimilarSTR = mostSimilarSTR.trim();
SimilarityMatrix SMX = SMXITR.next();
if (SMX.getPrimaryString().trim().equals(mostSimilarSTR) || SMX.getSecondaryString().trim().equals(mostSimilarSTR)) {
double smxDistance = SMX.getDistance();
if (smxDistance > Score) {
Score = smxDistance;
SMXreturn = SMX;
} }
} }
} }
if (SMXreturn != null) { System.out.println("none within 8 range");
if (SMXreturn.getPrimaryString().equals(str)) { ConcurrentMap<Integer, String> strCache = stringCache;
return SMXreturn.getSecondaryString(); ConcurrentMap<Integer, Future<SimilarityMatrix>> futureslocal = new MapMaker().concurrencyLevel(2).makeMap();
} else { ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
return SMXreturn.getPrimaryString(); for (String str1 : strCache.values()) {
if (!str.equals(str1)) {
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, str1, new SimilarityMatrix(str, str1));
futureslocal.put(futureslocal.size() + 1, executor.submit(worker));
} }
} }
SMXITR = SimilaritySMXList.iterator(); executor.shutdown();
while (SMXITR.hasNext()) { int index = 0;
SimilarityMatrix SMX = SMXITR.next(); for (Future<SimilarityMatrix> future : futureslocal.values()) {
if (SMX.getPrimaryString().contains(mostSimilarSTR) || SMX.getSecondaryString().contains(mostSimilarSTR)) { try {
double smxDistance = SMX.getDistance(); SimilarityMatrix SMX = future.get();
if (smxDistance > Score) { double distance = SMX.getDistance();
Score = smxDistance; System.out.println("index: " + index + "\nfutures size: " + futureslocal.values().size() + "\nScore: " + SMX.getDistance() + "\nSecondary: "
+ SMX.getSecondaryString() + "\nPrimary: " + SMX.getPrimaryString() + "\n");
if (distance > Score) {
Score = distance;
SMXreturn = SMX; SMXreturn = SMX;
} }
} catch (InterruptedException | ExecutionException ex) {
System.out.println("ex: " + ex.getMessage() + "\n");
} }
index++;
} }
if (SMXreturn != null) { System.out.println("Reached end: secondary: " + SMXreturn.getSecondaryString() + "\nPrimarY: " + SMXreturn.getPrimaryString()
if (SMXreturn.getPrimaryString().equals(str)) { + "\nScore: " + SMXreturn.getDistance());
return SMXreturn.getSecondaryString(); return SMXreturn.getSecondaryString();
} else {
return SMXreturn.getPrimaryString();
}
}
return "how did you manage to reach this, AAAAAAAAAAAA end my suffering";
} }
public String mostSimilar(String toBeCompared, String[] strings) { public String mostSimilar(String toBeCompared, ConcurrentMap<Integer, String> concurrentStrings) {
int minDistance = Integer.MAX_VALUE; int minDistance = 8;
String similar = ""; String similar = "";
for (String str : strings) { List<Future<DistanceObject>> futures = new ArrayList();
int d = LevenshteinDistance.computeLevenshteinDistance(str, toBeCompared); ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
if (d < minDistance) { concurrentStrings.values().stream().map((str) -> new LevenshteinDistance(str, toBeCompared, new DistanceObject())).forEachOrdered((worker) -> {
minDistance = d; futures.add(executor.submit(worker));
similar = str; });
executor.shutdown();
try {
for (Future<DistanceObject> future : futures) {
DistanceObject d = future.get();
try {
int distance = d.getDistance();
System.out.println("distance: " + distance + "\n");
if (distance < minDistance) {
minDistance = distance;
similar = d.getSentence();
}
} catch (NullPointerException ex) {
System.out.println("failed future\n");
}
} }
} catch (InterruptedException | ExecutionException ex) {
Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex);
} }
return similar; return similar;
} }
@ -402,6 +554,30 @@ public class MYSQLDatahandler {
if (str1.contains("{white}")) { if (str1.contains("{white}")) {
str1 = str1.replace("{white}", " "); str1 = str1.replace("{white}", " ");
} }
if (str1.contains("{fullblue}")) {
str1 = str1.replace("{fullblue}", " ");
}
if (str1.contains("{cyan}")) {
str1 = str1.replace("{cyan}", " ");
}
if (str1.contains("{lime}")) {
str1 = str1.replace("{lime}", " ");
}
if (str1.contains("{deeppink}")) {
str1 = str1.replace("{deeppink}", " ");
}
if (str1.contains("{slategray}")) {
str1 = str1.replace("{slategray}", " ");
}
if (str1.contains("{dodgerblue}")) {
str1 = str1.replace("{dodgerblue}", " ");
}
if (str1.contains("{black}")) {
str1 = str1.replace("{black}", " ");
}
if (str1.contains("{orangered}")) {
str1 = str1.replace("{orangered}", " ");
}
str1 = str1.trim(); str1 = str1.trim();
if (str1.length() > 2 && (!str1.startsWith("!"))) { if (str1.length() > 2 && (!str1.startsWith("!"))) {
strlistreturn.add(str1); strlistreturn.add(str1);
@ -410,38 +586,9 @@ public class MYSQLDatahandler {
return strlistreturn; return strlistreturn;
} }
/*
public static List<String> cutLongsFromEmotes(List<String> str) {
List<String> strreturn = new ArrayList();
int emotesTraceLong = 17;
for (String str1 : str) {
StringBuilder SB = new StringBuilder();
int counter = 0;
int i = 0;
for (Character c : str1.toCharArray()) {
i++;
if (Character.isDigit(c)) {
counter++;
if (counter > emotesTraceLong && str1.length() > i + 2) {
SB.append(str1.substring(0, i - emotesTraceLong));
SB.append(str1.substring(i + 1, str1.length()));
}
} else {
counter = 0;
}
}
if (SB.toString().isEmpty()) {
strreturn.add(str1);
} else {
strreturn.add(SB.toString());
}
}
return strreturn;
}
*/
private List<String> removeSlacks(List<String> str) { private List<String> removeSlacks(List<String> str) {
ShiftReduceParser model = SentimentAnalyzerTest.getModel(); ShiftReduceParser model = getModel();
MaxentTagger tagger = SentimentAnalyzerTest.getTagger(); MaxentTagger tagger = getTagger();
List<TaggedWord> taggedWords; List<TaggedWord> taggedWords;
List<String> strreturn = new ArrayList(); List<String> strreturn = new ArrayList();
for (String str1 : str) { for (String str1 : str) {
@ -481,9 +628,10 @@ public class MYSQLDatahandler {
} }
if (addCounter > 3) { if (addCounter > 3) {
boolean tooclosematch = false; boolean tooclosematch = false;
for (String strVals : StringCache.values()) { for (String strVals : stringCache.values()) {
double Distance = LevenshteinDistance.computeLevenshteinDistance(strVals, str1); LevenshteinDistance leven = new LevenshteinDistance(strVals, str1);
int maxpermittedDistance = 5; double Distance = leven.computeLevenshteinDistance();
int maxpermittedDistance = 2;
if (Distance < maxpermittedDistance) { if (Distance < maxpermittedDistance) {
tooclosematch = true; tooclosematch = true;
break; break;
@ -504,153 +652,4 @@ public class MYSQLDatahandler {
} }
return strreturn; return strreturn;
} }
/**
*
* @throws CustomError
* @deprecated
*/
public synchronized void calculateStrings() throws CustomError {
//linkedhashmap?
int initiallimit = 5;
int listindex = 0;
List<SimilarityMatrix> WS4JList = DataMapper.getAllSementicMatrixes();
List<SimilarityMatrix> WS4JListUpdate = new ArrayList();
List<String> sentencesList = DataMapper.getAllStrings();
for (int i = 1; i < initiallimit; i++) {
if (!sentencesList.get(i).isEmpty()) {
//System.out.print("i: " + i + "\n sentencesList i: " + sentencesList.get(i) + "\n");
String[] words1 = sentencesList.get(i).split(" ");
for (String words11 : words1) {
if (!words11.isEmpty() && words11.length() > 3) {
String str = words11;
if (!str.isEmpty() && str.length() > 3) {
//SecondaryPredicate, no secondary key present with word
Predicate<SimilarityMatrix> SecondaryPredicate = e -> str.equals(e.getSecondaryString());
//no primary key present with the word
if (!WS4JList.stream().filter(e -> str.equals(e.getPrimaryString())).findAny().isPresent()) {
WS4JListUpdate = createWS4JWordMatrix(str, sentencesList, WS4JListUpdate, WS4JList, SecondaryPredicate);
for (; listindex < WS4JListUpdate.size(); listindex++) {
WS4JList.add(WS4JListUpdate.get(listindex));
}
}
}
}
}
}
}
//System.out.println("\nWS4JListUpdate size: " + WS4JListUpdate.size());
DataMapper.insertSementicMatrixes(WS4JListUpdate);
}
/**
*
* @param str
* @param strlist
* @param ws4jlist
* @param oldlist
* @param SecondaryPredicate
* @return
* @deprecated
*/
public List<SimilarityMatrix> createWS4JWordMatrix(String str, List<String> strlist, List<SimilarityMatrix> ws4jlist,
List<SimilarityMatrix> oldlist, Predicate<SimilarityMatrix> SecondaryPredicate) {
for (String str1 : strlist) {
if (!str1.isEmpty()) {
String[] words1 = str1.split(" ");
for (int j = 0; j < words1.length; j++) {
if (!words1[j].isEmpty() && words1[j].length() > 3) {
String strword = words1[j];
if (!strword.isEmpty() && strword.length() > 3 && !strword.equals(str)) {
Predicate<SimilarityMatrix> PrimaryPredicate = e -> strword.equals(e.getPrimaryString());
if (!oldlist.stream().filter(PrimaryPredicate.and(SecondaryPredicate)).findAny().isPresent()) {
//System.out.println("creating SimilarityMatrix with STR: " + str + "\n strword: " + strword + "\n");
SimilarityMatrix ws4j = new SimilarityMatrix(str, strword);
/*
double addingDistance = ws4j.getDistanceCalculations();
if (addingDistance > 0.00) {
//System.out.println("added! \n");
ws4j.setDistance(addingDistance);
ws4jlist.add(ws4j);
}
*/
}
}
}
}
}
}
return ws4jlist;
}
/**
* Stanford Parser method to update calculations to the DB
*
* @deprecated
* @throws FunctionLayer.CustomError
*/
public void sentimentAnalyzingStringsToDB() throws CustomError {
List<String> sentencesList = DataMapper.getAllStrings();
List<SimilarityMatrix> WS4JList = DataMapper.getAllSementicMatrixes();
List<SimilarityMatrix> WS4JListUpdate = new ArrayList();
int listindex = 0;
for (int i = 0; i < sentencesList.size() - 3000; i++) {
String str = sentencesList.get(i);
if (!str.isEmpty() && str.length() > 3) {
//System.out.println("i: " + i + "\n");
Predicate<SimilarityMatrix> SecondaryPredicate = e -> str.equals(e.getSecondaryString());
if (!WS4JList.stream().filter(e -> str.equals(e.getPrimaryString())).findAny().isPresent()) {
//WS4JListUpdate = addStringMatrixes(str, sentencesList, SecondaryPredicate, WS4JListUpdate, WS4JList);
for (; listindex < WS4JListUpdate.size(); listindex++) {
WS4JList.add(WS4JListUpdate.get(listindex));
}
}
}
}
// System.out.println("\n WS4JListUpdate size: " + WS4JListUpdate.size());
DataMapper.insertSementicMatrixes(WS4JListUpdate);
}
/**
* @deprecated @param str Primary string which is checked, the filter
* ensures primary string has not been calculated yet
* @param sentencesList the full string list where each unique primary has
* to calculate its value to the rest of the DB
* @param SecondaryPredicate ensures primary string is not already
* calculated somewhere with another string
* @param WS4JListUpdate Matrix list to update in DB with new Sentences
* @param OldWS4JList Check if str1 already has primary entry with
* co-responding SecondaryPredicate
* @return Updated List which has to be inserted to the DB
*/
private List<SimilarityMatrix> addStringMatrixes(String str, List<String> sentencesList,
Predicate<SimilarityMatrix> SecondaryPredicate, List<SimilarityMatrix> WS4JListUpdate,
List<SimilarityMatrix> OldWS4JList, LexicalizedParser lp, TreebankLanguagePack tlp) {
double valuerange = -100.0;
for (int i = 0; i < sentencesList.size(); i++) {
String str1 = sentencesList.get(i);
if (!str1.isEmpty() && str1.length() > 3) {
Predicate<SimilarityMatrix> PrimaryPredicate = e -> str1.equals(e.getPrimaryString());
if (!OldWS4JList.stream().filter(PrimaryPredicate.and(SecondaryPredicate)).findAny().isPresent()) {
double s = -100.0;
if (s > valuerange) {
SimilarityMatrix SMX = new SimilarityMatrix(str, str1);
SMX.setDistance(s);
/*
System.out.println("SMX added: \n Primary: " + SMX.getPrimaryString() + "\n Secondary: " + SMX.getSecondaryString()
+ "\n Score: " + SMX.getDistance() + "\n");
*/
WS4JListUpdate.add(SMX);
}
}
}
}
/*
str parameter is primary and not used as primary if reaching here
secondary predicate ensures primary does not already exist as secondary with co-responding strlist primary
*/
return WS4JListUpdate;
}
} }

View File

@ -7,8 +7,6 @@ package FunctionLayer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
//https://www.programcreek.com/java-api-examples/index.php?source_dir=simmetrics-master/simmetrics-core/src/main/java/org/simmetrics/metrics/JaroWinkler.java#
//https://stackoverflow.com/questions/36032958/semantic-matching-in-ws4j-at-sentence-level
/** /**
* *
@ -29,6 +27,9 @@ public class MessageResponseHandler {
public static void getMessage(String message) { public static void getMessage(String message) {
if (message != null && !message.isEmpty()) { if (message != null && !message.isEmpty()) {
message = message.replace("@", ""); message = message.replace("@", "");
if (message.startsWith("<>")) {
message = message.substring(message.indexOf(">"));
}
if (message.startsWith("[ *")) { if (message.startsWith("[ *")) {
message = message.substring(message.indexOf("]")); message = message.substring(message.indexOf("]"));
} }

View File

@ -1,20 +1,21 @@
package FunctionLayer.StanfordParser; package FunctionLayer.StanfordParser;
import FunctionLayer.LevenshteinDistance; import FunctionLayer.LevenshteinDistance;
import FunctionLayer.MYSQLDatahandler;
import FunctionLayer.SimilarityMatrix;
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.IndexedWord; import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.TaggedWord; import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations; import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.process.DocumentPreprocessor; import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations; import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.tagger.maxent.MaxentTagger; import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.trees.Constituent; import edu.stanford.nlp.trees.Constituent;
import edu.stanford.nlp.trees.GrammaticalRelation; import edu.stanford.nlp.trees.GrammaticalRelation;
@ -22,7 +23,6 @@ import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.GrammaticalStructureFactory; import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations; import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.TypedDependency; import edu.stanford.nlp.trees.TypedDependency;
import edu.stanford.nlp.trees.tregex.gui.Tdiff; import edu.stanford.nlp.trees.tregex.gui.Tdiff;
import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.CoreMap;
@ -30,8 +30,9 @@ import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Properties;
import java.util.Set; import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicInteger;
import org.ejml.simple.SimpleMatrix; import org.ejml.simple.SimpleMatrix;
/* /*
@ -43,217 +44,227 @@ import org.ejml.simple.SimpleMatrix;
* *
* @author install1 * @author install1
*/ */
public class SentimentAnalyzerTest { public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
private static String modelPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz"; private String str;
private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz"; private String str1;
private static String parserModelPathUD = "edu/stanford/nlp/models/parser/nndep/english_UD.gz"; private SimilarityMatrix smxParam;
private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"; private ShiftReduceParser model;
private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; private MaxentTagger tagger;
private static MaxentTagger tagger; private GrammaticalStructureFactory gsf;
private static ShiftReduceParser model; private StanfordCoreNLP pipeline;
private static String[] options = {"-maxLength", "100"}; private StanfordCoreNLP pipelineSentiment;
private static LexicalizedParser lp; private AbstractSequenceClassifier classifier;
private static TreebankLanguagePack tlp;
private static Properties props = new Properties();
private static Properties propsSentiment = new Properties();
private static GrammaticalStructureFactory gsf;
private static StanfordCoreNLP pipeline;
private static StanfordCoreNLP pipelineSentiment;
public static void shiftReduceParserInitiate() { public SentimentAnalyzerTest(String str, String str1, SimilarityMatrix smxParam) {
model = ShiftReduceParser.loadModel(modelPath, options); this.str = str;
tagger = new MaxentTagger(taggerPath); this.str1 = str1;
lp = LexicalizedParser.loadModel(lexParserEnglishRNN, options); this.smxParam = smxParam;
tlp = lp.getOp().langpack(); model = MYSQLDatahandler.getModel();
gsf = tlp.grammaticalStructureFactory(); tagger = MYSQLDatahandler.getTagger();
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse"); pipeline = MYSQLDatahandler.getPipeline();
// set up pipeline properties pipelineSentiment = MYSQLDatahandler.getPipelineSentiment();
props.setProperty("parse.model", modelPath); gsf = MYSQLDatahandler.getGsf();
// use faster shift reduce parser classifier = MYSQLDatahandler.getClassifier();
props.setProperty("parse.maxlen", "100");
props.setProperty("parse.binaryTrees", "true");
propsSentiment.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
propsSentiment.setProperty("parse.model", lexParserEnglishRNN);
propsSentiment.setProperty("parse.maxlen", "100");
pipeline = new StanfordCoreNLP(props);
pipelineSentiment = new StanfordCoreNLP(propsSentiment);
} }
public static ShiftReduceParser getModel() { @Override
return model; public SimilarityMatrix call() {
} try {
Double score = -100.0;
public static MaxentTagger getTagger() { List<List<TaggedWord>> taggedwordlist1 = new ArrayList();
return tagger; List<List<TaggedWord>> taggedwordlist2 = new ArrayList();
} DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
for (List<HasWord> sentence : tokenizer) {
public static double sentimentanalyzing(String str, String str1) { taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
double score = -100.0;
List<List<TaggedWord>> taggedwordlist1 = new ArrayList();
List<List<TaggedWord>> taggedwordlist2 = new ArrayList();
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
for (List<HasWord> sentence : tokenizer) {
List<TaggedWord> tagged1 = tagger.tagSentence(sentence);
Tree tree = model.apply(tagged1);
ArrayList<TaggedWord> taggedYield = tree.taggedYield();
taggedwordlist1.add(taggedYield);
}
tokenizer = new DocumentPreprocessor(new StringReader(str));
for (List<HasWord> sentence : tokenizer) {
List<TaggedWord> tagged1 = tagger.tagSentence(sentence);
Tree tree = model.apply(tagged1);
ArrayList<TaggedWord> taggedYield = tree.taggedYield();
taggedwordlist2.add(taggedYield);
}
int counter = 0;
int counter1 = 0;
for (List<TaggedWord> taggedlist2 : taggedwordlist2) {
counter += taggedlist2.size();
}
for (List<TaggedWord> taggedlist1 : taggedwordlist1) {
counter1 += taggedlist1.size();
}
int overValue = counter >= counter1 ? counter - counter1 : counter1 - counter;
overValue *= 16;
while (overValue > 0) {
overValue--;
score--;
}
System.out.println("Score Post overValue: " + score + "\n");
for (List<TaggedWord> TGWList : taggedwordlist1) {
for (TaggedWord TGW : TGWList) {
List<String> tgwlist1 = new ArrayList();
for (List<TaggedWord> taggedlist2 : taggedwordlist2) {
for (TaggedWord TGW1 : taggedlist2) {
if (TGW.tag().equals(TGW1.tag()) && !TGW.tag().equals(":") && !tgwlist1.contains(TGW1.tag())) {
score += 64;
tgwlist1.add(TGW.tag());
}
}
}
} }
} tokenizer = new DocumentPreprocessor(new StringReader(str));
// System.out.println("Score: " + score); for (List<HasWord> sentence : tokenizer) {
Annotation annotation = new Annotation(str1); taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
pipeline.annotate(annotation); }
List<Tree> sentenceConstituencyParseList = new ArrayList(); int counter = 0;
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { int counter1 = 0;
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); counter = taggedwordlist2.stream().map((taggedlist2) -> taggedlist2.size()).reduce(counter, Integer::sum);
sentenceConstituencyParseList.add(sentenceConstituencyParse); counter1 = taggedwordlist1.stream().map((taggedlist1) -> taggedlist1.size()).reduce(counter1, Integer::sum);
} int overValue = counter >= counter1 ? counter - counter1 : counter1 - counter;
Annotation annotation1 = new Annotation(str); overValue *= 16;
pipeline.annotate(annotation1); score -= overValue;
for (CoreMap sentence : annotation1.get(CoreAnnotations.SentencesAnnotation.class)) { List<String> tgwlistIndex = new ArrayList();
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); taggedwordlist1.forEach((TGWList) -> {
GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceConstituencyParse); TGWList.forEach((TaggedWord) -> {
Collection<TypedDependency> allTypedDependencies = gs.allTypedDependencies(); if (!tgwlistIndex.contains(TaggedWord.tag()) && !TaggedWord.tag().equals(":")) {
List<String> filerTreeContent = new ArrayList(); tgwlistIndex.add(TaggedWord.tag());
for (Tree sentenceConstituencyParse1 : sentenceConstituencyParseList) { }
Set<Constituent> inT1notT2 = Tdiff.markDiff(sentenceConstituencyParse, sentenceConstituencyParse1); });
Set<Constituent> inT2notT1 = Tdiff.markDiff(sentenceConstituencyParse1, sentenceConstituencyParse); });
List<String> constiLabels = new ArrayList(); AtomicInteger runCount = new AtomicInteger(0);
for (Constituent consti : inT1notT2) { taggedwordlist2.forEach((TGWList) -> {
for (Constituent consti1 : inT2notT1) { TGWList.forEach((TaggedWord) -> {
if (consti.value().equals(consti1.value()) && !constiLabels.contains(consti.value())) { if (tgwlistIndex.contains(TaggedWord.tag())) {
//System.out.println("consti value: " + consti.value() + "\n"); tgwlistIndex.remove(TaggedWord.tag());
score += 64; //256 runCount.getAndIncrement();
constiLabels.add(consti.value()); }
});
});
score += runCount.get() * 64;
Annotation annotation = new Annotation(str1);
pipeline.annotate(annotation);
List<Tree> sentenceConstituencyParseList = new ArrayList();
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
sentenceConstituencyParseList.add(sentenceConstituencyParse);
}
Annotation annotation1 = new Annotation(str);
pipeline.annotate(annotation1);
List<String> nerList = new ArrayList();
for (CoreMap sentence : annotation1.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceConstituencyParse);
Collection<TypedDependency> allTypedDependencies = gs.allTypedDependencies();
List<String> filerTreeContent = new ArrayList();
for (Tree sentenceConstituencyParse1 : sentenceConstituencyParseList) {
Set<Constituent> inT1notT2 = Tdiff.markDiff(sentenceConstituencyParse, sentenceConstituencyParse1);
Set<Constituent> inT2notT1 = Tdiff.markDiff(sentenceConstituencyParse1, sentenceConstituencyParse);
List<String> constiLabels = new ArrayList();
for (Constituent consti : inT1notT2) {
for (Constituent consti1 : inT2notT1) {
if (consti.value().equals(consti1.value()) && !constiLabels.contains(consti.value())) {
score += 64; //256
constiLabels.add(consti.value());
}
} }
} }
} GrammaticalStructure gs1 = gsf.newGrammaticalStructure(sentenceConstituencyParse1);
GrammaticalStructure gs1 = gsf.newGrammaticalStructure(sentenceConstituencyParse1); Collection<TypedDependency> allTypedDependencies1 = gs1.allTypedDependencies();
Collection<TypedDependency> allTypedDependencies1 = gs1.allTypedDependencies(); for (TypedDependency TDY1 : allTypedDependencies1) {
for (TypedDependency TDY1 : allTypedDependencies1) { IndexedWord dep = TDY1.dep();
IndexedWord dep = TDY1.dep(); IndexedWord gov = TDY1.gov();
IndexedWord gov = TDY1.gov(); GrammaticalRelation grammaticalRelation = gs.getGrammaticalRelation(gov, dep);
GrammaticalRelation grammaticalRelation = gs.getGrammaticalRelation(gov, dep); if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) { score += 900;
score += 900; }
//System.out.println("grammaticalRelation applicable: " + score + "\n"); GrammaticalRelation reln = TDY1.reln();
if (reln.isApplicable(sentenceConstituencyParse)) {
score += 256;
}
} }
GrammaticalRelation reln = TDY1.reln(); for (TypedDependency TDY : allTypedDependencies) {
if (reln.isApplicable(sentenceConstituencyParse)) { IndexedWord dep = TDY.dep();
score += 256; IndexedWord gov = TDY.gov();
GrammaticalRelation grammaticalRelation = gs1.getGrammaticalRelation(gov, dep);
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
score += 900;
}
GrammaticalRelation reln = TDY.reln();
if (reln.isApplicable(sentenceConstituencyParse1)) {
score += 256;
}
} }
} AtomicInteger runCount1 = new AtomicInteger(0);
for (TypedDependency TDY : allTypedDependencies) { sentenceConstituencyParse.taggedLabeledYield().forEach((LBW) -> {
IndexedWord dep = TDY.dep(); sentenceConstituencyParse1.taggedLabeledYield().stream().filter((LBW1) -> (LBW.lemma().equals(LBW1.lemma())
IndexedWord gov = TDY.gov(); && !filerTreeContent.contains(LBW.lemma()))).map((_item) -> {
GrammaticalRelation grammaticalRelation = gs1.getGrammaticalRelation(gov, dep);
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
score += 900;
//System.out.println("grammaticalRelation applicable: " + score + "\n");
}
GrammaticalRelation reln = TDY.reln();
if (reln.isApplicable(sentenceConstituencyParse1)) {
score += 256;
}
}
for (CoreLabel LBW : sentenceConstituencyParse.taggedLabeledYield()) {
for (CoreLabel LBW1 : sentenceConstituencyParse1.taggedLabeledYield()) {
if (LBW.lemma().equals(LBW1.lemma()) && !filerTreeContent.contains(LBW.lemma())) {
filerTreeContent.add(LBW.lemma()); filerTreeContent.add(LBW.lemma());
score += 1500; return _item;
//System.out.println("lemma: " + LBW.lemma() + "\n"); }).forEachOrdered((_item) -> {
} runCount1.getAndIncrement();
});
});
score += runCount1.get() * 1500;
}
}
Annotation annotationSentiment1 = pipelineSentiment.process(str);
List<SimpleMatrix> simpleSMXlist = new ArrayList();
List<SimpleMatrix> simpleSMXlistVector = new ArrayList();
List<Integer> sentiment1 = new ArrayList();
List<Integer> sentiment2 = new ArrayList();
for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
sentiment1.add(RNNCoreAnnotations.getPredictedClass(tree));
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree);
simpleSMXlist.add(predictions);
simpleSMXlistVector.add(nodeVector);
}
annotationSentiment1 = pipelineSentiment.process(str1);
for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
sentiment2.add(RNNCoreAnnotations.getPredictedClass(tree));
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree);
score = simpleSMXlist.stream().map((simpleSMX) -> predictions.dot(simpleSMX) * 100).map((dot) -> dot > 50 ? dot - 50 : 50 - dot).map((subtracter) -> {
subtracter *= 25;
return subtracter;
}).map((subtracter) -> subtracter).reduce(score, (accumulator, _item) -> accumulator - _item);
for (SimpleMatrix simpleSMX : simpleSMXlistVector) {
double dot = nodeVector.dot(simpleSMX);
double elementSum = nodeVector.kron(simpleSMX).elementSum();
elementSum = Math.round(elementSum * 100.0) / 100.0;
if (dot < 0.1) {
score += 256;
}
if (elementSum < 0.1 && elementSum > 0.0) {
score += 1300;
} else if (elementSum > 0.1 && elementSum < 1.0) {
score -= 1100;
} else {
score -= 1424;
} }
} }
} }
} score -= (sentiment1.size() > sentiment2.size() ? sentiment1.size() - sentiment2.size() : sentiment2.size() - sentiment1.size()) * 500;
Annotation annotationSentiment1 = pipelineSentiment.process(str); DocumentReaderAndWriter<CoreLabel> readerAndWriter = classifier.makePlainTextReaderAndWriter();
List<SimpleMatrix> simpleSMXlist = new ArrayList(); List classifyRaw1 = classifier.classifyRaw(str, readerAndWriter);
List<SimpleMatrix> simpleSMXlistVector = new ArrayList(); List classifyRaw2 = classifier.classifyRaw(str1, readerAndWriter);
for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) { score -= (classifyRaw1.size() > classifyRaw2.size() ? classifyRaw1.size() - classifyRaw2.size() : classifyRaw2.size() - classifyRaw1.size()) * 200;
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); Annotation annotationSentiment = pipelineSentiment.process(str);
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); int mainSentiment1 = 0;
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree); int longest1 = 0;
simpleSMXlist.add(predictions); int mainSentiment2 = 0;
simpleSMXlistVector.add(nodeVector); int longest2 = 0;
} for (CoreMap sentence : annotationSentiment.get(CoreAnnotations.SentencesAnnotation.class)) {
annotationSentiment1 = pipelineSentiment.process(str1); Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) { int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); String partText = sentence.toString();
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); if (partText.length() > longest1) {
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree); mainSentiment1 = sentiment;
for (SimpleMatrix simpleSMX : simpleSMXlist) { longest1 = partText.length();
double dot = predictions.dot(simpleSMX) * 100;
//System.out.println("dot value: " + dot + "\n");
double subtracter = dot > 50 ? dot - 50 : 50 - dot;
System.out.println("score pre dot: " + score + "\nsubtracter: " + subtracter + "\n");
subtracter *= 25;
while (subtracter > 0) {
subtracter--;
score--;
} }
System.out.println("score post dot: " + score + "\n");
} }
for (SimpleMatrix simpleSMX : simpleSMXlistVector) { annotationSentiment = pipelineSentiment.process(str1);
double dot = nodeVector.dot(simpleSMX); for (CoreMap sentence : annotationSentiment.get(CoreAnnotations.SentencesAnnotation.class)) {
double elementSum = nodeVector.kron(simpleSMX).elementSum(); Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
elementSum = Math.round(elementSum * 100.0) / 100.0; int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
System.out.println("kron SMX elementSum: " + elementSum + "\n"); SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
if (dot < 0.1) { String partText = sentence.toString();
// System.out.println("\ndot VECTOR: " + dot + "\n"); if (partText.length() > longest2) {
score += 256; mainSentiment2 = sentiment;
longest2 = partText.length();
} }
if (elementSum < 0.1 && elementSum > 0.0) { }
score += 1300; if (longest1 != longest2) {
} else if (elementSum > 0.1 && elementSum < 1.0) { long deffLongest = longest1 > longest2 ? longest1 : longest2;
score -= 1100; long deffshorter = longest1 < longest2 ? longest1 : longest2;
if (deffLongest >= (deffshorter * 2) - 1 && deffLongest - deffshorter <= 45) {
score += (deffLongest - deffshorter) * 200;
} else if (mainSentiment1 != mainSentiment2 && deffLongest - deffshorter > 20 && deffLongest - deffshorter < 45) {
score += (deffLongest - deffshorter) * 200;
} else { } else {
score -= 1424; score -= (deffLongest - deffshorter) * 50;
} }
} }
LevenshteinDistance leven = new LevenshteinDistance(str, str1);
int SentenceScoreDiff = leven.computeLevenshteinDistance();
SentenceScoreDiff *= 15;
score -= SentenceScoreDiff;
System.out.println("Final current score: " + score + "\nSentence 1: " + str + "\nSentence 2: " + str1 + "\n");
smxParam.setDistance(score);
} catch (Exception ex) {
System.out.println("ex: " + ex.getMessage() + "\n");
smxParam.setDistance(-1000);
return smxParam;
} }
int SentenceScoreDiff = LevenshteinDistance.computeLevenshteinDistance(str, str1); return smxParam;
SentenceScoreDiff *= 15;
// System.out.println("score pre levenhstein substract: " + score + "\nSentenceScoreDiff: " + SentenceScoreDiff + "\n");
while (SentenceScoreDiff > 0) {
SentenceScoreDiff--;
score--;
}
System.out.println("Final current score: " + score + "\nSentences: " + str + "\n" + str1 + "\n\n\n");
return score;
} }
} }

View File

@ -6,10 +6,9 @@
ps ax | grep EventNotfierDiscordBot-1.0 ps ax | grep EventNotfierDiscordBot-1.0
kill $pid (number) kill $pid (number)
nohup screen -d -m -S nonRoot java -Xmx5048M -jar /home/Artificial_Autism/ArtificialAutism-1.0.jar nohup screen -d -m -S nonroot java -Xmx6048M -jar /home/javatests/ArtificialAutism-1.0.jar
nohup screen -d -m -S nonRoot java -Xmx4048M -jar /home/Artificial_Autism/ArtificialAutism-1.0.jar nohup screen -d -m -S nonroot java -Xmx4048M -jar /home/javatests/ArtificialAutism-1.0.jar
nohup screen -d -m -S gameservers java -Xmx2450M -jar /home/gameservers/ArtificialAutism/ArtificialAutism-1.0.jar
screen -ls (number1) screen -ls (number1)
screen -X -S (number1) quit screen -X -S (number1) quit
*/ */
@ -20,7 +19,6 @@ package PresentationLayer;
import FunctionLayer.CustomError; import FunctionLayer.CustomError;
import FunctionLayer.MYSQLDatahandler; import FunctionLayer.MYSQLDatahandler;
import FunctionLayer.MessageResponseHandler; import FunctionLayer.MessageResponseHandler;
import FunctionLayer.StanfordParser.SentimentAnalyzerTest;
import java.io.IOException; import java.io.IOException;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.List; import java.util.List;
@ -37,7 +35,7 @@ import org.javacord.api.entity.user.User;
public class DiscordHandler { public class DiscordHandler {
public static void main(String[] args) { public static void main(String[] args) {
SentimentAnalyzerTest.shiftReduceParserInitiate(); MYSQLDatahandler.shiftReduceParserInitiate();
new Thread(() -> { new Thread(() -> {
try { try {
MYSQLDatahandler.instance.initiateMYSQL(); MYSQLDatahandler.instance.initiateMYSQL();
@ -76,8 +74,8 @@ public class DiscordHandler {
List<User> userlist = event.getMessage().getMentionedUsers(); List<User> userlist = event.getMessage().getMentionedUsers();
String strresult = event.getMessage().toString(); String strresult = event.getMessage().toString();
if (userlist != null) { if (userlist != null) {
for (int i = 0; i < userlist.size(); i++) { for (User user : userlist) {
strresult = strresult.replace(userlist.get(i).getIdAsString(), ""); strresult = strresult.replace(user.getIdAsString(), "");
} }
} }
MessageResponseHandler.getMessage(strresult); MessageResponseHandler.getMessage(strresult);
@ -88,14 +86,13 @@ public class DiscordHandler {
Logger.getLogger(DiscordHandler.class.getName()).log(Level.SEVERE, null, ex); Logger.getLogger(DiscordHandler.class.getName()).log(Level.SEVERE, null, ex);
} }
} }
//contains to specify one channel where bot may always type
if (event.getMessage().getMentionedUsers().contains(api.getYourself()) if (event.getMessage().getMentionedUsers().contains(api.getYourself())
|| event.getServerTextChannel().get().toString().contains("minor-test")) { || event.getServerTextChannel().get().toString().contains("minor-test")) {
String ResponseStr; String ResponseStr;
try { try {
ResponseStr = MessageResponseHandler.selectReponseMessage(event.getMessage().toString()); ResponseStr = MessageResponseHandler.selectReponseMessage(event.getMessage().toString());
if (!ResponseStr.isEmpty()) { if (!ResponseStr.isEmpty()) {
System.out.print("\nResponseStr3: " + ResponseStr); System.out.print("\nResponseStr3: " + ResponseStr + "\n");
event.getChannel().sendMessage(ResponseStr); event.getChannel().sendMessage(ResponseStr);
} }
} catch (CustomError ex) { } catch (CustomError ex) {