updated calculations, updated handling calculation storages, updated DB retrieval, added Distance object, added levenstein, almost everything in mysqldatahandler,

This commit is contained in:
jenzur 2019-03-03 13:17:07 +01:00
parent aca3d9f9c8
commit f64ce5c5a0
8 changed files with 676 additions and 544 deletions

View File

@ -14,6 +14,7 @@ import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
@ -168,6 +169,47 @@ public class DataMapper {
}
}
public static LinkedHashMap<String, LinkedHashMap<String, Double>> getAllRelationScores() {
int count = getSementicsDBRows();
int counter2 = 0;
int hardCapRetrieveCount = 500000;
LinkedHashMap<String, LinkedHashMap<String, Double>> LHMSMX = new LinkedHashMap();
while (count > counter2) {
try (Connection l_cCon = DBCPDataSource.getConnection()) {
l_cCon.setAutoCommit(false);
String l_sSQL = "SELECT * FROM `WordMatrix` WHERE ID > " + counter2 + " AND ID < " + (counter2 + hardCapRetrieveCount);
try (PreparedStatement l_pStatement = l_cCon.prepareStatement(l_sSQL, java.sql.ResultSet.TYPE_FORWARD_ONLY,
java.sql.ResultSet.CONCUR_READ_ONLY)) {
l_pStatement.setFetchSize(Integer.MIN_VALUE);
try (ResultSet l_rsSearch = l_pStatement.executeQuery()) {
int i = 0;
LinkedHashMap<String, Double> LHMLocal = new LinkedHashMap();
while (l_rsSearch.next() && i < hardCapRetrieveCount) {
String str1 = l_rsSearch.getString(1);
String str2 = l_rsSearch.getString(2);
Double score = l_rsSearch.getDouble(3);
LHMLocal.put(str2, score);
while (l_rsSearch.next() && i < hardCapRetrieveCount && str1.equals(l_rsSearch.getString(1))) {
str2 = l_rsSearch.getString(2);
score = l_rsSearch.getDouble(3);
LHMLocal.put(str2, score);
i++;
counter2++;
}
LHMSMX.put(str1, LHMLocal);
System.out.println("i: " + i + "\n" + "free memory: " + Runtime.getRuntime().freeMemory() + "\ncounter2: " + counter2 + "\n");
i++;
counter2++;
}
}
}
} catch (SQLException ex) {
Logger.getLogger(DataMapper.class.getName()).log(Level.SEVERE, null, ex);
}
}
return LHMSMX;
}
public static void CloseConnections(PreparedStatement ps, ResultSet rs, Connection con) {
if (rs != null) {
try {
@ -191,4 +233,5 @@ public class DataMapper {
}
}
}
}

View File

@ -0,0 +1,37 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package FunctionLayer;
/**
*
* @author install1
*/
public class DistanceObject {
private Integer distance;
private String sentence;
public DistanceObject() {
}
public Integer getDistance() {
return distance;
}
public void setDistance(Integer distance) {
this.distance = distance;
}
public String getSentence() {
return sentence;
}
public DistanceObject(Integer distance, String sentence) {
this.distance = distance;
this.sentence = sentence;
}
}

View File

@ -5,19 +5,35 @@
*/
package FunctionLayer;
import java.util.concurrent.Callable;
/**
*
* @author install1
*/
public class LevenshteinDistance {
public class LevenshteinDistance implements Callable<DistanceObject> {
private CharSequence lhs;
private CharSequence rhs;
private DistanceObject dco;
private static int minimum(int a, int b, int c) {
return Math.min(Math.min(a, b), c);
}
public static int computeLevenshteinDistance(CharSequence lhs, CharSequence rhs) {
int[][] distance = new int[lhs.length() + 1][rhs.length() + 1];
public LevenshteinDistance(CharSequence lhs, CharSequence rhs, DistanceObject dco) {
this.lhs = lhs;
this.rhs = rhs;
this.dco = dco;
}
public LevenshteinDistance(CharSequence lhs, CharSequence rhs) {
this.lhs = lhs;
this.rhs = rhs;
}
public int computeLevenshteinDistance() {
int[][] distance = new int[lhs.length() + 1][rhs.length() + 1];
for (int i = 0; i <= lhs.length(); i++) {
distance[i][0] = i;
}
@ -34,4 +50,32 @@ public class LevenshteinDistance {
}
return distance[lhs.length()][rhs.length()];
}
@Override
public DistanceObject call() {
try {
int[][] distance = new int[lhs.length() + 1][rhs.length() + 1];
for (int i = 0; i <= lhs.length(); i++) {
distance[i][0] = i;
}
for (int j = 1; j <= rhs.length(); j++) {
distance[0][j] = j;
}
for (int i = 1; i <= lhs.length(); i++) {
for (int j = 1; j <= rhs.length(); j++) {
distance[i][j] = minimum(
distance[i - 1][j] + 1,
distance[i][j - 1] + 1,
distance[i - 1][j - 1] + ((lhs.charAt(i - 1) == rhs.charAt(j - 1)) ? 0 : 1));
}
}
dco.setDistance(distance[lhs.length()][rhs.length()]);
} catch (Exception ex) {
System.out.println("ex msg: " + ex.getMessage() + "\n");
dco.setDistance(100);
return dco;
}
return dco;
}
}

View File

@ -2,25 +2,25 @@
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
//https://stackoverflow.com/questions/43935229/hashmap-with-8-million-entries-becomes-slow
//http://www2.imm.dtu.dk/pubdb/views/edoc_download.php/6364/pdf/imm6364.pdf
*/
package FunctionLayer;
import DataLayer.DataMapper;
import FunctionLayer.StanfordParser.SentimentAnalyzerTest;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.MapMaker;
import com.google.common.collect.Multimap;
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import java.io.IOException;
@ -28,14 +28,23 @@ import java.io.StringReader;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
import java.util.function.Consumer;
import java.util.logging.Level;
import java.util.logging.Logger;
@ -48,19 +57,89 @@ public class MYSQLDatahandler {
public static final long EXPIRE_TIME_IN_SECONDS = TimeUnit.SECONDS.convert(10, TimeUnit.MINUTES);
public static final long EXPIRE_TIME_IN_SECONDS1 = TimeUnit.SECONDS.convert(10, TimeUnit.HOURS);
public static MYSQLDatahandler instance = new MYSQLDatahandler();
public static int SemeticsUpdateCount;
public static int semeticsUpdateCount;
public static int threadCounter = 0;
private volatile boolean RefreshMatrixFromDB;
private final ConcurrentMap<Integer, String> StringCache;
private List<SimilarityMatrix> SimilaritySMXList = new ArrayList();
private volatile boolean refreshMatrixFromDB;
private final ConcurrentMap<Integer, String> stringCache;
private LinkedHashMap<String, LinkedHashMap<String, Double>> lHMSMX = new LinkedHashMap();
private List<String> multiprocessCalculations = new ArrayList();
private List<Integer> updatedRows = new ArrayList();
private final Stopwatch stopwatch;
private final Stopwatch stopwatch1;
private static String modelPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz";
private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz";
private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz";
private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
private static String NERModel = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz";
private static MaxentTagger tagger;
private static ShiftReduceParser model;
private static String[] options = {"-maxLength", "100"};
private static Properties props = new Properties();
private static Properties propsSentiment = new Properties();
private static GrammaticalStructureFactory gsf;
private static LexicalizedParser lp;
private static TreebankLanguagePack tlp;
private static AbstractSequenceClassifier<CoreLabel> classifier;
private static StanfordCoreNLP pipeline;
private static StanfordCoreNLP pipelineSentiment;
public static AbstractSequenceClassifier<CoreLabel> getClassifier() {
return classifier;
}
public static void setClassifier(AbstractSequenceClassifier<CoreLabel> classifier) {
MYSQLDatahandler.classifier = classifier;
}
public MYSQLDatahandler() {
this.stopwatch = Stopwatch.createUnstarted();
this.stopwatch1 = Stopwatch.createStarted();
this.StringCache = new MapMaker().concurrencyLevel(2).makeMap();
this.stringCache = new MapMaker().concurrencyLevel(2).makeMap();
}
public static void shiftReduceParserInitiate() {
try {
classifier = CRFClassifier.getClassifierNoExceptions(NERModel);
} catch (ClassCastException ex) {
Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex);
}
model = ShiftReduceParser.loadModel(modelPath, options);
tagger = new MaxentTagger(taggerPath);
lp = LexicalizedParser.loadModel(lexParserEnglishRNN, options);
tlp = lp.getOp().langpack();
gsf = tlp.grammaticalStructureFactory();
// set up pipeline properties
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse");
props.setProperty("parse.model", modelPath);
props.setProperty("parse.maxlen", "100");
props.setProperty("parse.binaryTrees", "true");
propsSentiment.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
propsSentiment.setProperty("parse.model", lexParserEnglishRNN);
propsSentiment.setProperty("sentiment.model", sentimentModel);
propsSentiment.setProperty("parse.maxlen", "100");
// set up pipeline
pipeline = new StanfordCoreNLP(props);
pipelineSentiment = new StanfordCoreNLP(propsSentiment);
}
public static GrammaticalStructureFactory getGsf() {
return gsf;
}
public static StanfordCoreNLP getPipeline() {
return pipeline;
}
public static StanfordCoreNLP getPipelineSentiment() {
return pipelineSentiment;
}
public static MaxentTagger getTagger() {
return tagger;
}
public static ShiftReduceParser getModel() {
return model;
}
private Map<Integer, String> getCache() throws SQLException, IOException, CustomError {
@ -78,8 +157,8 @@ public class MYSQLDatahandler {
public void initiateMYSQL() throws SQLException, IOException {
try {
DataMapper.createTables();
StringCache.putAll(getCache());
SimilaritySMXList = DataMapper.getAllSementicMatrixes();
stringCache.putAll(getCache());
lHMSMX = DataMapper.getAllRelationScores();
} catch (CustomError ex) {
Logger.getLogger(MYSQLDatahandler.class
.getName()).log(Level.SEVERE, null, ex);
@ -87,34 +166,50 @@ public class MYSQLDatahandler {
}
public synchronized void checkIfUpdateMatrixes() {
RefreshMatrixFromDB = false;
refreshMatrixFromDB = false;
int calculationBoundaries = 10;
int updateBadgesInteger = 500;
if (stopwatch1.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS1) {
RefreshMatrixFromDB = true;
refreshMatrixFromDB = true;
if (threadCounter == 0) {
try {
SimilaritySMXList = DataMapper.getAllSementicMatrixes();
stopwatch1.reset();
} catch (CustomError ex) {
Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex);
}
lHMSMX = DataMapper.getAllRelationScores();
stopwatch1.reset();
}
}
if (StringCache.values().size() > 10) {
if (!RefreshMatrixFromDB && multiprocessCalculations.size() <= (calculationBoundaries * calculationBoundaries)) {
if (stringCache.values().size() > 10 && !refreshMatrixFromDB) {
if (multiprocessCalculations.size() <= (calculationBoundaries * calculationBoundaries)) {
threadCounter++;
List<String> strList = new ArrayList(StringCache.values());
SemeticsUpdateCount = new Random().nextInt(strList.size() - 6);
int beginindex = SemeticsUpdateCount;
SemeticsUpdateCount += calculationBoundaries / 2;
int temp = SemeticsUpdateCount;
List<String> strList = new ArrayList(stringCache.values());
List<Integer> updateLocal = updatedRows;
int random = -1;
if (!updateLocal.contains(random)) {
updatedRows.add(random);
}
Collections.sort(updateLocal);
while (updateLocal.contains(random)) {
random = new Random().nextInt(strList.size() - 6);
int indexPrev = Collections.binarySearch(updateLocal, random);
int indexNext = Collections.binarySearch(updateLocal, random + 6);
//-1 will always be index 0
if (indexPrev > 0 && indexNext > 0) {
indexPrev = updateLocal.get(indexPrev);
indexNext = updateLocal.get(indexNext);
}
random = indexPrev < random - 5 && indexNext < random ? random : -1;
}
updatedRows.add(random);
semeticsUpdateCount = random;
int beginindex = semeticsUpdateCount;
semeticsUpdateCount += calculationBoundaries / 2;
int temp = semeticsUpdateCount;
System.out.println("beginindex: " + beginindex + "\ntemp: " + temp + "\n");
List<String> strIndexNavigator = new ArrayList();
strList.subList(beginindex, temp).forEach((str) -> {
strIndexNavigator.add(str);
multiprocessCalculations.add(str);
});
new Thread(() -> {
LinkedHashMap<String, LinkedHashMap<String, Double>> LHMSMXLocal = lHMSMX;
List<String> strIndexNavigatorL = new ArrayList(strIndexNavigator);
List<String> strIndexAll = new ArrayList(strList);
List<String> randomIndexesToUpdate = new ArrayList();
@ -127,32 +222,49 @@ public class MYSQLDatahandler {
randomIndexesToUpdate.add(str);
});
List<SimilarityMatrix> matrixUpdateList = new ArrayList();
List<Future<SimilarityMatrix>> futures = new ArrayList();
ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
strIndexNavigatorL.forEach((str) -> {
randomIndexesToUpdate.stream().filter((str1) -> (!str.equals(str1))).forEachOrdered((str1) -> {
boolean present = false;
if (multiprocessCalculations.contains(str1)) {
present = true;
} else {
for (SimilarityMatrix SMX : SimilaritySMXList) {
if (SMX.getPrimaryString().equals(str) && SMX.getSecondaryString().equals(str1)) {
present = true;
break;
}
if (SMX.getPrimaryString().equals(str1) && SMX.getSecondaryString().equals(str)) {
present = true;
break;
}
} else if (LHMSMXLocal.containsKey(str)) {
LinkedHashMap<String, Double> orDefault = LHMSMXLocal.get(str);
if (orDefault.containsKey(str1)) {
present = true;
}
} else if (LHMSMXLocal.containsKey(str1)) {
LinkedHashMap<String, Double> orDefault = LHMSMXLocal.get(str1);
if (orDefault.containsKey(str)) {
present = true;
}
}
if (!present) {
SimilarityMatrix SMX = new SimilarityMatrix(str, str1);
double score = SentimentAnalyzerTest.sentimentanalyzing(str, str1);
SMX.setDistance(score);
matrixUpdateList.add(SMX);
SimilaritySMXList.add(SMX);
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, str1, SMX);
futures.add(executor.submit(worker));
}
});
});
executor.shutdown();
try {
System.out.println("finished worker assignment, futures size: " + futures.size() + "\n");
for (Future<SimilarityMatrix> future : futures) {
SimilarityMatrix SMX = future.get();
System.out.println("SMX primary: " + SMX.getPrimaryString() + "\nSMX Secondary: " + SMX.getSecondaryString()
+ "\nScore: " + SMX.getDistance() + "\n");
LinkedHashMap<String, Double> get = lHMSMX.getOrDefault(SMX.getPrimaryString(), null);
if (get == null) {
get = new LinkedHashMap();
}
get.put(SMX.getSecondaryString(), SMX.getDistance());
lHMSMX.put(SMX.getPrimaryString(), get);
matrixUpdateList.add(SMX);
}
} catch (InterruptedException | ExecutionException ex) {
Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex);
}
new Thread(() -> {
try {
if (!matrixUpdateList.isEmpty()) {
@ -160,6 +272,7 @@ public class MYSQLDatahandler {
System.out.println("finished datamapper semetic insert");
}
threadCounter--;
System.out.println("\nthreadCounter: " + threadCounter + "\n");
} catch (CustomError ex) {
Logger.getLogger(MYSQLDatahandler.class
.getName()).log(Level.SEVERE, null, ex);
@ -167,56 +280,73 @@ public class MYSQLDatahandler {
}).start();
}).
start();
try {
wait(800);
} catch (InterruptedException ex) {
Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex);
}
} else {
if (threadCounter == 0) {
List<String> strList = new ArrayList(StringCache.values());
List<SimilarityMatrix> matrixUpdateList = new ArrayList();
List<String> randomStrList = new ArrayList();
int indexes = updateBadgesInteger;
if (indexes >= strList.size()) {
indexes = strList.size() - 1;
}
int beginindexes = new Random().nextInt((strList.size()) - indexes);
strList.subList(beginindexes, beginindexes + indexes).forEach((str) -> {
randomStrList.add(str);
});
multiprocessCalculations.forEach((str) -> {
randomStrList.forEach((str1) -> {
boolean present = false;
for (SimilarityMatrix SMX : SimilaritySMXList) {
if (SMX.getPrimaryString().equals(str) && SMX.getSecondaryString().equals(str1)) {
present = true;
break;
}
if (SMX.getPrimaryString().equals(str1) && SMX.getSecondaryString().equals(str)) {
present = true;
break;
}
}
if (!present) {
SimilarityMatrix SMX = new SimilarityMatrix(str, str1);
double score = SentimentAnalyzerTest.sentimentanalyzing(str, str1);
SMX.setDistance(score);
matrixUpdateList.add(SMX);
SimilaritySMXList.add(SMX);
}
});
});
try {
if (!matrixUpdateList.isEmpty()) {
DataMapper.insertSementicMatrixes(matrixUpdateList);
System.out.println("finished datamapper semetic insert");
threadCounter++;
new Thread(() -> {
LinkedHashMap<String, LinkedHashMap<String, Double>> LHMSMXLocal = lHMSMX;
List<String> strList = new ArrayList(stringCache.values());
List<SimilarityMatrix> matrixUpdateList = new ArrayList();
List<String> randomStrList = new ArrayList();
int indexes = updateBadgesInteger;
if (indexes >= strList.size()) {
indexes = strList.size() - 1;
}
} catch (CustomError ex) {
Logger.getLogger(MYSQLDatahandler.class
.getName()).log(Level.SEVERE, null, ex);
}
multiprocessCalculations = new ArrayList();
int beginindexes = new Random().nextInt((strList.size()) - indexes);
strList.subList(beginindexes, beginindexes + indexes).forEach((str) -> {
randomStrList.add(str);
});
List<Future<SimilarityMatrix>> futures = new ArrayList();
ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
multiprocessCalculations.forEach((str) -> {
randomStrList.forEach((str1) -> {
boolean present = false;
if (LHMSMXLocal.containsKey(str)) {
LinkedHashMap<String, Double> orDefault = LHMSMXLocal.get(str);
if (orDefault.containsKey(str1)) {
present = true;
}
} else if (LHMSMXLocal.containsKey(str1)) {
LinkedHashMap<String, Double> orDefault = LHMSMXLocal.get(str1);
if (orDefault.containsKey(str)) {
present = true;
}
}
if (!present) {
SimilarityMatrix SMX = new SimilarityMatrix(str, str1);
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, str1, SMX);
futures.add(executor.submit(worker));
}
});
});
executor.shutdown();
try {
for (Future<SimilarityMatrix> future : futures) {
SimilarityMatrix SMX = future.get();
LinkedHashMap<String, Double> get = lHMSMX.getOrDefault(SMX.getPrimaryString(), null);
if (get == null) {
get = new LinkedHashMap();
}
get.put(SMX.getSecondaryString(), SMX.getDistance());
lHMSMX.put(SMX.getPrimaryString(), get);
matrixUpdateList.add(SMX);
}
} catch (InterruptedException | ExecutionException ex) {
Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex);
}
try {
if (!matrixUpdateList.isEmpty()) {
DataMapper.insertSementicMatrixes(matrixUpdateList);
System.out.println("finished datamapper semetic insert");
}
} catch (CustomError ex) {
Logger.getLogger(MYSQLDatahandler.class
.getName()).log(Level.SEVERE, null, ex);
}
multiprocessCalculations = new ArrayList();
updatedRows = new ArrayList();
threadCounter--;
}).start();
}
}
}
@ -233,14 +363,15 @@ public class MYSQLDatahandler {
strUpdate.addAll(str);
try {
DataMapper.InsertMYSQLStrings(strUpdate);
} catch (CustomError ex) {
Logger.getLogger(MYSQLDatahandler.class
.getName()).log(Level.SEVERE, null, ex);
}
MessageResponseHandler.setStr(new ArrayList());
int j = StringCache.size() + 1;
int j = stringCache.size() + 1;
for (String str1 : strUpdate) {
StringCache.put(j, str1);
stringCache.put(j, str1);
j++;
}
}).start();
@ -252,85 +383,106 @@ public class MYSQLDatahandler {
}
}
public String getResponseMsg(String str) throws CustomError {
public synchronized String getResponseMsg(String str) throws CustomError {
str = str.trim();
if (str.startsWith("<@")) {
str = str.substring(str.indexOf("> ") + 2);
}
final LinkedHashMap<String, LinkedHashMap<String, Double>> LHMSMXLocal = lHMSMX;
ConcurrentMap<Integer, String> strArrs = stringCache;
double Score = -10000;
SimilarityMatrix SMXreturn = null;
List<String> strLocal = new ArrayList(StringCache.values());
for (String str1 : strLocal) {
if (str.equals(str1)) {
Iterator<SimilarityMatrix> SMXITR = SimilaritySMXList.iterator();
while (SMXITR.hasNext()) {
SimilarityMatrix SMX = SMXITR.next();
if (SMX.getPrimaryString().equals(str) || SMX.getSecondaryString().equals(str)) {
double smxDistance = SMX.getDistance();
SimilarityMatrix SMXreturn = new SimilarityMatrix("", "");
System.out.println("pre mostSimilarSTR \n");
String mostSimilarSTR = mostSimilar(str, strArrs);
if (!mostSimilarSTR.isEmpty()) {
System.out.println("mostSimilarSTR; " + mostSimilarSTR + "\n");
LinkedHashMap<String, Double> orDefault = LHMSMXLocal.getOrDefault(mostSimilarSTR, null);
if (orDefault != null) {
for (Entry<String, Double> entrySet : orDefault.entrySet()) {
double smxDistance = entrySet.getValue();
if (smxDistance > Score) {
Score = smxDistance;
SMXreturn = new SimilarityMatrix(mostSimilarSTR, entrySet.getKey(), smxDistance);
}
}
}
for (Entry<String, LinkedHashMap<String, Double>> values1 : LHMSMXLocal.entrySet()) {
LinkedHashMap<String, Double> value = values1.getValue();
for (Entry<String, Double> keystr : value.entrySet()) {
if (keystr.getKey().equals(mostSimilarSTR)) {
double smxDistance = keystr.getValue();
if (smxDistance > Score) {
Score = smxDistance;
SMXreturn = SMX;
SMXreturn = new SimilarityMatrix(values1.getKey(), keystr.getKey(), smxDistance);
}
}
}
break;
}
}
if (SMXreturn != null) {
if (SMXreturn.getPrimaryString().equals(str)) {
return SMXreturn.getSecondaryString();
} else {
return SMXreturn.getPrimaryString();
}
}
String[] toArray = strLocal.toArray(new String[strLocal.size()]);
String mostSimilarSTR = mostSimilar(str, toArray);
Iterator<SimilarityMatrix> SMXITR = SimilaritySMXList.iterator();
while (SMXITR.hasNext()) {
System.out.println("mostSimilarSTR; " + mostSimilarSTR + "\n");
mostSimilarSTR = mostSimilarSTR.trim();
SimilarityMatrix SMX = SMXITR.next();
if (SMX.getPrimaryString().trim().equals(mostSimilarSTR) || SMX.getSecondaryString().trim().equals(mostSimilarSTR)) {
double smxDistance = SMX.getDistance();
if (smxDistance > Score) {
Score = smxDistance;
SMXreturn = SMX;
if (!SMXreturn.getPrimaryString().isEmpty()) {
if (SMXreturn.getPrimaryString().equals(mostSimilarSTR)) {
return SMXreturn.getSecondaryString();
} else {
return SMXreturn.getPrimaryString();
}
}
}
if (SMXreturn != null) {
if (SMXreturn.getPrimaryString().equals(str)) {
return SMXreturn.getSecondaryString();
} else {
return SMXreturn.getPrimaryString();
System.out.println("none within 8 range");
ConcurrentMap<Integer, String> strCache = stringCache;
ConcurrentMap<Integer, Future<SimilarityMatrix>> futureslocal = new MapMaker().concurrencyLevel(2).makeMap();
ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
for (String str1 : strCache.values()) {
if (!str.equals(str1)) {
Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, str1, new SimilarityMatrix(str, str1));
futureslocal.put(futureslocal.size() + 1, executor.submit(worker));
}
}
SMXITR = SimilaritySMXList.iterator();
while (SMXITR.hasNext()) {
SimilarityMatrix SMX = SMXITR.next();
if (SMX.getPrimaryString().contains(mostSimilarSTR) || SMX.getSecondaryString().contains(mostSimilarSTR)) {
double smxDistance = SMX.getDistance();
if (smxDistance > Score) {
Score = smxDistance;
executor.shutdown();
int index = 0;
for (Future<SimilarityMatrix> future : futureslocal.values()) {
try {
SimilarityMatrix SMX = future.get();
double distance = SMX.getDistance();
System.out.println("index: " + index + "\nfutures size: " + futureslocal.values().size() + "\nScore: " + SMX.getDistance() + "\nSecondary: "
+ SMX.getSecondaryString() + "\nPrimary: " + SMX.getPrimaryString() + "\n");
if (distance > Score) {
Score = distance;
SMXreturn = SMX;
}
} catch (InterruptedException | ExecutionException ex) {
System.out.println("ex: " + ex.getMessage() + "\n");
}
index++;
}
if (SMXreturn != null) {
if (SMXreturn.getPrimaryString().equals(str)) {
return SMXreturn.getSecondaryString();
} else {
return SMXreturn.getPrimaryString();
}
}
return "how did you manage to reach this, AAAAAAAAAAAA end my suffering";
System.out.println("Reached end: secondary: " + SMXreturn.getSecondaryString() + "\nPrimarY: " + SMXreturn.getPrimaryString()
+ "\nScore: " + SMXreturn.getDistance());
return SMXreturn.getSecondaryString();
}
public String mostSimilar(String toBeCompared, String[] strings) {
int minDistance = Integer.MAX_VALUE;
public String mostSimilar(String toBeCompared, ConcurrentMap<Integer, String> concurrentStrings) {
int minDistance = 8;
String similar = "";
for (String str : strings) {
int d = LevenshteinDistance.computeLevenshteinDistance(str, toBeCompared);
if (d < minDistance) {
minDistance = d;
similar = str;
List<Future<DistanceObject>> futures = new ArrayList();
ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
concurrentStrings.values().stream().map((str) -> new LevenshteinDistance(str, toBeCompared, new DistanceObject())).forEachOrdered((worker) -> {
futures.add(executor.submit(worker));
});
executor.shutdown();
try {
for (Future<DistanceObject> future : futures) {
DistanceObject d = future.get();
try {
int distance = d.getDistance();
System.out.println("distance: " + distance + "\n");
if (distance < minDistance) {
minDistance = distance;
similar = d.getSentence();
}
} catch (NullPointerException ex) {
System.out.println("failed future\n");
}
}
} catch (InterruptedException | ExecutionException ex) {
Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex);
}
return similar;
}
@ -402,6 +554,30 @@ public class MYSQLDatahandler {
if (str1.contains("{white}")) {
str1 = str1.replace("{white}", " ");
}
if (str1.contains("{fullblue}")) {
str1 = str1.replace("{fullblue}", " ");
}
if (str1.contains("{cyan}")) {
str1 = str1.replace("{cyan}", " ");
}
if (str1.contains("{lime}")) {
str1 = str1.replace("{lime}", " ");
}
if (str1.contains("{deeppink}")) {
str1 = str1.replace("{deeppink}", " ");
}
if (str1.contains("{slategray}")) {
str1 = str1.replace("{slategray}", " ");
}
if (str1.contains("{dodgerblue}")) {
str1 = str1.replace("{dodgerblue}", " ");
}
if (str1.contains("{black}")) {
str1 = str1.replace("{black}", " ");
}
if (str1.contains("{orangered}")) {
str1 = str1.replace("{orangered}", " ");
}
str1 = str1.trim();
if (str1.length() > 2 && (!str1.startsWith("!"))) {
strlistreturn.add(str1);
@ -410,38 +586,9 @@ public class MYSQLDatahandler {
return strlistreturn;
}
/*
public static List<String> cutLongsFromEmotes(List<String> str) {
List<String> strreturn = new ArrayList();
int emotesTraceLong = 17;
for (String str1 : str) {
StringBuilder SB = new StringBuilder();
int counter = 0;
int i = 0;
for (Character c : str1.toCharArray()) {
i++;
if (Character.isDigit(c)) {
counter++;
if (counter > emotesTraceLong && str1.length() > i + 2) {
SB.append(str1.substring(0, i - emotesTraceLong));
SB.append(str1.substring(i + 1, str1.length()));
}
} else {
counter = 0;
}
}
if (SB.toString().isEmpty()) {
strreturn.add(str1);
} else {
strreturn.add(SB.toString());
}
}
return strreturn;
}
*/
private List<String> removeSlacks(List<String> str) {
ShiftReduceParser model = SentimentAnalyzerTest.getModel();
MaxentTagger tagger = SentimentAnalyzerTest.getTagger();
ShiftReduceParser model = getModel();
MaxentTagger tagger = getTagger();
List<TaggedWord> taggedWords;
List<String> strreturn = new ArrayList();
for (String str1 : str) {
@ -481,9 +628,10 @@ public class MYSQLDatahandler {
}
if (addCounter > 3) {
boolean tooclosematch = false;
for (String strVals : StringCache.values()) {
double Distance = LevenshteinDistance.computeLevenshteinDistance(strVals, str1);
int maxpermittedDistance = 5;
for (String strVals : stringCache.values()) {
LevenshteinDistance leven = new LevenshteinDistance(strVals, str1);
double Distance = leven.computeLevenshteinDistance();
int maxpermittedDistance = 2;
if (Distance < maxpermittedDistance) {
tooclosematch = true;
break;
@ -504,153 +652,4 @@ public class MYSQLDatahandler {
}
return strreturn;
}
/**
*
* @throws CustomError
* @deprecated
*/
public synchronized void calculateStrings() throws CustomError {
//linkedhashmap?
int initiallimit = 5;
int listindex = 0;
List<SimilarityMatrix> WS4JList = DataMapper.getAllSementicMatrixes();
List<SimilarityMatrix> WS4JListUpdate = new ArrayList();
List<String> sentencesList = DataMapper.getAllStrings();
for (int i = 1; i < initiallimit; i++) {
if (!sentencesList.get(i).isEmpty()) {
//System.out.print("i: " + i + "\n sentencesList i: " + sentencesList.get(i) + "\n");
String[] words1 = sentencesList.get(i).split(" ");
for (String words11 : words1) {
if (!words11.isEmpty() && words11.length() > 3) {
String str = words11;
if (!str.isEmpty() && str.length() > 3) {
//SecondaryPredicate, no secondary key present with word
Predicate<SimilarityMatrix> SecondaryPredicate = e -> str.equals(e.getSecondaryString());
//no primary key present with the word
if (!WS4JList.stream().filter(e -> str.equals(e.getPrimaryString())).findAny().isPresent()) {
WS4JListUpdate = createWS4JWordMatrix(str, sentencesList, WS4JListUpdate, WS4JList, SecondaryPredicate);
for (; listindex < WS4JListUpdate.size(); listindex++) {
WS4JList.add(WS4JListUpdate.get(listindex));
}
}
}
}
}
}
}
//System.out.println("\nWS4JListUpdate size: " + WS4JListUpdate.size());
DataMapper.insertSementicMatrixes(WS4JListUpdate);
}
/**
*
* @param str
* @param strlist
* @param ws4jlist
* @param oldlist
* @param SecondaryPredicate
* @return
* @deprecated
*/
public List<SimilarityMatrix> createWS4JWordMatrix(String str, List<String> strlist, List<SimilarityMatrix> ws4jlist,
List<SimilarityMatrix> oldlist, Predicate<SimilarityMatrix> SecondaryPredicate) {
for (String str1 : strlist) {
if (!str1.isEmpty()) {
String[] words1 = str1.split(" ");
for (int j = 0; j < words1.length; j++) {
if (!words1[j].isEmpty() && words1[j].length() > 3) {
String strword = words1[j];
if (!strword.isEmpty() && strword.length() > 3 && !strword.equals(str)) {
Predicate<SimilarityMatrix> PrimaryPredicate = e -> strword.equals(e.getPrimaryString());
if (!oldlist.stream().filter(PrimaryPredicate.and(SecondaryPredicate)).findAny().isPresent()) {
//System.out.println("creating SimilarityMatrix with STR: " + str + "\n strword: " + strword + "\n");
SimilarityMatrix ws4j = new SimilarityMatrix(str, strword);
/*
double addingDistance = ws4j.getDistanceCalculations();
if (addingDistance > 0.00) {
//System.out.println("added! \n");
ws4j.setDistance(addingDistance);
ws4jlist.add(ws4j);
}
*/
}
}
}
}
}
}
return ws4jlist;
}
/**
* Stanford Parser method to update calculations to the DB
*
* @deprecated
* @throws FunctionLayer.CustomError
*/
public void sentimentAnalyzingStringsToDB() throws CustomError {
List<String> sentencesList = DataMapper.getAllStrings();
List<SimilarityMatrix> WS4JList = DataMapper.getAllSementicMatrixes();
List<SimilarityMatrix> WS4JListUpdate = new ArrayList();
int listindex = 0;
for (int i = 0; i < sentencesList.size() - 3000; i++) {
String str = sentencesList.get(i);
if (!str.isEmpty() && str.length() > 3) {
//System.out.println("i: " + i + "\n");
Predicate<SimilarityMatrix> SecondaryPredicate = e -> str.equals(e.getSecondaryString());
if (!WS4JList.stream().filter(e -> str.equals(e.getPrimaryString())).findAny().isPresent()) {
//WS4JListUpdate = addStringMatrixes(str, sentencesList, SecondaryPredicate, WS4JListUpdate, WS4JList);
for (; listindex < WS4JListUpdate.size(); listindex++) {
WS4JList.add(WS4JListUpdate.get(listindex));
}
}
}
}
// System.out.println("\n WS4JListUpdate size: " + WS4JListUpdate.size());
DataMapper.insertSementicMatrixes(WS4JListUpdate);
}
/**
* @deprecated @param str Primary string which is checked, the filter
* ensures primary string has not been calculated yet
* @param sentencesList the full string list where each unique primary has
* to calculate its value to the rest of the DB
* @param SecondaryPredicate ensures primary string is not already
* calculated somewhere with another string
* @param WS4JListUpdate Matrix list to update in DB with new Sentences
* @param OldWS4JList Check if str1 already has primary entry with
* co-responding SecondaryPredicate
* @return Updated List which has to be inserted to the DB
*/
private List<SimilarityMatrix> addStringMatrixes(String str, List<String> sentencesList,
Predicate<SimilarityMatrix> SecondaryPredicate, List<SimilarityMatrix> WS4JListUpdate,
List<SimilarityMatrix> OldWS4JList, LexicalizedParser lp, TreebankLanguagePack tlp) {
double valuerange = -100.0;
for (int i = 0; i < sentencesList.size(); i++) {
String str1 = sentencesList.get(i);
if (!str1.isEmpty() && str1.length() > 3) {
Predicate<SimilarityMatrix> PrimaryPredicate = e -> str1.equals(e.getPrimaryString());
if (!OldWS4JList.stream().filter(PrimaryPredicate.and(SecondaryPredicate)).findAny().isPresent()) {
double s = -100.0;
if (s > valuerange) {
SimilarityMatrix SMX = new SimilarityMatrix(str, str1);
SMX.setDistance(s);
/*
System.out.println("SMX added: \n Primary: " + SMX.getPrimaryString() + "\n Secondary: " + SMX.getSecondaryString()
+ "\n Score: " + SMX.getDistance() + "\n");
*/
WS4JListUpdate.add(SMX);
}
}
}
}
/*
str parameter is primary and not used as primary if reaching here
secondary predicate ensures primary does not already exist as secondary with co-responding strlist primary
*/
return WS4JListUpdate;
}
}

View File

@ -7,8 +7,6 @@ package FunctionLayer;
import java.util.ArrayList;
import java.util.List;
//https://www.programcreek.com/java-api-examples/index.php?source_dir=simmetrics-master/simmetrics-core/src/main/java/org/simmetrics/metrics/JaroWinkler.java#
//https://stackoverflow.com/questions/36032958/semantic-matching-in-ws4j-at-sentence-level
/**
*
@ -29,6 +27,9 @@ public class MessageResponseHandler {
public static void getMessage(String message) {
if (message != null && !message.isEmpty()) {
message = message.replace("@", "");
if (message.startsWith("<>")) {
message = message.substring(message.indexOf(">"));
}
if (message.startsWith("[ *")) {
message = message.substring(message.indexOf("]"));
}

View File

@ -1,20 +1,21 @@
package FunctionLayer.StanfordParser;
import FunctionLayer.LevenshteinDistance;
import FunctionLayer.MYSQLDatahandler;
import FunctionLayer.SimilarityMatrix;
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.trees.Constituent;
import edu.stanford.nlp.trees.GrammaticalRelation;
@ -22,7 +23,6 @@ import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.TypedDependency;
import edu.stanford.nlp.trees.tregex.gui.Tdiff;
import edu.stanford.nlp.util.CoreMap;
@ -30,8 +30,9 @@ import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicInteger;
import org.ejml.simple.SimpleMatrix;
/*
@ -43,217 +44,227 @@ import org.ejml.simple.SimpleMatrix;
*
* @author install1
*/
public class SentimentAnalyzerTest {
public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> {
private static String modelPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz";
private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz";
private static String parserModelPathUD = "edu/stanford/nlp/models/parser/nndep/english_UD.gz";
private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz";
private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
private static MaxentTagger tagger;
private static ShiftReduceParser model;
private static String[] options = {"-maxLength", "100"};
private static LexicalizedParser lp;
private static TreebankLanguagePack tlp;
private static Properties props = new Properties();
private static Properties propsSentiment = new Properties();
private static GrammaticalStructureFactory gsf;
private static StanfordCoreNLP pipeline;
private static StanfordCoreNLP pipelineSentiment;
private String str;
private String str1;
private SimilarityMatrix smxParam;
private ShiftReduceParser model;
private MaxentTagger tagger;
private GrammaticalStructureFactory gsf;
private StanfordCoreNLP pipeline;
private StanfordCoreNLP pipelineSentiment;
private AbstractSequenceClassifier classifier;
public static void shiftReduceParserInitiate() {
model = ShiftReduceParser.loadModel(modelPath, options);
tagger = new MaxentTagger(taggerPath);
lp = LexicalizedParser.loadModel(lexParserEnglishRNN, options);
tlp = lp.getOp().langpack();
gsf = tlp.grammaticalStructureFactory();
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse");
// set up pipeline properties
props.setProperty("parse.model", modelPath);
// use faster shift reduce parser
props.setProperty("parse.maxlen", "100");
props.setProperty("parse.binaryTrees", "true");
propsSentiment.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
propsSentiment.setProperty("parse.model", lexParserEnglishRNN);
propsSentiment.setProperty("parse.maxlen", "100");
pipeline = new StanfordCoreNLP(props);
pipelineSentiment = new StanfordCoreNLP(propsSentiment);
public SentimentAnalyzerTest(String str, String str1, SimilarityMatrix smxParam) {
this.str = str;
this.str1 = str1;
this.smxParam = smxParam;
model = MYSQLDatahandler.getModel();
tagger = MYSQLDatahandler.getTagger();
pipeline = MYSQLDatahandler.getPipeline();
pipelineSentiment = MYSQLDatahandler.getPipelineSentiment();
gsf = MYSQLDatahandler.getGsf();
classifier = MYSQLDatahandler.getClassifier();
}
public static ShiftReduceParser getModel() {
return model;
}
public static MaxentTagger getTagger() {
return tagger;
}
public static double sentimentanalyzing(String str, String str1) {
double score = -100.0;
List<List<TaggedWord>> taggedwordlist1 = new ArrayList();
List<List<TaggedWord>> taggedwordlist2 = new ArrayList();
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
for (List<HasWord> sentence : tokenizer) {
List<TaggedWord> tagged1 = tagger.tagSentence(sentence);
Tree tree = model.apply(tagged1);
ArrayList<TaggedWord> taggedYield = tree.taggedYield();
taggedwordlist1.add(taggedYield);
}
tokenizer = new DocumentPreprocessor(new StringReader(str));
for (List<HasWord> sentence : tokenizer) {
List<TaggedWord> tagged1 = tagger.tagSentence(sentence);
Tree tree = model.apply(tagged1);
ArrayList<TaggedWord> taggedYield = tree.taggedYield();
taggedwordlist2.add(taggedYield);
}
int counter = 0;
int counter1 = 0;
for (List<TaggedWord> taggedlist2 : taggedwordlist2) {
counter += taggedlist2.size();
}
for (List<TaggedWord> taggedlist1 : taggedwordlist1) {
counter1 += taggedlist1.size();
}
int overValue = counter >= counter1 ? counter - counter1 : counter1 - counter;
overValue *= 16;
while (overValue > 0) {
overValue--;
score--;
}
System.out.println("Score Post overValue: " + score + "\n");
for (List<TaggedWord> TGWList : taggedwordlist1) {
for (TaggedWord TGW : TGWList) {
List<String> tgwlist1 = new ArrayList();
for (List<TaggedWord> taggedlist2 : taggedwordlist2) {
for (TaggedWord TGW1 : taggedlist2) {
if (TGW.tag().equals(TGW1.tag()) && !TGW.tag().equals(":") && !tgwlist1.contains(TGW1.tag())) {
score += 64;
tgwlist1.add(TGW.tag());
}
}
}
@Override
public SimilarityMatrix call() {
try {
Double score = -100.0;
List<List<TaggedWord>> taggedwordlist1 = new ArrayList();
List<List<TaggedWord>> taggedwordlist2 = new ArrayList();
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1));
for (List<HasWord> sentence : tokenizer) {
taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
}
}
// System.out.println("Score: " + score);
Annotation annotation = new Annotation(str1);
pipeline.annotate(annotation);
List<Tree> sentenceConstituencyParseList = new ArrayList();
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
sentenceConstituencyParseList.add(sentenceConstituencyParse);
}
Annotation annotation1 = new Annotation(str);
pipeline.annotate(annotation1);
for (CoreMap sentence : annotation1.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceConstituencyParse);
Collection<TypedDependency> allTypedDependencies = gs.allTypedDependencies();
List<String> filerTreeContent = new ArrayList();
for (Tree sentenceConstituencyParse1 : sentenceConstituencyParseList) {
Set<Constituent> inT1notT2 = Tdiff.markDiff(sentenceConstituencyParse, sentenceConstituencyParse1);
Set<Constituent> inT2notT1 = Tdiff.markDiff(sentenceConstituencyParse1, sentenceConstituencyParse);
List<String> constiLabels = new ArrayList();
for (Constituent consti : inT1notT2) {
for (Constituent consti1 : inT2notT1) {
if (consti.value().equals(consti1.value()) && !constiLabels.contains(consti.value())) {
//System.out.println("consti value: " + consti.value() + "\n");
score += 64; //256
constiLabels.add(consti.value());
tokenizer = new DocumentPreprocessor(new StringReader(str));
for (List<HasWord> sentence : tokenizer) {
taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield());
}
int counter = 0;
int counter1 = 0;
counter = taggedwordlist2.stream().map((taggedlist2) -> taggedlist2.size()).reduce(counter, Integer::sum);
counter1 = taggedwordlist1.stream().map((taggedlist1) -> taggedlist1.size()).reduce(counter1, Integer::sum);
int overValue = counter >= counter1 ? counter - counter1 : counter1 - counter;
overValue *= 16;
score -= overValue;
List<String> tgwlistIndex = new ArrayList();
taggedwordlist1.forEach((TGWList) -> {
TGWList.forEach((TaggedWord) -> {
if (!tgwlistIndex.contains(TaggedWord.tag()) && !TaggedWord.tag().equals(":")) {
tgwlistIndex.add(TaggedWord.tag());
}
});
});
AtomicInteger runCount = new AtomicInteger(0);
taggedwordlist2.forEach((TGWList) -> {
TGWList.forEach((TaggedWord) -> {
if (tgwlistIndex.contains(TaggedWord.tag())) {
tgwlistIndex.remove(TaggedWord.tag());
runCount.getAndIncrement();
}
});
});
score += runCount.get() * 64;
Annotation annotation = new Annotation(str1);
pipeline.annotate(annotation);
List<Tree> sentenceConstituencyParseList = new ArrayList();
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
sentenceConstituencyParseList.add(sentenceConstituencyParse);
}
Annotation annotation1 = new Annotation(str);
pipeline.annotate(annotation1);
List<String> nerList = new ArrayList();
for (CoreMap sentence : annotation1.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceConstituencyParse);
Collection<TypedDependency> allTypedDependencies = gs.allTypedDependencies();
List<String> filerTreeContent = new ArrayList();
for (Tree sentenceConstituencyParse1 : sentenceConstituencyParseList) {
Set<Constituent> inT1notT2 = Tdiff.markDiff(sentenceConstituencyParse, sentenceConstituencyParse1);
Set<Constituent> inT2notT1 = Tdiff.markDiff(sentenceConstituencyParse1, sentenceConstituencyParse);
List<String> constiLabels = new ArrayList();
for (Constituent consti : inT1notT2) {
for (Constituent consti1 : inT2notT1) {
if (consti.value().equals(consti1.value()) && !constiLabels.contains(consti.value())) {
score += 64; //256
constiLabels.add(consti.value());
}
}
}
}
GrammaticalStructure gs1 = gsf.newGrammaticalStructure(sentenceConstituencyParse1);
Collection<TypedDependency> allTypedDependencies1 = gs1.allTypedDependencies();
for (TypedDependency TDY1 : allTypedDependencies1) {
IndexedWord dep = TDY1.dep();
IndexedWord gov = TDY1.gov();
GrammaticalRelation grammaticalRelation = gs.getGrammaticalRelation(gov, dep);
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
score += 900;
//System.out.println("grammaticalRelation applicable: " + score + "\n");
GrammaticalStructure gs1 = gsf.newGrammaticalStructure(sentenceConstituencyParse1);
Collection<TypedDependency> allTypedDependencies1 = gs1.allTypedDependencies();
for (TypedDependency TDY1 : allTypedDependencies1) {
IndexedWord dep = TDY1.dep();
IndexedWord gov = TDY1.gov();
GrammaticalRelation grammaticalRelation = gs.getGrammaticalRelation(gov, dep);
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
score += 900;
}
GrammaticalRelation reln = TDY1.reln();
if (reln.isApplicable(sentenceConstituencyParse)) {
score += 256;
}
}
GrammaticalRelation reln = TDY1.reln();
if (reln.isApplicable(sentenceConstituencyParse)) {
score += 256;
for (TypedDependency TDY : allTypedDependencies) {
IndexedWord dep = TDY.dep();
IndexedWord gov = TDY.gov();
GrammaticalRelation grammaticalRelation = gs1.getGrammaticalRelation(gov, dep);
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
score += 900;
}
GrammaticalRelation reln = TDY.reln();
if (reln.isApplicable(sentenceConstituencyParse1)) {
score += 256;
}
}
}
for (TypedDependency TDY : allTypedDependencies) {
IndexedWord dep = TDY.dep();
IndexedWord gov = TDY.gov();
GrammaticalRelation grammaticalRelation = gs1.getGrammaticalRelation(gov, dep);
if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) {
score += 900;
//System.out.println("grammaticalRelation applicable: " + score + "\n");
}
GrammaticalRelation reln = TDY.reln();
if (reln.isApplicable(sentenceConstituencyParse1)) {
score += 256;
}
}
for (CoreLabel LBW : sentenceConstituencyParse.taggedLabeledYield()) {
for (CoreLabel LBW1 : sentenceConstituencyParse1.taggedLabeledYield()) {
if (LBW.lemma().equals(LBW1.lemma()) && !filerTreeContent.contains(LBW.lemma())) {
AtomicInteger runCount1 = new AtomicInteger(0);
sentenceConstituencyParse.taggedLabeledYield().forEach((LBW) -> {
sentenceConstituencyParse1.taggedLabeledYield().stream().filter((LBW1) -> (LBW.lemma().equals(LBW1.lemma())
&& !filerTreeContent.contains(LBW.lemma()))).map((_item) -> {
filerTreeContent.add(LBW.lemma());
score += 1500;
//System.out.println("lemma: " + LBW.lemma() + "\n");
}
return _item;
}).forEachOrdered((_item) -> {
runCount1.getAndIncrement();
});
});
score += runCount1.get() * 1500;
}
}
Annotation annotationSentiment1 = pipelineSentiment.process(str);
List<SimpleMatrix> simpleSMXlist = new ArrayList();
List<SimpleMatrix> simpleSMXlistVector = new ArrayList();
List<Integer> sentiment1 = new ArrayList();
List<Integer> sentiment2 = new ArrayList();
for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
sentiment1.add(RNNCoreAnnotations.getPredictedClass(tree));
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree);
simpleSMXlist.add(predictions);
simpleSMXlistVector.add(nodeVector);
}
annotationSentiment1 = pipelineSentiment.process(str1);
for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
sentiment2.add(RNNCoreAnnotations.getPredictedClass(tree));
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree);
score = simpleSMXlist.stream().map((simpleSMX) -> predictions.dot(simpleSMX) * 100).map((dot) -> dot > 50 ? dot - 50 : 50 - dot).map((subtracter) -> {
subtracter *= 25;
return subtracter;
}).map((subtracter) -> subtracter).reduce(score, (accumulator, _item) -> accumulator - _item);
for (SimpleMatrix simpleSMX : simpleSMXlistVector) {
double dot = nodeVector.dot(simpleSMX);
double elementSum = nodeVector.kron(simpleSMX).elementSum();
elementSum = Math.round(elementSum * 100.0) / 100.0;
if (dot < 0.1) {
score += 256;
}
if (elementSum < 0.1 && elementSum > 0.0) {
score += 1300;
} else if (elementSum > 0.1 && elementSum < 1.0) {
score -= 1100;
} else {
score -= 1424;
}
}
}
}
Annotation annotationSentiment1 = pipelineSentiment.process(str);
List<SimpleMatrix> simpleSMXlist = new ArrayList();
List<SimpleMatrix> simpleSMXlistVector = new ArrayList();
for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree);
simpleSMXlist.add(predictions);
simpleSMXlistVector.add(nodeVector);
}
annotationSentiment1 = pipelineSentiment.process(str1);
for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree);
for (SimpleMatrix simpleSMX : simpleSMXlist) {
double dot = predictions.dot(simpleSMX) * 100;
//System.out.println("dot value: " + dot + "\n");
double subtracter = dot > 50 ? dot - 50 : 50 - dot;
System.out.println("score pre dot: " + score + "\nsubtracter: " + subtracter + "\n");
subtracter *= 25;
while (subtracter > 0) {
subtracter--;
score--;
score -= (sentiment1.size() > sentiment2.size() ? sentiment1.size() - sentiment2.size() : sentiment2.size() - sentiment1.size()) * 500;
DocumentReaderAndWriter<CoreLabel> readerAndWriter = classifier.makePlainTextReaderAndWriter();
List classifyRaw1 = classifier.classifyRaw(str, readerAndWriter);
List classifyRaw2 = classifier.classifyRaw(str1, readerAndWriter);
score -= (classifyRaw1.size() > classifyRaw2.size() ? classifyRaw1.size() - classifyRaw2.size() : classifyRaw2.size() - classifyRaw1.size()) * 200;
Annotation annotationSentiment = pipelineSentiment.process(str);
int mainSentiment1 = 0;
int longest1 = 0;
int mainSentiment2 = 0;
int longest2 = 0;
for (CoreMap sentence : annotationSentiment.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
String partText = sentence.toString();
if (partText.length() > longest1) {
mainSentiment1 = sentiment;
longest1 = partText.length();
}
System.out.println("score post dot: " + score + "\n");
}
for (SimpleMatrix simpleSMX : simpleSMXlistVector) {
double dot = nodeVector.dot(simpleSMX);
double elementSum = nodeVector.kron(simpleSMX).elementSum();
elementSum = Math.round(elementSum * 100.0) / 100.0;
System.out.println("kron SMX elementSum: " + elementSum + "\n");
if (dot < 0.1) {
// System.out.println("\ndot VECTOR: " + dot + "\n");
score += 256;
annotationSentiment = pipelineSentiment.process(str1);
for (CoreMap sentence : annotationSentiment.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
String partText = sentence.toString();
if (partText.length() > longest2) {
mainSentiment2 = sentiment;
longest2 = partText.length();
}
if (elementSum < 0.1 && elementSum > 0.0) {
score += 1300;
} else if (elementSum > 0.1 && elementSum < 1.0) {
score -= 1100;
}
if (longest1 != longest2) {
long deffLongest = longest1 > longest2 ? longest1 : longest2;
long deffshorter = longest1 < longest2 ? longest1 : longest2;
if (deffLongest >= (deffshorter * 2) - 1 && deffLongest - deffshorter <= 45) {
score += (deffLongest - deffshorter) * 200;
} else if (mainSentiment1 != mainSentiment2 && deffLongest - deffshorter > 20 && deffLongest - deffshorter < 45) {
score += (deffLongest - deffshorter) * 200;
} else {
score -= 1424;
score -= (deffLongest - deffshorter) * 50;
}
}
LevenshteinDistance leven = new LevenshteinDistance(str, str1);
int SentenceScoreDiff = leven.computeLevenshteinDistance();
SentenceScoreDiff *= 15;
score -= SentenceScoreDiff;
System.out.println("Final current score: " + score + "\nSentence 1: " + str + "\nSentence 2: " + str1 + "\n");
smxParam.setDistance(score);
} catch (Exception ex) {
System.out.println("ex: " + ex.getMessage() + "\n");
smxParam.setDistance(-1000);
return smxParam;
}
int SentenceScoreDiff = LevenshteinDistance.computeLevenshteinDistance(str, str1);
SentenceScoreDiff *= 15;
// System.out.println("score pre levenhstein substract: " + score + "\nSentenceScoreDiff: " + SentenceScoreDiff + "\n");
while (SentenceScoreDiff > 0) {
SentenceScoreDiff--;
score--;
}
System.out.println("Final current score: " + score + "\nSentences: " + str + "\n" + str1 + "\n\n\n");
return score;
return smxParam;
}
}

View File

@ -6,10 +6,9 @@
ps ax | grep EventNotfierDiscordBot-1.0
kill $pid (number)
nohup screen -d -m -S nonRoot java -Xmx5048M -jar /home/Artificial_Autism/ArtificialAutism-1.0.jar
nohup screen -d -m -S nonRoot java -Xmx4048M -jar /home/Artificial_Autism/ArtificialAutism-1.0.jar
nohup screen -d -m -S nonroot java -Xmx6048M -jar /home/javatests/ArtificialAutism-1.0.jar
nohup screen -d -m -S nonroot java -Xmx4048M -jar /home/javatests/ArtificialAutism-1.0.jar
nohup screen -d -m -S gameservers java -Xmx2450M -jar /home/gameservers/ArtificialAutism/ArtificialAutism-1.0.jar
screen -ls (number1)
screen -X -S (number1) quit
*/
@ -20,7 +19,6 @@ package PresentationLayer;
import FunctionLayer.CustomError;
import FunctionLayer.MYSQLDatahandler;
import FunctionLayer.MessageResponseHandler;
import FunctionLayer.StanfordParser.SentimentAnalyzerTest;
import java.io.IOException;
import java.sql.SQLException;
import java.util.List;
@ -37,7 +35,7 @@ import org.javacord.api.entity.user.User;
public class DiscordHandler {
public static void main(String[] args) {
SentimentAnalyzerTest.shiftReduceParserInitiate();
MYSQLDatahandler.shiftReduceParserInitiate();
new Thread(() -> {
try {
MYSQLDatahandler.instance.initiateMYSQL();
@ -76,8 +74,8 @@ public class DiscordHandler {
List<User> userlist = event.getMessage().getMentionedUsers();
String strresult = event.getMessage().toString();
if (userlist != null) {
for (int i = 0; i < userlist.size(); i++) {
strresult = strresult.replace(userlist.get(i).getIdAsString(), "");
for (User user : userlist) {
strresult = strresult.replace(user.getIdAsString(), "");
}
}
MessageResponseHandler.getMessage(strresult);
@ -88,14 +86,13 @@ public class DiscordHandler {
Logger.getLogger(DiscordHandler.class.getName()).log(Level.SEVERE, null, ex);
}
}
//contains to specify one channel where bot may always type
if (event.getMessage().getMentionedUsers().contains(api.getYourself())
|| event.getServerTextChannel().get().toString().contains("minor-test")) {
String ResponseStr;
try {
ResponseStr = MessageResponseHandler.selectReponseMessage(event.getMessage().toString());
if (!ResponseStr.isEmpty()) {
System.out.print("\nResponseStr3: " + ResponseStr);
System.out.print("\nResponseStr3: " + ResponseStr + "\n");
event.getChannel().sendMessage(ResponseStr);
}
} catch (CustomError ex) {