diff --git a/ArtificialAutism/src/main/java/DataLayer/DataMapper.java b/ArtificialAutism/src/main/java/DataLayer/DataMapper.java index 4f41e8da..f1a0281f 100644 --- a/ArtificialAutism/src/main/java/DataLayer/DataMapper.java +++ b/ArtificialAutism/src/main/java/DataLayer/DataMapper.java @@ -14,6 +14,7 @@ import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.logging.Level; @@ -168,6 +169,47 @@ public class DataMapper { } } + public static LinkedHashMap> getAllRelationScores() { + int count = getSementicsDBRows(); + int counter2 = 0; + int hardCapRetrieveCount = 500000; + LinkedHashMap> LHMSMX = new LinkedHashMap(); + while (count > counter2) { + try (Connection l_cCon = DBCPDataSource.getConnection()) { + l_cCon.setAutoCommit(false); + String l_sSQL = "SELECT * FROM `WordMatrix` WHERE ID > " + counter2 + " AND ID < " + (counter2 + hardCapRetrieveCount); + try (PreparedStatement l_pStatement = l_cCon.prepareStatement(l_sSQL, java.sql.ResultSet.TYPE_FORWARD_ONLY, + java.sql.ResultSet.CONCUR_READ_ONLY)) { + l_pStatement.setFetchSize(Integer.MIN_VALUE); + try (ResultSet l_rsSearch = l_pStatement.executeQuery()) { + int i = 0; + LinkedHashMap LHMLocal = new LinkedHashMap(); + while (l_rsSearch.next() && i < hardCapRetrieveCount) { + String str1 = l_rsSearch.getString(1); + String str2 = l_rsSearch.getString(2); + Double score = l_rsSearch.getDouble(3); + LHMLocal.put(str2, score); + while (l_rsSearch.next() && i < hardCapRetrieveCount && str1.equals(l_rsSearch.getString(1))) { + str2 = l_rsSearch.getString(2); + score = l_rsSearch.getDouble(3); + LHMLocal.put(str2, score); + i++; + counter2++; + } + LHMSMX.put(str1, LHMLocal); + System.out.println("i: " + i + "\n" + "free memory: " + Runtime.getRuntime().freeMemory() + "\ncounter2: " + counter2 + "\n"); + i++; + counter2++; + } + } + } + } catch (SQLException ex) { + Logger.getLogger(DataMapper.class.getName()).log(Level.SEVERE, null, ex); + } + } + return LHMSMX; + } + public static void CloseConnections(PreparedStatement ps, ResultSet rs, Connection con) { if (rs != null) { try { @@ -191,4 +233,5 @@ public class DataMapper { } } } + } diff --git a/ArtificialAutism/src/main/java/FunctionLayer/DistanceObject.java b/ArtificialAutism/src/main/java/FunctionLayer/DistanceObject.java new file mode 100644 index 00000000..a21282bc --- /dev/null +++ b/ArtificialAutism/src/main/java/FunctionLayer/DistanceObject.java @@ -0,0 +1,37 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package FunctionLayer; + +/** + * + * @author install1 + */ +public class DistanceObject { + + private Integer distance; + private String sentence; + + public DistanceObject() { + } + + public Integer getDistance() { + return distance; + } + + public void setDistance(Integer distance) { + this.distance = distance; + } + + public String getSentence() { + return sentence; + } + + + public DistanceObject(Integer distance, String sentence) { + this.distance = distance; + this.sentence = sentence; + } +} diff --git a/ArtificialAutism/src/main/java/FunctionLayer/LevenshteinDistance.java b/ArtificialAutism/src/main/java/FunctionLayer/LevenshteinDistance.java index 7632a4fe..4134e710 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/LevenshteinDistance.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/LevenshteinDistance.java @@ -5,19 +5,35 @@ */ package FunctionLayer; +import java.util.concurrent.Callable; + /** * * @author install1 */ -public class LevenshteinDistance { +public class LevenshteinDistance implements Callable { + + private CharSequence lhs; + private CharSequence rhs; + private DistanceObject dco; private static int minimum(int a, int b, int c) { return Math.min(Math.min(a, b), c); } - public static int computeLevenshteinDistance(CharSequence lhs, CharSequence rhs) { - int[][] distance = new int[lhs.length() + 1][rhs.length() + 1]; + public LevenshteinDistance(CharSequence lhs, CharSequence rhs, DistanceObject dco) { + this.lhs = lhs; + this.rhs = rhs; + this.dco = dco; + } + public LevenshteinDistance(CharSequence lhs, CharSequence rhs) { + this.lhs = lhs; + this.rhs = rhs; + } + + public int computeLevenshteinDistance() { + int[][] distance = new int[lhs.length() + 1][rhs.length() + 1]; for (int i = 0; i <= lhs.length(); i++) { distance[i][0] = i; } @@ -34,4 +50,32 @@ public class LevenshteinDistance { } return distance[lhs.length()][rhs.length()]; } + + @Override + public DistanceObject call() { + try { + int[][] distance = new int[lhs.length() + 1][rhs.length() + 1]; + + for (int i = 0; i <= lhs.length(); i++) { + distance[i][0] = i; + } + for (int j = 1; j <= rhs.length(); j++) { + distance[0][j] = j; + } + for (int i = 1; i <= lhs.length(); i++) { + for (int j = 1; j <= rhs.length(); j++) { + distance[i][j] = minimum( + distance[i - 1][j] + 1, + distance[i][j - 1] + 1, + distance[i - 1][j - 1] + ((lhs.charAt(i - 1) == rhs.charAt(j - 1)) ? 0 : 1)); + } + } + dco.setDistance(distance[lhs.length()][rhs.length()]); + } catch (Exception ex) { + System.out.println("ex msg: " + ex.getMessage() + "\n"); + dco.setDistance(100); + return dco; + } + return dco; + } } diff --git a/ArtificialAutism/src/main/java/FunctionLayer/MYSQLDatahandler.java b/ArtificialAutism/src/main/java/FunctionLayer/MYSQLDatahandler.java index 8d7fe52f..c8bbc2b5 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/MYSQLDatahandler.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/MYSQLDatahandler.java @@ -2,25 +2,25 @@ * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. -//https://stackoverflow.com/questions/43935229/hashmap-with-8-million-entries-becomes-slow -//http://www2.imm.dtu.dk/pubdb/views/edoc_download.php/6364/pdf/imm6364.pdf */ package FunctionLayer; import DataLayer.DataMapper; import FunctionLayer.StanfordParser.SentimentAnalyzerTest; import com.google.common.base.Stopwatch; -import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.MapMaker; -import com.google.common.collect.Multimap; +import edu.stanford.nlp.ie.AbstractSequenceClassifier; +import edu.stanford.nlp.ie.crf.CRFClassifier; +import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.HasWord; -import edu.stanford.nlp.ling.Label; import edu.stanford.nlp.ling.TaggedWord; import edu.stanford.nlp.ling.Word; import edu.stanford.nlp.parser.lexparser.LexicalizedParser; import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; +import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.process.DocumentPreprocessor; import edu.stanford.nlp.tagger.maxent.MaxentTagger; +import edu.stanford.nlp.trees.GrammaticalStructureFactory; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreebankLanguagePack; import java.io.IOException; @@ -28,14 +28,23 @@ import java.io.StringReader; import java.sql.SQLException; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; import java.util.Random; +import java.util.Set; +import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; -import java.util.function.Predicate; +import java.util.function.Consumer; import java.util.logging.Level; import java.util.logging.Logger; @@ -48,19 +57,89 @@ public class MYSQLDatahandler { public static final long EXPIRE_TIME_IN_SECONDS = TimeUnit.SECONDS.convert(10, TimeUnit.MINUTES); public static final long EXPIRE_TIME_IN_SECONDS1 = TimeUnit.SECONDS.convert(10, TimeUnit.HOURS); public static MYSQLDatahandler instance = new MYSQLDatahandler(); - public static int SemeticsUpdateCount; + public static int semeticsUpdateCount; public static int threadCounter = 0; - private volatile boolean RefreshMatrixFromDB; - private final ConcurrentMap StringCache; - private List SimilaritySMXList = new ArrayList(); + private volatile boolean refreshMatrixFromDB; + private final ConcurrentMap stringCache; + private LinkedHashMap> lHMSMX = new LinkedHashMap(); private List multiprocessCalculations = new ArrayList(); + private List updatedRows = new ArrayList(); private final Stopwatch stopwatch; private final Stopwatch stopwatch1; + private static String modelPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz"; + private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz"; + private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"; + private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; + private static String NERModel = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz"; + private static MaxentTagger tagger; + private static ShiftReduceParser model; + private static String[] options = {"-maxLength", "100"}; + private static Properties props = new Properties(); + private static Properties propsSentiment = new Properties(); + private static GrammaticalStructureFactory gsf; + private static LexicalizedParser lp; + private static TreebankLanguagePack tlp; + private static AbstractSequenceClassifier classifier; + private static StanfordCoreNLP pipeline; + private static StanfordCoreNLP pipelineSentiment; + + public static AbstractSequenceClassifier getClassifier() { + return classifier; + } + + public static void setClassifier(AbstractSequenceClassifier classifier) { + MYSQLDatahandler.classifier = classifier; + } public MYSQLDatahandler() { this.stopwatch = Stopwatch.createUnstarted(); this.stopwatch1 = Stopwatch.createStarted(); - this.StringCache = new MapMaker().concurrencyLevel(2).makeMap(); + this.stringCache = new MapMaker().concurrencyLevel(2).makeMap(); + } + + public static void shiftReduceParserInitiate() { + try { + classifier = CRFClassifier.getClassifierNoExceptions(NERModel); + } catch (ClassCastException ex) { + Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex); + } + model = ShiftReduceParser.loadModel(modelPath, options); + tagger = new MaxentTagger(taggerPath); + lp = LexicalizedParser.loadModel(lexParserEnglishRNN, options); + tlp = lp.getOp().langpack(); + gsf = tlp.grammaticalStructureFactory(); + // set up pipeline properties + props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse"); + props.setProperty("parse.model", modelPath); + props.setProperty("parse.maxlen", "100"); + props.setProperty("parse.binaryTrees", "true"); + propsSentiment.setProperty("annotators", "tokenize, ssplit, parse, sentiment"); + propsSentiment.setProperty("parse.model", lexParserEnglishRNN); + propsSentiment.setProperty("sentiment.model", sentimentModel); + propsSentiment.setProperty("parse.maxlen", "100"); + // set up pipeline + pipeline = new StanfordCoreNLP(props); + pipelineSentiment = new StanfordCoreNLP(propsSentiment); + } + + public static GrammaticalStructureFactory getGsf() { + return gsf; + } + + public static StanfordCoreNLP getPipeline() { + return pipeline; + } + + public static StanfordCoreNLP getPipelineSentiment() { + return pipelineSentiment; + } + + public static MaxentTagger getTagger() { + return tagger; + } + + public static ShiftReduceParser getModel() { + return model; } private Map getCache() throws SQLException, IOException, CustomError { @@ -78,8 +157,8 @@ public class MYSQLDatahandler { public void initiateMYSQL() throws SQLException, IOException { try { DataMapper.createTables(); - StringCache.putAll(getCache()); - SimilaritySMXList = DataMapper.getAllSementicMatrixes(); + stringCache.putAll(getCache()); + lHMSMX = DataMapper.getAllRelationScores(); } catch (CustomError ex) { Logger.getLogger(MYSQLDatahandler.class .getName()).log(Level.SEVERE, null, ex); @@ -87,34 +166,50 @@ public class MYSQLDatahandler { } public synchronized void checkIfUpdateMatrixes() { - RefreshMatrixFromDB = false; + refreshMatrixFromDB = false; int calculationBoundaries = 10; int updateBadgesInteger = 500; if (stopwatch1.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS1) { - RefreshMatrixFromDB = true; + refreshMatrixFromDB = true; if (threadCounter == 0) { - try { - SimilaritySMXList = DataMapper.getAllSementicMatrixes(); - stopwatch1.reset(); - } catch (CustomError ex) { - Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex); - } + lHMSMX = DataMapper.getAllRelationScores(); + stopwatch1.reset(); } } - if (StringCache.values().size() > 10) { - if (!RefreshMatrixFromDB && multiprocessCalculations.size() <= (calculationBoundaries * calculationBoundaries)) { + if (stringCache.values().size() > 10 && !refreshMatrixFromDB) { + if (multiprocessCalculations.size() <= (calculationBoundaries * calculationBoundaries)) { threadCounter++; - List strList = new ArrayList(StringCache.values()); - SemeticsUpdateCount = new Random().nextInt(strList.size() - 6); - int beginindex = SemeticsUpdateCount; - SemeticsUpdateCount += calculationBoundaries / 2; - int temp = SemeticsUpdateCount; + List strList = new ArrayList(stringCache.values()); + List updateLocal = updatedRows; + int random = -1; + if (!updateLocal.contains(random)) { + updatedRows.add(random); + } + Collections.sort(updateLocal); + while (updateLocal.contains(random)) { + random = new Random().nextInt(strList.size() - 6); + int indexPrev = Collections.binarySearch(updateLocal, random); + int indexNext = Collections.binarySearch(updateLocal, random + 6); + //-1 will always be index 0 + if (indexPrev > 0 && indexNext > 0) { + indexPrev = updateLocal.get(indexPrev); + indexNext = updateLocal.get(indexNext); + } + random = indexPrev < random - 5 && indexNext < random ? random : -1; + } + updatedRows.add(random); + semeticsUpdateCount = random; + int beginindex = semeticsUpdateCount; + semeticsUpdateCount += calculationBoundaries / 2; + int temp = semeticsUpdateCount; + System.out.println("beginindex: " + beginindex + "\ntemp: " + temp + "\n"); List strIndexNavigator = new ArrayList(); strList.subList(beginindex, temp).forEach((str) -> { strIndexNavigator.add(str); multiprocessCalculations.add(str); }); new Thread(() -> { + LinkedHashMap> LHMSMXLocal = lHMSMX; List strIndexNavigatorL = new ArrayList(strIndexNavigator); List strIndexAll = new ArrayList(strList); List randomIndexesToUpdate = new ArrayList(); @@ -127,32 +222,49 @@ public class MYSQLDatahandler { randomIndexesToUpdate.add(str); }); List matrixUpdateList = new ArrayList(); + List> futures = new ArrayList(); + ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); strIndexNavigatorL.forEach((str) -> { randomIndexesToUpdate.stream().filter((str1) -> (!str.equals(str1))).forEachOrdered((str1) -> { boolean present = false; if (multiprocessCalculations.contains(str1)) { present = true; - } else { - for (SimilarityMatrix SMX : SimilaritySMXList) { - if (SMX.getPrimaryString().equals(str) && SMX.getSecondaryString().equals(str1)) { - present = true; - break; - } - if (SMX.getPrimaryString().equals(str1) && SMX.getSecondaryString().equals(str)) { - present = true; - break; - } + } else if (LHMSMXLocal.containsKey(str)) { + LinkedHashMap orDefault = LHMSMXLocal.get(str); + if (orDefault.containsKey(str1)) { + present = true; + } + } else if (LHMSMXLocal.containsKey(str1)) { + LinkedHashMap orDefault = LHMSMXLocal.get(str1); + if (orDefault.containsKey(str)) { + present = true; } } if (!present) { SimilarityMatrix SMX = new SimilarityMatrix(str, str1); - double score = SentimentAnalyzerTest.sentimentanalyzing(str, str1); - SMX.setDistance(score); - matrixUpdateList.add(SMX); - SimilaritySMXList.add(SMX); + Callable worker = new SentimentAnalyzerTest(str, str1, SMX); + futures.add(executor.submit(worker)); } }); }); + executor.shutdown(); + try { + System.out.println("finished worker assignment, futures size: " + futures.size() + "\n"); + for (Future future : futures) { + SimilarityMatrix SMX = future.get(); + System.out.println("SMX primary: " + SMX.getPrimaryString() + "\nSMX Secondary: " + SMX.getSecondaryString() + + "\nScore: " + SMX.getDistance() + "\n"); + LinkedHashMap get = lHMSMX.getOrDefault(SMX.getPrimaryString(), null); + if (get == null) { + get = new LinkedHashMap(); + } + get.put(SMX.getSecondaryString(), SMX.getDistance()); + lHMSMX.put(SMX.getPrimaryString(), get); + matrixUpdateList.add(SMX); + } + } catch (InterruptedException | ExecutionException ex) { + Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex); + } new Thread(() -> { try { if (!matrixUpdateList.isEmpty()) { @@ -160,6 +272,7 @@ public class MYSQLDatahandler { System.out.println("finished datamapper semetic insert"); } threadCounter--; + System.out.println("\nthreadCounter: " + threadCounter + "\n"); } catch (CustomError ex) { Logger.getLogger(MYSQLDatahandler.class .getName()).log(Level.SEVERE, null, ex); @@ -167,56 +280,73 @@ public class MYSQLDatahandler { }).start(); }). start(); - try { - wait(800); - } catch (InterruptedException ex) { - Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex); - } } else { if (threadCounter == 0) { - List strList = new ArrayList(StringCache.values()); - List matrixUpdateList = new ArrayList(); - List randomStrList = new ArrayList(); - int indexes = updateBadgesInteger; - if (indexes >= strList.size()) { - indexes = strList.size() - 1; - } - int beginindexes = new Random().nextInt((strList.size()) - indexes); - strList.subList(beginindexes, beginindexes + indexes).forEach((str) -> { - randomStrList.add(str); - }); - multiprocessCalculations.forEach((str) -> { - randomStrList.forEach((str1) -> { - boolean present = false; - for (SimilarityMatrix SMX : SimilaritySMXList) { - if (SMX.getPrimaryString().equals(str) && SMX.getSecondaryString().equals(str1)) { - present = true; - break; - } - if (SMX.getPrimaryString().equals(str1) && SMX.getSecondaryString().equals(str)) { - present = true; - break; - } - } - if (!present) { - SimilarityMatrix SMX = new SimilarityMatrix(str, str1); - double score = SentimentAnalyzerTest.sentimentanalyzing(str, str1); - SMX.setDistance(score); - matrixUpdateList.add(SMX); - SimilaritySMXList.add(SMX); - } - }); - }); - try { - if (!matrixUpdateList.isEmpty()) { - DataMapper.insertSementicMatrixes(matrixUpdateList); - System.out.println("finished datamapper semetic insert"); + threadCounter++; + new Thread(() -> { + LinkedHashMap> LHMSMXLocal = lHMSMX; + List strList = new ArrayList(stringCache.values()); + List matrixUpdateList = new ArrayList(); + List randomStrList = new ArrayList(); + int indexes = updateBadgesInteger; + if (indexes >= strList.size()) { + indexes = strList.size() - 1; } - } catch (CustomError ex) { - Logger.getLogger(MYSQLDatahandler.class - .getName()).log(Level.SEVERE, null, ex); - } - multiprocessCalculations = new ArrayList(); + int beginindexes = new Random().nextInt((strList.size()) - indexes); + strList.subList(beginindexes, beginindexes + indexes).forEach((str) -> { + randomStrList.add(str); + }); + List> futures = new ArrayList(); + ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + multiprocessCalculations.forEach((str) -> { + randomStrList.forEach((str1) -> { + boolean present = false; + if (LHMSMXLocal.containsKey(str)) { + LinkedHashMap orDefault = LHMSMXLocal.get(str); + if (orDefault.containsKey(str1)) { + present = true; + } + } else if (LHMSMXLocal.containsKey(str1)) { + LinkedHashMap orDefault = LHMSMXLocal.get(str1); + if (orDefault.containsKey(str)) { + present = true; + } + } + if (!present) { + SimilarityMatrix SMX = new SimilarityMatrix(str, str1); + Callable worker = new SentimentAnalyzerTest(str, str1, SMX); + futures.add(executor.submit(worker)); + } + }); + }); + executor.shutdown(); + try { + for (Future future : futures) { + SimilarityMatrix SMX = future.get(); + LinkedHashMap get = lHMSMX.getOrDefault(SMX.getPrimaryString(), null); + if (get == null) { + get = new LinkedHashMap(); + } + get.put(SMX.getSecondaryString(), SMX.getDistance()); + lHMSMX.put(SMX.getPrimaryString(), get); + matrixUpdateList.add(SMX); + } + } catch (InterruptedException | ExecutionException ex) { + Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex); + } + try { + if (!matrixUpdateList.isEmpty()) { + DataMapper.insertSementicMatrixes(matrixUpdateList); + System.out.println("finished datamapper semetic insert"); + } + } catch (CustomError ex) { + Logger.getLogger(MYSQLDatahandler.class + .getName()).log(Level.SEVERE, null, ex); + } + multiprocessCalculations = new ArrayList(); + updatedRows = new ArrayList(); + threadCounter--; + }).start(); } } } @@ -233,14 +363,15 @@ public class MYSQLDatahandler { strUpdate.addAll(str); try { DataMapper.InsertMYSQLStrings(strUpdate); + } catch (CustomError ex) { Logger.getLogger(MYSQLDatahandler.class .getName()).log(Level.SEVERE, null, ex); } MessageResponseHandler.setStr(new ArrayList()); - int j = StringCache.size() + 1; + int j = stringCache.size() + 1; for (String str1 : strUpdate) { - StringCache.put(j, str1); + stringCache.put(j, str1); j++; } }).start(); @@ -252,85 +383,106 @@ public class MYSQLDatahandler { } } - public String getResponseMsg(String str) throws CustomError { + public synchronized String getResponseMsg(String str) throws CustomError { + str = str.trim(); + if (str.startsWith("<@")) { + str = str.substring(str.indexOf("> ") + 2); + } + final LinkedHashMap> LHMSMXLocal = lHMSMX; + ConcurrentMap strArrs = stringCache; double Score = -10000; - SimilarityMatrix SMXreturn = null; - List strLocal = new ArrayList(StringCache.values()); - for (String str1 : strLocal) { - if (str.equals(str1)) { - Iterator SMXITR = SimilaritySMXList.iterator(); - while (SMXITR.hasNext()) { - SimilarityMatrix SMX = SMXITR.next(); - if (SMX.getPrimaryString().equals(str) || SMX.getSecondaryString().equals(str)) { - double smxDistance = SMX.getDistance(); + SimilarityMatrix SMXreturn = new SimilarityMatrix("", ""); + System.out.println("pre mostSimilarSTR \n"); + String mostSimilarSTR = mostSimilar(str, strArrs); + if (!mostSimilarSTR.isEmpty()) { + System.out.println("mostSimilarSTR; " + mostSimilarSTR + "\n"); + LinkedHashMap orDefault = LHMSMXLocal.getOrDefault(mostSimilarSTR, null); + if (orDefault != null) { + for (Entry entrySet : orDefault.entrySet()) { + double smxDistance = entrySet.getValue(); + if (smxDistance > Score) { + Score = smxDistance; + SMXreturn = new SimilarityMatrix(mostSimilarSTR, entrySet.getKey(), smxDistance); + } + } + } + for (Entry> values1 : LHMSMXLocal.entrySet()) { + LinkedHashMap value = values1.getValue(); + for (Entry keystr : value.entrySet()) { + if (keystr.getKey().equals(mostSimilarSTR)) { + double smxDistance = keystr.getValue(); if (smxDistance > Score) { Score = smxDistance; - SMXreturn = SMX; + SMXreturn = new SimilarityMatrix(values1.getKey(), keystr.getKey(), smxDistance); } } } - break; } - } - if (SMXreturn != null) { - if (SMXreturn.getPrimaryString().equals(str)) { - return SMXreturn.getSecondaryString(); - } else { - return SMXreturn.getPrimaryString(); - } - } - String[] toArray = strLocal.toArray(new String[strLocal.size()]); - String mostSimilarSTR = mostSimilar(str, toArray); - Iterator SMXITR = SimilaritySMXList.iterator(); - while (SMXITR.hasNext()) { - System.out.println("mostSimilarSTR; " + mostSimilarSTR + "\n"); - mostSimilarSTR = mostSimilarSTR.trim(); - SimilarityMatrix SMX = SMXITR.next(); - if (SMX.getPrimaryString().trim().equals(mostSimilarSTR) || SMX.getSecondaryString().trim().equals(mostSimilarSTR)) { - double smxDistance = SMX.getDistance(); - if (smxDistance > Score) { - Score = smxDistance; - SMXreturn = SMX; + if (!SMXreturn.getPrimaryString().isEmpty()) { + if (SMXreturn.getPrimaryString().equals(mostSimilarSTR)) { + return SMXreturn.getSecondaryString(); + } else { + return SMXreturn.getPrimaryString(); } } } - if (SMXreturn != null) { - if (SMXreturn.getPrimaryString().equals(str)) { - return SMXreturn.getSecondaryString(); - } else { - return SMXreturn.getPrimaryString(); + System.out.println("none within 8 range"); + ConcurrentMap strCache = stringCache; + ConcurrentMap> futureslocal = new MapMaker().concurrencyLevel(2).makeMap(); + ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + for (String str1 : strCache.values()) { + if (!str.equals(str1)) { + Callable worker = new SentimentAnalyzerTest(str, str1, new SimilarityMatrix(str, str1)); + futureslocal.put(futureslocal.size() + 1, executor.submit(worker)); } } - SMXITR = SimilaritySMXList.iterator(); - while (SMXITR.hasNext()) { - SimilarityMatrix SMX = SMXITR.next(); - if (SMX.getPrimaryString().contains(mostSimilarSTR) || SMX.getSecondaryString().contains(mostSimilarSTR)) { - double smxDistance = SMX.getDistance(); - if (smxDistance > Score) { - Score = smxDistance; + executor.shutdown(); + int index = 0; + for (Future future : futureslocal.values()) { + try { + SimilarityMatrix SMX = future.get(); + double distance = SMX.getDistance(); + System.out.println("index: " + index + "\nfutures size: " + futureslocal.values().size() + "\nScore: " + SMX.getDistance() + "\nSecondary: " + + SMX.getSecondaryString() + "\nPrimary: " + SMX.getPrimaryString() + "\n"); + if (distance > Score) { + Score = distance; SMXreturn = SMX; } + } catch (InterruptedException | ExecutionException ex) { + System.out.println("ex: " + ex.getMessage() + "\n"); } + index++; } - if (SMXreturn != null) { - if (SMXreturn.getPrimaryString().equals(str)) { - return SMXreturn.getSecondaryString(); - } else { - return SMXreturn.getPrimaryString(); - } - } - return "how did you manage to reach this, AAAAAAAAAAAA end my suffering"; + System.out.println("Reached end: secondary: " + SMXreturn.getSecondaryString() + "\nPrimarY: " + SMXreturn.getPrimaryString() + + "\nScore: " + SMXreturn.getDistance()); + return SMXreturn.getSecondaryString(); } - public String mostSimilar(String toBeCompared, String[] strings) { - int minDistance = Integer.MAX_VALUE; + public String mostSimilar(String toBeCompared, ConcurrentMap concurrentStrings) { + int minDistance = 8; String similar = ""; - for (String str : strings) { - int d = LevenshteinDistance.computeLevenshteinDistance(str, toBeCompared); - if (d < minDistance) { - minDistance = d; - similar = str; + List> futures = new ArrayList(); + ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + concurrentStrings.values().stream().map((str) -> new LevenshteinDistance(str, toBeCompared, new DistanceObject())).forEachOrdered((worker) -> { + futures.add(executor.submit(worker)); + }); + executor.shutdown(); + try { + for (Future future : futures) { + DistanceObject d = future.get(); + try { + int distance = d.getDistance(); + System.out.println("distance: " + distance + "\n"); + if (distance < minDistance) { + minDistance = distance; + similar = d.getSentence(); + } + } catch (NullPointerException ex) { + System.out.println("failed future\n"); + } } + } catch (InterruptedException | ExecutionException ex) { + Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex); } return similar; } @@ -402,6 +554,30 @@ public class MYSQLDatahandler { if (str1.contains("{white}")) { str1 = str1.replace("{white}", " "); } + if (str1.contains("{fullblue}")) { + str1 = str1.replace("{fullblue}", " "); + } + if (str1.contains("{cyan}")) { + str1 = str1.replace("{cyan}", " "); + } + if (str1.contains("{lime}")) { + str1 = str1.replace("{lime}", " "); + } + if (str1.contains("{deeppink}")) { + str1 = str1.replace("{deeppink}", " "); + } + if (str1.contains("{slategray}")) { + str1 = str1.replace("{slategray}", " "); + } + if (str1.contains("{dodgerblue}")) { + str1 = str1.replace("{dodgerblue}", " "); + } + if (str1.contains("{black}")) { + str1 = str1.replace("{black}", " "); + } + if (str1.contains("{orangered}")) { + str1 = str1.replace("{orangered}", " "); + } str1 = str1.trim(); if (str1.length() > 2 && (!str1.startsWith("!"))) { strlistreturn.add(str1); @@ -410,38 +586,9 @@ public class MYSQLDatahandler { return strlistreturn; } - /* - public static List cutLongsFromEmotes(List str) { - List strreturn = new ArrayList(); - int emotesTraceLong = 17; - for (String str1 : str) { - StringBuilder SB = new StringBuilder(); - int counter = 0; - int i = 0; - for (Character c : str1.toCharArray()) { - i++; - if (Character.isDigit(c)) { - counter++; - if (counter > emotesTraceLong && str1.length() > i + 2) { - SB.append(str1.substring(0, i - emotesTraceLong)); - SB.append(str1.substring(i + 1, str1.length())); - } - } else { - counter = 0; - } - } - if (SB.toString().isEmpty()) { - strreturn.add(str1); - } else { - strreturn.add(SB.toString()); - } - } - return strreturn; - } - */ private List removeSlacks(List str) { - ShiftReduceParser model = SentimentAnalyzerTest.getModel(); - MaxentTagger tagger = SentimentAnalyzerTest.getTagger(); + ShiftReduceParser model = getModel(); + MaxentTagger tagger = getTagger(); List taggedWords; List strreturn = new ArrayList(); for (String str1 : str) { @@ -481,9 +628,10 @@ public class MYSQLDatahandler { } if (addCounter > 3) { boolean tooclosematch = false; - for (String strVals : StringCache.values()) { - double Distance = LevenshteinDistance.computeLevenshteinDistance(strVals, str1); - int maxpermittedDistance = 5; + for (String strVals : stringCache.values()) { + LevenshteinDistance leven = new LevenshteinDistance(strVals, str1); + double Distance = leven.computeLevenshteinDistance(); + int maxpermittedDistance = 2; if (Distance < maxpermittedDistance) { tooclosematch = true; break; @@ -504,153 +652,4 @@ public class MYSQLDatahandler { } return strreturn; } - - /** - * - * @throws CustomError - * @deprecated - */ - public synchronized void calculateStrings() throws CustomError { - //linkedhashmap? - int initiallimit = 5; - int listindex = 0; - List WS4JList = DataMapper.getAllSementicMatrixes(); - List WS4JListUpdate = new ArrayList(); - List sentencesList = DataMapper.getAllStrings(); - for (int i = 1; i < initiallimit; i++) { - if (!sentencesList.get(i).isEmpty()) { - //System.out.print("i: " + i + "\n sentencesList i: " + sentencesList.get(i) + "\n"); - String[] words1 = sentencesList.get(i).split(" "); - for (String words11 : words1) { - if (!words11.isEmpty() && words11.length() > 3) { - String str = words11; - if (!str.isEmpty() && str.length() > 3) { - //SecondaryPredicate, no secondary key present with word - Predicate SecondaryPredicate = e -> str.equals(e.getSecondaryString()); - //no primary key present with the word - if (!WS4JList.stream().filter(e -> str.equals(e.getPrimaryString())).findAny().isPresent()) { - WS4JListUpdate = createWS4JWordMatrix(str, sentencesList, WS4JListUpdate, WS4JList, SecondaryPredicate); - for (; listindex < WS4JListUpdate.size(); listindex++) { - WS4JList.add(WS4JListUpdate.get(listindex)); - } - } - } - } - } - } - } - //System.out.println("\nWS4JListUpdate size: " + WS4JListUpdate.size()); - DataMapper.insertSementicMatrixes(WS4JListUpdate); - } - - /** - * - * @param str - * @param strlist - * @param ws4jlist - * @param oldlist - * @param SecondaryPredicate - * @return - * @deprecated - */ - public List createWS4JWordMatrix(String str, List strlist, List ws4jlist, - List oldlist, Predicate SecondaryPredicate) { - for (String str1 : strlist) { - if (!str1.isEmpty()) { - String[] words1 = str1.split(" "); - for (int j = 0; j < words1.length; j++) { - if (!words1[j].isEmpty() && words1[j].length() > 3) { - String strword = words1[j]; - if (!strword.isEmpty() && strword.length() > 3 && !strword.equals(str)) { - Predicate PrimaryPredicate = e -> strword.equals(e.getPrimaryString()); - if (!oldlist.stream().filter(PrimaryPredicate.and(SecondaryPredicate)).findAny().isPresent()) { - //System.out.println("creating SimilarityMatrix with STR: " + str + "\n strword: " + strword + "\n"); - SimilarityMatrix ws4j = new SimilarityMatrix(str, strword); - /* - double addingDistance = ws4j.getDistanceCalculations(); - if (addingDistance > 0.00) { - //System.out.println("added! \n"); - ws4j.setDistance(addingDistance); - ws4jlist.add(ws4j); - } - */ - } - } - } - } - } - } - return ws4jlist; - } - - /** - * Stanford Parser method to update calculations to the DB - * - * @deprecated - * @throws FunctionLayer.CustomError - */ - public void sentimentAnalyzingStringsToDB() throws CustomError { - - List sentencesList = DataMapper.getAllStrings(); - List WS4JList = DataMapper.getAllSementicMatrixes(); - List WS4JListUpdate = new ArrayList(); - int listindex = 0; - for (int i = 0; i < sentencesList.size() - 3000; i++) { - String str = sentencesList.get(i); - if (!str.isEmpty() && str.length() > 3) { - //System.out.println("i: " + i + "\n"); - Predicate SecondaryPredicate = e -> str.equals(e.getSecondaryString()); - if (!WS4JList.stream().filter(e -> str.equals(e.getPrimaryString())).findAny().isPresent()) { - //WS4JListUpdate = addStringMatrixes(str, sentencesList, SecondaryPredicate, WS4JListUpdate, WS4JList); - for (; listindex < WS4JListUpdate.size(); listindex++) { - WS4JList.add(WS4JListUpdate.get(listindex)); - } - } - } - } - // System.out.println("\n WS4JListUpdate size: " + WS4JListUpdate.size()); - DataMapper.insertSementicMatrixes(WS4JListUpdate); - } - - /** - * @deprecated @param str Primary string which is checked, the filter - * ensures primary string has not been calculated yet - * @param sentencesList the full string list where each unique primary has - * to calculate its value to the rest of the DB - * @param SecondaryPredicate ensures primary string is not already - * calculated somewhere with another string - * @param WS4JListUpdate Matrix list to update in DB with new Sentences - * @param OldWS4JList Check if str1 already has primary entry with - * co-responding SecondaryPredicate - * @return Updated List which has to be inserted to the DB - */ - private List addStringMatrixes(String str, List sentencesList, - Predicate SecondaryPredicate, List WS4JListUpdate, - List OldWS4JList, LexicalizedParser lp, TreebankLanguagePack tlp) { - double valuerange = -100.0; - for (int i = 0; i < sentencesList.size(); i++) { - String str1 = sentencesList.get(i); - if (!str1.isEmpty() && str1.length() > 3) { - Predicate PrimaryPredicate = e -> str1.equals(e.getPrimaryString()); - if (!OldWS4JList.stream().filter(PrimaryPredicate.and(SecondaryPredicate)).findAny().isPresent()) { - double s = -100.0; - if (s > valuerange) { - SimilarityMatrix SMX = new SimilarityMatrix(str, str1); - SMX.setDistance(s); - /* - System.out.println("SMX added: \n Primary: " + SMX.getPrimaryString() + "\n Secondary: " + SMX.getSecondaryString() - + "\n Score: " + SMX.getDistance() + "\n"); - */ - WS4JListUpdate.add(SMX); - } - } - } - } - /* - str parameter is primary and not used as primary if reaching here - secondary predicate ensures primary does not already exist as secondary with co-responding strlist primary - */ - return WS4JListUpdate; - } - } diff --git a/ArtificialAutism/src/main/java/FunctionLayer/MessageResponseHandler.java b/ArtificialAutism/src/main/java/FunctionLayer/MessageResponseHandler.java index 29dfcdf9..d8c2aa1f 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/MessageResponseHandler.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/MessageResponseHandler.java @@ -7,8 +7,6 @@ package FunctionLayer; import java.util.ArrayList; import java.util.List; -//https://www.programcreek.com/java-api-examples/index.php?source_dir=simmetrics-master/simmetrics-core/src/main/java/org/simmetrics/metrics/JaroWinkler.java# -//https://stackoverflow.com/questions/36032958/semantic-matching-in-ws4j-at-sentence-level /** * @@ -29,6 +27,9 @@ public class MessageResponseHandler { public static void getMessage(String message) { if (message != null && !message.isEmpty()) { message = message.replace("@", ""); + if (message.startsWith("<>")) { + message = message.substring(message.indexOf(">")); + } if (message.startsWith("[ *")) { message = message.substring(message.indexOf("]")); } diff --git a/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java b/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java index 20c7b944..8e369c35 100644 --- a/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java +++ b/ArtificialAutism/src/main/java/FunctionLayer/StanfordParser/SentimentAnalyzerTest.java @@ -1,20 +1,21 @@ package FunctionLayer.StanfordParser; import FunctionLayer.LevenshteinDistance; +import FunctionLayer.MYSQLDatahandler; +import FunctionLayer.SimilarityMatrix; +import edu.stanford.nlp.ie.AbstractSequenceClassifier; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.IndexedWord; -import edu.stanford.nlp.ling.Label; import edu.stanford.nlp.ling.TaggedWord; import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations; -import edu.stanford.nlp.parser.lexparser.LexicalizedParser; import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.process.DocumentPreprocessor; -import edu.stanford.nlp.process.Tokenizer; import edu.stanford.nlp.sentiment.SentimentCoreAnnotations; +import edu.stanford.nlp.sequences.DocumentReaderAndWriter; import edu.stanford.nlp.tagger.maxent.MaxentTagger; import edu.stanford.nlp.trees.Constituent; import edu.stanford.nlp.trees.GrammaticalRelation; @@ -22,7 +23,6 @@ import edu.stanford.nlp.trees.GrammaticalStructure; import edu.stanford.nlp.trees.GrammaticalStructureFactory; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreeCoreAnnotations; -import edu.stanford.nlp.trees.TreebankLanguagePack; import edu.stanford.nlp.trees.TypedDependency; import edu.stanford.nlp.trees.tregex.gui.Tdiff; import edu.stanford.nlp.util.CoreMap; @@ -30,8 +30,9 @@ import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import java.util.List; -import java.util.Properties; import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.atomic.AtomicInteger; import org.ejml.simple.SimpleMatrix; /* @@ -43,217 +44,227 @@ import org.ejml.simple.SimpleMatrix; * * @author install1 */ -public class SentimentAnalyzerTest { +public class SentimentAnalyzerTest implements Callable { - private static String modelPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz"; - private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz"; - private static String parserModelPathUD = "edu/stanford/nlp/models/parser/nndep/english_UD.gz"; - private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"; - private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; - private static MaxentTagger tagger; - private static ShiftReduceParser model; - private static String[] options = {"-maxLength", "100"}; - private static LexicalizedParser lp; - private static TreebankLanguagePack tlp; - private static Properties props = new Properties(); - private static Properties propsSentiment = new Properties(); - private static GrammaticalStructureFactory gsf; - private static StanfordCoreNLP pipeline; - private static StanfordCoreNLP pipelineSentiment; + private String str; + private String str1; + private SimilarityMatrix smxParam; + private ShiftReduceParser model; + private MaxentTagger tagger; + private GrammaticalStructureFactory gsf; + private StanfordCoreNLP pipeline; + private StanfordCoreNLP pipelineSentiment; + private AbstractSequenceClassifier classifier; - public static void shiftReduceParserInitiate() { - model = ShiftReduceParser.loadModel(modelPath, options); - tagger = new MaxentTagger(taggerPath); - lp = LexicalizedParser.loadModel(lexParserEnglishRNN, options); - tlp = lp.getOp().langpack(); - gsf = tlp.grammaticalStructureFactory(); - props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse"); - // set up pipeline properties - props.setProperty("parse.model", modelPath); - // use faster shift reduce parser - props.setProperty("parse.maxlen", "100"); - props.setProperty("parse.binaryTrees", "true"); - propsSentiment.setProperty("annotators", "tokenize, ssplit, parse, sentiment"); - propsSentiment.setProperty("parse.model", lexParserEnglishRNN); - propsSentiment.setProperty("parse.maxlen", "100"); - pipeline = new StanfordCoreNLP(props); - pipelineSentiment = new StanfordCoreNLP(propsSentiment); + public SentimentAnalyzerTest(String str, String str1, SimilarityMatrix smxParam) { + this.str = str; + this.str1 = str1; + this.smxParam = smxParam; + model = MYSQLDatahandler.getModel(); + tagger = MYSQLDatahandler.getTagger(); + pipeline = MYSQLDatahandler.getPipeline(); + pipelineSentiment = MYSQLDatahandler.getPipelineSentiment(); + gsf = MYSQLDatahandler.getGsf(); + classifier = MYSQLDatahandler.getClassifier(); } - public static ShiftReduceParser getModel() { - return model; - } - - public static MaxentTagger getTagger() { - return tagger; - } - - public static double sentimentanalyzing(String str, String str1) { - double score = -100.0; - List> taggedwordlist1 = new ArrayList(); - List> taggedwordlist2 = new ArrayList(); - DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1)); - for (List sentence : tokenizer) { - List tagged1 = tagger.tagSentence(sentence); - Tree tree = model.apply(tagged1); - ArrayList taggedYield = tree.taggedYield(); - taggedwordlist1.add(taggedYield); - } - tokenizer = new DocumentPreprocessor(new StringReader(str)); - for (List sentence : tokenizer) { - List tagged1 = tagger.tagSentence(sentence); - Tree tree = model.apply(tagged1); - ArrayList taggedYield = tree.taggedYield(); - taggedwordlist2.add(taggedYield); - } - int counter = 0; - int counter1 = 0; - for (List taggedlist2 : taggedwordlist2) { - counter += taggedlist2.size(); - } - for (List taggedlist1 : taggedwordlist1) { - counter1 += taggedlist1.size(); - } - int overValue = counter >= counter1 ? counter - counter1 : counter1 - counter; - overValue *= 16; - while (overValue > 0) { - overValue--; - score--; - } - System.out.println("Score Post overValue: " + score + "\n"); - for (List TGWList : taggedwordlist1) { - for (TaggedWord TGW : TGWList) { - List tgwlist1 = new ArrayList(); - for (List taggedlist2 : taggedwordlist2) { - for (TaggedWord TGW1 : taggedlist2) { - if (TGW.tag().equals(TGW1.tag()) && !TGW.tag().equals(":") && !tgwlist1.contains(TGW1.tag())) { - score += 64; - tgwlist1.add(TGW.tag()); - } - } - } + @Override + public SimilarityMatrix call() { + try { + Double score = -100.0; + List> taggedwordlist1 = new ArrayList(); + List> taggedwordlist2 = new ArrayList(); + DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(str1)); + for (List sentence : tokenizer) { + taggedwordlist1.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); } - } - // System.out.println("Score: " + score); - Annotation annotation = new Annotation(str1); - pipeline.annotate(annotation); - List sentenceConstituencyParseList = new ArrayList(); - for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { - Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); - sentenceConstituencyParseList.add(sentenceConstituencyParse); - } - Annotation annotation1 = new Annotation(str); - pipeline.annotate(annotation1); - for (CoreMap sentence : annotation1.get(CoreAnnotations.SentencesAnnotation.class)) { - Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); - GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceConstituencyParse); - Collection allTypedDependencies = gs.allTypedDependencies(); - List filerTreeContent = new ArrayList(); - for (Tree sentenceConstituencyParse1 : sentenceConstituencyParseList) { - Set inT1notT2 = Tdiff.markDiff(sentenceConstituencyParse, sentenceConstituencyParse1); - Set inT2notT1 = Tdiff.markDiff(sentenceConstituencyParse1, sentenceConstituencyParse); - List constiLabels = new ArrayList(); - for (Constituent consti : inT1notT2) { - for (Constituent consti1 : inT2notT1) { - if (consti.value().equals(consti1.value()) && !constiLabels.contains(consti.value())) { - //System.out.println("consti value: " + consti.value() + "\n"); - score += 64; //256 - constiLabels.add(consti.value()); + tokenizer = new DocumentPreprocessor(new StringReader(str)); + for (List sentence : tokenizer) { + taggedwordlist2.add(model.apply(tagger.tagSentence(sentence)).taggedYield()); + } + int counter = 0; + int counter1 = 0; + counter = taggedwordlist2.stream().map((taggedlist2) -> taggedlist2.size()).reduce(counter, Integer::sum); + counter1 = taggedwordlist1.stream().map((taggedlist1) -> taggedlist1.size()).reduce(counter1, Integer::sum); + int overValue = counter >= counter1 ? counter - counter1 : counter1 - counter; + overValue *= 16; + score -= overValue; + List tgwlistIndex = new ArrayList(); + taggedwordlist1.forEach((TGWList) -> { + TGWList.forEach((TaggedWord) -> { + if (!tgwlistIndex.contains(TaggedWord.tag()) && !TaggedWord.tag().equals(":")) { + tgwlistIndex.add(TaggedWord.tag()); + } + }); + }); + AtomicInteger runCount = new AtomicInteger(0); + taggedwordlist2.forEach((TGWList) -> { + TGWList.forEach((TaggedWord) -> { + if (tgwlistIndex.contains(TaggedWord.tag())) { + tgwlistIndex.remove(TaggedWord.tag()); + runCount.getAndIncrement(); + } + }); + }); + score += runCount.get() * 64; + Annotation annotation = new Annotation(str1); + pipeline.annotate(annotation); + List sentenceConstituencyParseList = new ArrayList(); + for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { + Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); + sentenceConstituencyParseList.add(sentenceConstituencyParse); + } + Annotation annotation1 = new Annotation(str); + pipeline.annotate(annotation1); + List nerList = new ArrayList(); + for (CoreMap sentence : annotation1.get(CoreAnnotations.SentencesAnnotation.class)) { + Tree sentenceConstituencyParse = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); + GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceConstituencyParse); + Collection allTypedDependencies = gs.allTypedDependencies(); + List filerTreeContent = new ArrayList(); + for (Tree sentenceConstituencyParse1 : sentenceConstituencyParseList) { + Set inT1notT2 = Tdiff.markDiff(sentenceConstituencyParse, sentenceConstituencyParse1); + Set inT2notT1 = Tdiff.markDiff(sentenceConstituencyParse1, sentenceConstituencyParse); + List constiLabels = new ArrayList(); + for (Constituent consti : inT1notT2) { + for (Constituent consti1 : inT2notT1) { + if (consti.value().equals(consti1.value()) && !constiLabels.contains(consti.value())) { + score += 64; //256 + constiLabels.add(consti.value()); + } } } - } - GrammaticalStructure gs1 = gsf.newGrammaticalStructure(sentenceConstituencyParse1); - Collection allTypedDependencies1 = gs1.allTypedDependencies(); - for (TypedDependency TDY1 : allTypedDependencies1) { - IndexedWord dep = TDY1.dep(); - IndexedWord gov = TDY1.gov(); - GrammaticalRelation grammaticalRelation = gs.getGrammaticalRelation(gov, dep); - if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) { - score += 900; - //System.out.println("grammaticalRelation applicable: " + score + "\n"); + GrammaticalStructure gs1 = gsf.newGrammaticalStructure(sentenceConstituencyParse1); + Collection allTypedDependencies1 = gs1.allTypedDependencies(); + for (TypedDependency TDY1 : allTypedDependencies1) { + IndexedWord dep = TDY1.dep(); + IndexedWord gov = TDY1.gov(); + GrammaticalRelation grammaticalRelation = gs.getGrammaticalRelation(gov, dep); + if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) { + score += 900; + } + GrammaticalRelation reln = TDY1.reln(); + if (reln.isApplicable(sentenceConstituencyParse)) { + score += 256; + } } - GrammaticalRelation reln = TDY1.reln(); - if (reln.isApplicable(sentenceConstituencyParse)) { - score += 256; + for (TypedDependency TDY : allTypedDependencies) { + IndexedWord dep = TDY.dep(); + IndexedWord gov = TDY.gov(); + GrammaticalRelation grammaticalRelation = gs1.getGrammaticalRelation(gov, dep); + if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) { + score += 900; + } + GrammaticalRelation reln = TDY.reln(); + if (reln.isApplicable(sentenceConstituencyParse1)) { + score += 256; + } } - } - for (TypedDependency TDY : allTypedDependencies) { - IndexedWord dep = TDY.dep(); - IndexedWord gov = TDY.gov(); - GrammaticalRelation grammaticalRelation = gs1.getGrammaticalRelation(gov, dep); - if (grammaticalRelation.isApplicable(sentenceConstituencyParse)) { - score += 900; - //System.out.println("grammaticalRelation applicable: " + score + "\n"); - } - GrammaticalRelation reln = TDY.reln(); - if (reln.isApplicable(sentenceConstituencyParse1)) { - score += 256; - } - } - for (CoreLabel LBW : sentenceConstituencyParse.taggedLabeledYield()) { - for (CoreLabel LBW1 : sentenceConstituencyParse1.taggedLabeledYield()) { - if (LBW.lemma().equals(LBW1.lemma()) && !filerTreeContent.contains(LBW.lemma())) { + AtomicInteger runCount1 = new AtomicInteger(0); + sentenceConstituencyParse.taggedLabeledYield().forEach((LBW) -> { + sentenceConstituencyParse1.taggedLabeledYield().stream().filter((LBW1) -> (LBW.lemma().equals(LBW1.lemma()) + && !filerTreeContent.contains(LBW.lemma()))).map((_item) -> { filerTreeContent.add(LBW.lemma()); - score += 1500; - //System.out.println("lemma: " + LBW.lemma() + "\n"); - } + return _item; + }).forEachOrdered((_item) -> { + runCount1.getAndIncrement(); + }); + }); + score += runCount1.get() * 1500; + } + } + Annotation annotationSentiment1 = pipelineSentiment.process(str); + List simpleSMXlist = new ArrayList(); + List simpleSMXlistVector = new ArrayList(); + List sentiment1 = new ArrayList(); + List sentiment2 = new ArrayList(); + for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) { + Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); + sentiment1.add(RNNCoreAnnotations.getPredictedClass(tree)); + SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); + SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree); + simpleSMXlist.add(predictions); + simpleSMXlistVector.add(nodeVector); + } + annotationSentiment1 = pipelineSentiment.process(str1); + for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) { + Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); + sentiment2.add(RNNCoreAnnotations.getPredictedClass(tree)); + SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); + SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree); + score = simpleSMXlist.stream().map((simpleSMX) -> predictions.dot(simpleSMX) * 100).map((dot) -> dot > 50 ? dot - 50 : 50 - dot).map((subtracter) -> { + subtracter *= 25; + return subtracter; + }).map((subtracter) -> subtracter).reduce(score, (accumulator, _item) -> accumulator - _item); + for (SimpleMatrix simpleSMX : simpleSMXlistVector) { + double dot = nodeVector.dot(simpleSMX); + double elementSum = nodeVector.kron(simpleSMX).elementSum(); + elementSum = Math.round(elementSum * 100.0) / 100.0; + if (dot < 0.1) { + score += 256; + } + if (elementSum < 0.1 && elementSum > 0.0) { + score += 1300; + } else if (elementSum > 0.1 && elementSum < 1.0) { + score -= 1100; + } else { + score -= 1424; } } } - } - Annotation annotationSentiment1 = pipelineSentiment.process(str); - List simpleSMXlist = new ArrayList(); - List simpleSMXlistVector = new ArrayList(); - for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) { - Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); - SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); - SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree); - simpleSMXlist.add(predictions); - simpleSMXlistVector.add(nodeVector); - } - annotationSentiment1 = pipelineSentiment.process(str1); - for (CoreMap sentence : annotationSentiment1.get(CoreAnnotations.SentencesAnnotation.class)) { - Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); - SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); - SimpleMatrix nodeVector = RNNCoreAnnotations.getNodeVector(tree); - for (SimpleMatrix simpleSMX : simpleSMXlist) { - double dot = predictions.dot(simpleSMX) * 100; - //System.out.println("dot value: " + dot + "\n"); - double subtracter = dot > 50 ? dot - 50 : 50 - dot; - System.out.println("score pre dot: " + score + "\nsubtracter: " + subtracter + "\n"); - subtracter *= 25; - while (subtracter > 0) { - subtracter--; - score--; + score -= (sentiment1.size() > sentiment2.size() ? sentiment1.size() - sentiment2.size() : sentiment2.size() - sentiment1.size()) * 500; + DocumentReaderAndWriter readerAndWriter = classifier.makePlainTextReaderAndWriter(); + List classifyRaw1 = classifier.classifyRaw(str, readerAndWriter); + List classifyRaw2 = classifier.classifyRaw(str1, readerAndWriter); + score -= (classifyRaw1.size() > classifyRaw2.size() ? classifyRaw1.size() - classifyRaw2.size() : classifyRaw2.size() - classifyRaw1.size()) * 200; + Annotation annotationSentiment = pipelineSentiment.process(str); + int mainSentiment1 = 0; + int longest1 = 0; + int mainSentiment2 = 0; + int longest2 = 0; + for (CoreMap sentence : annotationSentiment.get(CoreAnnotations.SentencesAnnotation.class)) { + Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); + int sentiment = RNNCoreAnnotations.getPredictedClass(tree); + String partText = sentence.toString(); + if (partText.length() > longest1) { + mainSentiment1 = sentiment; + longest1 = partText.length(); } - System.out.println("score post dot: " + score + "\n"); } - for (SimpleMatrix simpleSMX : simpleSMXlistVector) { - double dot = nodeVector.dot(simpleSMX); - double elementSum = nodeVector.kron(simpleSMX).elementSum(); - elementSum = Math.round(elementSum * 100.0) / 100.0; - System.out.println("kron SMX elementSum: " + elementSum + "\n"); - if (dot < 0.1) { - // System.out.println("\ndot VECTOR: " + dot + "\n"); - score += 256; + annotationSentiment = pipelineSentiment.process(str1); + for (CoreMap sentence : annotationSentiment.get(CoreAnnotations.SentencesAnnotation.class)) { + Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); + int sentiment = RNNCoreAnnotations.getPredictedClass(tree); + SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); + String partText = sentence.toString(); + if (partText.length() > longest2) { + mainSentiment2 = sentiment; + longest2 = partText.length(); } - if (elementSum < 0.1 && elementSum > 0.0) { - score += 1300; - } else if (elementSum > 0.1 && elementSum < 1.0) { - score -= 1100; + } + if (longest1 != longest2) { + long deffLongest = longest1 > longest2 ? longest1 : longest2; + long deffshorter = longest1 < longest2 ? longest1 : longest2; + if (deffLongest >= (deffshorter * 2) - 1 && deffLongest - deffshorter <= 45) { + score += (deffLongest - deffshorter) * 200; + } else if (mainSentiment1 != mainSentiment2 && deffLongest - deffshorter > 20 && deffLongest - deffshorter < 45) { + score += (deffLongest - deffshorter) * 200; } else { - score -= 1424; + score -= (deffLongest - deffshorter) * 50; } } + LevenshteinDistance leven = new LevenshteinDistance(str, str1); + int SentenceScoreDiff = leven.computeLevenshteinDistance(); + SentenceScoreDiff *= 15; + score -= SentenceScoreDiff; + System.out.println("Final current score: " + score + "\nSentence 1: " + str + "\nSentence 2: " + str1 + "\n"); + smxParam.setDistance(score); + } catch (Exception ex) { + System.out.println("ex: " + ex.getMessage() + "\n"); + smxParam.setDistance(-1000); + return smxParam; } - int SentenceScoreDiff = LevenshteinDistance.computeLevenshteinDistance(str, str1); - SentenceScoreDiff *= 15; - // System.out.println("score pre levenhstein substract: " + score + "\nSentenceScoreDiff: " + SentenceScoreDiff + "\n"); - while (SentenceScoreDiff > 0) { - SentenceScoreDiff--; - score--; - } - System.out.println("Final current score: " + score + "\nSentences: " + str + "\n" + str1 + "\n\n\n"); - return score; + return smxParam; } + } diff --git a/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java b/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java index 7900fadc..67851943 100644 --- a/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java +++ b/ArtificialAutism/src/main/java/PresentationLayer/DiscordHandler.java @@ -6,10 +6,9 @@ ps ax | grep EventNotfierDiscordBot-1.0 kill $pid (number) -nohup screen -d -m -S nonRoot java -Xmx5048M -jar /home/Artificial_Autism/ArtificialAutism-1.0.jar -nohup screen -d -m -S nonRoot java -Xmx4048M -jar /home/Artificial_Autism/ArtificialAutism-1.0.jar +nohup screen -d -m -S nonroot java -Xmx6048M -jar /home/javatests/ArtificialAutism-1.0.jar +nohup screen -d -m -S nonroot java -Xmx4048M -jar /home/javatests/ArtificialAutism-1.0.jar -nohup screen -d -m -S gameservers java -Xmx2450M -jar /home/gameservers/ArtificialAutism/ArtificialAutism-1.0.jar screen -ls (number1) screen -X -S (number1) quit */ @@ -20,7 +19,6 @@ package PresentationLayer; import FunctionLayer.CustomError; import FunctionLayer.MYSQLDatahandler; import FunctionLayer.MessageResponseHandler; -import FunctionLayer.StanfordParser.SentimentAnalyzerTest; import java.io.IOException; import java.sql.SQLException; import java.util.List; @@ -37,7 +35,7 @@ import org.javacord.api.entity.user.User; public class DiscordHandler { public static void main(String[] args) { - SentimentAnalyzerTest.shiftReduceParserInitiate(); + MYSQLDatahandler.shiftReduceParserInitiate(); new Thread(() -> { try { MYSQLDatahandler.instance.initiateMYSQL(); @@ -76,8 +74,8 @@ public class DiscordHandler { List userlist = event.getMessage().getMentionedUsers(); String strresult = event.getMessage().toString(); if (userlist != null) { - for (int i = 0; i < userlist.size(); i++) { - strresult = strresult.replace(userlist.get(i).getIdAsString(), ""); + for (User user : userlist) { + strresult = strresult.replace(user.getIdAsString(), ""); } } MessageResponseHandler.getMessage(strresult); @@ -88,14 +86,13 @@ public class DiscordHandler { Logger.getLogger(DiscordHandler.class.getName()).log(Level.SEVERE, null, ex); } } - //contains to specify one channel where bot may always type if (event.getMessage().getMentionedUsers().contains(api.getYourself()) || event.getServerTextChannel().get().toString().contains("minor-test")) { String ResponseStr; try { ResponseStr = MessageResponseHandler.selectReponseMessage(event.getMessage().toString()); if (!ResponseStr.isEmpty()) { - System.out.print("\nResponseStr3: " + ResponseStr); + System.out.print("\nResponseStr3: " + ResponseStr + "\n"); event.getChannel().sendMessage(ResponseStr); } } catch (CustomError ex) { diff --git a/ArtificialAutism/target/ArtificialAutism-1.0.jar b/ArtificialAutism/target/ArtificialAutism-1.0.jar index 2438a3c4..4fb774d3 100644 Binary files a/ArtificialAutism/target/ArtificialAutism-1.0.jar and b/ArtificialAutism/target/ArtificialAutism-1.0.jar differ