fixing pre discord launch calcaulations, implementation of JMWE coreNLP extension, hopefully fixed repetetive data in multithreading
This commit is contained in:
		
							parent
							
								
									43da2dd5d5
								
							
						
					
					
						commit
						232190d076
					
				| @ -50,7 +50,7 @@ import java.util.logging.Logger; | ||||
|  * @author install1 | ||||
|  */ | ||||
| public class MYSQLDatahandler { | ||||
|      | ||||
| 
 | ||||
|     public static final long EXPIRE_TIME_IN_SECONDS = TimeUnit.SECONDS.convert(6, TimeUnit.MINUTES); | ||||
|     public static final long EXPIRE_TIME_IN_SECONDS1 = TimeUnit.SECONDS.convert(10, TimeUnit.HOURS); | ||||
|     public static MYSQLDatahandler instance = new MYSQLDatahandler(); | ||||
| @ -60,107 +60,119 @@ public class MYSQLDatahandler { | ||||
|     private LinkedHashMap<String, LinkedHashMap<String, Double>> lHMSMX = new LinkedHashMap(); | ||||
|     private final Stopwatch stopwatch; | ||||
|     private final Stopwatch stopwatch1; | ||||
|     private static String modelPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz"; | ||||
|     private ExecutorService executor; | ||||
| 
 | ||||
|     private static String shiftReduceParserPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz"; | ||||
|     private static String sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz"; | ||||
|     private static String lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"; | ||||
|     private static String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; | ||||
|     private static String NERModel = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz"; | ||||
|     private static String nerModel = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz"; | ||||
|     private static String jmweIndexData = "/home/javatests/lib/mweindex_wordnet3.0_semcor1.6.data"; // ./lib/mweindex_wordnet3.0_semcor1.6.data | ||||
|     private static MaxentTagger tagger; | ||||
|     private static ShiftReduceParser model; | ||||
|     private static String[] options = {"-maxLength", "100"}; | ||||
|     private static Properties props = new Properties(); | ||||
|     private static Properties propsSentiment = new Properties(); | ||||
|     private static Properties propsJMWE = new Properties(); | ||||
|     private static GrammaticalStructureFactory gsf; | ||||
|     private static LexicalizedParser lp; | ||||
|     private static TreebankLanguagePack tlp; | ||||
|     private static AbstractSequenceClassifier<CoreLabel> classifier; | ||||
|     private ExecutorService executor; | ||||
|     // set up Stanford CoreNLP pipeline | ||||
|     private static StanfordCoreNLP pipeline; | ||||
|     private static StanfordCoreNLP pipelineSentiment; | ||||
|      | ||||
|     private static StanfordCoreNLP pipelineJMWE; | ||||
| 
 | ||||
|     public static AbstractSequenceClassifier<CoreLabel> getClassifier() { | ||||
|         return classifier; | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public static void setClassifier(AbstractSequenceClassifier<CoreLabel> classifier) { | ||||
|         MYSQLDatahandler.classifier = classifier; | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public void instantiateExecutor() { | ||||
|         this.executor = new ForkJoinPool(Runtime.getRuntime().availableProcessors(), | ||||
|                 ForkJoinPool.defaultForkJoinWorkerThreadFactory, | ||||
|                 null, true); | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public MYSQLDatahandler() { | ||||
|         this.stopwatch = Stopwatch.createUnstarted(); | ||||
|         this.stopwatch1 = Stopwatch.createStarted(); | ||||
|         this.stringCache = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public static void shiftReduceParserInitiate() { | ||||
|         try { | ||||
|             classifier = CRFClassifier.getClassifierNoExceptions(NERModel); | ||||
|             classifier = CRFClassifier.getClassifierNoExceptions(nerModel); | ||||
|         } catch (ClassCastException ex) { | ||||
|             Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex); | ||||
|         } | ||||
|         model = ShiftReduceParser.loadModel(modelPath, options); | ||||
|         model = ShiftReduceParser.loadModel(shiftReduceParserPath, options); | ||||
|         tagger = new MaxentTagger(taggerPath); | ||||
|         lp = LexicalizedParser.loadModel(lexParserEnglishRNN, options); | ||||
|         tlp = lp.getOp().langpack(); | ||||
|         gsf = tlp.grammaticalStructureFactory(); | ||||
|         // set up pipeline properties | ||||
|         props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse"); | ||||
|         props.setProperty("parse.model", modelPath); | ||||
|         props.setProperty("parse.model", shiftReduceParserPath); | ||||
|         props.setProperty("parse.maxlen", "100"); | ||||
|         props.setProperty("tokenize.maxlen", "100"); | ||||
|         props.setProperty("ssplit.maxlen", "100"); | ||||
|         props.setProperty("lemma.maxlen", "100"); | ||||
|         props.setProperty("parse.binaryTrees", "true"); | ||||
|         propsSentiment.setProperty("annotators", "tokenize, ssplit, parse, sentiment"); | ||||
|         props.setProperty("annotators", "tokenize,ssplit,pos,lemma,parse"); | ||||
|         propsSentiment.setProperty("parse.model", lexParserEnglishRNN); | ||||
|         propsSentiment.setProperty("ner.model", nerModel); | ||||
|         propsSentiment.setProperty("sentiment.model", sentimentModel); | ||||
|         propsSentiment.setProperty("sentiment.maxlen", "100"); | ||||
|         propsSentiment.setProperty("parse.maxlen", "100"); | ||||
|         propsSentiment.setProperty("tokenize.maxlen", "100"); | ||||
|         propsSentiment.setProperty("ssplit.maxlen", "100"); | ||||
|         propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,depparse,sentiment"); //coref too expensive memorywise | ||||
|         propsJMWE.setProperty("customAnnotatorClass.jmwe", "edu.stanford.nlp.pipeline.JMWEAnnotator"); | ||||
|         propsJMWE.setProperty("customAnnotatorClass.jmwe.verbose", "false"); | ||||
|         propsJMWE.setProperty("customAnnotatorClass.jmwe.underscoreReplacement", "-"); | ||||
|         propsJMWE.setProperty("customAnnotatorClass.jmwe.indexData", jmweIndexData); | ||||
|         propsJMWE.setProperty("customAnnotatorClass.jmwe.detector", "Exhaustive"); | ||||
|         //"Consecutive", "Exhaustive", "ProperNouns", "Complex" and "CompositeConsecutiveProperNouns" | ||||
|         propsJMWE.setProperty("annotators", "tokenize, ssplit, pos, lemma, jmwe"); | ||||
|         // set up pipeline | ||||
|         pipeline = new StanfordCoreNLP(props); | ||||
|         pipelineSentiment = new StanfordCoreNLP(propsSentiment); | ||||
|         pipelineJMWE = new StanfordCoreNLP(propsJMWE); | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public static StanfordCoreNLP getPipelineJMWE() { | ||||
|         return pipelineJMWE; | ||||
|     } | ||||
| 
 | ||||
|     public static GrammaticalStructureFactory getGsf() { | ||||
|         return gsf; | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public static StanfordCoreNLP getPipeline() { | ||||
|         return pipeline; | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public static StanfordCoreNLP getPipelineSentiment() { | ||||
|         return pipelineSentiment; | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public static MaxentTagger getTagger() { | ||||
|         return tagger; | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public static ShiftReduceParser getModel() { | ||||
|         return model; | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     private Map<Integer, String> getCache() throws SQLException, IOException, CustomError { | ||||
|         return DataMapper.getAllStrings(); | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public int getlHMSMXSize() { | ||||
|         return lHMSMX.size(); | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public int getstringCacheSize() { | ||||
|         return stringCache.size(); | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public void initiateMYSQL() throws SQLException, IOException { | ||||
|         try { | ||||
|             DataMapper.createTables(); | ||||
| @ -171,7 +183,7 @@ public class MYSQLDatahandler { | ||||
|                     .getName()).log(Level.SEVERE, null, ex); | ||||
|         } | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public synchronized void checkIfUpdateMatrixes() { | ||||
|         refreshMatrixFromDB = false; | ||||
|         int counter = 0; | ||||
| @ -208,9 +220,8 @@ public class MYSQLDatahandler { | ||||
|                 selectUpdate = secondaryIterator; | ||||
|                 secondaryIterator++; | ||||
|             } | ||||
|             int beginindex = selectUpdate; | ||||
|             ConcurrentMap<Integer, String> strIndexNavigator = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|             String get = stringCachelocal.getOrDefault(beginindex, null); | ||||
|             String get = stringCachelocal.getOrDefault(selectUpdate, null); | ||||
|             if (get == null) { | ||||
|                 get = stringCachelocal.get(new Random().nextInt(stringCachelocal.size() - 1)); | ||||
|             } | ||||
| @ -220,7 +231,7 @@ public class MYSQLDatahandler { | ||||
|             strIndexNavigator.values().forEach((str) -> { | ||||
|                 stringCachelocal.values().stream().filter((str1) -> (!str.equals(str1))).forEachOrdered((str1) -> { | ||||
|                     boolean present = false; | ||||
|                     LinkedHashMap<String, Double> orDefault = LHMSMXLocal.getOrDefault(str, null); | ||||
|                     LinkedHashMap<String, Double> orDefault = lHMSMX.getOrDefault(str, null); | ||||
|                     if (orDefault != null) { | ||||
|                         Double orDefault1 = orDefault.getOrDefault(str1, null); | ||||
|                         if (orDefault1 != null) { | ||||
| @ -229,7 +240,7 @@ public class MYSQLDatahandler { | ||||
|                         } | ||||
|                     } | ||||
|                     if (!present) { | ||||
|                         orDefault = LHMSMXLocal.getOrDefault(str1, null); | ||||
|                         orDefault = lHMSMX.getOrDefault(str1, null); | ||||
|                         if (orDefault != null) { | ||||
|                             Double orDefault1 = orDefault.getOrDefault(str, null); | ||||
|                             if (orDefault1 != null) { | ||||
| @ -242,12 +253,12 @@ public class MYSQLDatahandler { | ||||
|                         SimilarityMatrix SMX = new SimilarityMatrix(str, str1); | ||||
|                         Callable<SimilarityMatrix> worker = new SentimentAnalyzerTest(str, str1, SMX); | ||||
|                         futures.put(futures.size() + 1, executor.submit(worker)); | ||||
|                         LinkedHashMap<String, Double> orDefault1 = LHMSMXLocal.getOrDefault(str, null); | ||||
|                         LinkedHashMap<String, Double> orDefault1 = lHMSMX.getOrDefault(str, null); | ||||
|                         if (orDefault1 == null) { | ||||
|                             orDefault1 = new LinkedHashMap<String, Double>(); | ||||
|                         } | ||||
|                         orDefault1.put(str1, 0.0); | ||||
|                         LHMSMXLocal.put(str, orDefault1); | ||||
|                         lHMSMX.put(str, orDefault1); | ||||
|                     } | ||||
|                 }); | ||||
|             }); | ||||
| @ -262,12 +273,11 @@ public class MYSQLDatahandler { | ||||
|                     Logger.getLogger(MYSQLDatahandler.class.getName()).log(Level.SEVERE, null, ex); | ||||
|                 } | ||||
|                 LinkedHashMap<String, Double> getFuture = lHMSMX.getOrDefault(SMX.getPrimaryString(), null); | ||||
|                 if (getFuture == null) { | ||||
|                     getFuture = new LinkedHashMap<String, Double>(); | ||||
|                 if (getFuture != null) { | ||||
|                     getFuture.put(SMX.getSecondaryString(), SMX.getDistance()); | ||||
|                     lHMSMX.put(SMX.getPrimaryString(), getFuture); | ||||
|                     matrixUpdateList.put(matrixUpdateList.size() + 1, SMX); | ||||
|                 } | ||||
|                 getFuture.put(SMX.getSecondaryString(), SMX.getDistance()); | ||||
|                 lHMSMX.put(SMX.getPrimaryString(), getFuture); | ||||
|                 matrixUpdateList.put(matrixUpdateList.size() + 1, SMX); | ||||
|             } | ||||
|             try { | ||||
|                 if (!matrixUpdateList.isEmpty()) { | ||||
| @ -280,7 +290,7 @@ public class MYSQLDatahandler { | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public synchronized void checkIfUpdateStrings() throws CustomError { | ||||
|         if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning()) { | ||||
|             ConcurrentMap<Integer, String> str = MessageResponseHandler.getStr(); | ||||
| @ -307,7 +317,7 @@ public class MYSQLDatahandler { | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public synchronized String getResponseMsg(String str) throws CustomError { | ||||
|         str = str.trim(); | ||||
|         if (str.startsWith("<@")) { | ||||
| @ -380,7 +390,7 @@ public class MYSQLDatahandler { | ||||
|                 + "\nScore: " + SMXreturn.getDistance()); | ||||
|         return SMXreturn.getSecondaryString(); | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public String mostSimilar(String toBeCompared, ConcurrentMap<Integer, String> concurrentStrings) { | ||||
|         int minDistance = 8; | ||||
|         String similar = ""; | ||||
| @ -403,7 +413,7 @@ public class MYSQLDatahandler { | ||||
|         } | ||||
|         return similar; | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public static ConcurrentMap<Integer, String> cutContent(ConcurrentMap<Integer, String> str) { | ||||
|         ConcurrentMap<Integer, String> returnlist = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|         for (String str1 : str.values()) { | ||||
| @ -415,7 +425,7 @@ public class MYSQLDatahandler { | ||||
|         } | ||||
|         return returnlist; | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public static ConcurrentMap<Integer, String> filterContent(ConcurrentMap<Integer, String> str) { | ||||
|         ConcurrentMap<Integer, String> strlistreturn = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|         for (String str1 : str.values()) { | ||||
| @ -523,7 +533,7 @@ public class MYSQLDatahandler { | ||||
|         } | ||||
|         return strlistreturn; | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     private ConcurrentMap<Integer, String> removeSlacks(ConcurrentMap<Integer, String> str) { | ||||
|         ShiftReduceParser model = getModel(); | ||||
|         MaxentTagger tagger = getTagger(); | ||||
| @ -590,7 +600,7 @@ public class MYSQLDatahandler { | ||||
|         } | ||||
|         return strreturn; | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     private ConcurrentMap<Integer, String> verifyCalculationFitness(ConcurrentMap<Integer, String> strmap) { | ||||
|         ConcurrentMap<Integer, String> returnmap = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|         ConcurrentMap<Integer, String> allStrings = stringCache; | ||||
|  | ||||
| @ -4,11 +4,15 @@ import FunctionLayer.LevenshteinDistance; | ||||
| import FunctionLayer.MYSQLDatahandler; | ||||
| import FunctionLayer.SimilarityMatrix; | ||||
| import com.google.common.collect.MapMaker; | ||||
| import edu.mit.jmwe.data.IMWE; | ||||
| import edu.mit.jmwe.data.IMWEDesc; | ||||
| import edu.mit.jmwe.data.IToken; | ||||
| import edu.stanford.nlp.ie.AbstractSequenceClassifier; | ||||
| import edu.stanford.nlp.ling.CoreAnnotations; | ||||
| import edu.stanford.nlp.ling.CoreLabel; | ||||
| import edu.stanford.nlp.ling.HasWord; | ||||
| import edu.stanford.nlp.ling.IndexedWord; | ||||
| import edu.stanford.nlp.ling.JMWEAnnotation; | ||||
| import edu.stanford.nlp.ling.TaggedWord; | ||||
| import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations; | ||||
| import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; | ||||
| @ -48,14 +52,16 @@ import org.ejml.simple.SimpleMatrix; | ||||
|  */ | ||||
| public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> { | ||||
| 
 | ||||
|     private SimilarityMatrix smxParam; | ||||
| 
 | ||||
|     private String str; | ||||
|     private String str1; | ||||
|     private SimilarityMatrix smxParam; | ||||
|     private ShiftReduceParser model; | ||||
|     private MaxentTagger tagger; | ||||
|     private GrammaticalStructureFactory gsf; | ||||
|     private StanfordCoreNLP pipeline; | ||||
|     private StanfordCoreNLP pipelineSentiment; | ||||
|     private StanfordCoreNLP pipelineJMWE; | ||||
|     private AbstractSequenceClassifier classifier; | ||||
| 
 | ||||
|     public SentimentAnalyzerTest(String str, String str1, SimilarityMatrix smxParam) { | ||||
| @ -66,6 +72,7 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> { | ||||
|         tagger = MYSQLDatahandler.getTagger(); | ||||
|         pipeline = MYSQLDatahandler.getPipeline(); | ||||
|         pipelineSentiment = MYSQLDatahandler.getPipelineSentiment(); | ||||
|         pipelineJMWE = MYSQLDatahandler.getPipelineJMWE(); | ||||
|         gsf = MYSQLDatahandler.getGsf(); | ||||
|         classifier = MYSQLDatahandler.getClassifier(); | ||||
|     } | ||||
| @ -231,6 +238,7 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> { | ||||
|                 Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); | ||||
|                 int sentiment = RNNCoreAnnotations.getPredictedClass(tree); | ||||
|                 String partText = sentence.toString(); | ||||
|                 SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree); | ||||
|                 if (partText.length() > longest1) { | ||||
|                     mainSentiment1 = sentiment; | ||||
|                     longest1 = partText.length(); | ||||
| @ -258,6 +266,155 @@ public class SentimentAnalyzerTest implements Callable<SimilarityMatrix> { | ||||
|                     score -= (deffLongest - deffshorter) * 50; | ||||
|                 } | ||||
|             } | ||||
|             Annotation jmweStrAnnotation = new Annotation(str); | ||||
|             pipelineJMWE.annotate(jmweStrAnnotation); | ||||
|             List<CoreMap> sentences = jmweStrAnnotation.get(CoreAnnotations.SentencesAnnotation.class); | ||||
|             int tokensCounter1 = 0; | ||||
|             int tokensCounter2 = 0; | ||||
|             int anotatorcounter1 = 0; | ||||
|             int anotatorcounter2 = 0; | ||||
|             int inflectedCounterPositive1 = 0; | ||||
|             int inflectedCounterPositive2 = 0; | ||||
|             int inflectedCounterNegative = 0; | ||||
|             int MarkedContinuousCounter1 = 0; | ||||
|             int MarkedContinuousCounter2 = 0; | ||||
|             int UnmarkedPatternCounter = 0; | ||||
|             ConcurrentMap<Integer, String> ITokenMapTag1 = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|             ConcurrentMap<Integer, String> ITokenMapTag2 = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|             ConcurrentMap<Integer, String> strTokenStems1 = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|             ConcurrentMap<Integer, String> strTokenStems2 = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|             ConcurrentMap<Integer, String> strTokenForm1 = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|             ConcurrentMap<Integer, String> strTokenForm2 = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|             ConcurrentMap<Integer, String> strTokenGetEntry1 = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|             ConcurrentMap<Integer, String> strTokenGetEntry2 = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|             ConcurrentMap<Integer, String> strTokenGetiPart1 = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|             ConcurrentMap<Integer, String> strTokenGetiPart2 = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|             ConcurrentMap<Integer, String> strTokenEntryPOS1 = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|             ConcurrentMap<Integer, String> strTokenEntryPOS2 = new MapMaker().concurrencyLevel(2).makeMap(); | ||||
|             for (CoreMap sentence : sentences) { | ||||
|                 for (IMWE<IToken> token : sentence.get(JMWEAnnotation.class)) { | ||||
|                     if (token.isInflected()) { | ||||
|                         inflectedCounterPositive1++; | ||||
|                     } else { | ||||
|                         inflectedCounterNegative++; | ||||
|                     } | ||||
|                     strTokenForm1.put(strTokenForm1.size() + 1, token.getForm()); | ||||
|                     strTokenGetEntry1.put(strTokenGetEntry1.size() + 1, token.getEntry().toString().substring(token.getEntry().toString().length() - 1)); | ||||
|                     Collection<IMWEDesc.IPart> values = token.getPartMap().values(); | ||||
|                     IMWEDesc entry = token.getEntry(); | ||||
|                     MarkedContinuousCounter1 += entry.getMarkedContinuous(); | ||||
|                     UnmarkedPatternCounter += entry.getUnmarkedPattern(); | ||||
|                     for (IMWEDesc.IPart iPart : values) { | ||||
|                         strTokenGetiPart1.put(strTokenGetiPart1.size() + 1, iPart.getForm()); | ||||
|                     } | ||||
|                     for (String strPostPrefix : entry.getPOS().getPrefixes()) { | ||||
|                         strTokenEntryPOS1.put(strTokenEntryPOS1.size() + 1, strPostPrefix); | ||||
|                     } | ||||
|                     for (IToken tokens : token.getTokens()) { | ||||
|                         ITokenMapTag1.put(ITokenMapTag1.size() + 1, tokens.getTag()); | ||||
|                         for (String strtoken : tokens.getStems()) { | ||||
|                             strTokenStems1.put(strTokenStems1.size() + 1, strtoken); | ||||
|                         } | ||||
|                     } | ||||
|                     tokensCounter1++; | ||||
|                 } | ||||
|                 anotatorcounter1++; | ||||
|             } | ||||
|             jmweStrAnnotation = new Annotation(str1); | ||||
|             pipelineJMWE.annotate(jmweStrAnnotation); | ||||
|             sentences = jmweStrAnnotation.get(CoreAnnotations.SentencesAnnotation.class); | ||||
|             for (CoreMap sentence : sentences) { | ||||
|                 for (IMWE<IToken> token : sentence.get(JMWEAnnotation.class)) { | ||||
|                     if (token.isInflected()) { | ||||
|                         inflectedCounterPositive2++; | ||||
|                     } else { | ||||
|                         inflectedCounterNegative--; | ||||
|                     } | ||||
|                     strTokenForm2.put(strTokenForm2.size() + 1, token.getForm()); | ||||
|                     strTokenGetEntry2.put(strTokenGetEntry2.size() + 1, token.getEntry().toString().substring(token.getEntry().toString().length() - 1)); | ||||
|                     Collection<IMWEDesc.IPart> values = token.getPartMap().values(); | ||||
|                     IMWEDesc entry = token.getEntry(); | ||||
|                     MarkedContinuousCounter2 += entry.getMarkedContinuous(); | ||||
|                     UnmarkedPatternCounter += entry.getUnmarkedPattern(); | ||||
|                     for (IMWEDesc.IPart iPart : values) { | ||||
|                         strTokenGetiPart2.put(strTokenGetiPart2.size() + 1, iPart.getForm()); | ||||
|                     } | ||||
|                     for (String strPostPrefix : entry.getPOS().getPrefixes()) { | ||||
|                         strTokenEntryPOS2.put(strTokenEntryPOS2.size() + 1, strPostPrefix); | ||||
|                     } | ||||
|                     for (IToken tokens : token.getTokens()) { | ||||
|                         ITokenMapTag2.put(ITokenMapTag2.size() + 1, tokens.getTag()); | ||||
|                         for (String strtoken : tokens.getStems()) { | ||||
|                             strTokenStems2.put(strTokenStems2.size() + 1, strtoken); | ||||
|                         } | ||||
|                     } | ||||
|                     tokensCounter2++; | ||||
|                 } | ||||
|                 anotatorcounter2++; | ||||
|             } | ||||
|             for (String strTokenPos1 : strTokenEntryPOS1.values()) { | ||||
|                 for (String strTokenPos2 : strTokenEntryPOS2.values()) { | ||||
|                     if (strTokenPos1.equals(strTokenPos2)) { | ||||
|                         score += 500; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             score += UnmarkedPatternCounter * 1600; | ||||
|             if (MarkedContinuousCounter1 > 0 && MarkedContinuousCounter2 > 0) { | ||||
|                 score += MarkedContinuousCounter1 > MarkedContinuousCounter2 ? (MarkedContinuousCounter1 - MarkedContinuousCounter2) * 500 | ||||
|                         : (MarkedContinuousCounter2 - MarkedContinuousCounter1) * 500; | ||||
|             } | ||||
|             for (String strTokeniPart1 : strTokenGetiPart1.values()) { | ||||
|                 for (String strTokeniPart2 : strTokenGetiPart2.values()) { | ||||
|                     if (strTokeniPart1.equals(strTokeniPart2)) { | ||||
|                         score += 400; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             for (String strTokenEntry1 : strTokenGetEntry1.values()) { | ||||
|                 for (String strTokenEntry2 : strTokenGetEntry2.values()) { | ||||
|                     if (strTokenEntry1.equals(strTokenEntry2)) { | ||||
|                         score += 2500; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             for (String strmapTag : ITokenMapTag1.values()) { | ||||
|                 for (String strmapTag1 : ITokenMapTag2.values()) { | ||||
|                     if (strmapTag.equals(strmapTag1)) { | ||||
|                         score += 1450; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             for (String strTokenForm1itr1 : strTokenForm1.values()) { | ||||
|                 for (String strTokenForm1itr2 : strTokenForm2.values()) { | ||||
|                     if (strTokenForm1itr1.equals(strTokenForm1itr2)) { | ||||
|                         score += 2600; | ||||
|                     } else if (strTokenForm1itr1.contains(strTokenForm1itr2)) { | ||||
|                         score += 500; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             for (String strTokenStem : strTokenStems1.values()) { | ||||
|                 for (String strTokenStem1 : strTokenStems2.values()) { | ||||
|                     if (strTokenStem.equals(strTokenStem1)) { | ||||
|                         score += 1500; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             if (inflectedCounterPositive1 + inflectedCounterPositive2 > inflectedCounterNegative && inflectedCounterNegative > 0) { | ||||
|                 score += (inflectedCounterPositive1 - inflectedCounterNegative) * 650; | ||||
|             } | ||||
|             if (inflectedCounterPositive1 > 0 && inflectedCounterPositive2 > 0) { | ||||
|                 score += ((inflectedCounterPositive1 + inflectedCounterPositive2) - inflectedCounterNegative) * 550; | ||||
|             } | ||||
|             if (anotatorcounter1 > 1 && anotatorcounter2 > 1) { | ||||
|                 score += (anotatorcounter1 + anotatorcounter2) * 400; | ||||
|             } | ||||
|             if (tokensCounter1 > 0 && tokensCounter2 > 0) { | ||||
|                 score += (tokensCounter1 + tokensCounter2) * 400; | ||||
|             } else { | ||||
|                 score -= tokensCounter1 >= tokensCounter2 ? (tokensCounter1 - tokensCounter2) * 500 : (tokensCounter2 - tokensCounter1) * 500; | ||||
|             } | ||||
|             LevenshteinDistance leven = new LevenshteinDistance(str, str1); | ||||
|             int SentenceScoreDiff = leven.computeLevenshteinDistance(); | ||||
|             SentenceScoreDiff *= 15; | ||||
|  | ||||
| @ -46,7 +46,7 @@ public class DiscordHandler { | ||||
|         MYSQLDatahandler.shiftReduceParserInitiate(); | ||||
|         MYSQLDatahandler.instance.instantiateExecutor(); | ||||
|         if (MYSQLDatahandler.instance.getstringCacheSize() != 0) { | ||||
|             while (MYSQLDatahandler.instance.getlHMSMXSize() * MYSQLDatahandler.instance.getlHMSMXSize() * 1.5 | ||||
|             while (MYSQLDatahandler.instance.getlHMSMXSize() * MYSQLDatahandler.instance.getlHMSMXSize() * 2 | ||||
|                     < (MYSQLDatahandler.instance.getstringCacheSize()  | ||||
|                     * MYSQLDatahandler.instance.getstringCacheSize()) | ||||
|                     - MYSQLDatahandler.instance.getstringCacheSize()) { | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user