679 lines
32 KiB
Kotlin
679 lines
32 KiB
Kotlin
/*
|
|
* To change this license header, choose License Headers in Project Properties.
|
|
* To change this template file, choose Tools | Templates
|
|
* and open the template in the editor.
|
|
*/
|
|
package FunctionLayer
|
|
|
|
import DataLayer.DataMapper
|
|
import FunctionLayer.StanfordParser.SentimentAnalyzerTest
|
|
import FunctionLayer.StanfordParser.SentimentValueCache
|
|
import com.google.common.base.Stopwatch
|
|
import com.google.common.collect.MapMaker
|
|
import edu.stanford.nlp.ie.AbstractSequenceClassifier
|
|
import edu.stanford.nlp.ie.crf.CRFClassifier
|
|
import edu.stanford.nlp.ling.CoreLabel
|
|
import edu.stanford.nlp.parser.lexparser.LexicalizedParser
|
|
import edu.stanford.nlp.pipeline.Annotation
|
|
import edu.stanford.nlp.pipeline.CoreDocument
|
|
import edu.stanford.nlp.pipeline.StanfordCoreNLP
|
|
import edu.stanford.nlp.tagger.maxent.MaxentTagger
|
|
import edu.stanford.nlp.trees.GrammaticalStructureFactory
|
|
import edu.stanford.nlp.trees.TreebankLanguagePack
|
|
import kotlinx.coroutines.*
|
|
import java.io.IOException
|
|
import java.io.UnsupportedEncodingException
|
|
import java.net.*
|
|
import java.sql.SQLException
|
|
import java.util.*
|
|
import java.util.concurrent.ConcurrentMap
|
|
import java.util.concurrent.CountDownLatch
|
|
import java.util.concurrent.TimeUnit
|
|
import java.util.function.Consumer
|
|
import java.util.logging.Level
|
|
import java.util.logging.Logger
|
|
import kotlin.collections.ArrayList
|
|
|
|
|
|
/**
|
|
*
|
|
* @author install1
|
|
*/
|
|
public class Datahandler {
|
|
private val stopwatch: Stopwatch
|
|
fun shiftReduceParserInitiate() = runBlocking {
|
|
val cdl = CountDownLatch(2)
|
|
coroutineScope {
|
|
val job = launch(Dispatchers.Default) {
|
|
propsSentiment.setProperty("parse.model", lexParserEnglishRNN)
|
|
propsSentiment.setProperty("sentiment.model", sentimentModel)
|
|
propsSentiment.setProperty("parse.maxlen", "90")
|
|
propsSentiment.setProperty("threads", "5")
|
|
propsSentiment.setProperty("pos.maxlen", "90")
|
|
propsSentiment.setProperty("tokenize.maxlen", "90")
|
|
propsSentiment.setProperty("ssplit.maxlen", "90")
|
|
propsSentiment.setProperty("annotators", "tokenize,ssplit,pos,parse,sentiment,lemma,stopword") //coref too expensive memorywise
|
|
propsSentiment.setProperty("customAnnotatorClass.stopword", "FunctionLayer.StopwordAnnotator")
|
|
propsSentiment.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList)
|
|
propsSentiment.setProperty("tokenize.options", "untokenizable=firstKeep")
|
|
pipelineSentiment = StanfordCoreNLP(propsSentiment)
|
|
tagger = MaxentTagger(taggerPath)
|
|
lp = LexicalizedParser.loadModel(lexParserEnglishRNN, *options)
|
|
tlp = lp.getOp().langpack()
|
|
gsf = tlp.grammaticalStructureFactory()
|
|
cdl.countDown()
|
|
yield()
|
|
}
|
|
job.join()
|
|
}
|
|
coroutineScope {
|
|
val job = launch(Dispatchers.Default) {
|
|
try {
|
|
classifier = CRFClassifier.getClassifierNoExceptions(nerModel)
|
|
} catch (ex: ClassCastException) {
|
|
Logger.getLogger(Datahandler::class.java.name).log(Level.SEVERE, null, ex)
|
|
}
|
|
cdl.countDown()
|
|
yield()
|
|
}
|
|
job.join()
|
|
}
|
|
try {
|
|
cdl.await()
|
|
} catch (ex: InterruptedException) {
|
|
//System.out.println("cdl await interrupted: " + ex.getLocalizedMessage() + "\n");
|
|
}
|
|
println("finished shiftReduceParserInitiate\n")
|
|
}
|
|
|
|
fun updateStringCache() {
|
|
try {
|
|
checkIfUpdateStrings()
|
|
} catch (ex: CustomError) {
|
|
Logger.getLogger(Datahandler::class.java.name).log(Level.SEVERE, null, ex)
|
|
}
|
|
}
|
|
|
|
@get:Throws(SQLException::class, IOException::class, CustomError::class)
|
|
private val cache: Map<Int, String?>
|
|
private get() = DataMapper.getAllStrings()
|
|
|
|
@Throws(SQLException::class, IOException::class)
|
|
fun initiateMYSQL() {
|
|
try {
|
|
DataMapper.createTables()
|
|
stringCache.putAll(cache)
|
|
} catch (ex: CustomError) {
|
|
Logger.getLogger(Datahandler::class.java
|
|
.name).log(Level.SEVERE, null, ex)
|
|
}
|
|
}
|
|
|
|
fun instantiateAnnotationMapJMWE() {
|
|
if (!stringCache.isEmpty()) {
|
|
val jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(stringCache.values)
|
|
for ((key, value) in jmweAnnotation) {
|
|
jmweAnnotationCache[key] = value
|
|
}
|
|
}
|
|
}
|
|
|
|
fun instantiateAnnotationMap() = runBlocking {
|
|
if (!stringCache.isEmpty()) {
|
|
val Annotationspipeline = MapMaker().concurrencyLevel(5).makeMap<String?, Annotation>()
|
|
val AnnotationspipelineSentiment = MapMaker().concurrencyLevel(5).makeMap<String?, Annotation>()
|
|
coroutineScope {
|
|
for (str in stringCache.values) {
|
|
val job = launch(Dispatchers.Default) {
|
|
val strAnno = Annotation(str)
|
|
strAnno.compact()
|
|
Annotationspipeline[str] = strAnno
|
|
val strAnno2 = Annotation(str)
|
|
strAnno2.compact()
|
|
AnnotationspipelineSentiment[str] = strAnno2
|
|
yield()
|
|
}
|
|
job.join();
|
|
}
|
|
}
|
|
System.out.println("PRE getMultipleCoreDocumentsWaySuggestion lag")
|
|
val coreDocumentpipelineMap = getMultipleCoreDocumentsWaySuggestion(stringCache.values, pipeline)
|
|
//System.out.println("post getMultipleCoreDocumentsWaySuggestion instantiateAnnotationMap lag")
|
|
pipeline.annotate(Annotationspipeline.values, 4)
|
|
pipelineSentiment!!.annotate(AnnotationspipelineSentiment.values, 4)
|
|
//System.out.println("reached second job instantiateAnnotationMap lag");
|
|
coroutineScope {
|
|
for (i in Annotationspipeline.entries) {
|
|
val job = launch(Dispatchers.Default) {
|
|
i.value.compact()
|
|
pipelineAnnotationCache[i.key] = i.value
|
|
yield()
|
|
}
|
|
job.join();
|
|
}
|
|
|
|
for (i in AnnotationspipelineSentiment.entries) {
|
|
val job = launch(Dispatchers.Default) {
|
|
i.value.compact()
|
|
pipelineSentimentAnnotationCache[i.key] = i.value
|
|
yield()
|
|
}
|
|
job.join();
|
|
}
|
|
}
|
|
System.out.println("post Annotationspipeline lag")
|
|
for (i in coreDocumentpipelineMap.entries) {
|
|
coreDocumentAnnotationCache[i.key] = i.value
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
private fun futuresReturnOverallEvaluation(similarityMatrixes: List<SimilarityMatrix?>): ConcurrentMap<Int?, String?> {
|
|
var strmapreturn = MapMaker().concurrencyLevel(6).makeMap<Int?, String?>()
|
|
if (!similarityMatrixes.isEmpty()) {
|
|
for (SMX in similarityMatrixes) {
|
|
strmapreturn = addSMXToMapReturn(strmapreturn, SMX)
|
|
}
|
|
}
|
|
return strmapreturn
|
|
}
|
|
|
|
private fun addSMXToMapReturn(strmapreturn: ConcurrentMap<Int?, String?>, SMX: SimilarityMatrix?): ConcurrentMap<Int?, String?> {
|
|
if (!strmapreturn.containsValue(SMX!!.primaryString)) {
|
|
strmapreturn[strmapreturn.size] = SMX.primaryString
|
|
val transmittedStr = SMX.secondaryString
|
|
val cacheValue1 = SMX.cacheValue1
|
|
val cacheValue2 = SMX.cacheValue2
|
|
if (cacheValue1 != null && !sentimentCachingMap.keys.contains(SMX.primaryString)) {
|
|
sentimentCachingMap[SMX.secondaryString] = SMX.cacheValue1
|
|
}
|
|
if (cacheValue2 != null && !sentimentCachingMap.keys.contains(transmittedStr)) {
|
|
sentimentCachingMap[transmittedStr] = SMX.cacheValue2
|
|
}
|
|
}
|
|
return strmapreturn
|
|
}
|
|
|
|
private fun checkForNullValues(index: String?): Boolean {
|
|
if (jmweAnnotationCache[index] != null && pipelineAnnotationCache[index] != null
|
|
&& pipelineSentimentAnnotationCache[index] != null &&
|
|
coreDocumentAnnotationCache[index] != null) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private suspend fun StrComparringNoSentenceRelationMap(strCacheLocal: ConcurrentMap<Int, String?>, strCollection: Collection<String?>, localJMWEMap: ConcurrentMap<String, Annotation>,
|
|
localPipelineAnnotation: ConcurrentMap<String?, Annotation>, localPipelineSentimentAnnotation: ConcurrentMap<String?, Annotation>,
|
|
localCoreDocumentMap: ConcurrentMap<String, CoreDocument>): List<SimilarityMatrix?> {
|
|
val distance_requirement = 10500.0
|
|
val prefix_size = 150
|
|
val smxReturnList: ArrayList<SimilarityMatrix> = ArrayList<SimilarityMatrix>()
|
|
|
|
coroutineScope {
|
|
for (j in strCollection) {
|
|
val job = launch(Dispatchers.Default) {
|
|
for (i in strCollection) {
|
|
if (j != i) {
|
|
val SMXInit = SimilarityMatrix(j, i)
|
|
val sentimentCacheStr1 = sentimentCachingMap.getOrDefault(i, null)
|
|
val sentimentCacheStr = sentimentCachingMap.getOrDefault(j, null)
|
|
var sentimentAnalyzerTest: SentimentAnalyzerTest? = null
|
|
val checkedVal: Boolean = checkForNullValues(i)
|
|
if (stringCache.size < prefix_size || !checkedVal) {
|
|
sentimentAnalyzerTest = SentimentAnalyzerTest(j, i, SMXInit,
|
|
localJMWEMap[j], localJMWEMap[i], localPipelineAnnotation[j],
|
|
localPipelineAnnotation[i], localPipelineSentimentAnnotation[j],
|
|
localPipelineSentimentAnnotation[i], localCoreDocumentMap[j], localCoreDocumentMap[i],
|
|
sentimentCacheStr, sentimentCacheStr1)
|
|
} else {
|
|
sentimentAnalyzerTest = SentimentAnalyzerTest(j, i, SMXInit,
|
|
localJMWEMap[j], jmweAnnotationCache[i], localPipelineAnnotation[j],
|
|
pipelineAnnotationCache[i], localPipelineSentimentAnnotation[j],
|
|
pipelineSentimentAnnotationCache[i], localCoreDocumentMap[j],
|
|
coreDocumentAnnotationCache[i], sentimentCacheStr, sentimentCacheStr1)
|
|
}
|
|
val call = sentimentAnalyzerTest.callSMX();
|
|
if (call != null && call.distance > distance_requirement) {
|
|
smxReturnList.add(call)
|
|
}
|
|
}
|
|
}
|
|
yield()
|
|
}
|
|
job.join()
|
|
}
|
|
}
|
|
|
|
return smxReturnList
|
|
}
|
|
|
|
private suspend fun stringIteratorComparator(strmap: ConcurrentMap<Int?, String?>,
|
|
strCacheLocal: ConcurrentMap<Int, String?>, localJMWEMap: ConcurrentMap<String, Annotation>,
|
|
localPipelineAnnotation: ConcurrentMap<String?, Annotation>, localPipelineSentimentAnnotation: ConcurrentMap<String?, Annotation>,
|
|
localCoreDocumentMap: ConcurrentMap<String, CoreDocument>): ConcurrentMap<Int?, String?> {
|
|
//System.out.println("strmap siuze: " + strmap.size());
|
|
val ComparringNoSentenceRelationMap: List<SimilarityMatrix> = StrComparringNoSentenceRelationMap(strCacheLocal, strmap.values,
|
|
localJMWEMap, localPipelineAnnotation, localPipelineSentimentAnnotation, localCoreDocumentMap) as List<SimilarityMatrix>
|
|
Collections.sort(ComparringNoSentenceRelationMap, Comparator<SimilarityMatrix> { e1: SimilarityMatrix, e2: SimilarityMatrix -> e1.primaryString.compareTo(e2.primaryString) })
|
|
System.out.println("ComparringNoSentenceRelationMap size: " + ComparringNoSentenceRelationMap.size);
|
|
return futuresReturnOverallEvaluation(ComparringNoSentenceRelationMap)
|
|
}
|
|
|
|
private suspend fun removeNonSensicalStrings(strmap: ConcurrentMap<Int?, String?>): ConcurrentMap<Int?, String?> {
|
|
val strCacheLocal = stringCache
|
|
val localJMWEMap = getMultipleJMWEAnnotation(strmap.values)
|
|
val localPipelineAnnotation = getMultiplePipelineAnnotation(strmap.values)
|
|
System.out.println("str size post getMultiplePipelineAnnotation: " + strmap.size)
|
|
val localPipelineSentimentAnnotation = getMultiplePipelineSentimentAnnotation(strmap.values)
|
|
val localCoreDocumentMap = getMultipleCoreDocumentsWaySuggestion(strmap.values, pipeline)
|
|
System.out.println("strmap size pre stringIteratorComparator: " + strmap.size)
|
|
return stringIteratorComparator(strmap, strCacheLocal, localJMWEMap, localPipelineAnnotation, localPipelineSentimentAnnotation, localCoreDocumentMap)
|
|
}
|
|
|
|
fun checkIfUpdateStrings() = runBlocking {
|
|
if (stopwatch.elapsed(TimeUnit.SECONDS) >= EXPIRE_TIME_IN_SECONDS || !stopwatch.isRunning) {
|
|
var str = MessageResponseHandler.getStr()
|
|
println("str size: " + str.size)
|
|
str = filterContent(str)
|
|
System.out.println("str size post filtercontent: " + str.size)
|
|
str = removeNonSensicalStrings(str)
|
|
System.out.println("removeNonSensicalStrings str size POST: " + str.size + "\n");
|
|
str = annotationCacheUpdate(str)
|
|
println("""
|
|
annotationCacheUpdate str size POST: ${str.size}
|
|
""".trimIndent())
|
|
val strf = str
|
|
if (!stringCache.isEmpty()) {
|
|
coroutineScope {
|
|
val job = launch(Dispatchers.IO) {
|
|
try {
|
|
DataMapper.InsertMYSQLStrings(strf)
|
|
} catch (ex: CustomError) {
|
|
Logger.getLogger(Datahandler::class.java
|
|
.name).log(Level.SEVERE, null, ex)
|
|
}
|
|
MessageResponseHandler.setStr(MapMaker().concurrencyLevel(6).makeMap())
|
|
yield()
|
|
}
|
|
job.join()
|
|
}
|
|
} else {
|
|
try {
|
|
DataMapper.InsertMYSQLStrings(strf)
|
|
} catch (ex: CustomError) {
|
|
Logger.getLogger(Datahandler::class.java
|
|
.name).log(Level.SEVERE, null, ex)
|
|
}
|
|
MessageResponseHandler.setStr(MapMaker().concurrencyLevel(6).makeMap())
|
|
}
|
|
if (!stopwatch.isRunning) {
|
|
stopwatch.start()
|
|
} else {
|
|
stopwatch.reset()
|
|
}
|
|
}
|
|
}
|
|
|
|
private fun trimString(str: String): String {
|
|
var str = str
|
|
str = str.trim { it <= ' ' }
|
|
if (str.startsWith("<@")) {
|
|
str = str.substring(str.indexOf("> ") + 2)
|
|
}
|
|
return str
|
|
}
|
|
|
|
private suspend fun getResponseFutures(strF: String): String {
|
|
val values_copy: List<String?> = ArrayList(stringCache.values)
|
|
Collections.sort<String>(values_copy) { o1, o2 -> o2.length - o1.length }
|
|
var preRelationUserCounters = -155000.0
|
|
val concurrentRelations: MutableList<String?> = arrayListOf()
|
|
val SB = StringBuilder()
|
|
coroutineScope {
|
|
for (str1 in values_copy) {
|
|
if (strF != str1) {
|
|
val job = launch(Dispatchers.Default) {
|
|
var sentimentCacheStr1 = sentimentCachingMap.getOrDefault(str1, null)
|
|
var sentimentAnalyzerTest = SentimentAnalyzerTest(strF, str1, SimilarityMatrix(strF, str1),
|
|
strAnnoJMWE, jmweAnnotationCache[str1], strAnno,
|
|
pipelineAnnotationCache[str1], strAnnoSentiment,
|
|
pipelineSentimentAnnotationCache[str1], coreDoc, coreDocumentAnnotationCache[str1],
|
|
null, sentimentCacheStr1)
|
|
|
|
var getSMX: SimilarityMatrix = sentimentAnalyzerTest.callSMX()
|
|
if (getSMX != null) {
|
|
val scoreRelationLastUserMsg = getSMX.distance
|
|
if (scoreRelationLastUserMsg > preRelationUserCounters) {
|
|
preRelationUserCounters = scoreRelationLastUserMsg
|
|
concurrentRelations.add(getSMX.secondaryString)
|
|
}
|
|
}
|
|
yield()
|
|
}
|
|
job.join()
|
|
}
|
|
}
|
|
val randomLenghtPermit = strF.length * (Math.random() * Math.random() * Math.random() * (Math.random() * 10))
|
|
Collections.reverse(concurrentRelations)
|
|
if (!concurrentRelations.isEmpty()) {
|
|
val firstRelation = concurrentRelations[0]
|
|
|
|
val job1 = launch(Dispatchers.Default) {
|
|
for (secondaryRelation in concurrentRelations) {
|
|
if (SB.toString().length > randomLenghtPermit && !SB.toString().isEmpty()) {
|
|
break
|
|
}
|
|
val append = appendToString(firstRelation, secondaryRelation)
|
|
if (append) {
|
|
SB.append(secondaryRelation).append(" ")
|
|
}
|
|
}
|
|
yield()
|
|
}
|
|
job1.join()
|
|
}
|
|
}
|
|
if (SB.toString().isEmpty()) {
|
|
return "failure, preventing stuckness"
|
|
}
|
|
return SB.toString()
|
|
}
|
|
|
|
private fun appendToString(firstRelation: String?, secondaryRelation: String?): Boolean {
|
|
if (firstRelation == secondaryRelation) {
|
|
return true
|
|
}
|
|
val scoreRelationStrF = getScoreRelationStrF(firstRelation, secondaryRelation)
|
|
return if (scoreRelationStrF > 1900) {
|
|
true
|
|
} else false
|
|
}
|
|
|
|
fun getResponseMsg(str: String): String {
|
|
val responseFutures: String
|
|
runBlocking {
|
|
val strF = trimString(str)
|
|
getSingularAnnotation(strF)
|
|
responseFutures = getResponseFutures(strF)
|
|
}
|
|
return responseFutures
|
|
}
|
|
|
|
suspend fun getSingularAnnotation(str: String?) {
|
|
coroutineScope {
|
|
val job = launch(Dispatchers.Default) {
|
|
strAnno = Annotation(str)
|
|
strAnno!!.compact()
|
|
pipeline.annotate(strAnno)
|
|
yield()
|
|
}
|
|
job.join()
|
|
|
|
val job1 = launch(Dispatchers.Default) {
|
|
strAnnoSentiment = Annotation(str)
|
|
strAnnoSentiment!!.compact()
|
|
pipelineSentiment!!.annotate(strAnnoSentiment)
|
|
val notactualList: MutableList<String?> = arrayListOf()
|
|
notactualList.add(str)
|
|
val jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(notactualList)
|
|
strAnnoJMWE = jmweAnnotation.values.iterator().next()
|
|
strAnnoJMWE.compact()
|
|
yield()
|
|
}
|
|
job1.join()
|
|
|
|
val job3 = launch(Dispatchers.Default) {
|
|
val coreDocument = CoreDocument(str)
|
|
pipeline.annotate(coreDocument)
|
|
coreDoc = coreDocument
|
|
yield()
|
|
}
|
|
job3.join()
|
|
}
|
|
}
|
|
|
|
private fun getScoreRelationStrF(str: String?, mostRecentMsg: String?): Double {
|
|
val SMX = SimilarityMatrix(str, mostRecentMsg)
|
|
val cacheSentiment1 = sentimentCachingMap.getOrDefault(str, null)
|
|
val cacheSentiment2 = sentimentCachingMap.getOrDefault(mostRecentMsg, null)
|
|
val sentimentAnalyzerTest = SentimentAnalyzerTest(str, mostRecentMsg, SMX,
|
|
strAnnoJMWE, jmweAnnotationCache[mostRecentMsg], strAnno,
|
|
pipelineAnnotationCache[mostRecentMsg], strAnnoSentiment,
|
|
pipelineSentimentAnnotationCache[mostRecentMsg], coreDoc,
|
|
coreDocumentAnnotationCache[mostRecentMsg],
|
|
cacheSentiment1, cacheSentiment2)
|
|
val callSMX = sentimentAnalyzerTest.callSMX()
|
|
return callSMX.distance ?: 0.0
|
|
}
|
|
|
|
suspend private fun annotationCacheUpdate(strmap: ConcurrentMap<Int?, String?>): ConcurrentMap<Int?, String?> {
|
|
val jmweAnnotation = PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(strmap.values)
|
|
for ((key, value) in jmweAnnotation) {
|
|
jmweAnnotationCache[key] = value
|
|
}
|
|
val Annotationspipeline = MapMaker().concurrencyLevel(5).makeMap<String?, Annotation>()
|
|
val AnnotationspipelineSentiment = MapMaker().concurrencyLevel(5).makeMap<String?, Annotation>()
|
|
val coreDocumentpipelineMap = getMultipleCoreDocumentsWaySuggestion(strmap.values, pipeline)
|
|
|
|
coroutineScope {
|
|
val job = launch(Dispatchers.Default) {
|
|
for (str in strmap.values) {
|
|
val strAnno1 = Annotation(str)
|
|
Annotationspipeline[str] = strAnno1
|
|
val strAnno2 = Annotation(str)
|
|
AnnotationspipelineSentiment[str] = strAnno2
|
|
stringCache[stringCache.size + 1] = str
|
|
}
|
|
yield()
|
|
}
|
|
pipeline.annotate(Annotationspipeline.values, 5)
|
|
pipelineSentiment!!.annotate(AnnotationspipelineSentiment.values, 5)
|
|
job.join()
|
|
}
|
|
coroutineScope {
|
|
val job = launch(Dispatchers.Default) {
|
|
for (pipelineEntry in Annotationspipeline.entries) {
|
|
if (pipelineEntry != null) {
|
|
pipelineAnnotationCache[pipelineEntry.key] = pipelineEntry.value
|
|
}
|
|
}
|
|
yield()
|
|
}
|
|
job.join()
|
|
}
|
|
coroutineScope {
|
|
val job = launch(Dispatchers.Default) {
|
|
for (coreDocumentEntry in coreDocumentpipelineMap.entries) {
|
|
coreDocumentAnnotationCache[coreDocumentEntry.key] = coreDocumentEntry.value
|
|
}
|
|
yield()
|
|
}
|
|
job.join()
|
|
}
|
|
coroutineScope {
|
|
val job1 = launch(Dispatchers.Default) {
|
|
for (pipelineEntry in AnnotationspipelineSentiment.entries) {
|
|
if (pipelineEntry != null) {
|
|
pipelineSentimentAnnotationCache[pipelineEntry.key] = pipelineEntry.value
|
|
}
|
|
}
|
|
yield()
|
|
}
|
|
job1.join()
|
|
}
|
|
return strmap
|
|
}
|
|
|
|
|
|
private class AnnotationCollector<T> : Consumer<T> {
|
|
val annotationsT: MutableList<T?> = arrayListOf()
|
|
override fun accept(ann: T) {
|
|
//System.out.println("adding ann: " + ann.toString());
|
|
annotationsT.add(ann)
|
|
}
|
|
|
|
companion object {
|
|
var i = 0
|
|
}
|
|
}
|
|
|
|
companion object {
|
|
val EXPIRE_TIME_IN_SECONDS = TimeUnit.SECONDS.convert(10, TimeUnit.MINUTES)
|
|
val EXPIRE_TIME_IN_SECONDS1 = TimeUnit.SECONDS.convert(10, TimeUnit.HOURS)
|
|
|
|
@JvmField
|
|
var instance = Datahandler()
|
|
private var strAnno: Annotation? = null
|
|
private var strAnnoSentiment: Annotation? = null
|
|
private lateinit var strAnnoJMWE: Annotation
|
|
private var coreDoc: CoreDocument? = null
|
|
private val stringCache = MapMaker().concurrencyLevel(6).makeMap<Int, String?>()
|
|
private lateinit var pipelineAnnotationCache: ConcurrentMap<String?, Annotation>
|
|
private lateinit var pipelineSentimentAnnotationCache: ConcurrentMap<String?, Annotation>
|
|
private lateinit var jmweAnnotationCache: ConcurrentMap<String, Annotation>
|
|
private lateinit var coreDocumentAnnotationCache: ConcurrentMap<String, CoreDocument>
|
|
private val sentimentCachingMap = MapMaker().concurrencyLevel(6).makeMap<String, SentimentValueCache>()
|
|
private const val similar = ""
|
|
private const val shiftReduceParserPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz"
|
|
private const val sentimentModel = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz"
|
|
private const val lexParserEnglishRNN = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"
|
|
private const val taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"
|
|
private const val nerModel = "edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz"
|
|
private const val nerModel2 = "edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz"
|
|
private const val nerModel3 = "edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz"
|
|
private const val customStopWordList = "start,starts,period,periods,a,an,and,are,as,at,be,but,by,for,if,in,into,is,it,no,not,of,on,or,such,that,the,their,then,there,these,they,this,to,was,will,with"
|
|
|
|
@JvmStatic
|
|
var tagger: MaxentTagger? = null
|
|
private set
|
|
private val options = arrayOf("-maxLength", "100")
|
|
private val props = Properties()
|
|
private val propsSentiment = Properties()
|
|
|
|
@JvmStatic
|
|
var gsf: GrammaticalStructureFactory? = null
|
|
private set
|
|
private lateinit var lp: LexicalizedParser
|
|
private lateinit var tlp: TreebankLanguagePack
|
|
private lateinit var classifier: AbstractSequenceClassifier<CoreLabel>
|
|
|
|
public fun getPipeLine(): StanfordCoreNLP {
|
|
return pipeline
|
|
}
|
|
|
|
// set up Stanford CoreNLP pipeline
|
|
@JvmStatic
|
|
val pipeline = pipeLineSetUp
|
|
private var pipelineSentiment: StanfordCoreNLP? = null
|
|
|
|
private val pipeLineSetUp: StanfordCoreNLP
|
|
private get() {
|
|
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse")
|
|
props.setProperty("parse.model", shiftReduceParserPath)
|
|
props.setProperty("parse.maxlen", "90")
|
|
props.setProperty("parse.binaryTrees", "true")
|
|
props.setProperty("threads", "5")
|
|
props.setProperty("pos.maxlen", "90")
|
|
props.setProperty("tokenize.maxlen", "90")
|
|
props.setProperty("ssplit.maxlen", "90")
|
|
props.setProperty("lemma.maxlen", "90")
|
|
props.setProperty("ner.model", "$nerModel,$nerModel2,$nerModel3")
|
|
props.setProperty("ner.combinationMode", "HIGH_RECALL")
|
|
props.setProperty("regexner.ignorecase", "true")
|
|
props.setProperty("ner.fine.regexner.ignorecase", "true")
|
|
props.setProperty("tokenize.options", "untokenizable=firstKeep")
|
|
return StanfordCoreNLP(props)
|
|
}
|
|
|
|
@JvmStatic
|
|
fun getClassifier(): AbstractSequenceClassifier<CoreLabel> {
|
|
return classifier
|
|
}
|
|
|
|
private fun getMultipleJMWEAnnotation(str: Collection<String?>): ConcurrentMap<String, Annotation> {
|
|
return PipelineJMWESingleton.INSTANCE.getJMWEAnnotation(str)
|
|
}
|
|
|
|
private fun getMultiplePipelineAnnotation(str: Collection<String?>): ConcurrentMap<String?, Annotation> {
|
|
val pipelineAnnotationMap = MapMaker().concurrencyLevel(5).makeMap<String?, Annotation>()
|
|
for (str1 in str) {
|
|
val strAnno1 = Annotation(str1)
|
|
pipelineAnnotationMap[str1] = strAnno1
|
|
}
|
|
pipeline.annotate(pipelineAnnotationMap.values, 5)
|
|
return pipelineAnnotationMap
|
|
}
|
|
|
|
private fun getMultiplePipelineSentimentAnnotation(str: Collection<String?>): ConcurrentMap<String?, Annotation> {
|
|
val pipelineAnnotationMap = MapMaker().concurrencyLevel(5).makeMap<String?, Annotation>()
|
|
for (str1 in str) {
|
|
val strAnno1 = Annotation(str1)
|
|
pipelineAnnotationMap[str1] = strAnno1
|
|
}
|
|
pipelineSentiment?.annotate(pipelineAnnotationMap.values, 5)
|
|
return pipelineAnnotationMap
|
|
}
|
|
|
|
fun filterContent(str: ConcurrentMap<Int?, String?>): ConcurrentMap<Int?, String?> {
|
|
val strlistreturn = MapMaker().concurrencyLevel(5).makeMap<Int?, String?>()
|
|
for (str1: String? in str.values) {
|
|
if (!str1?.isEmpty()!! && str1.length > 3) {
|
|
var str1Local: String = str1.trim();
|
|
if (str1Local.length > 2 && !str1Local.startsWith("!")) {
|
|
strlistreturn[strlistreturn.size] = str1Local
|
|
}
|
|
}
|
|
}
|
|
return strlistreturn
|
|
}
|
|
|
|
suspend fun getMultipleCoreDocumentsWaySuggestion(str: Collection<String?>, localNLP: StanfordCoreNLP): ConcurrentMap<String, CoreDocument> {
|
|
val annCollector: AnnotationCollector<Annotation?> = AnnotationCollector<Annotation?>()
|
|
val annotationreturnMap = MapMaker().concurrencyLevel(6).makeMap<String, CoreDocument>()
|
|
coroutineScope {
|
|
val job = launch(Dispatchers.Default) {
|
|
for (exampleString in str) {
|
|
localNLP.annotate(Annotation(exampleString), annCollector)
|
|
AnnotationCollector.i++
|
|
}
|
|
yield()
|
|
}
|
|
job.join()
|
|
}
|
|
try {
|
|
Thread.sleep(1500)
|
|
} catch (ex: InterruptedException) {
|
|
Logger.getLogger(Datahandler::class.java.name).log(Level.SEVERE, null, ex)
|
|
}
|
|
coroutineScope {
|
|
val job1 = launch(Dispatchers.Default) {
|
|
for (ann in annCollector.annotationsT) {
|
|
if (ann != null) {
|
|
ann.compact()
|
|
val CD = CoreDocument(ann)
|
|
annotationreturnMap[CD.text()] = CD
|
|
}
|
|
}
|
|
yield()
|
|
}
|
|
job1.join()
|
|
}
|
|
try {
|
|
Thread.sleep(1500)
|
|
} catch (ex: InterruptedException) {
|
|
Logger.getLogger(Datahandler::class.java.name).log(Level.SEVERE, null, ex)
|
|
}
|
|
return annotationreturnMap
|
|
}
|
|
}
|
|
|
|
init {
|
|
stopwatch = Stopwatch.createUnstarted()
|
|
jmweAnnotationCache = MapMaker().concurrencyLevel(5).makeMap<String, Annotation>()
|
|
pipelineAnnotationCache = MapMaker().concurrencyLevel(5).makeMap<String, Annotation>()
|
|
pipelineSentimentAnnotationCache = MapMaker().concurrencyLevel(5).makeMap<String, Annotation>()
|
|
coreDocumentAnnotationCache = MapMaker().concurrencyLevel(5).makeMap<String, CoreDocument>()
|
|
}
|
|
} |