需求
首先是有一段档案文章在没有换行符的情况下发送给了算法,算法识别出来了敏感词与敏感句以及他们的坐标(此时的坐标是没有换行符/n的情况下的)
但是我们的原始档案文章是有换行符的因为要让前端去解析,但是现在问题就出现了
因为前端展示界面要有换行所以给前端的数据要携带/n,但是现在要把敏感词和敏感句高亮,如果我们的坐标还是没有/n的坐标前端就没办法匹配高亮了
有些人可能会说直接index不就好了? ,但是如果词或句子在/n之间呢,那怎么计算,所以我写了一个算法来计算敏感词句的start和end
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
|
Function<String, int[]> findNewLineIndices = text -> { ArrayList<Integer> indicesList = new ArrayList<>(); int index = text.indexOf('\n'); while (index != -1) { indicesList.add(index - indicesList.size()); index = text.indexOf('\n', index + 1); } int[] indicesArray = new int[indicesList.size()]; for (int i = 0; i < indicesList.size(); i++) { indicesArray[i] = indicesList.get(i); } return indicesArray; };
BiFunction<String, String, int[]> findSentence = (sentence, text) -> { int sentenceLength = sentence.length(); int[] result = new int[]{0, 0}; while (sentence.length() > 4 && !text.contains(sentence)) { sentence = sentence.substring(0, sentence.length() - 1); } int index = text.indexOf(sentence); if (index >= 0 && !sentence.isEmpty()) { return new int[]{index, index + sentenceLength}; } else { return result; } };
BiFunction<int[], Integer, int[]> findCountIndices = (indices, startNode) -> { int indexBeforeStart = -1; int indicesBetween = 0; for (int i = 0; i < indices.length; i++) { if (indices[i] > startNode) { indexBeforeStart = i; break; } } return new int[]{indexBeforeStart, indicesBetween}; };
String replace = artificialText.replaceAll("\\n", ""); int[] newLineIndices = findNewLineIndices.apply(artificialText); int[] sentenceIndices = findSentence.apply(cleanSentenceText, replace); int[] countIndices = findCountIndices.apply(newLineIndices, sentenceIndices[0]);
jsonObject.add("start", new JsonPrimitive(sentenceIndies[0] + countIndices[0])); jsonObject.add("end", new JsonPrimitive(sentenceIndices[1] + countIndices[0] + countIndices[1]));
|