永发信息网

文本算法是什么意思

答案:2  悬赏:10  手机版
解决时间 2021-01-24 19:11
  • 提问者网友:呐年旧曙光
  • 2021-01-24 15:08
文本算法是什么意思
最佳答案
  • 五星知识达人网友:枭雄戏美人
  • 2021-01-24 15:47
在文本信息空间内寻找任何两个最相关的文本信息,并将之简并成一个文本信息,从而实现信息数量的收缩。
简并算法的实现通过比较整个信息空间内的所有文本的相关性(相识性),得到相互之间的相关性后两两(注)进行配对。配对的要求是这两个文本信息的相关性最大,例如A 找到了文档B,那么B 也一定找到最相关的文档就是A 。

注,某些情况A 最相近的文档是C ,那么B 而B 最相关的文档也是C ,存在一种情况,A,B,C 三者之间自恰,就是构成空间信息最近的一个三角形。

得到了最相似文档后,将只进行平均化,或者简单的迭加。

信息空间中独立信息的数量会减少到原来的一半以下,然后重复实现1 的过程,在进行兼并。

信息最后简并到唯一的一个信息,就是整个信息文本的平均值。

画出信息树的结构,就能够根据要进行规模不同大小的聚类进行自动聚类了。
全部回答
  • 1楼网友:妄饮晩冬酒
  • 2021-01-24 16:00
用java比较两个文本文件的不同 rangedifferencer public class rangedifferencer { private static final rangedifference[] empty_result= new rangedifference[0]; private rangedifferencer() { // nothing to do } public static rangedifference[] finddifferences(irangecomparator left, irangecomparator right) { int rightsize= right.getrangecount(); int leftsize= left.getrangecount(); // // differences matrix: // only the last d of each diagonal is stored, i.e., lastdiagonal[k] = row of d // int diaglen= 2 * math.max(rightsize, leftsize); // bound on the size of edit script int maxdiagonal= diaglen; int lastdiagonal[]= new int[diaglen + 1]; // the row containing the last d // on diagonal k (lastdiagonal[k] = row) int origin= diaglen / 2; // origin of diagonal 0 // script corresponding to d[k] linkedrangedifference script[]= new linkedrangedifference[diaglen + 1]; int row, col; // find common prefix for (row= 0; row < rightsize && row < leftsize && rangesequal(right, row, left, row) == true;) row++; lastdiagonal[origin]= row; script[origin]= null; int lower= (row == rightsize) ? origin + 1 : origin - 1; int upper= (row == leftsize) ? origin - 1 : origin + 1; if (lower > upper) return empty_result; //system.out.println("finddifferences: " + maxdiagonal + " " + lower + " " + upper); // for each value of the edit distance for (int d= 1; d <= maxdiagonal; ++d) { // d is the current edit distance if (right.skiprangecomparison(d, maxdiagonal, left)) return empty_result; // should be something we already found // for each relevant diagonal (-d, -d+2 ..., d-2, d) for (int k= lower; k <= upper; k += 2) { // k is the current diagonal linkedrangedifference edit; if (k == origin - d || k != origin + d && lastdiagonal[k + 1] >= lastdiagonal[k - 1]) { // // move down // row= lastdiagonal[k + 1] + 1; edit= new linkedrangedifference(script[k + 1], linkedrangedifference.delete); } else { // // move right // row= lastdiagonal[k - 1]; edit= new linkedrangedifference(script[k - 1], linkedrangedifference.insert); } col= row + k - origin; edit.frightstart= row; edit.fleftstart= col; //assert.istrue(k >= 0 && k <= maxdiagonal); script[k]= edit; // slide down the diagonal as far as possible while (row < rightsize && col < leftsize && rangesequal(right, row, left, col) == true) { ++row; ++col; } //assert.istrue(k >= 0 && k <= maxdiagonal); // unreasonable value for diagonal index lastdiagonal[k]= row; if (row == rightsize && col == leftsize) { //showscript(script[k], right, left); return createdifferencesranges(script[k]); } if (row == rightsize) lower= k + 2; if (col == leftsize) upper= k - 2; } --lower; ++upper; } // too many differences //assert.istrue(false); return null; } public static rangedifference[] findranges(irangecomparator left, irangecomparator right) { rangedifference[] in= finddifferences(left, right); list out= new arraylist(); rangedifference rd; int mstart= 0; int ystart= 0; for (int i= 0; i < in.length; i++) { rangedifference es= in[i]; rd= new rangedifference(rangedifference.nochange, mstart, es.rightstart() - mstart, ystart, es.leftstart() - ystart); if (rd.maxlength() != 0) out.add(rd); out.add(es); mstart= es.rightend(); ystart= es.leftend(); } rd= new rangedifference(rangedifference.nochange, mstart, right.getrangecount() - mstart, ystart, left.getrangecount() - ystart); if (rd.maxlength() > 0) out.add(rd); return (rangedifference[]) out.toarray(empty_result); } //---- private methods private static rangedifference[] createdifferencesranges(linkedrangedifference start) { linkedrangedifference ep= reversedifferences(start); arraylist result= new arraylist(); rangedifference es= null; while (ep != null) { es= new rangedifference(rangedifference.change); if (ep.isinsert()) { es.frightstart= ep.frightstart + 1; es.fleftstart= ep.fleftstart; rangedifference b= ep; do { ep= ep.getnext(); es.fleftlength++; } while (ep != null && ep.isinsert() && ep.frightstart == b.frightstart); } else { es.frightstart= ep.frightstart; es.fleftstart= ep.fleftstart; rangedifference a= ep; // // deleted lines // do { a= ep; ep= ep.getnext(); es.frightlength++; } while (ep != null && ep.isdelete() && ep.frightstart == a.frightstart + 1); boolean change= (ep != null && ep.isinsert() && ep.frightstart == a.frightstart); if (change) { rangedifference b= ep; // // replacement lines // do { ep= ep.getnext(); es.fleftlength++; } while (ep != null && ep.isinsert() && ep.frightstart == b.frightstart); } else { es.fleftlength= 0; } es.fleftstart++; // meaning of range changes from "insert after", to "replace with" } // // the script commands are 1 based, subtract one to make them zero based // es.frightstart--; es.fleftstart--; result.add(es); } return (rangedifference[]) result.toarray(empty_result); } private static boolean rangesequal(irangecomparator a, int ai, irangecomparator b, int bi) { return a.rangesequal(ai, b, bi); } private static boolean rangespansequal(irangecomparator right, int rightstart, int rightlen, irangecomparator left, int leftstart, int leftlen) { if (rightlen == leftlen) { int i= 0; for (i= 0; i < rightlen; i++) { if (!rangesequal(right, rightstart + i, left, leftstart + i)) break; } if (i == rightlen) return true; } return false; } private static linkedrangedifference reversedifferences(linkedrangedifference start) { linkedrangedifference ep, behind, ahead; ahead= start; ep= null; while (ahead != null) { behind= ep; ep= ahead; ahead= ahead.getnext(); ep.setnext(behind); } return ep; } } 下面是一段关于如何使用这些类的简单的测试代码 public class rangedifferencertest extends testcase { inputstream left = null; inputstream right = null; protected void setup() throws exception { string file1 = "d:/temp/1.txt"; string file2 = "d:/temp/2.txt"; left = new fileinputstream(new file(file1)); right = new fileinputstream(new file(file2)); super.setup(); } protected void teardown() throws exception { left.close(); right.close(); super.teardown(); } public static void main(string[] args) { } public void testfinddifferences() { try { rangedifference[] rds = rangedifferencer.findranges(new linecomparator(left,"gbk"),new linecomparator(right,"gbk")); if(rds != null ){ for(int i=0; i
我要举报
如以上回答内容为低俗、色情、不良、暴力、侵权、涉及违法等信息,可以点下面链接进行举报!
点此我要举报以上问答信息
大家都在看
推荐资讯