{"id":5592,"date":"2021-11-23T06:55:32","date_gmt":"2021-11-23T06:55:32","guid":{"rendered":"https:\/\/support.divominer.com\/zh-tw\/articles\/docs\/divominer-%e4%bd%bf%e7%94%a8%e6%89%8b%e5%86%8a\/%e6%bc%94%e7%ae%97%e6%b3%95%e6%8c%96%e6%8e%98\/%e7%9b%b8%e4%bc%bc%e5%ba%a6%e5%88%86%e6%9e%90"},"modified":"2024-08-30T11:46:49","modified_gmt":"2024-08-30T03:46:49","slug":"%e7%9b%b8%e4%bc%bc%e5%ba%a6%e5%88%86%e6%9e%90","status":"publish","type":"docs","link":"https:\/\/support.divominer.com\/zh-tw\/articles\/docs\/divominer-%e4%bd%bf%e7%94%a8%e6%89%8b%e5%86%8a\/%e6%bc%94%e7%ae%97%e6%b3%95%e6%8e%a2%e5%8b%98\/%e7%9b%b8%e4%bc%bc%e5%ba%a6%e5%88%86%e6%9e%90","title":{"rendered":"\u76f8\u4f3c\u5ea6\u5206\u6790"},"content":{"rendered":"\n<p>\u76f8\u4f3c\u5ea6\u5206\u6790\u8a08\u7b97\uff0c\u7576\u524d\u5e73\u53f0\u652f\u6301MinHashLSH\u65b9\u6cd5\uff0c\u5728\u9810\u8a2d\u689d\u4ef6\u4e0b\uff08\u5305\u62ec\uff1a\u7be9\u9078\u6578\u64da\u3001\u6311\u9078\u76f8\u4f3c\u5ea6\u8a08\u7b97\u65b9\u6cd5\u3001\u76f8\u4f3c\u95be\u503c\u7b49\uff09\u5c0d\u6587\u672c\u5167\u5bb9\u505a\u805a\u5408\u5efa\u6a21\uff0c\u5176\u805a\u5408\u7d50\u679c\u6709\u52a9\u65bc\u767c\u73fe\u76f8\u4f3c\u6587\u672c\uff0c\u65b9\u4fbf\u6279\u91cf\u7de8\u78bc\u64cd\u4f5c\uff0c\u70ba\u5f8c\u7e8c\u6df1\u5165\u7684\u7814\u7a76\u5206\u6790\u63d0\u4f9b\u6578\u64da\u9810\u5148\u8655\u7406\u57fa\u790e\u3002<\/p>\n\n\n\n<p>\u4e00\u3001MinHashLSH\u6a21\u578b\u4ecb\u7ecd<\/p>\n\n\n\n<ol><li>Jaccard\u76f8\u4f3c\u5ea6<\/li><\/ol>\n\n\n\n<p>\u7d66\u5b9a\u96c6\u5408A\u3001B\uff0cJaccard\u4fc2\u6578\u5b9a\u7fa9\u70baA\u548cB\u4ea4\u96c6\u5143\u7d20\u500b\u6578\u8207A\u548cB\u806f\u96c6\u7684\u5143\u7d20\u500b\u6578\u7684\u6bd4\u503c\uff0c\u5373\uff1aJ(A, B) = |A\u2229B| \/ |A\u222aB| = |A\u2229B| \/ (|A| + |B| &#8211; |A\u2229B|) \uff0c\u7528\u65bc\u6bd4\u8f03\u6709\u9650\u6a23\u672c\u96c6\u4e4b\u9593\u7684\u76f8\u4f3c\u6027\u8207\u5dee\u7570\u6027\uff0c\u4fc2\u6578\u503c\u8d8a\u5927\u76f8\u4f3c\u5ea6\u8d8a\u9ad8\u3002\u6bd4\u5982A = {a,b,c,d},B = {c,d,e,f}\uff0c\u90a3\u4e48Jaccard\u76f8\u4f3c\u6027\u4fc2\u6578\u70ba2\/6=0.33\u3002<\/p>\n\n\n\n<p>2. MinHash<\/p>\n\n\n\n<p>MinHash\u6709\u5225\u65bc\u50b3\u7d71\u7684hash\u7b97\u6cd5\uff0c\u4e0d\u518d\u662f\u5c07\u6587\u672c\u5167\u5bb9\u96a8\u6a5f\u6620\u5c04\u70ba\u7c3d\u540d\u503c\uff0c\u800c\u662f\u80fd\u5920\u6eff\u8db3\u76f8\u4f3c\u6587\u672c\u7684\u7c3d\u540d\u4e5f\u76f8\u8fd1\u3002\u5176\u57fa\u672c\u539f\u7406\u662fA\u222aB\u7684\u96a8\u6a5f\u57df\u88e1\uff0c\u9078\u4e2d\u7684\u5143\u7d20\u843d\u5728A\u2229B\u9019\u500b\u5340\u57df\u7684\u6a5f\u7387\u7b49\u65bcJaccard\u76f8\u4f3c\u5ea6\uff0c\u53ef\u4ee5\u7528\u65bc\u5927\u898f\u6a21\u805a\u985e\u554f\u984c\u3002\u5177\u9ad4\u64cd\u4f5c\u662f\u5148\u5c0dA\u3001B\u7684n\u500b\u7dad\u5ea6\u505a\u96a8\u6a5f\u6392\u5217\uff08\u5373\u7d22\u5f15\u96a8\u6a5f\u6253\u4e82\uff09\uff0c\u5206\u5225\u53d6\u5411\u91cfA\u3001B\u7684\u7b2c\u4e00\u500b\u975e0\u7d22\u5f15\u503c\u5373\u4e3aMinHash\u503c\uff0c\u90a3\u9ebc\u6a5f\u7387P(minHash(A) = minHash(B)) = Jaccard(A,B)\u3002<\/p>\n\n\n\n<p>3. LSH\uff08Locality Sensitive Hashing\uff0c\u5340\u57df\u654f\u611fHash\u51fd\u6578\uff09<\/p>\n\n\n\n<p>Minhash\u89e3\u6c7a\u4e86\u9ad8\u7dad\u7a00\u758f\u5411\u91cf\u7684\u904b\u7b97\uff0c\u4f46\u662f\u5c0d\u96c6\u5408\u5169\u5169\u6bd4\u8f03\u7684\u6642\u9593\u8907\u96dc\u5ea6\u4f9d\u7136\u662fO(n<sup>2<\/sup>)\u3002\u5047\u5982\u96c6\u5408\u6578\u91cf\u6975\u5176\u9f90\u5927\uff0c\u6211\u5011\u5e0c\u671b\u50c5\u50c5\u6bd4\u8f03\u90a3\u4e9b\u76f8\u4f3c\u5ea6\u53ef\u80fd\u5f88\u9ad8\u7684\u96c6\u5408\uff0c\u800c\u76f4\u63a5\u5ffd\u7565\u6389\u76f8\u4f3c\u5ea6\u5f88\u4f4e\u7684\u96c6\u5408\uff0cLSH\u5c31\u662f\u89e3\u6c7a\u9019\u500b\u554f\u984c\u3002\u5176\u57fa\u672c\u601d\u60f3\u662f\u539f\u7a7a\u9593\u76f8\u8fd1\u7684\u9ede\uff0c\u7d93\u904eLSH\u51fd\u6578\u6620\u5c04\u5f8c\uff0c\u5f88\u5927\u6a5f\u7387Hash\u503c\u76f8\u540c\uff1b\u800c\u8ddd\u96e2\u9060\u7684\u5169\u500b\u9ede\uff0c\u6620\u5c04\u5f8cHash\u503c\u76f8\u7b49\u7684\u6a5f\u7387\u5f88\u5c0f\u3002<\/p>\n\n\n\n<p>\u4e8c\u3001\u7b97\u6cd5\u8aaa\u660e<\/p>\n\n\n\n<p>\u6587\u672c\u76f8\u4f3c\u5ea6\u8a08\u7b97\u6642\uff0c\u9996\u5148\u5f97\u5230document-term\u77e9\u9663\uff0c\u4f8b\u5982\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-table\"><table><tbody><tr><td>&nbsp;<\/td><td>S1<\/td><td>S2<\/td><td>S3<\/td><\/tr><tr><td>0<\/td><td>1<\/td><td>0<\/td><td>0<\/td><\/tr><tr><td>1<\/td><td>0<\/td><td>0<\/td><td>1<\/td><\/tr><tr><td>2<\/td><td>0<\/td><td>1<\/td><td>0<\/td><\/tr><tr><td>3<\/td><td>1<\/td><td>0<\/td><td>1<\/td><\/tr><tr><td>4<\/td><td>0<\/td><td>0<\/td><td>1<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<p>\u5176\u4e2d\uff0cS1\u3001S2\u3001S3\u8868\u793a\u6587\u4ef6\uff0c\u7b2c\u4e00\u5217\u5e8f\u865f0-5\u8868\u793a\u884c\u5e8f\u865f\uff0c\u4e5f\u5c31\u662f\u55ae\u8a5e\uff1b\u5176\u4ed6\u90e8\u5206\u4e2d1\u8868\u793a\u6587\u4ef6S\u6709\u9019\u500b\u55ae\u8a5e\uff0c0\u8868\u793a\u6587\u4ef6S\u6c92\u6709\u9019\u500b\u55ae\u8a5e\u3002\u63a5\u4e0b\u4f86\uff0c\u4f7f\u7528hash\u51fd\u6578\u7522\u751f\u884c\u865f\u9806\u5e8f\uff0c\u6bd4\u5982(x+1)mod5\uff0c(3x+1)mod5\uff0c\u5247\u5169\u500bhash\u51fd\u6578\u7522\u751f\u884c\u865f\u9806\u5e8f\u70ba\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-table\"><table><tbody><tr><td>&nbsp;<\/td><td>Hash1<\/td><td>Hash2<\/td><\/tr><tr><td>0<\/td><td>1<\/td><td>1<\/td><\/tr><tr><td>1<\/td><td>2<\/td><td>4<\/td><\/tr><tr><td>2<\/td><td>3<\/td><td>2<\/td><\/tr><tr><td>3<\/td><td>4<\/td><td>0<\/td><\/tr><tr><td>4<\/td><td>0<\/td><td>3<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<p>\u901a\u904e\u904d\u6b77\u77e9\u9663\u4e2d\u7684\u503c\uff0c\u5c0d\u65bc0\u8df3\u904e\uff0c\u5c0d\u65bc1\uff0c\u770bhash\u51fd\u6578\u7522\u751f\u7684\u884c\u865f\uff0c\u627e\u5230\u884c\u865f\u6700\u5c0f\u7684\u503c<\/p>\n\n\n\n<p>\u4f5c\u70bahash\u8f38\u51fa\u7684\u503c\uff0c\u6700\u5f8c\u5f97\u5230\u5982\u4e0b\u77e9\u9663\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-table\"><table><tbody><tr><td>&nbsp;<\/td><td>S1<\/td><td>S2<\/td><td>S3<\/td><\/tr><tr><td>Hash1<\/td><td>1<\/td><td>3<\/td><td>0<\/td><\/tr><tr><td>Hash2<\/td><td>1<\/td><td>2<\/td><td>0<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<p>\u6b64\u6642S1\u3001S2\u7684\u76f8\u4f3c\u5ea6\uff0cJ(S1,S2)=0\/3=0\u3002Minhash\u8a08\u7b97\u7684\u5408\u7406\u6027\u5728\u65bc\uff0c\u5169\u500b\u96c6\u5408\u7684\u96a8\u6a5f\u884c\u6392\u5217\u7684minhash\u503c\u76f8\u7b49\u7684\u6a5f\u7387\uff0c\u7b49\u65bc\u5169\u500b\u96c6\u5408\u7684Jaccard\u76f8\u4f3c\u5ea6\u3002\u5c0d\u65bc\u5169\u500b\u96c6\u5408A\u3001B\uff0c\u6bcf\u4e00\u884c\u7684\u72c0\u614b\u6709\u4e09\u7a2e\uff1a(1) A\u3001B\u96c6\u5408\u90fd\u6709\u9019\u500b\u55ae\u8a5e\uff1b(2) A\u3001B\u96c6\u5408\u90fd\u6c92\u6709\u9019\u500b\u55ae\u8a5e\uff1b(3) A\u3001B\u96c6\u5408\u50c5\u6709\u4e00\u500b\u4e00\u500b\u96c6\u5408\u6709\u9019\u500b\u55ae\u8a5e\u3002\u82e5\u5206\u5225\u5c6c\u65bc\uff081\uff09\u3001\uff082\uff09\u3001\uff083\uff09\u72c0\u614b\u7684\u884c\u6578\u4f9d\u6b21\u6709n1\u3001n2\u3001n3\u884c\uff0c\u5247Jaccard(A,B)=n1\/(n1+n3)\uff1b\u7531\u65bc\u6392\u5217\u662f\u96a8\u6a5f\u7684\uff0c\u518d\u9047\u5230\u8ba1\u7b97\u7684\u5408\u7406\u6027\u5728\u4e8e\u4e24\u4e2a\u96c6\u5408\u7684\u968f\u673a\u884c\u6392\u5217\u7684minhash\u503c\u76f8\u7b49\u7684\u6982\u7387\u7b49\u4e8e\u4e24\u4e2a\u96c6\u5408\u7684\uff083\uff09\u4e4b\u524d\u9047\u5230\uff081\uff09\u7684\u6a5f\u7387\u662fn1\/(n1+n3)\uff0c\u6070\u597d\u7b49\u65bcJaccard\u4fc2\u6578\u503c\u3002<\/p>\n","protected":false},"featured_media":0,"parent":5584,"menu_order":8,"comment_status":"open","ping_status":"closed","template":"","doc_tag":[],"_links":{"self":[{"href":"https:\/\/support.divominer.com\/zh-tw\/wp-json\/wp\/v2\/docs\/5592"}],"collection":[{"href":"https:\/\/support.divominer.com\/zh-tw\/wp-json\/wp\/v2\/docs"}],"about":[{"href":"https:\/\/support.divominer.com\/zh-tw\/wp-json\/wp\/v2\/types\/docs"}],"replies":[{"embeddable":true,"href":"https:\/\/support.divominer.com\/zh-tw\/wp-json\/wp\/v2\/comments?post=5592"}],"version-history":[{"count":3,"href":"https:\/\/support.divominer.com\/zh-tw\/wp-json\/wp\/v2\/docs\/5592\/revisions"}],"predecessor-version":[{"id":5794,"href":"https:\/\/support.divominer.com\/zh-tw\/wp-json\/wp\/v2\/docs\/5592\/revisions\/5794"}],"up":[{"embeddable":true,"href":"https:\/\/support.divominer.com\/zh-tw\/wp-json\/wp\/v2\/docs\/5584"}],"next":[{"title":"\u65b0\u5efa\u6587\u672c\u76f8\u4f3c\u5ea6\u4efb\u52d9","link":"https:\/\/support.divominer.com\/zh-tw\/articles\/docs\/divominer-%e4%bd%bf%e7%94%a8%e6%89%8b%e5%86%8a\/%e6%bc%94%e7%ae%97%e6%b3%95%e6%8e%a2%e5%8b%98\/%e6%96%b0%e5%bb%ba%e6%96%87%e6%9c%ac%e7%9b%b8%e4%bc%bc%e5%ba%a6%e4%bb%bb%e5%8b%99","href":"https:\/\/support.divominer.com\/zh-tw\/wp-json\/wp\/v2\/docs\/5629"}],"prev":[{"title":"\u65b0\u5efa\u4e3b\u984c\u6a21\u578b\uff08LDA\uff09\u4efb\u52d9","link":"https:\/\/support.divominer.com\/zh-tw\/articles\/docs\/divominer-%e4%bd%bf%e7%94%a8%e6%89%8b%e5%86%8a\/%e6%bc%94%e7%ae%97%e6%b3%95%e6%8e%a2%e5%8b%98\/%e6%96%b0%e5%bb%ba%e4%b8%bb%e9%a1%8c%e6%a8%a1%e5%9e%8b%ef%bc%88lda%ef%bc%89%e4%bb%bb%e5%8b%99","href":"https:\/\/support.divominer.com\/zh-tw\/wp-json\/wp\/v2\/docs\/5627"}],"wp:attachment":[{"href":"https:\/\/support.divominer.com\/zh-tw\/wp-json\/wp\/v2\/media?parent=5592"}],"wp:term":[{"taxonomy":"doc_tag","embeddable":true,"href":"https:\/\/support.divominer.com\/zh-tw\/wp-json\/wp\/v2\/doc_tag?post=5592"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}