From 944f4f9fd65f8598873a21dd6c7f0024aea0c39f Mon Sep 17 00:00:00 2001 From: tianjixiaoying <136106230@qq.com> Date: Fri, 18 Dec 2015 16:31:36 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=96=87=E6=9C=AC=E5=90=91?= =?UTF-8?q?=E9=87=8F=E4=B8=AD=E7=9A=84term=E6=8C=89=E7=85=A7=E4=BB=8E?= =?UTF-8?q?=E5=B0=8F=E5=88=B0=E5=A4=A7=E6=8E=92=E5=BA=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 程序导出的文本向量中,termid没有按照从小到大的次序排列,但是一般来说支持向量机需要的数据就是要求排序的。增加了排序代码 --- .../AbstractOutputtingQuantizedData.java | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/libsvm-dp-core/src/main/java/org/shirdrn/document/preprocessing/component/AbstractOutputtingQuantizedData.java b/libsvm-dp-core/src/main/java/org/shirdrn/document/preprocessing/component/AbstractOutputtingQuantizedData.java index 9086bd2..2438cc6 100644 --- a/libsvm-dp-core/src/main/java/org/shirdrn/document/preprocessing/component/AbstractOutputtingQuantizedData.java +++ b/libsvm-dp-core/src/main/java/org/shirdrn/document/preprocessing/component/AbstractOutputtingQuantizedData.java @@ -57,7 +57,7 @@ public void fire() { StringBuffer line = new StringBuffer(); line.append(labelId).append(" "); Entry> docsEntry = docsIter.next(); - Map terms = docsEntry.getValue(); + Map terms = sortMapByKey(docsEntry.getValue()); for(Entry termEntry : terms.entrySet()) { String word = termEntry.getKey(); Integer wordId = getWordId(word); @@ -83,6 +83,25 @@ public void fire() { LOG.info("Finished: outputVectorFile=" + context.getFDMetadata().getOutputVectorFile()); } + /** + * sort map by key + * @param oriMap original map + * @return sorted map + */ + private Map sortMapByKey(Map oriMap) { + if (oriMap == null || oriMap.isEmpty()) { + return null; + } + Map sortedMap = new TreeMap( + new Comparator() { + public int compare(String key1, String key2) { + return getWordId(key1) - getWordId(key2); + } + }); + sortedMap.putAll(oriMap); + return sortedMap; + } + private Integer getWordId(String word) { TermFeatureable term = featuredTermsMap.get(word); return term == null ? null : term.getId();