使用TextMiniing和Apache POI获得Word文件内容,无须MS-Office ActiveX
/** Created on 2005/07/18* 使用tm-extractors-0.4.jar*/package com.nova.colimas.common.doc;import java.io.FileInputStream;import java.io.FileOutputStream;import org.textmining.text.extraction.WordExtractor;/*** Deal with ms-word 2000/xp files.* @author tyrone**/public class WordProcess extends DocProcess {public static String run(String filename){WordExtractor extractor=null;String text=null;try{FileInputStream in = new FileInputStream (filename);extractor = new WordExtractor();text=extractor.extractText(in);}catch(Exception ex){//logreturn null;}return text;}public static void main(String[] args){try{FileOutputStream out=new FileOutputStream("result.txt");out.write(WordProcess.run(args[0]).getBytes());out.flush();out.close();}catch(Exception ex){System.out.println(ex.toString());}}} |