/* * Created on 2005/07/18 * 使用tm-extractors-0.4.jar */ package com.nova.colimas.common.doc; import java.io.FileInputStream; import java.io.FileOutputStream; import org.textmining.text.extraction.WordExtractor; /** * Deal with ms-word 2000/xp files. * @author tyrone * */ public class WordProcess extends DocProcess { public static String run(String 058817141456174.htm){ WordExtractor extractor=null; String text=null; try{ FileInputStream in = new FileInputStream (058817141456174.htm); extractor = new WordExtractor(); text=extractor.extractText(in); }catch(Exception ex){ //log return null; } return text; } public static void main(String[] args){ try{ FileOutputStream out=new FileOutputStream("result.txt"); out.write(WordProcess.run(args[0]).getBytes()); out.flush(); out.close(); }catch(Exception ex){ System.out.println(ex.toString()); } } }
|