001/** 002 * Logback: the reliable, generic, fast and flexible logging framework. 003 * Copyright (C) 1999-2015, QOS.ch. All rights reserved. 004 * 005 * This program and the accompanying materials are dual-licensed under 006 * either the terms of the Eclipse Public License v1.0 as published by 007 * the Eclipse Foundation 008 * 009 * or (per the licensee's choosing) 010 * 011 * under the terms of the GNU Lesser General Public License version 2.1 012 * as published by the Free Software Foundation. 013 */ 014package ch.qos.logback.classic.corpus; 015 016import java.io.BufferedReader; 017import java.io.FileReader; 018import java.io.IOException; 019import java.io.InputStream; 020import java.io.InputStreamReader; 021import java.net.URL; 022import java.util.ArrayList; 023import java.util.List; 024 025public class TextFileUtil { 026 027 public static List<String> toWords(URL url) throws IOException { 028 InputStream is = url.openStream(); 029 InputStreamReader reader = new InputStreamReader(is); 030 BufferedReader br = new BufferedReader(reader); 031 return toWords(br); 032 } 033 034 public static List<String> toWords(String filename) throws IOException { 035 FileReader fr = new FileReader(filename); 036 BufferedReader br = new BufferedReader(fr); 037 return toWords(br); 038 } 039 040 public static List<String> toWords(BufferedReader br) throws IOException { 041 042 // (\\d+)$ 043 // String regExp = "^(\\d+) "+ msg + " ([\\dabcdef-]+)$"; 044 // Pattern p = Pattern.compile(regExp); 045 String line; 046 047 List<String> wordList = new ArrayList<String>(); 048 049 while ((line = br.readLine()) != null) { 050 // line = line.replaceAll("\\p{Punct}+", " "); 051 String[] words = line.split("\\s"); 052 for (String word : words) { 053 wordList.add(word); 054 } 055 } 056 br.close(); 057 058 return wordList; 059 } 060}