001 002/* 003 * html2txt - Converts HTML documents to plain text 004 * 005 * Copyright (c) 2015, Arno Unkrig 006 * All rights reserved. 007 * 008 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 009 * following conditions are met: 010 * 011 * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the 012 * following disclaimer. 013 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the 014 * following disclaimer in the documentation and/or other materials provided with the distribution. 015 * 3. The name of the author may not be used to endorse or promote products derived from this software without 016 * specific prior written permission. 017 * 018 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 019 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 020 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 021 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 022 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 023 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 024 * POSSIBILITY OF SUCH DAMAGE. 025 */ 026 027package de.unkrig.html2txt; 028 029import java.io.File; 030import java.util.ArrayList; 031import java.util.Iterator; 032import java.util.List; 033 034import org.apache.tools.ant.BuildException; 035import org.apache.tools.ant.Task; 036import org.apache.tools.ant.types.Resource; 037import org.apache.tools.ant.types.ResourceCollection; 038import org.apache.tools.ant.types.resources.FileResource; 039 040import de.unkrig.commons.io.IoUtil; 041import de.unkrig.commons.lang.protocol.ConsumerWhichThrows; 042import de.unkrig.commons.nullanalysis.Nullable; 043 044/** 045 * Converts one or more HTML files into plain text files. 046 * <p> 047 * The following attributes are mutually exclusive: 048 * </p> 049 * <dl> 050 * <dd>{@link #setTofile(File)}</dd> 051 * <dd>{@link #setTodir(File)}</dd> 052 * </dl> 053 */ 054public 055class AntTask extends Task { 056 057 private final Html2Txt html2txt = new Html2Txt(); 058 059 @Nullable private File file; 060 @Nullable private File tofile; 061 @Nullable private File todir; 062 private final List<ResourceCollection> resourceCollections = new ArrayList<ResourceCollection>(); 063 064 // BEGIN CONFIGURATION SETTERS 065 066 /** 067 * The file that contains the HTML document to convert. 068 */ 069 public void 070 setFile(File value) { this.file = value; } 071 072 /** 073 * The file that contains generated plain text. Only allowed if exactly <i>one</i> HTML is converted. 074 */ 075 public void 076 setTofile(File value) { this.tofile = value; } 077 078 /** 079 * The directory where the output file(s) will be created. The name of each output file(s) will be that of the 080 * input file, less the "{@code .html}" suffix (if any), plus an "{@code .txt}" extension. 081 * <p> 082 * The default is the source resource's base directory (if any), otherwise the project's base directory. 083 * </p> 084 */ 085 public void 086 setTodir(File value) { this.todir = value; } 087 088 /** 089 * The number of spaces that preceeds each line of output. 090 * 091 * @ant.defaultValue 0 092 */ 093 public void 094 setPageLeftMargin(int value) { this.html2txt.setPageLeftMarginWidth(value); } 095 096 /** 097 * The maximum length of output lines is "<var>pageWidth</var> - <var>rightMarginWidth</var>". 098 * <p> 099 * The default value is @"1" in order to avoid extra line wraps on certain terminals. 100 * </p> 101 * 102 * @ant.defaultValue {@code 1} 103 */ 104 public void 105 setPageRightMargin(int value) { this.html2txt.setPageRightMarginWidth(value); } 106 107 /** 108 * The maximum length of output lines is "<var>pageWidth</var> - <var>rightMarginWidth</var>". 109 * <p> 110 * Defaults to the value of the environment variable "$COLUMNS", or, if that is not set, to 80. 111 * </p> 112 * 113 * @ant.defaultValue {@code $COLUMNS|80} 114 */ 115 public void 116 setPageWidth(int value) { this.html2txt.setPageWidth(value); } 117 118 /** 119 * Resources to convert. 120 */ 121 public void 122 addConfigured(ResourceCollection value) { this.resourceCollections.add(value); } 123 124 // END CONFIGURATION SETTERS 125 126 /** 127 * The ANT task "execute" method. 128 * 129 * @see Task#execute 130 */ 131 @Override public void 132 execute() throws BuildException { 133 try { 134 this.execute2(); 135 } catch (Exception e) { 136 throw new BuildException(e); 137 } 138 } 139 140 private void 141 execute2() throws Exception { 142 143 final File file = this.file; 144 final File tofile = this.tofile; 145 final List<ResourceCollection> resourceCollections = this.resourceCollections; 146 147 List<Resource> resources = new ArrayList<Resource>(); 148 149 if (file != null) resources.add(new FileResource(file)); 150 151 for (ResourceCollection resourceCollection : resourceCollections) { 152 153 // Process each resource of each collection. 154 for ( 155 @SuppressWarnings("unchecked") Iterator<Resource> it = resourceCollection.iterator(); 156 it.hasNext(); 157 ) resources.add(it.next()); 158 } 159 160 if (resources.isEmpty()) return; 161 162 if (resources.size() == 1 && tofile != null && this.todir == null) { 163 this.convertResource(resources.get(0), tofile); 164 } else 165 if (tofile == null) { 166 for (Resource resource : resources) { 167 File todir = this.todir; 168 if (todir == null && resource.isFilesystemOnly()) todir = ((FileResource) resource).getBaseDir(); 169 if (todir == null) todir = this.getProject().getBaseDir(); 170 String outputFileName = resource.getName(); 171 if (outputFileName.endsWith(".html")) { 172 outputFileName = outputFileName.substring(0, outputFileName.length() - 5); 173 } 174 outputFileName += ".txt"; 175 this.convertResource(resource, new File(todir, outputFileName)); 176 } 177 } else 178 { 179 throw new BuildException("Invalid combination of attributes and subelements"); 180 } 181 } 182 183 private void 184 convertResource(Resource in, final File out) throws Exception { 185 186 if (in.isFilesystemOnly()) { 187 this.html2txt.html2txt(((FileResource) in).getFile(), out); 188 } else 189 { 190 IoUtil.asFile( 191 in.getInputStream(), // inputStream 192 true, // closeInputStream 193 "h2t", // prefix 194 ".html", // suffix 195 null, // directory 196 new ConsumerWhichThrows<File, Exception>() { // delegate 197 198 @Override public void 199 consume(File temporaryFile) throws Exception { 200 AntTask.this.html2txt.html2txt(temporaryFile, out); 201 } 202 } 203 ); 204 } 205 } 206} 207