001
002/*
003 * html2txt - Converts HTML documents to plain text
004 *
005 * Copyright (c) 2015, Arno Unkrig
006 * All rights reserved.
007 *
008 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
009 * following conditions are met:
010 *
011 *    1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
012 *       following disclaimer.
013 *    2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
014 *       following disclaimer in the documentation and/or other materials provided with the distribution.
015 *    3. The name of the author may not be used to endorse or promote products derived from this software without
016 *       specific prior written permission.
017 *
018 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
020 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
021 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
022 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
023 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
024 * POSSIBILITY OF SUCH DAMAGE.
025 */
026
027package de.unkrig.html2txt;
028
029import java.io.File;
030import java.util.ArrayList;
031import java.util.Iterator;
032import java.util.List;
033
034import org.apache.tools.ant.BuildException;
035import org.apache.tools.ant.Task;
036import org.apache.tools.ant.types.Resource;
037import org.apache.tools.ant.types.ResourceCollection;
038import org.apache.tools.ant.types.resources.FileResource;
039
040import de.unkrig.commons.io.IoUtil;
041import de.unkrig.commons.lang.protocol.ConsumerWhichThrows;
042import de.unkrig.commons.nullanalysis.Nullable;
043
044/**
045 * Converts one or more HTML files into plain text files.
046 * <p>
047 *   The following attributes are mutually exclusive:
048 * </p>
049 * <dl>
050 *   <dd>{@link #setTofile(File)}</dd>
051 *   <dd>{@link #setTodir(File)}</dd>
052 * </dl>
053 */
054public
055class AntTask extends Task {
056
057    private final Html2Txt html2txt = new Html2Txt();
058
059    @Nullable private File                 file;
060    @Nullable private File                 tofile;
061    @Nullable private File                 todir;
062    private final List<ResourceCollection> resourceCollections = new ArrayList<ResourceCollection>();
063
064    // BEGIN CONFIGURATION SETTERS
065
066    /**
067     * The file that contains the HTML document to convert.
068     */
069    public void
070    setFile(File value) { this.file = value; }
071
072    /**
073     * The file that contains generated plain text. Only allowed if exactly <i>one</i> HTML is converted.
074     */
075    public void
076    setTofile(File value) { this.tofile = value; }
077
078    /**
079     * The directory where the output file(s) will be created. The name of each output file(s) will be that of the
080     * input file, less the "{@code .html}" suffix (if any), plus an "{@code .txt}" extension.
081     * <p>
082     *   The default is the source resource's base directory (if any), otherwise the project's base directory.
083     * </p>
084     */
085    public void
086    setTodir(File value) { this.todir = value; }
087
088    /**
089     * The number of spaces that preceeds each line of output.
090     *
091     * @ant.defaultValue 0
092     */
093    public void
094    setPageLeftMargin(int value) { this.html2txt.setPageLeftMarginWidth(value); }
095
096    /**
097     * The maximum length of output lines is "<var>pageWidth</var> - <var>rightMarginWidth</var>".
098     * <p>
099     *   The default value is @"1" in order to avoid extra line wraps on certain terminals.
100     * </p>
101     *
102     * @ant.defaultValue {@code 1}
103     */
104    public void
105    setPageRightMargin(int value) { this.html2txt.setPageRightMarginWidth(value); }
106
107    /**
108     * The maximum length of output lines is "<var>pageWidth</var> - <var>rightMarginWidth</var>".
109     * <p>
110     *   Defaults to the value of the environment variable "$COLUMNS", or, if that is not set, to 80.
111     *  </p>
112     *
113     * @ant.defaultValue {@code $COLUMNS|80}
114     */
115    public void
116    setPageWidth(int value) { this.html2txt.setPageWidth(value); }
117
118    /**
119     * Resources to convert.
120     */
121    public void
122    addConfigured(ResourceCollection value) { this.resourceCollections.add(value); }
123
124    // END CONFIGURATION SETTERS
125
126    /**
127     * The ANT task "execute" method.
128     *
129     * @see Task#execute
130     */
131    @Override public void
132    execute() throws BuildException {
133        try {
134            this.execute2();
135        } catch (Exception e) {
136            throw new BuildException(e);
137        }
138    }
139
140    private void
141    execute2() throws Exception {
142
143        final File                     file                = this.file;
144        final File                     tofile              = this.tofile;
145        final List<ResourceCollection> resourceCollections = this.resourceCollections;
146
147        List<Resource> resources = new ArrayList<Resource>();
148
149        if (file != null) resources.add(new FileResource(file));
150
151        for (ResourceCollection resourceCollection : resourceCollections) {
152
153            // Process each resource of each collection.
154            for (
155                @SuppressWarnings("unchecked") Iterator<Resource> it = resourceCollection.iterator();
156                it.hasNext();
157            ) resources.add(it.next());
158        }
159
160        if (resources.isEmpty()) return;
161
162        if (resources.size() == 1 && tofile != null && this.todir == null) {
163            this.convertResource(resources.get(0), tofile);
164        } else
165        if (tofile == null) {
166            for (Resource resource : resources) {
167                File todir = this.todir;
168                if (todir == null && resource.isFilesystemOnly()) todir = ((FileResource) resource).getBaseDir();
169                if (todir == null) todir = this.getProject().getBaseDir();
170                String outputFileName = resource.getName();
171                if (outputFileName.endsWith(".html")) {
172                    outputFileName = outputFileName.substring(0, outputFileName.length() - 5);
173                }
174                outputFileName += ".txt";
175                this.convertResource(resource, new File(todir, outputFileName));
176            }
177        } else
178        {
179            throw new BuildException("Invalid combination of attributes and subelements");
180        }
181    }
182
183    private void
184    convertResource(Resource in, final File out) throws Exception {
185
186        if (in.isFilesystemOnly()) {
187            this.html2txt.html2txt(((FileResource) in).getFile(), out);
188        } else
189        {
190            IoUtil.asFile(
191                in.getInputStream(),                         // inputStream
192                true,                                        // closeInputStream
193                "h2t",                                       // prefix
194                ".html",                                     // suffix
195                null,                                        // directory
196                new ConsumerWhichThrows<File, Exception>() { // delegate
197
198                    @Override public void
199                    consume(File temporaryFile) throws Exception {
200                        AntTask.this.html2txt.html2txt(temporaryFile, out);
201                    }
202                }
203            );
204        }
205    }
206}
207