Utente:Iron Bishop/Java html2wiki-tables

Da Wikipedia, l'enciclopedia libera.
Vai alla navigazione Vai alla ricerca
/******************************************************************************  
 *
 *               Java HTML to Wiki Converter - tables
 *               ------------------------------------
 *
 *  $ v0.6,  last modified: 2004/08/15 $
 *  converts the HTML table tags into their wiki equivalents
 *
 *  Copyright (C) 2004 Flavio "Iron Bishop" Pastore
 *                     http://ironbishop.altervista.org
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it  under the terms of the GNU  General Public License
 *  as published by the Free Software Foundation;  either version 
 *  2 of the License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY  or  FITNESS FOR A PARTICULAR PURPOSE. See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program;  if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *  http://www.gnu.org/copyleft/gpl.html
 *
 ******************************************************************************
 *
 *  Based on:
 *
 *               HTML to Wiki Converter - tables
 *               -------------------------------
 *
 *  $Id: html2wiki-tables.inc,v 1.3 2004/06/18 21:23:05 bmanolov Exp $
 *
 *  converts the HTML table tags into their wiki equivalents,
 *  developed by Magnus Manske and used in Wikipedia
 *
 *  Copyright (C) 2004 Borislav Manolov
 *
 *  Author: Borislav Manolov <b.manolov at web.de>
 *          http://purl.oclc.org/NET/manolov/
 *
 *****************************************************************************/

import java.io.*;
import java.util.*;
import java.util.regex.*;

class html2wiki_tables {

	final static boolean ESCAPE_EXT_WIKICODE = false;
	
	public static void main(String args[]) {
		/* main */
		if (args.length == 2) io(args[0], args[1]);
		else blablah();
	}
	
	public static String convert(String str) {
		/* converts a HTML table into a wikicode table */
		
		str = str.replaceAll("\r", "");

		// escape existent wiki markup
		if (ESCAPE_EXT_WIKICODE)
			str = str.replaceAll("\\{\\||\\|\\}|\\|\\||!\\+|\\|\\-|\\|\\+|\\|",
								 "<nowiki>$0</nowiki>");

		// remove \n in HTML tags
		str = strip_newlines(str);
	
		// replace HTML tags with wiki equivalents
		str = str.replaceAll("<\\/table>",		// end table
							 "\n|}");

		str = str.replaceAll("<\\/caption>","");// end caption
		str = str.replaceAll("<\\/tr>","");		// end row
		str = str.replaceAll("<\\/th>","");		// end heading
		str = str.replaceAll("<\\/td>","");		// end cell

		str = str.replaceAll("<table([^>]*)>",	// start table
							 "\n{| $1 |");

		str = str.replaceAll("<caption>",		// start caption
							 "\n|+");

		str = str.replaceAll("<tr([^>]*)>",		// start row
							 "\n|- $1");

		str = str.replaceAll("<th([^>]*)>",		// start heading
							 "\n! $1 |");

		str = str.replaceAll("<td([^>]*)>",		// start cell
							 "\n| $1 |");

		str = str.replaceAll(" *\n *","\n");	// spaces at the end and the beginning of a line
		str = str.replaceAll("\n+","\n");		// multiple new lines

		// clear phase
		String a[] = {"  ", "! \\|", "\\| \\|"};
		String b[] = {" ", "!",   "\\|"};
		for (int i = 0; i < a.length; i++) 
			str = str.replaceAll(a[i], b[i]);

		// return wikicode
		return str;
	}

	public static void io(String in, String out) {
		/* converts HTML tables files into wikicode tables files */
		String table = "", buffer = "";
		
		try {
			FileReader fr = new FileReader(in);
			BufferedReader br = new BufferedReader(fr);
			while ((buffer = br.readLine()) != null) table += buffer + "\n";
			fr.close();
			
			table = convert(table);
			
			FileWriter fw = new FileWriter(out);
			fw.write("<!-- Made with Java HTML to Wiki Converter - tables  -->\n");
			fw.write("<!-- Copyright (C) 2004 Flavio \"Iron Bishop\" Pastore -->\n");
			fw.write(table);
			fw.close();
			
		} catch (Throwable t) {
			System.out.println(t.toString());
		}
	}

	public static String strip_newlines(String s) {
		/* removes newlines into HTML table tags */
		String testo = "", tabella = "", salva = "" + s;
		
		try {
		
		while (s.contains("<table")) {
			testo += s.substring(0, s.indexOf("<table"));
			tabella = s.substring(s.indexOf("<table"), s.indexOf("</table>") + 8);
			s = s.substring(s.indexOf("</table>") + 8);
			
			int end = tabella.indexOf("<td");
			String buffer = "", res = "";
			buffer = tabella.substring(0, end);
	
			while (true) {
				tabella = tabella.substring(end);
				end = tabella.indexOf(">") + 1;
				buffer += tabella.substring(0,end);
				buffer = buffer.replaceAll("\n"," ");
				tabella = tabella.substring(end);
				end = tabella.indexOf("<td");
				if (end == -1) end = tabella.length();
				res += buffer + tabella.substring(0, end);
				buffer= "";
				if (end == tabella.length()) break;
			}
					
			testo += res;
		}

		} catch (Throwable t) { System.out.println(t.toString()); return salva; }
		/* ERROR: function still return "testo" instead of "salva".
		 *
		 * ...who knows why? O_o */

		testo += s;
		return testo;
		

	}
	
	public static void blablah() {
		/* written introduction */
		System.out.println("Java HTML to Wiki Converter - tables");
		System.out.println("Author: Flavio \"Iron Bishop\" Pastore");
		System.out.println("        http://ironbishop.altervista.org\n");
		System.out.println("This program is free software, read LICENSE.txt for further informations.\n");
		System.out.println("USAGE: java html2wiki_tables  HTML_input_file  wikicode_destination_file");
/*debug
	System.out.println("-@-" + convert("aaaa\na <table align=right\nborder=1><tr align=right><td align=right>aaa1</td><td>2</td></tr></table>\n\naa\naaa<table><tr><t>prova\nprova</td></tr></table>\naaaaaa\n" + "-@-"));
//*/
	}

}