Академический Документы
Профессиональный Документы
Культура Документы
--------------------------------------------------------------------------------
Description : This project helps to have text of any web page like A to Z Amazon
to Google to Yahoo any one.
Code :
import java.io.File;
import java.io.FileInputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.IOException;
/**
* Convert text/html into text/plain
*
* Auther: Omindra Kumar Rana
* Email: rana_omindra@yahoo.co.in
*
* @version 1.0 $Date: May 10, 2005 $
*/
try
{
String text = null;
int c = input.read();
c = input.read();
}
}
catch (Exception e)
{
input.close();
throw e;
}
result.append('<');
while (level > 0)
{
int c = r.read();
if (c == -1) break; // EOF
result.append((char)c);
if (c == '<') level++; else if (c == '>') level--;
}
return result.toString();
}
while (Character.isLetter((char)c))
{
result.append((char)c);
r.mark(1);
c = r.read();
}
if (c == ';') result.append(';');
else r.reset();
return result.toString();
}
if (isTag(t,"body"))
{ in_body = true; body_found = true; }
else if (isTag(t,"/body"))
{ in_body = false; result = "<BR>; }
else if (isTag(t,"center"))
{ result = "<BR>; center = true; }
else if (isTag(t,"/center"))
{ result = "<BR>; center = false; }
else if (isTag(t,"pre"))
{ result = "<BR>; pre = true; }
else if (isTag(t,"/pre"))
{ result = "<BR>; pre = false; }
else if (isTag(t,"p"))
result = "
<BR>;
else if (isTag(t,"br"))
result = "<BR>;
else if (isTag(t,"h1") || isTag(t,"h2") ||
isTag(t,"h3") ||isTag(t,"h4") || isTag(t,"h5") || isTag(t,"h6") ||
isTag(t,"h7"))
result = "<BR>;
else if (isTag(t,"/h1") || isTag(t,"/h2") ||
isTag(t,"/h3") ||isTag(t,"/h4") || isTag(t,"/h5") || isTag(t,"/h6") ||
isTag(t,"/h7"))
result = "<BR>;
else if (isTag(t,"/dl"))
result = "<BR>;
else if (isTag(t,"dd"))
result = "
* ";
else if (isTag(t,"dt"))
result = " ";
else if (isTag(t,"li"))
result = "
* ";
else if (isTag(t,"/ul"))
result = "<BR>;
else if (isTag(t,"/ol"))
result = "<BR>;
else if (isTag(t,"hr"))
result = "_________________________________________
<BR>;
else if (isTag(t,"table"))
result = "<BR>;
else if (isTag(t,"/table"))
result = "<BR>;
else if (isTag(t,"form"))
result = "<BR>;
else if (isTag(t,"/form"))
result = "<BR>;
else if (isTag(t,"b"))
result = "*";
else if (isTag(t,"/b"))
result = "*";
else if (isTag(t,"i"))
result = """;
else if (isTag(t,"/i"))
result = """;
else if (isTag(t,"img"))
{
int idx = t.indexOf("alt="");
if (idx != -1)
{
idx += 5;
int idx2 = t.indexOf(""",idx);
result = t.substring(idx,idx2);
}
}
else if (isTag(t,"a"))
{
int idx = t.indexOf("href="");
if (idx != -1)
{
idx += 6;
int idx2 = t.indexOf(""",idx);
href = t.substring(idx,idx2);
}
else
{
href = "";
}
}
else if (isTag(t,"/a"))
{
if (href.length() > 0)
{
result = " [ " + href + " ]";
href = "";
}
}
return result;
}
try
{
File file;
if (argv[0] != null) file = new File(argv[0]);
else file = new File("html_test_file.html");
fis = new FileInputStream(file);
byte buf[] = new byte[fis.available()];
//bytes that can be read from this file input stream without blocking
fis.read(buf);
fis.close();
fis = null;
s = new String(buf);
HTML2Text h = new HTML2Text();
System.out.println(h.convert(s));
}
catch (Exception e)
{
if (fis != null) fis.close();
throw e;
}
}
}