Parse HTML in Android

How can you parse HTML in android?

You can use XmlPullParser for parsing XML.

For e.g. refer to http://developer.android.com/reference/org/xmlpull/v1/XmlPullParser.html

How to Parse Html tags From Website in Android

using jsoup http://jsoup.org/ you can get this

download the jsoup.jar file then add it to your libs folder then go to android dependancies right click >> build path >> configure build path >> ADD JARS>> libs >> then choose the jsoup.jar file you downloaded

 try {
String website="http://www.mcpss.com/?PN='News2'&SubP='DNewsStory'&gn=&NewsID=47318&ShowNav=&StoryGroup=Current";
Document doc = Jsoup.connect(website).get();
Elements el=doc.getElementsByClass("header");
String text=el.text();
} catch (Exception e) {
Log.wtf("name of activity","error message to show in log", e);
}

Android: How do I parse an HTML page?

look the code below and let me know if you have any doubts and see this link it may help you

http://wptrafficanalyzer.in/blog/android-lazy-loading-images-and-text-in-listview-from-http-json-data/

public class MainActivity extends Activity {

ListView mListView;

@Override
public void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);

// URL to the JSON data
String strUrl = "ur url/countries";

// Creating a new non-ui thread task to download json data
DownloadTask downloadTask = new DownloadTask();

// Starting the download process
downloadTask.execute(strUrl);

// Getting a reference to ListView of activity_main
mListView = (ListView) findViewById(R.id.lv_countries);

}

/** A method to download json data from url */
private String downloadUrl(String strUrl) throws IOException{
String data = "";
InputStream iStream = null;
try{
URL url = new URL(strUrl);

// Creating an http connection to communicate with url
HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();

// Connecting to url
urlConnection.connect();

// Reading data from url
iStream = urlConnection.getInputStream();

BufferedReader br = new BufferedReader(new InputStreamReader(iStream));

StringBuffer sb = new StringBuffer();

String line = "";
while( ( line = br.readLine()) != null){
sb.append(line);
}

data = sb.toString();

br.close();

}catch(Exception e){
Log.d("Exception while downloading url", e.toString());
}finally{
iStream.close();
}

return data;
}

/** AsyncTask to download json data */
private class DownloadTask extends AsyncTask<String, Integer, String>{
String data = null;
@Override
protected String doInBackground(String... url) {
try{
data = downloadUrl(url[0]);
}catch(Exception e){
Log.d("Background Task",e.toString());
}
return data;
}

@Override
protected void onPostExecute(String result) {

// The parsing of the xml data is done in a non-ui thread
ListViewLoaderTask listViewLoaderTask = new ListViewLoaderTask();

// Start parsing xml data
listViewLoaderTask.execute(result);
}
}

/** AsyncTask to parse json data and load ListView */
private class ListViewLoaderTask extends AsyncTask<String, Void, SimpleAdapter>{

JSONObject jObject;
// Doing the parsing of xml data in a non-ui thread
@Override
protected SimpleAdapter doInBackground(String... strJson) {
try{
jObject = new JSONObject(strJson[0]);
CountryJSONParser countryJsonParser = new CountryJSONParser();
countryJsonParser.parse(jObject);
}catch(Exception e){
Log.d("JSON Exception1",e.toString());
}

// Instantiating json parser class
CountryJSONParser countryJsonParser = new CountryJSONParser();

// A list object to store the parsed countries list
List<HashMap<String, Object>> countries = null;

try{
// Getting the parsed data as a List construct
countries = countryJsonParser.parse(jObject);
}catch(Exception e){
Log.d("Exception",e.toString());
}

// Keys used in Hashmap
String[] from = { "country"

// Ids of views in listview_layout
int[] to = { R.id.tv_country};

// Instantiating an adapter to store each items
// R.layout.listview_layout defines the layout of each item
SimpleAdapter adapter = new SimpleAdapter(getBaseContext(), countries, R.layout.lv_layout, from, to);

return adapter;
}

/** Invoked by the Android on "doInBackground" is executed */
@Override
protected void onPostExecute(SimpleAdapter adapter) {

// Setting adapter for the listview
mListView.setAdapter(adapter);

for(int i=0;i<adapter.getCount();i++){
HashMap<String, Object> hm = (HashMap<String, Object>) adapter.getItem(i);
HashMap<String, Object> hmDownload = new HashMap<String, Object>();
hm.put("flag_path",imgUrl);
hm.put("position", i);


}
}
}

@Override
protected void onPostExecute(HashMap<String, Object> result) {
// Getting the path to the downloaded image
String path = (String) result.get("flag");

// Getting the position of the downloaded image
int position = (Integer) result.get("position");

// Getting adapter of the listview
SimpleAdapter adapter = (SimpleAdapter ) mListView.getAdapter();

// Getting the hashmap object at the specified position of the listview
HashMap<String, Object> hm = (HashMap<String, Object>) adapter.getItem(position);

// Overwriting the existing path in the adapter
hm.put("flag",path);

// Noticing listview about the dataset changes
adapter.notifyDataSetChanged();
}
}

@Override
public boolean onCreateOptionsMenu(Menu menu) {
getMenuInflater().inflate(R.menu.activity_main, menu);
return true;
}
}

Android use JSoup parse HTML convert to String

It is useful to first check, if JSoup can parse the content: http://try.jsoup.org/~8W0oCmiiYnFL01nUM6HDbQ9wwTA

You are using Jsoup.parse which expects html stored in a string. If you want to use parse to retrieve the html source you have to pass a URL and a timeout:

String url = "http://servertrj.com/news/index/208";
Document doc = Jsoup.parse(new URL(url), 3000);

Most of the time you find the get() syntax to pull html source, compare your syntax to this simple example:

String url = "http://servertrj.com/news/index/208";
String userAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36";
Document doc = Jsoup.connect(url).userAgent(userAgent).get();
Elements elements = doc.select(".margin-box");
System.out.println(elements.size() + "\n" + elements.toString());

Output:

1
<div class="margin-box">
<p style="margin: 0px 0px 15px; padding: 0px; border: 0px; line-height: 30px; font-family: "Microsoft YaHei;, SimSun, Verdana, Arial; color: rgb(0, 0, 0); font-size: 15px;">[... truncated because of spam detection, but same as try.jsoup]</p>
</div>


Related Topics



Leave a reply



Submit