Convert HTML table to DataSet while downloading data from another site
Just imagine a situation while you are downloading the data from another site using
System.Net.WebClient client = new System.Net.WebClient();
string st = client.DownloadString(strURL);
or directly if you find a HTML table there and you want to add that table to dataset for further processing.this function will help you for that using regular expressions.
private DataSet ConvertHTMLTablesToDataSet(String HTML)
{
DataSet ds = new DataSet();
try
{
DataTable dt = new DataTable();
DataRow dr;
DataColumn dc;
string TableExpression = "<table[^>]*>(.*?)</table>";
string HeaderExpression = "<th[^>]*>(.*?)</th>";
string RowExpression = "<tr[^>]*>(.*?)</tr>";
string ColumnExpression = "<td[^>]*>(.*?)</td>";
bool HeadersExist = false;
int iCurrentColumn = 0;
int iCurrentRow = 0;
//Get a match for all the tables in the HTML
MatchCollection Tables = Regex.Matches(HTML, TableExpression, RegexOptions.IgnoreCase);
//Loop through each table element
foreach (Match Table in Tables)
{
// Reset the current row counter and the header flag
iCurrentRow = 0;
HeadersExist = false;
//Add a new table to the DataSet
dt = new DataTable();
// Create the relevant amount of columns for this table (use the headers if they exist, otherwise use default names)
if (Table.Value.ToString().Contains("<th"))
{
// Set the HeadersExist flag
HeadersExist = true;
// Get a match for all the rows in the table
MatchCollection Headers = Regex.Matches(Table.Value, HeaderExpression, RegexOptions.IgnoreCase);
// Loop through each header element
foreach (Match Header in Headers)
{
dt.Columns.Add(Header.Groups[1].ToString());
}
}
else
{
for (int iColumns = 1; iColumns <= Regex.Matches(Regex.Matches(Regex.Matches(Table.Value, TableExpression, RegexOptions.IgnoreCase)[0].Value.ToString(), RowExpression, RegexOptions.IgnoreCase)[0].Value.ToString(), ColumnExpression, RegexOptions.IgnoreCase).Count; iColumns++)
{
dt.Columns.Add("Column " + iColumns);
}
}
//Get a match for all the rows in the table
MatchCollection Rows = Regex.Matches(Table.Value, RowExpression, RegexOptions.IgnoreCase);
//Loop through each row element
foreach (Match Row in Rows)
{
//Only loop through the row if it isn't a header row
if (!(iCurrentRow == 0 && HeadersExist == true))
{
//Create a new row and reset the current column counter
dr = dt.NewRow();
iCurrentColumn = 0;
// Get a match for all the columns in the row
MatchCollection Columns = Regex.Matches(Row.Value, ColumnExpression, RegexOptions.IgnoreCase);
// Loop through each column element
foreach (Match Column in Columns)
{
//Add the value to the DataRow
dr[iCurrentColumn] = Column.Groups[1].ToString();
//Increase the current column
iCurrentColumn += 1;
}
dt.Rows.Add(dr);
//Add the DataRow to the DataTable
}
// Increase the current row counter
iCurrentRow += 1;
}
// Add the DataTable to the DataSet
ds.Tables.Add(dt);
}
}
catch { }
finally { }
return ds;
}
Great example. Thanks. Was able to lift it with no changes :)
ReplyDeleteThis comment has been removed by the author.
ReplyDeleteExcellent. Working perfect...
ReplyDelete