Friday, September 24, 2010

Convert HTML table to DataSet while downloading data from another site

Convert HTML table to DataSet while downloading data from another site

Just imagine a situation while you are downloading the data from another site using

System.Net.WebClient client = new System.Net.WebClient();

  string st = client.DownloadString(strURL);

or directly if you find a HTML table there and you want to add that table to dataset for further processing.this function will help you for that using regular expressions.

private DataSet ConvertHTMLTablesToDataSet(String HTML)


DataSet ds = new DataSet();



DataTable dt = new DataTable();

DataRow dr;

DataColumn dc;

string TableExpression = "<table[^>]*>(.*?)</table>";

string HeaderExpression = "<th[^>]*>(.*?)</th>";

string RowExpression = "<tr[^>]*>(.*?)</tr>";

string ColumnExpression = "<td[^>]*>(.*?)</td>";

bool HeadersExist = false;

int iCurrentColumn = 0;

int iCurrentRow = 0;

//Get a match for all the tables in the HTML

MatchCollection Tables = Regex.Matches(HTML, TableExpression, RegexOptions.IgnoreCase);

//Loop through each table element

foreach (Match Table in Tables)


  // Reset the current row counter and the header flag

  iCurrentRow = 0;

  HeadersExist = false;

  //Add a new table to the DataSet

  dt = new DataTable();

  // Create the relevant amount of columns for this table (use the headers if they exist, otherwise use default names)

if (Table.Value.ToString().Contains("<th"))


  // Set the HeadersExist flag

HeadersExist = true;

  // Get a match for all the rows in the table

  MatchCollection Headers = Regex.Matches(Table.Value, HeaderExpression, RegexOptions.IgnoreCase);

  // Loop through each header element

  foreach (Match Header in Headers)







  for (int iColumns = 1; iColumns <= Regex.Matches(Regex.Matches(Regex.Matches(Table.Value, TableExpression, RegexOptions.IgnoreCase)[0].Value.ToString(), RowExpression, RegexOptions.IgnoreCase)[0].Value.ToString(), ColumnExpression, RegexOptions.IgnoreCase).Count; iColumns++)


  dt.Columns.Add("Column " + iColumns);



//Get a match for all the rows in the table

  MatchCollection Rows = Regex.Matches(Table.Value, RowExpression, RegexOptions.IgnoreCase);

  //Loop through each row element

  foreach (Match Row in Rows)


  //Only loop through the row if it isn't a header row

  if (!(iCurrentRow == 0 && HeadersExist == true))


  //Create a new row and reset the current column counter

  dr = dt.NewRow();

  iCurrentColumn = 0;

  // Get a match for all the columns in the row

  MatchCollection Columns = Regex.Matches(Row.Value, ColumnExpression, RegexOptions.IgnoreCase);

  // Loop through each column element

  foreach (Match Column in Columns)


  //Add the value to the DataRow

  dr[iCurrentColumn] = Column.Groups[1].ToString();

  //Increase the current column

  iCurrentColumn += 1;



//Add the DataRow to the DataTable


  // Increase the current row counter

  iCurrentRow += 1;


// Add the DataTable to the DataSet




  catch { }

  finally { }

  return ds;



  1. Great example. Thanks. Was able to lift it with no changes :)

  2. This comment has been removed by the author.

  3. Excellent. Working perfect...