From Excel to Datatable in C# with Open Xml

From Excel to DataTable in C# with Open XML

I think this should do what you're asking. The other function is there just to deal with if you have shared strings, which I assume you do in your column headers. Not sure this is perfect, but I hope it helps.

static void Main(string[] args)
{
DataTable dt = new DataTable();

using (SpreadsheetDocument spreadSheetDocument = SpreadsheetDocument.Open(@"..\..\example.xlsx", false))
{

WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart;
IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
string relationshipId = sheets.First().Id.Value;
WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId);
Worksheet workSheet = worksheetPart.Worksheet;
SheetData sheetData = workSheet.GetFirstChild<SheetData>();
IEnumerable<Row> rows = sheetData.Descendants<Row>();

foreach (Cell cell in rows.ElementAt(0))
{
dt.Columns.Add(GetCellValue(spreadSheetDocument, cell));
}

foreach (Row row in rows) //this will also include your header row...
{
DataRow tempRow = dt.NewRow();

for (int i = 0; i < row.Descendants<Cell>().Count(); i++)
{
tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i-1));
}

dt.Rows.Add(tempRow);
}

}
dt.Rows.RemoveAt(0); //...so i'm taking it out here.

}

public static string GetCellValue(SpreadsheetDocument document, Cell cell)
{
SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
string value = cell.CellValue.InnerXml;

if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
}
else
{
return value;
}
}

Importing Excel to DataTable getting data incorrectly using OpenXML

I rewrote the part of the code with the help of the link provided by PaulF. Thanks!!!

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;

namespace OpenXMLDemo
{
class Program
{
static void Main(string[] args)
{
Program p = new Program();

System.Data.DataTable data = p.ExtractExcel(@"C:\TempData\");
}

public System.Data.DataTable ExtractExcel(string fullPath)
{
var excelFileToImport = Directory.GetFiles(fullPath, "Data_Import.xlsx", SearchOption.AllDirectories);

//Create a new DataTable.
System.Data.DataTable dt = new System.Data.DataTable();

//Open the Excel file in Read Mode using OpenXML
using (SpreadsheetDocument doc = SpreadsheetDocument.Open(excelFileToImport[0], false))
{
WorksheetPart titlesWorksheetPart = GetWorksheetPart(doc.WorkbookPart, "Titles");

Worksheet titlesWorksheet = titlesWorksheetPart.Worksheet;

//Fetch all the rows present in the worksheet
IEnumerable<Row> rows = titlesWorksheet.GetFirstChild<SheetData>().Descendants<Row>();

foreach (Cell cell in rows.ElementAt(1))
{
dt.Columns.Add(GetCellValue(doc, cell)); // this will include 2nd row a header row
}

//Loop through the Worksheet rows
foreach (Row row in rows)
{
if (row.RowIndex.Value > 2) //this will exclude first two rows
{
System.Data.DataRow tempRow = dt.NewRow();
int columnIndex = 0;
foreach (Cell cell in row.Descendants<Cell>())
{
// Gets the column index of the cell with data
int cellColumnIndex = (int)GetColumnIndexFromName(GetColumnName(cell.CellReference));
cellColumnIndex--; //zero based index
if (columnIndex < cellColumnIndex)
{
do
{
tempRow[columnIndex] = ""; //Insert blank data here;
columnIndex++;
}
while (columnIndex < cellColumnIndex);
}
tempRow[columnIndex] = GetCellValue(doc, cell);

columnIndex++;
}
dt.Rows.Add(tempRow);
}
}
}
return dt;
}
public static string GetCellValue(SpreadsheetDocument document, Cell cell)
{
SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
if (cell.CellValue == null)
{
return "";
}
string value = cell.CellValue.InnerXml;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
}
else
{
return value;
}
}
/// <summary>
/// Given a cell name, parses the specified cell to get the column name.
/// </summary>
/// <param name="cellReference">Address of the cell (ie. B2)</param>
/// <returns>Column Name (ie. B)</returns>
public static string GetColumnName(string cellReference)
{
// Create a regular expression to match the column name portion of the cell name.
Regex regex = new Regex("[A-Za-z]+");
Match match = regex.Match(cellReference);
return match.Value;
}
/// <summary>
/// Given just the column name (no row index), it will return the zero based column index.
/// Note: This method will only handle columns with a length of up to two (ie. A to Z and AA to ZZ).
/// A length of three can be implemented when needed.
/// </summary>
/// <param name="columnName">Column Name (ie. A or AB)</param>
/// <returns>Zero based index if the conversion was successful; otherwise null</returns>
public static int? GetColumnIndexFromName(string columnName)
{
//return columnIndex;
string name = columnName;
int number = 0;
int pow = 1;
for (int i = name.Length - 1; i >= 0; i--)
{
number += (name[i] - 'A' + 1) * pow;
pow *= 26;
}
return number;
}
public WorksheetPart GetWorksheetPart(WorkbookPart workbookPart, string sheetName)
{
string relId = workbookPart.Workbook.Descendants<Sheet>().First(s => sheetName.Equals(s.Name)).Id;
return (WorksheetPart)workbookPart.GetPartById(relId);
}
}
}

Open XML Spreadsheet read table

I finally figured out how to do this. See below if you are having the same issue, note that I do plan to clean this up quite a bit on my own project, but this should get you going.


public static DataTable GetNamedDataTable(SpreadsheetDocument spreadsheetDocument, string DataTableName)
{
var dataTable = new DataTable();
Workbook woorkbook = spreadsheetDocument.WorkbookPart.Workbook;
Sheet sheet = woorkbook.Descendants<Sheet>().Where(s => s.Name == "SheetName").FirstOrDefault();
SharedStringTable sharedStringTable = woorkbook.WorkbookPart.SharedStringTablePart.SharedStringTable;
List<SharedStringItem> allSharedStringItems= sharedStringTable.Descendants<SharedStringItem>().ToList();
WorksheetPart worksheetPart = (WorksheetPart)spreadsheetDocument.WorkbookPart.GetPartById(sheet.Id);
TableDefinitionPart tableDefinitionPart = worksheetPart.TableDefinitionParts.FirstOrDefault(r => r.Table.Name == DataTableName);
QueryTablePart queryTablePart = tableDefinitionPart.QueryTableParts.FirstOrDefault();
Table excelTable = tableDefinitionPart.Table;
int columnCounter = 0;
foreach(TableColumn column in excelTable.TableColumns)
{
DataColumn dataColumn = dataTable.Columns.Add(column.Name);
dataColumn.SetOrdinal(columnCounter);
columnCounter++;
}

var newCellRange = excelTable.Reference;
var startCell = newCellRange.Value.Split(':')[0];
var endCell = newCellRange.Value.Split(':')[1];

uint firstRowNum = GetRowIndex(startCell);
uint lastRowNum = GetRowIndex(endCell);
string firstColumn = GetColumnName(startCell);
string lastColumn = GetColumnName(endCell);

var columnIndex = 0;
foreach (Row row in worksheetPart.Worksheet.Descendants<Row>().Where(r => r.RowIndex.Value > firstRowNum && r.RowIndex.Value <= lastRowNum))
{
var dataRow = dataTable.NewRow();
foreach (Cell cell in row)
{
string columnName = GetColumnName(cell.CellReference.Value);
if (CompareColumn(columnName, firstColumn) >= 0 && CompareColumn(columnName, lastColumn) <= 0)
{
if (cell.CellValue != null)
{
dataRow[columnIndex] = cell.CellValue.Text;
}
columnIndex++;
}
}
dataTable.Rows.Add(dataRow);
columnIndex = 0;
Console.WriteLine("");
}

return dataTable;
}

private static uint GetRowIndex(string cellName)
{
// Create a regular expression to match the row index portion the cell name.
Regex regex = new Regex(@"\d+");
Match match = regex.Match(cellName);
return uint.Parse(match.Value);
}

// Given a cell name, parses the specified cell to get the column name.
private static string GetColumnName(string cellName)
{
// Create a regular expression to match the column name portion of the cell name.
Regex regex = new Regex("[A-Za-z]+");
Match match = regex.Match(cellName);
return match.Value;
}

// Given two columns, compares the columns.
private static int CompareColumn(string column1, string column2)
{
if (column1.Length > column2.Length)
{
return 1;
}
else if (column1.Length < column2.Length)
{
return -1;
}
else
{
return string.Compare(column1, column2, true);
}
}

CREDIT for this answer must go to the following site as it is what lead me to be able to figure this out. https://learn.microsoft.com/en-us/office/open-xml/how-to-calculate-the-sum-of-a-range-of-cells-in-a-spreadsheet-document

Get Excel data in DataTable

While the JET/Access driver can read Excel files as if they were databases, it has several problems, especially in the .NET Core era:

  1. It's Windows-only
  2. It needs to be installed. It can't be packaged with your application
  3. The installed version must match the bitness (32/64-bit) of any Office components. This in turn means that your application must match Office's bitness.

There are libraries that can read Excel files directly. One such option is ExcelDataReader, which opens a DbDataReader over an Excel sheet. It can handle both the obsolete xls format and the 16 year old xlsx format (yes, the "new" xlsx format was introduce in 2006, 16 years ago).

The generated data reader can be used to read the data or load a DataTable the same as any other data reader.

using (var stream = File.Open(filePath, FileMode.Open, FileAccess.Read))
{
using (var reader = ExcelReaderFactory.CreateReader(stream))
{
var table=new DataTable();
table.Load(reader);
...
}
}

ExcelDataReader has an extension that allows reading all sheets in a workbook into a DataSet, with one DataTable for each sheet.

using (var stream = File.Open(filePath, FileMode.Open, FileAccess.Read))
{
using (var reader = ExcelReaderFactory.CreateReader(stream))
{
var dataset= reader.AsDataSet();

// The result of each spreadsheet is in dataset.Tables
}
}


Related Topics



Leave a reply



Submit