Execute query on SQL Server Analysis Services with IronPython
its better use Microsoft.AnalysisServices.AdomdClient.dll and mdx query.
and set query result in Datasets in Ststem.Data assembly
something like this:
clr.AddReference ("Microsoft.AnalysisServices.AdomdClient.dll")
clr.AddReference ("System.Data")
from Microsoft.AnalysisServices.AdomdClient import AdomdConnection , AdomdDataAdapter
from System.Data import DataSet
conn = AdomdConnection("Data Source=0.0.0.0;Catalog=MyCatalog;")
conn.Open()
cmd = conn.CreateCommand()
cmd.CommandText = "your mdx query" # in your case 'select from finance'
adp = AdomdDataAdapter(cmd)
datasetParam = DataSet()
adp.Fill(datasetParam)
conn.Close();
# datasetParam hold your result as collection a\of tables
# each tables has rows
# and each row has columns
print datasetParam.Tables[0].Rows[0][0]
Python Connect to Microsoft Analysis Service
Well just Googling around it seems that the library IronPython will be useful
Execute query on SQL Server Analysis Services with IronPython
Extracting raw data from a PowerPivot model using Python
Lo and behold, I finally managed to crack the problem - turns out that accessing Power Pivot data using Python is indeed possible! Below's a short recap of what I did - you can find a more detailed description here: Analysis Services (SSAS) on a shoestring. Note: the code has been optimized neither for efficiency nor elegance.
- Install Microsoft Power BI Desktop (comes with free Analysis Services server, so no need for a costly SQL Server license - however, the same approach obviously also works if you have a proper license).
- Fire up the AS engine by first creating the msmdsrv.ini settings file, then restore the database from the ABF file (using AMO.NET), then extract data using ADOMD.NET.
Here's the Python code that illustrates the AS engine + AMO.NET parts:
import psutil, subprocess, random, os, zipfile, shutil, clr, sys, pandas
def initialSetup(pathPowerBI):
sys.path.append(pathPowerBI)
#required Analysis Services assemblies
clr.AddReference("Microsoft.PowerBI.Amo.Core")
clr.AddReference("Microsoft.PowerBI.Amo")
clr.AddReference("Microsoft.PowerBI.AdomdClient")
global AMO, ADOMD
import Microsoft.AnalysisServices as AMO
import Microsoft.AnalysisServices.AdomdClient as ADOMD
def restorePowerPivot(excelName, pathTarget, port, pathPowerBI):
#create random folder
os.chdir(pathTarget)
folder = os.getcwd()+str(random.randrange(10**6, 10**7))
os.mkdir(folder)
#extract PowerPivot model (abf backup)
archive = zipfile.ZipFile(excelName)
for member in archive.namelist():
if ".data" in member:
filename = os.path.basename(member)
abfname = os.path.join(folder, filename) + ".abf"
source = archive.open(member)
target = file(os.path.join(folder, abfname), 'wb')
shutil.copyfileobj(source, target)
del target
archive.close()
#start the cmd.exe process to get its PID
listPIDpre = [proc for proc in psutil.process_iter()]
process = subprocess.Popen('cmd.exe /k', stdin=subprocess.PIPE)
listPIDpost = [proc for proc in psutil.process_iter()]
pid = [proc for proc in listPIDpost if proc not in listPIDpre if "cmd.exe" in str(proc)][0]
pid = str(pid).split("=")[1].split(",")[0]
#msmdsrv.ini
msmdsrvText = '''<ConfigurationSettings>
<DataDir>{0}</DataDir>
<TempDir>{0}</TempDir>
<LogDir>{0}</LogDir>
<BackupDir>{0}</BackupDir>
<DeploymentMode>2</DeploymentMode>
<RecoveryModel>1</RecoveryModel>
<DisklessModeRequested>0</DisklessModeRequested>
<CleanDataFolderOnStartup>1</CleanDataFolderOnStartup>
<AutoSetDefaultInitialCatalog>1</AutoSetDefaultInitialCatalog>
<Network>
<Requests>
<EnableBinaryXML>1</EnableBinaryXML>
<EnableCompression>1</EnableCompression>
</Requests>
<Responses>
<EnableBinaryXML>1</EnableBinaryXML>
<EnableCompression>1</EnableCompression>
<CompressionLevel>9</CompressionLevel>
</Responses>
<ListenOnlyOnLocalConnections>1</ListenOnlyOnLocalConnections>
</Network>
<Port>{1}</Port>
<PrivateProcess>{2}</PrivateProcess>
<InstanceVisible>0</InstanceVisible>
<Language>1033</Language>
<Debug>
<CallStackInError>0</CallStackInError>
</Debug>
<Log>
<Exception>
<CrashReportsFolder>{0}</CrashReportsFolder>
</Exception>
<FlightRecorder>
<Enabled>0</Enabled>
</FlightRecorder>
</Log>
<AllowedBrowsingFolders>{0}</AllowedBrowsingFolders>
<ResourceGovernance>
<GovernIMBIScheduler>0</GovernIMBIScheduler>
</ResourceGovernance>
<Feature>
<ManagedCodeEnabled>1</ManagedCodeEnabled>
</Feature>
<VertiPaq>
<EnableDisklessTMImageSave>0</EnableDisklessTMImageSave>
<EnableProcessingSimplifiedLocks>1</EnableProcessingSimplifiedLocks>
</VertiPaq>
</ConfigurationSettings>'''
#save ini file to disk, fill it with required parameters
msmdsrvini = open(folder+"\\msmdsrv.ini", "w")
msmdsrvText = msmdsrvText.format(folder, port, pid) #{0},{1},{2}
msmdsrvini.write(msmdsrvText)
msmdsrvini.close()
#run AS engine inside the cmd.exe process
initString = "\"{0}\\msmdsrv.exe\" -c -s \"{1}\""
initString = initString.format(pathPowerBI.replace("/","\\"),folder)
process.stdin.write(initString + " \n")
#connect to the AS instance from Python
AMOServer = AMO.Server()
AMOServer.Connect("localhost:{0}".format(port))
#restore database from PowerPivot abf backup, disconnect
AMORestoreInfo = AMO.RestoreInfo(os.path.join(folder, abfname))
AMOServer.Restore(AMORestoreInfo)
AMOServer.Disconnect()
return process
And the data-extraction part:
def runQuery(query, port, flag):
#ADOMD assembly
ADOMDConn = ADOMD.AdomdConnection("Data Source=localhost:{0}".format(port))
ADOMDConn.Open()
ADOMDCommand = ADOMDConn.CreateCommand()
ADOMDCommand.CommandText = query
#read data in via AdomdDataReader object
DataReader = ADOMDCommand.ExecuteReader()
#get metadata, number of columns
SchemaTable = DataReader.GetSchemaTable()
numCol = SchemaTable.Rows.Count #same as DataReader.FieldCount
#get column names
columnNames = []
for i in range(numCol):
columnNames.append(str(SchemaTable.Rows[i][0]))
#fill with data
data = []
while DataReader.Read()==True:
row = []
for j in range(numCol):
try:
row.append(DataReader[j].ToString())
except:
row.append(DataReader[j])
data.append(row)
df = pandas.DataFrame(data)
df.columns = columnNames
if flag==0:
DataReader.Close()
ADOMDConn.Close()
return df
else:
#metadata table
metadataColumnNames = []
for j in range(SchemaTable.Columns.Count):
metadataColumnNames.append(SchemaTable.Columns[j].ToString())
metadata = []
for i in range(numCol):
row = []
for j in range(SchemaTable.Columns.Count):
try:
row.append(SchemaTable.Rows[i][j].ToString())
except:
row.append(SchemaTable.Rows[i][j])
metadata.append(row)
metadf = pandas.DataFrame(metadata)
metadf.columns = metadataColumnNames
DataReader.Close()
ADOMDConn.Close()
return df, metadf
The raw data are then extracted via something like this:
pathPowerBI = "C:/Program Files/Microsoft Power BI Desktop/bin"
initialSetup(pathPowerBI)
session = restorePowerPivot("D:/Downloads/PowerPivotTutorialSample.xlsx", "D:/", 60000, pathPowerBI)
df, metadf = runQuery("EVALUATE dbo_DimProduct", 60000, 1)
endSession(session)
Related Topics
Differencebetween Rowsbetween and Rangebetween
I Don't Understand Collation? (Mysql, Rdbms, Character Sets)
How to Insert Null Values into SQL Server
Remove Duplicates in a Django Query
Sqlite Format Number with 2 Decimal Places Always
How to Make a Table Read Only in SQL Server
How to Create a Closure Table Using Data from an Adjacency List
Key/Value Pairs in a Database Table
How to Write a Conditional in a MySQL Select Statement
Is There a Coalesce-Like Function in Excel
SQL Join on Nearest Less Than Date
A How to Escape %% When Building Like Queries in Rails 3/Activerecord
Why Even Use *Db.Exec() or Prepared Statements in Golang
How to Select Bottom Most Rows
Sql: Combine Select Count(*) from Multiple Tables
Many to Many Relation Design - Intersection Table Design
SQL 2005 Cte VS Temp Table Performance When Used in Joins of Other Tables