Execute Query on SQL Server Analysis Services with Ironpython

Execute query on SQL Server Analysis Services with IronPython

its better use Microsoft.AnalysisServices.AdomdClient.dll and mdx query.
and set query result in Datasets in Ststem.Data assembly
something like this:

clr.AddReference ("Microsoft.AnalysisServices.AdomdClient.dll")
clr.AddReference ("System.Data")

from Microsoft.AnalysisServices.AdomdClient import AdomdConnection , AdomdDataAdapter
from System.Data import DataSet

conn = AdomdConnection("Data Source=0.0.0.0;Catalog=MyCatalog;")
conn.Open()
cmd = conn.CreateCommand()
cmd.CommandText = "your mdx query" # in your case 'select from finance'
adp = AdomdDataAdapter(cmd)
datasetParam = DataSet()
adp.Fill(datasetParam)
conn.Close();

# datasetParam hold your result as collection a\of tables
# each tables has rows
# and each row has columns
print datasetParam.Tables[0].Rows[0][0]

Python Connect to Microsoft Analysis Service

Well just Googling around it seems that the library IronPython will be useful

Execute query on SQL Server Analysis Services with IronPython

Extracting raw data from a PowerPivot model using Python

Lo and behold, I finally managed to crack the problem - turns out that accessing Power Pivot data using Python is indeed possible! Below's a short recap of what I did - you can find a more detailed description here: Analysis Services (SSAS) on a shoestring. Note: the code has been optimized neither for efficiency nor elegance.

  • Install Microsoft Power BI Desktop (comes with free Analysis Services server, so no need for a costly SQL Server license - however, the same approach obviously also works if you have a proper license).
  • Fire up the AS engine by first creating the msmdsrv.ini settings file, then restore the database from the ABF file (using AMO.NET), then extract data using ADOMD.NET.

Here's the Python code that illustrates the AS engine + AMO.NET parts:

import psutil, subprocess, random, os, zipfile, shutil, clr, sys, pandas

def initialSetup(pathPowerBI):
sys.path.append(pathPowerBI)

#required Analysis Services assemblies
clr.AddReference("Microsoft.PowerBI.Amo.Core")
clr.AddReference("Microsoft.PowerBI.Amo")
clr.AddReference("Microsoft.PowerBI.AdomdClient")

global AMO, ADOMD
import Microsoft.AnalysisServices as AMO
import Microsoft.AnalysisServices.AdomdClient as ADOMD

def restorePowerPivot(excelName, pathTarget, port, pathPowerBI):
#create random folder
os.chdir(pathTarget)
folder = os.getcwd()+str(random.randrange(10**6, 10**7))
os.mkdir(folder)

#extract PowerPivot model (abf backup)
archive = zipfile.ZipFile(excelName)
for member in archive.namelist():
if ".data" in member:
filename = os.path.basename(member)
abfname = os.path.join(folder, filename) + ".abf"
source = archive.open(member)
target = file(os.path.join(folder, abfname), 'wb')
shutil.copyfileobj(source, target)
del target
archive.close()

#start the cmd.exe process to get its PID
listPIDpre = [proc for proc in psutil.process_iter()]
process = subprocess.Popen('cmd.exe /k', stdin=subprocess.PIPE)
listPIDpost = [proc for proc in psutil.process_iter()]
pid = [proc for proc in listPIDpost if proc not in listPIDpre if "cmd.exe" in str(proc)][0]
pid = str(pid).split("=")[1].split(",")[0]

#msmdsrv.ini
msmdsrvText = '''<ConfigurationSettings>
<DataDir>{0}</DataDir>
<TempDir>{0}</TempDir>
<LogDir>{0}</LogDir>
<BackupDir>{0}</BackupDir>
<DeploymentMode>2</DeploymentMode>
<RecoveryModel>1</RecoveryModel>
<DisklessModeRequested>0</DisklessModeRequested>
<CleanDataFolderOnStartup>1</CleanDataFolderOnStartup>
<AutoSetDefaultInitialCatalog>1</AutoSetDefaultInitialCatalog>
<Network>
<Requests>
<EnableBinaryXML>1</EnableBinaryXML>
<EnableCompression>1</EnableCompression>
</Requests>
<Responses>
<EnableBinaryXML>1</EnableBinaryXML>
<EnableCompression>1</EnableCompression>
<CompressionLevel>9</CompressionLevel>
</Responses>
<ListenOnlyOnLocalConnections>1</ListenOnlyOnLocalConnections>
</Network>
<Port>{1}</Port>
<PrivateProcess>{2}</PrivateProcess>
<InstanceVisible>0</InstanceVisible>
<Language>1033</Language>
<Debug>
<CallStackInError>0</CallStackInError>
</Debug>
<Log>
<Exception>
<CrashReportsFolder>{0}</CrashReportsFolder>
</Exception>
<FlightRecorder>
<Enabled>0</Enabled>
</FlightRecorder>
</Log>
<AllowedBrowsingFolders>{0}</AllowedBrowsingFolders>
<ResourceGovernance>
<GovernIMBIScheduler>0</GovernIMBIScheduler>
</ResourceGovernance>
<Feature>
<ManagedCodeEnabled>1</ManagedCodeEnabled>
</Feature>
<VertiPaq>
<EnableDisklessTMImageSave>0</EnableDisklessTMImageSave>
<EnableProcessingSimplifiedLocks>1</EnableProcessingSimplifiedLocks>
</VertiPaq>
</ConfigurationSettings>'''

#save ini file to disk, fill it with required parameters
msmdsrvini = open(folder+"\\msmdsrv.ini", "w")
msmdsrvText = msmdsrvText.format(folder, port, pid) #{0},{1},{2}
msmdsrvini.write(msmdsrvText)
msmdsrvini.close()

#run AS engine inside the cmd.exe process
initString = "\"{0}\\msmdsrv.exe\" -c -s \"{1}\""
initString = initString.format(pathPowerBI.replace("/","\\"),folder)
process.stdin.write(initString + " \n")

#connect to the AS instance from Python
AMOServer = AMO.Server()
AMOServer.Connect("localhost:{0}".format(port))

#restore database from PowerPivot abf backup, disconnect
AMORestoreInfo = AMO.RestoreInfo(os.path.join(folder, abfname))
AMOServer.Restore(AMORestoreInfo)
AMOServer.Disconnect()

return process

And the data-extraction part:

def runQuery(query, port, flag):
#ADOMD assembly
ADOMDConn = ADOMD.AdomdConnection("Data Source=localhost:{0}".format(port))
ADOMDConn.Open()
ADOMDCommand = ADOMDConn.CreateCommand()
ADOMDCommand.CommandText = query

#read data in via AdomdDataReader object
DataReader = ADOMDCommand.ExecuteReader()

#get metadata, number of columns
SchemaTable = DataReader.GetSchemaTable()
numCol = SchemaTable.Rows.Count #same as DataReader.FieldCount

#get column names
columnNames = []
for i in range(numCol):
columnNames.append(str(SchemaTable.Rows[i][0]))

#fill with data
data = []
while DataReader.Read()==True:
row = []
for j in range(numCol):
try:
row.append(DataReader[j].ToString())
except:
row.append(DataReader[j])
data.append(row)
df = pandas.DataFrame(data)
df.columns = columnNames

if flag==0:
DataReader.Close()
ADOMDConn.Close()

return df
else:
#metadata table
metadataColumnNames = []
for j in range(SchemaTable.Columns.Count):
metadataColumnNames.append(SchemaTable.Columns[j].ToString())
metadata = []
for i in range(numCol):
row = []
for j in range(SchemaTable.Columns.Count):
try:
row.append(SchemaTable.Rows[i][j].ToString())
except:
row.append(SchemaTable.Rows[i][j])
metadata.append(row)
metadf = pandas.DataFrame(metadata)
metadf.columns = metadataColumnNames

DataReader.Close()
ADOMDConn.Close()

return df, metadf

The raw data are then extracted via something like this:

pathPowerBI = "C:/Program Files/Microsoft Power BI Desktop/bin"
initialSetup(pathPowerBI)
session = restorePowerPivot("D:/Downloads/PowerPivotTutorialSample.xlsx", "D:/", 60000, pathPowerBI)
df, metadf = runQuery("EVALUATE dbo_DimProduct", 60000, 1)
endSession(session)


Related Topics



Leave a reply



Submit