Convert Pandas Dataframe to Nested JSON

Convert Pandas Dataframe to nested JSON

UPDATE:

j = (df.groupby(['ID','Location','Country','Latitude','Longitude'])
.apply(lambda x: x[['timestamp','tide']].to_dict('records'))
.reset_index()
.rename(columns={0:'Tide-Data'})
.to_json(orient='records'))

Result (formatted):

In [103]: print(json.dumps(json.loads(j), indent=2, sort_keys=True))
[
{
"Country": "FRA",
"ID": 1,
"Latitude": 48.383,
"Location": "BREST",
"Longitude": -4.495,
"Tide-Data": [
{
"tide": 6905.0,
"timestamp": "1807-01-01"
},
{
"tide": 6931.0,
"timestamp": "1807-02-01"
},
{
"tide": 6896.0,
"timestamp": "1807-03-01"
},
{
"tide": 6953.0,
"timestamp": "1807-04-01"
},
{
"tide": 7043.0,
"timestamp": "1807-05-01"
}
]
},
{
"Country": "DEU",
"ID": 7,
"Latitude": 53.867,
"Location": "CUXHAVEN 2",
"Longitude": 8.717,
"Tide-Data": [
{
"tide": 7093.0,
"timestamp": "1843-01-01"
},
{
"tide": 6688.0,
"timestamp": "1843-02-01"
},
{
"tide": 6493.0,
"timestamp": "1843-03-01"
},
{
"tide": 6723.0,
"timestamp": "1843-04-01"
},
{
"tide": 6533.0,
"timestamp": "1843-05-01"
}
]
},
{
"Country": "DEU",
"ID": 8,
"Latitude": 53.899,
"Location": "WISMAR 2",
"Longitude": 11.458,
"Tide-Data": [
{
"tide": 6957.0,
"timestamp": "1848-07-01"
},
{
"tide": 6944.0,
"timestamp": "1848-08-01"
},
{
"tide": 7084.0,
"timestamp": "1848-09-01"
},
{
"tide": 6898.0,
"timestamp": "1848-10-01"
},
{
"tide": 6859.0,
"timestamp": "1848-11-01"
}
]
},
{
"Country": "NLD",
"ID": 9,
"Latitude": 51.918,
"Location": "MAASSLUIS",
"Longitude": 4.25,
"Tide-Data": [
{
"tide": 6880.0,
"timestamp": "1848-02-01"
},
{
"tide": 6700.0,
"timestamp": "1848-03-01"
},
{
"tide": 6775.0,
"timestamp": "1848-04-01"
},
{
"tide": 6580.0,
"timestamp": "1848-05-01"
},
{
"tide": 6685.0,
"timestamp": "1848-06-01"
}
]
},
{
"Country": "USA",
"ID": 10,
"Latitude": 37.807,
"Location": "SAN FRANCISCO",
"Longitude": -122.465,
"Tide-Data": [
{
"tide": 6909.0,
"timestamp": "1854-07-01"
},
{
"tide": 6940.0,
"timestamp": "1854-08-01"
},
{
"tide": 6961.0,
"timestamp": "1854-09-01"
},
{
"tide": 6952.0,
"timestamp": "1854-10-01"
},
{
"tide": 6952.0,
"timestamp": "1854-11-01"
}
]
}
]

OLD answer:

You can do it using groupby(), apply() and to_json() methods:

j = (df.groupby(['ID','Location','Country','Latitude','Longitude'], as_index=False)
.apply(lambda x: dict(zip(x.timestamp,x.tide)))
.reset_index()
.rename(columns={0:'Tide-Data'})
.to_json(orient='records'))

Output:

In [112]: print(json.dumps(json.loads(j), indent=2, sort_keys=True))
[
{
"Country": "FRA",
"ID": 1,
"Latitude": 48.383,
"Location": "BREST",
"Longitude": -4.495,
"Tide-Data": {
"1807-01-01": 6905.0,
"1807-02-01": 6931.0,
"1807-03-01": 6896.0,
"1807-04-01": 6953.0,
"1807-05-01": 7043.0
}
},
{
"Country": "DEU",
"ID": 7,
"Latitude": 53.867,
"Location": "CUXHAVEN 2",
"Longitude": 8.717,
"Tide-Data": {
"1843-01-01": 7093.0,
"1843-02-01": 6688.0,
"1843-03-01": 6493.0,
"1843-04-01": 6723.0,
"1843-05-01": 6533.0
}
},
{
"Country": "DEU",
"ID": 8,
"Latitude": 53.899,
"Location": "WISMAR 2",
"Longitude": 11.458,
"Tide-Data": {
"1848-07-01": 6957.0,
"1848-08-01": 6944.0,
"1848-09-01": 7084.0,
"1848-10-01": 6898.0,
"1848-11-01": 6859.0
}
},
{
"Country": "NLD",
"ID": 9,
"Latitude": 51.918,
"Location": "MAASSLUIS",
"Longitude": 4.25,
"Tide-Data": {
"1848-02-01": 6880.0,
"1848-03-01": 6700.0,
"1848-04-01": 6775.0,
"1848-05-01": 6580.0,
"1848-06-01": 6685.0
}
},
{
"Country": "USA",
"ID": 10,
"Latitude": 37.807,
"Location": "SAN FRANCISCO",
"Longitude": -122.465,
"Tide-Data": {
"1854-07-01": 6909.0,
"1854-08-01": 6940.0,
"1854-09-01": 6961.0,
"1854-10-01": 6952.0,
"1854-11-01": 6952.0
}
}
]

PS if you don't care of idents you can write directly to JSON file:

(df.groupby(['ID','Location','Country','Latitude','Longitude'], as_index=False)
.apply(lambda x: dict(zip(x.timestamp,x.tide)))
.reset_index()
.rename(columns={0:'Tide-Data'})
.to_json('/path/to/file_name.json', orient='records'))

Convert Pandas dataframe to nested JSON (without nesting as lists)

Looking at the docs for to_dict it seems like we still have to use the records option, and if we assume it will always be a list of 1 element, just take the 0th element using your original code

>>> import numpy as np
>>> import pandas as pd
>>> array = np.array([['a1', 'b1', 'c1', 'd1'], ['a2', 'b2', 'c2', 'd2']])
>>> df = pd.DataFrame(data=array, columns=['a','b','c','d'])
>>> (df.groupby(['a','b'])
.apply(lambda x: x[['c','d']].to_dict('records')[0])
.reset_index()
.rename(columns={0:'nested_group'})
.to_json(orient='records'))
'[{"a":"a1","b":"b1","nested_group":{"c":"c1","d":"d1"}},{"a":"a2","b":"b2","nested_group":{"c":"c2","d":"d2"}}]'

Pandas Dataframe to Nested JSON

You can first define a function to convert sub-groups to json, then apply this function to each group, and then merge sub-group jsons to a single json object.

def f(x):
return (dict({'date':x.date.iloc[0]},**{k:v for k,v in zip(x.student,x.grade)}))

(
df.groupby(['course','date'])
.apply(f)
.groupby(level=0)
.apply(lambda x: x.tolist())
.to_dict()
)
Out[1006]:
{'ENGLISH': [{'Student_1': 93, 'Student_2': 83, 'date': '2017-06-25'},
{'Student_1': 96, 'Student_2': 99, 'date': '2017-06-26'}],
'MATH': [{'Student_1': 93, 'Student_2': 83, 'date': '2017-06-25'},
{'Student_1': 90, 'Student_2': 85, 'date': '2017-06-26'}]}

convert nested json to pandas dataframe of specific format

Edit: Using the updated data with multiple usageId (there are 3 of them),I did the following processing steps and output df4 with the relevant columns of interest, please have a look:

import pandas as pd

d = [{'usageId': 'e83f43f8-ec4a-402d-a64e-d74b6f1df4a7',
'assessment_status_date': '2022-03-28',
'assessment_date': '2020-12-07',
'usage_assessment': <truncated> ]

df4 = pd.DataFrame()
df1 = pd.DataFrame(d) #df1 = from dictionary d
for usageId in set(df1['usageId']):
df2 = df1[df1['usageId'] == usageId] #df2 = filtered based on each usageId, total more than 100 usageId
test_category = list({k.split('+')[0].strip(): v for d in lis for k, v in d.items()}.keys())[0]
lis = pd.DataFrame(df2['test_category'].values.tolist()[0]).T['test_execution'].values.tolist()
df3 = pd.DataFrame({k.split('+')[-1].strip(): v for d in lis for k, v in d.items()}).T #df3 = extracted column 'test_execution'
df3['test_category'] = test_category
df3['usageId'] = usageId
df4 = pd.concat([df4, df3], axis=0) #df4 = compiled test_execution for all usageId
df1['test_category'][df1['usageId']==usageId] = test_category
print(df4)

Output:

                               evidence_capture      test_result_justification latest_test_result_date last_updated_by test_execution_status                    test_result                  test_category                        usageId
Usage Reconciliation None Test out of scope 2019-10-02 None In Progress None Health and Welfare Plan e83f43f8-ec4a-402d-a64e-d7...
Data Agreements None xxx 2019-10-02 None In Progress None Health and Welfare Plan e83f43f8-ec4a-402d-a64e-d7...
Data Elements None The rationale provided for... 2019-10-02 None In Progress None Health and Welfare Plan e83f43f8-ec4a-402d-a64e-d7...
Computations None None None None Not Started None Health and Welfare Plan e83f43f8-ec4a-402d-a64e-d7...
Lineage None None None None Not Started None Health and Welfare Plan e83f43f8-ec4a-402d-a64e-d7...
Metadata None Valid 2020-07-02 None Completed Pass without Compensating ... Health and Welfare Plan e83f43f8-ec4a-402d-a64e-d7...
Data Quality Monitoring None xxx 2019-08-09 None Completed Pass without Compensating ... Health and Welfare Plan e83f43f8-ec4a-402d-a64e-d7...
HPU Source Reliability None xxx 2019-10-02 None In Progress None Health and Welfare Plan e83f43f8-ec4a-402d-a64e-d7...
Change Notification None None None None Not Started None Health and Welfare Plan e83f43f8-ec4a-402d-a64e-d7...
New or Changed Usage Reconc... None xxx 2020-10-23 None Completed Pass without Compensating ... New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Data Agreement Reviewed and... None xxx 2020-07-21 None Completed Pass without Compensating ... New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
HPDEs Identified None Valid 2020-07-02 None Completed Pass without Compensating ... New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
HPDE Justification is Docum... None xxx 2020-02-28 None Completed Pass without Compensating ... New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
HPDE Identification Rationa... None xxx 2020-02-28 None Completed Pass without Compensating ... New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Usage Output is Documented ... None xxx 2020-08-07 None Completed Pass without Compensating ... New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Data Element Metadata is in... None xxx None tat00000 In Progress None New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Secured Data Indicator Cons... None None None None Not Started None New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
HPDE Metadata is in Curated... None Valid 2020-07-02 None Completed Pass without Compensating ... New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Usage Outcome is Accurate None None None None Not Started None New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Lineage is Accurate Reflect... None None None None Not Started None New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Partial Lineage from Author... None None None None Not Started None New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Lineage from Usage to Autho... None None None None Not Started None New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Quality is Sufficient None None None None Not Started None New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Usage Description is Valid None xx 2020-02-28 None Completed Pass without Compensating ... New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Usage SME's EID Status is V... None Valid 2020-07-02 None Completed Pass without Compensating ... New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Usage AE None Valid 2020-07-02 None Completed Pass without Compensating ... New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Data Defect Tracking Proces... None xxx 2020-02-28 None Completed Pass without Compensating ... New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Data Movement is Reasonable None xxx 2020-10-23 None Completed Fail New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
DQ Rules and Thresholds are... None None None None In Progress Fail New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Data Defect Tracking Proces... None None None None Not Started None New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
DQ Monitoring Plan is Reaso... None xxx 2020-10-22 None Completed Pass without Compensating ... New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Usage Consumes from Accepta... None xxx 2020-10-23 None Completed Fail New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Usage Consumes from Approve... None None None None Not Started None New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Change Notification Process... None xxx 2020-07-21 None Completed Pass without Compensating ... New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Change Notification Process... None None None None Not Started None New or Changed Usage Recon... b3c9cbbd-fb72-46df-a4a3-6d...
Usage Reconciliation None Test out of scope 2019-10-02 None In Progress None Provided Health Insur Offe... c67a1567-2de3-4826-97bb-99...
Data Agreements None xxx 2019-10-21 None Completed Pass without Compensating ... Provided Health Insur Offe... c67a1567-2de3-4826-97bb-99...
Data Elements None Valid 2020-07-02 None Completed Pass without Compensating ... Provided Health Insur Offe... c67a1567-2de3-4826-97bb-99...
Computations None None None None Not Started None Provided Health Insur Offe... c67a1567-2de3-4826-97bb-99...
Lineage None None None None Not Started None Provided Health Insur Offe... c67a1567-2de3-4826-97bb-99...
Metadata None Valid 2020-07-02 None Completed Pass without Compensating ... Provided Health Insur Offe... c67a1567-2de3-4826-97bb-99...
Data Quality Monitoring None xxx 2019-10-21 None Completed Pass without Compensating ... Provided Health Insur Offe... c67a1567-2de3-4826-97bb-99...
HPU Source Reliability None None None None Not Started None Provided Health Insur Offe... c67a1567-2de3-4826-97bb-99...
Change Notification None None None None Not Started None Provided Health Insur Offe... c67a1567-2de3-4826-97bb-99...

The original df1 is also updated; this table is linked to df4 by usageId and test_category

                         usageId assessment_status_date assessment_date usage_assessment has_l3test compensating_control recommendations                  test_category assessment_status recommendation_indicator assessment_justification revalidation_justification
0 e83f43f8-ec4a-402d-a64e-d7... 2022-03-28 2020-12-07 Level 1 None None None Health and Welfare Plan In Progress None None None
1 b3c9cbbd-fb72-46df-a4a3-6d... 2022-03-28 2020-12-07 Level 1 None None None New or Changed Usage Recon... In Progress None None None
2 c67a1567-2de3-4826-97bb-99... 2022-03-28 2020-12-07 Level 1 None None None Provided Health Insur Offe... In Progress None None None


Related Topics



Leave a reply



Submit