Creating a Dictionary from a CSV File

Creating a dictionary from a csv file?

I believe the syntax you were looking for is as follows:

import csv

with open('coors.csv', mode='r') as infile:
reader = csv.reader(infile)
with open('coors_new.csv', mode='w') as outfile:
writer = csv.writer(outfile)
mydict = {rows[0]:rows[1] for rows in reader}

Alternately, for python <= 2.7.1, you want:

mydict = dict((rows[0],rows[1]) for rows in reader)

Creating a dictionary from a CSV file in Python

Leverage the csv module then you can just index the data easily. Check if the key is there if it is just access the last entry otherwise add a new list.

import csv

def space_statistics(csvFile):
space = {}
with open(csvFile, newline='') as inFile:
reader = csv.reader(inFile)
data = list(reader)
for entry in data[1:]:#skip the first one because thats the header
if entry[2] in space:#company already exists in dict
if entry[-1] == 'Success'
space[entry[2]][1] += 1
else:#failed
space[entry[2]][2] += 1
space[entry[2]][0] += 1#total mission count
else:
space[entry[2]] = [1,0,0]
if entry[-1] == 'Success'
space[entry[2]][1] += 1
else:#failed
space[entry[2]][2] += 1
return space

Creating a dictionary from a CSV file

Create a dictionary, then iterate over the result and stuff the rows in the dictionary. Note that if you encounter a row with a duplicate date, you will have to decide what to do (raise an exception, replace the previous row, discard the later row, etc...)

Here's test.csv:

Date,Foo,Bar
123,456,789
abc,def,ghi

and the corresponding program:

import csv
reader = csv.reader(open('test.csv'))

result = {}
for row in reader:
key = row[0]
if key in result:
# implement your duplicate row handling here
pass
result[key] = row[1:]
print(result)

yields:

{'Date': ['Foo', 'Bar'], '123': ['456', '789'], 'abc': ['def', 'ghi']}

or, with DictReader:

import csv
reader = csv.DictReader(open('test.csv'))

result = {}
for row in reader:
key = row.pop('Date')
if key in result:
# implement your duplicate row handling here
pass
result[key] = row
print(result)

results in:

{'123': {'Foo': '456', 'Bar': '789'}, 'abc': {'Foo': 'def', 'Bar': 'ghi'}}

Or perhaps you want to map the column headings to a list of values for that column:

import csv
reader = csv.DictReader(open('test.csv'))

result = {}
for row in reader:
for column, value in row.items(): # consider .iteritems() for Python 2
result.setdefault(column, []).append(value)
print(result)

That yields:

{'Date': ['123', 'abc'], 'Foo': ['456', 'def'], 'Bar': ['789', 'ghi']}

Making a dictionary from a csv file with

You're nearly there. You simply need to add an entry to mountains for each iteration of the loop:

mountains = dict()
with open('mountains.csv', 'r') as handle:
reader = csv.reader(handle, delimiter=',')

for row in reader:
name = row[0]
height = row[1]
mountains[name] = int(height)

How to create a dictionary with one key and multiple values from a CSV file?

csv.DictReader by default takes the first row as the keys of the dictionary, so that won't work here since you want the first column as the keys.

So you can read the csv file using csv.reader and then iterate over the rows and create your dictionary using dictionary comprehension

import csv

mydict = {}

#Open the file in read mode
with open('input_experiment.csv', mode='r') as infile:
#Open a reader to the csv, the delimiter is a single space
reader = csv.reader(infile, delimiter=' ', skipinitialspace=True)

#Read into the dictionary using dictionary comprehension, key is the first column and row are rest of the columns
mydict = { key: row for key, *row in reader }

print(mydict)

So if the input file is

EOLB-98 2 4 3 1 4 4 CCPZ-81 CTCB-18 VBOX-39
LKHN-41 3 3 1 1 4 3 BYAP-21 QENC-92 JSZQ-42
NWVF-51 5 3 2 4 3 5 YWVL-18 KPCC-99 FIMD-24
XVGP-15 1 4 1 1 4 1 DZCP-35 WMBB-45 XTCH-99

The output will be

{'EOLB-98': ['2', '4', '3', '1', '4', '4', 'CCPZ-81', 'CTCB-18', 'VBOX-39'], 
'LKHN-41': ['3', '3', '1', '1', '4', '3', 'BYAP-21', 'QENC-92', 'JSZQ-42'],
'NWVF-51': ['5', '3', '2', '4', '3', '5', 'YWVL-18', 'KPCC-99', 'FIMD-24'],
'XVGP-15': ['1', '4', '1', '1', '4', '1', 'DZCP-35', 'WMBB-45', 'XTCH-99']}

Creating a dictionary from specific rows of a .csv file

These zeros are NOT values from empty row(s) but indexes (number of row).

You should select single row using .iloc[] - but for your example output you should rather use index 0 instead of index 2 because it seems it skiped empty rows.

df.iloc[0].to_dict()

Minimal working example:

import pandas as pd

data = {
'Column X': ['A','B','C'],
'Column Y': ['D','E','F'],
'Column Z': ['G','H','I']
}
df = pd.DataFrame(data)

print('--- df ---')
print(df)

print('--- all rows ---')
print(df.to_dict())

print('--- one row ---')
print(df.iloc[2].to_dict())

Result:

--- df ---
Column X Column Y Column Z
0 A D G
1 B E H
2 C F I
--- all rows ---
{'Column X': {0: 'A', 1: 'B', 2: 'C'}, 'Column Y': {0: 'D', 1: 'E', 2: 'F'}, 'Column Z': {0: 'G', 1: 'H', 2: 'I'}}
--- one row ---
{'Column X': 'C', 'Column Y': 'F', 'Column Z': 'I'}

Create a dictionary from CSV file

Alternative without Pandas (nothing wrong with Pandas):

import csv

with open("file.csv", "r") as file:
reader = csv.reader(file)
keys = next(reader)[1:]
devices_dict = {key: dict(zip(keys, values)) for key, *values in reader}

Result:

{'co-agg-r1': {'device_platform': 'cisco_ios', 'device_role': 'co-agg-r'},
'co-edg-fw': {'device_platform': 'cisco_asa', 'device_role': 'co-edg-fw'},
'co-acc-sw': {'device_platform': 'cisco_ios', 'device_role': 'co-acc-sw'},
'co-acc-rsw': {'device_platform': 'broadcom_icos', 'device_role': 'co-acc-rsw'}}

Create a dictionary from a csv file with same value

Use normal for-loop for this

for row in spamreader:
if row[5] not in self.data:
self.data[row[5]] = []

self.data[row[5]].append(row[3])
#self.data[row[5]].append(row)

Eventually you could use defaultdict and list as default value - and it will automatically create empty list when it will be needed.

 import collections

self.data = collections.defaultdict(list)

for row in spamreader:
self.data[row[5]].append(row[3])

BTW:

Standard module itertools has function groupby() which probably could be also useful but I never used it.

pandas.DataFrame has also groupby() which can be useful. And it can read csv.

I use io.StringIO only to simulate file in memory. You should use filename

text = '''19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
15/06/2021 12:43:59;people-mail@gmail.com;lastname;firstname;Validé;price2;2,00;;private-code
15/06/2021 12:43:59;people-mail@gmail.com;lastname;firstname;Validé;price2;2,00;;private-code
15/06/2021 12:43:59;people-mail@gmail.com;lastname;firstname;Validé;price2;2,00;;private-code
15/06/2021 12:43:59;people-mail@gmail.com;lastname;firstname;Validé;price3;2,00;;private-code
15/06/2021 12:43:59;people-mail@gmail.com;lastname;firstname;Validé;price3;2,00;;private-code
15/06/2021 12:43:59;people-mail@gmail.com;lastname;firstname;Validé;price3;2,00;;private-code
'''

import pandas as pd

import io

#df = pd.read_csv("filename.csv", header=None, sep=';')
df = pd.read_csv(io.StringIO(text), header=None, sep=';')

#df.iloc[:,5]

for value, group in df.groupby(5):
print('--- value:', value, '---')
print(group)

Result:

--- value: price1 ---
0 1 ... 7 8
0 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code
1 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code
2 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code
3 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code
4 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code
5 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code
6 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code
7 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code
8 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code
9 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code
10 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code
11 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code
12 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code
13 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code
14 19/06/2021 18:18:17 people-mail@hotmail.fr ... NaN private-code

[15 rows x 9 columns]

--- value: price2 ---
0 1 ... 7 8
15 15/06/2021 12:43:59 people-mail@gmail.com ... NaN private-code
16 15/06/2021 12:43:59 people-mail@gmail.com ... NaN private-code
17 15/06/2021 12:43:59 people-mail@gmail.com ... NaN private-code

[3 rows x 9 columns]

--- value: price3 ---
0 1 ... 7 8
18 15/06/2021 12:43:59 people-mail@gmail.com ... NaN private-code
19 15/06/2021 12:43:59 people-mail@gmail.com ... NaN private-code
20 15/06/2021 12:43:59 people-mail@gmail.com ... NaN private-code

[3 rows x 9 columns]

EDIT:

Example with other mentioned methods

text = '''19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
19/06/2021 18:18:17;people-mail@hotmail.fr;lastname;firstname;Validé;price1;2,00;;private-code
15/06/2021 12:43:59;people-mail@gmail.com;lastname;firstname;Validé;price2;2,00;;private-code
15/06/2021 12:43:59;people-mail@gmail.com;lastname;firstname;Validé;price2;2,00;;private-code
15/06/2021 12:43:59;people-mail@gmail.com;lastname;firstname;Validé;price2;2,00;;private-code
15/06/2021 12:43:59;people-mail@gmail.com;lastname;firstname;Validé;price3;2,00;;private-code
15/06/2021 12:43:59;people-mail@gmail.com;lastname;firstname;Validé;price3;2,00;;private-code
15/06/2021 12:43:59;people-mail@gmail.com;lastname;firstname;Validé;price3;2,00;;private-code
'''

import csv
import io

# ---

data = {}

#with open('filename.csv') as fh:
with io.StringIO(text) as fh:
csv_reader = csv.reader(fh, delimiter=';')

for row in csv_reader:
if row[5] not in data:
data[row[5]] = []

data[row[5]].append(row[3])

for key, value in data.items():
print(key, value)
print('---')

# ---

import collections

data = collections.defaultdict(list)

#with open('filename.csv') as fh:
with io.StringIO(text) as fh:
csv_reader = csv.reader(fh, delimiter=';')

for row in csv_reader:
data[row[5]].append(row[3])

for key, value in data.items():
print(key, value)
print('---')

# ---

import collections
import itertools

data = collections.defaultdict(list)

#with open('filename.csv') as fh:
with io.StringIO(text) as fh:
csv_reader = csv.reader(fh, delimiter=';')

all_rows = list(csv_reader)
data = itertools.groupby(all_rows, key=lambda row:row[5])

for key, value in data:
print(key)
for item in value:
print(item)
print('---')


Related Topics



Leave a reply



Submit