Splitting one csv into multiple files
I suggest you not inventing a wheel. There is existing solution. Source here
import os
def split(filehandler, delimiter=',', row_limit=1000,
output_name_template='output_%s.csv', output_path='.', keep_headers=True):
import csv
reader = csv.reader(filehandler, delimiter=delimiter)
current_piece = 1
current_out_path = os.path.join(
output_path,
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
current_limit = row_limit
if keep_headers:
headers = reader.next()
current_out_writer.writerow(headers)
for i, row in enumerate(reader):
if i + 1 > current_limit:
current_piece += 1
current_limit = row_limit * current_piece
current_out_path = os.path.join(
output_path,
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
if keep_headers:
current_out_writer.writerow(headers)
current_out_writer.writerow(row)
Use it like:
split(open('/your/pat/input.csv', 'r'));
How to split one csv into multiple files in python
If you can't use pandas you can use the built-in csv
module and itertools.groupby()
function. You can use this to group by country.
from itertools import groupby
import csv
with open('world.csv') as csv_file:
reader = csv.reader(csv_file)
next(reader) #skip header
#Group by column (country)
lst = sorted(reader, key=lambda x : x[4])
groups = groupby(lst, key=lambda x : x[4])
#Write file for each country
for k,g in groups:
filename = k + '.csv'
with open(filename, 'w', newline='') as fout:
csv_output = csv.writer(fout)
csv_output.writerow(["city","city_alt","lat","lng","country"]) #header
for line in g:
csv_output.writerow(line)
Split csv file into two separate files python 3.7
Read the whole csv file and store the contents in a list. Then store similar csv data in a temp list. After storing, extract 70% of the data from the list and write it into the file and write the remaining data into another file.
csv_data = []
with open ('file.csv') as file:
csv_data.append(file.read())
csv_data = (''.join(csv_data)).split("\n")
header = csv_data[0]
csv_data = csv_data[1:]
temp_list = []
add_header = True
for i in csv_data:
if len(temp_list) == 0:
temp_list.append(i)
elif i.split(',')[0] == temp_list[0].split(',')[0]:
temp_list.append(i)
else:
file_length = len(temp_list)
line_count = int((0.7*file_length)+1)
if line_count == 1:
with open("file1.csv","a+") as file1:
if add_header:
add_header = False
file1.write(header+'\n')
file1.write(temp_list[0]+'\n')
else:
seventy_perc_lines = temp_list[:line_count]
thirty_perc_lines = temp_list[line_count:]
if add_header:
seventy_perc_lines.insert(0,header)
thirty_perc_lines.insert(0,header)
add_header = False
with open("file1.csv","a+") as file1:
for j in range(len(seventy_perc_lines)):
file1.write(seventy_perc_lines[j]+'\n')
if len(thirty_perc_lines) != 0:
with open("file2.csv","a+") as file2:
for j in range(len(thirty_perc_lines)):
file2.write(thirty_perc_lines[j]+'\n')
temp_list = []
temp_list.append(i)
file1.csv
file2.csv
Note: If there are only 3 lines, this code will add all the 3 lines in file1 and adds nothing to file2. You need to edit this code if you wish to change this behaviour.
Splitting a csv files into multiple file with overlapping rows using python
I don't have the data to test this thoroughly but should work:
CHUNK = 4_000
OVERLAP = 1_000
def write_csv(lines, filename, header):
with open(filename, 'w') as csv:
csv.write(header)
csv.writelines(lines)
def get_csv_gen():
part = 1
while True:
yield f'data_part_{part}.csv'
part += 1
get_csv_name = get_csv_gen()
with open('8-0new2.csv') as csv:
header = csv.readline()
lines = csv.readlines()
for offset in range(0, len(lines), CHUNK-OVERLAP):
write_csv(lines[offset:offset+CHUNK], next(get_csv_name), header)
Related Topics
Remove White Space from Entire Dataframe
Python Pandas Valueerror Arrays Must Be All Same Length
Taking Data from Drop-Down Menu Using Flask
How to Check List Containing Nan
Reduce Multi-Index/Multi-Level Dataframe to Single Index, Single Level
Check Json Data Is None in Python
How to Remove Words in a Column in Pandas
Python - How to Make User Input Not Case Sensitive
How to Remove Any Url Within a String in Python
How to Save All the Variables in the Current Python Session
Remove Partial String from Dataframe With Pandas
How to Call a Django Function on Button Click
Clicking Links With Python Beautifulsoup
Regex Check If Specific Multiple Words Present in a Sentence
How to Clear/Delete the Contents of a Tkinter Text Widget
Invalidargumenterror: Logits and Labels Must Have the Same First Dimension Seq2Seq Tensorflow