Read and write to the same csv file - delete row if condition met (stop writerow to erase everything in file)
import csv
def filter_dates(csv_filepath)
with open(csv_filepath, 'r') as in_file:
csv_in = list(csv.reader(in_file))
# create accumulator for new list to add only valid values
filtered_list = []
filetered_list.append(csv_in[0]) # append header to new list
# filter the list making sure no errors appear BEFORE writing to the file
for row in csv_in[1:]: #skip header (first row)
if int(row[2]) >= 2011: # if entry is NEWER OR EQUAL TO than 7 years, we add it to filtered_list
filtered_list.append(row)
# now filtered_list contains only entires where index position 2 contains valid years
with open(csv_filepath, 'w') as out_file:
writer = csv.writer(out_file)
writer.writerows
Here is a fully independent example:
csv_in = [
['name', 'sbin', 'year'],
['moby_dick', 'sbin', '1851'],
['new_book', 'sbin', '2011'],
['newest_book', 'sbin', '2018'],
]
filtered_list = []
filtered_list.append(csv_in[0]) # this is where the header is added
for row in csv_in[1:]: #skip header (first row)
if int(row[2]) >= 2011:
filtered_list.append(row)
print(filtered_list)
A couple of notes:
- it's generally good to store this kind of stuff in memory before you open the file to write (or overwrite in this case) so that any error while reading and filtering the file happens before we try to modify the output
- easiest way to overwrite a file is to first read it, commit the contents to memory (
csv_in
the array I've defined in the firstwith
block this case), and then *finally8 once the data is ready (filtered_list
) for 'shipping' commit it to a file - never ever use the
global
declaration in python, it's never worth it and causes a lot of headaches down the line
test
Read and write to the same csv file - delete row if condition met (stop writerow to erase everything in file)
import csv
def filter_dates(csv_filepath)
with open(csv_filepath, 'r') as in_file:
csv_in = list(csv.reader(in_file))
# create accumulator for new list to add only valid values
filtered_list = []
filetered_list.append(csv_in[0]) # append header to new list
# filter the list making sure no errors appear BEFORE writing to the file
for row in csv_in[1:]: #skip header (first row)
if int(row[2]) >= 2011: # if entry is NEWER OR EQUAL TO than 7 years, we add it to filtered_list
filtered_list.append(row)
# now filtered_list contains only entires where index position 2 contains valid years
with open(csv_filepath, 'w') as out_file:
writer = csv.writer(out_file)
writer.writerows
Here is a fully independent example:
csv_in = [
['name', 'sbin', 'year'],
['moby_dick', 'sbin', '1851'],
['new_book', 'sbin', '2011'],
['newest_book', 'sbin', '2018'],
]
filtered_list = []
filtered_list.append(csv_in[0]) # this is where the header is added
for row in csv_in[1:]: #skip header (first row)
if int(row[2]) >= 2011:
filtered_list.append(row)
print(filtered_list)
A couple of notes:
- it's generally good to store this kind of stuff in memory before you open the file to write (or overwrite in this case) so that any error while reading and filtering the file happens before we try to modify the output
- easiest way to overwrite a file is to first read it, commit the contents to memory (
csv_in
the array I've defined in the firstwith
block this case), and then *finally8 once the data is ready (filtered_list
) for 'shipping' commit it to a file - never ever use the
global
declaration in python, it's never worth it and causes a lot of headaches down the line
test
Remove row from the CSV file if condition met
Your current script is opening your auction CSV file for appending, and then whilst it is still open, attempting to open it again for reading. This is probably why it is not updating as expected.
A better approach would be to first read the entire contents of your existing saved auction file into a dictionary. The key could be the link
which would then make it easy to determine if you have already seen an existing auction.
Next scrape the current auctions and update the saved_auctions
dictionary as needed.
Finally at the end, write the contents of saved_auctions
back to the CSV file.
For example:
import requests
from scrapy.selector import Selector
import csv
import re
auction_filename = 'pickels_dataset.csv'
# Load existing auctions into a dictionary with link as key
saved_auctions = {}
with open(auction_filename, newline='', encoding='utf-8') as f_auction_file:
for row in csv.reader(f_auction_file):
saved_auctions[row[9]] = row # dictionary key is link
live_auctions_api = 'https://www.pickles.com.au/PWR-Web/services/api/sales/future'
api_request = requests.get(url=live_auctions_api)
for auctions in api_request.json():
auction_link = auctions.get('viewSaleListingLink')
if 'cars/item/search/-/listing/listSaleItems/' in auction_link:
auction_request = requests.get(url=auction_link)
response = Selector(text=auction_request.text)
sales_id_re = response.xpath('//script[contains(text(), "Product_Type_Sequence")]/text() | //script[contains(text(), "lot_number_suffix_sequence")]/text()').get()
sales_id = re.findall(r'"Product_Type_Sequence";var n="(.*?)"', sales_id_re) or re.findall(r'"lot_number_suffix_sequence";var n="(.*?)"', sales_id_re)
if sales_id == []:
continue
auction_sale_link = f'https://www.pickles.com.au/v4/caradvert/saleid-{sales_id[0]}-public?count=true&inav=Car%7Cbc%7Cha%7Cu&q=(And.ProductType.Vehicles._.Year.range(2010..2021).)&sr=%7Clot_number_suffix_sequence%7C0%7C30'
auction_sale_link_requests = requests.get(url=auction_sale_link)
auctions_data = auction_sale_link_requests.json().get('SearchResults')
if auctions_data == []:
print("NO RESULTS")
for auction_data in auctions_data:
if int(auction_data.get('MinimumBid')) > 0:
ids = auction_data.get('TargetId')
main_title = auction_data.get('Title')
short_title = str(auction_data.get('Year')) + ' ' + str(auction_data.get('Make')) + ' ' + str(auction_data.get('Model'))
make = auction_data.get('Make')
model = auction_data.get('Model')
variant = auction_data.get('Series')
transmission = auction_data.get('Transmission')
odometer = auction_data.get('Odometer')
state = auction_data.get('Location').get('State')
minimum_bid = auction_data.get('MinimumBid')
sale_price = "${:,.2f}".format(minimum_bid).strip()
link_path = main_title.replace(' ', '-').replace('/', '-').replace(',', '-') + '/' + str(ids)
link = f'https://www.pickles.com.au/cars/item/-/details/{link_path}'
sale_date = auction_data.get('SaleEndString')
auction_values = [
main_title, short_title, make,
model, variant, transmission, odometer,
state, sale_price,
link, sale_date
]
if link in saved_auctions:
if saved_auctions[link][8] == sale_price:
print('Same result already exists in the file')
else:
print('New value updated')
saved_auctions[link] = auction_values # Updated the entry
else:
print('New auction added')
saved_auctions[link] = auction_values
# Update the saved auction file
with open(auction_filename, 'w', newline='', encoding='utf-8') as f_auction_file:
csv_auction_file = csv.writer(f_auction_file)
csv_auction_file.writerows(saved_auctions.values())
If you want to also remove auctions that are no longer active, then it would probably be best to simply ignore the saved file and just write all current entries as is.
Python replace cells of CSV file
You were right with your problem,
This fixes your problem
x=0
while x < 20: Or length of csv file.
updateprices("10,00","TITELS", x)
x+=1
def updateprices(prijstitel, titelprijs, var):
with open(csvfile, 'r') as csvfileadjust: #open the file
filereader = csv.reader(csvfileadjust)
row = list(islice(filereader,var+1))[-1] #get all lines till var+1
if titelprijs in row: #if the title is in the row
ind = row.index('END') #search for END in that List
lastprijs = row[ind-1]
print lastprijs
if lastprijs != prijstitel: #if lastprijs is not equal to prijstitel ( 9,99 != 10,00)
row.pop(ind) #drop the last item in list ("END")
row.append(prijstitel) #add the new prijstitel (10,00)
row.append("END")
with open('out.csv', 'a') as out:
tester = csv.writer(out)
tester.writerow(row) #Write to csv file
else: #if the title is not in the row
with open('out.csv', 'a') as out:
tester = csv.writer(out)
tester.writerow(row) #write (or copy) the line
output and input dictionary
on windows (python 3), if you don't use open("D_FINAL.csv", "w",newline="")
, the resulting csv file has extra blank lines (because of the extra carriage return char inserted making end of lines like \r\r\n
).
Unfortunately, those double carriage return chars are read back not the same way: it generates a blank row after each valid data row. So unpacking fails because there aren't any elements in the row.
More on this nasty side-effect: CSV file written with Python has blank lines between each row
So when reading, you get a blank line.
Just do (and upgrade your python skills a notch):
with open("D_FINAL.csv", "w", newline="") as f: # python 2: use: "wb"
writer = csv.writer(f)
writer.writerows(D_FINAL.items())
(write in one go with writerows
, use a with
block so you don't miss the call to close
and the file isn't locked anymore)
Note that json
module is best suited to read & write dictionaries (if your need is serialization)
Aside: it's better to read back using a better technique too (it seems that the newline
thingy isn't really necessary when reading):
with open("D_FINAL.csv", "r", newline="") as f: # python 2: use: "rb"
writer = csv.reader(f)
output = {k:v for k,v in writer} # unpack+dictionary comprehension
csv.writerows() puts newline after each row
This problem occurs only with Python on Windows.
In Python v3, you need to add newline=''
in the open call per:
Python 3.3 CSV.Writer writes extra blank rows
On Python v2, you need to open the file as binary with "b" in your open() call before passing to csv
Changing the line
with open('stocks2.csv','w') as f:
to:
with open('stocks2.csv','wb') as f:
will fix the problem
More info about the issue here:
CSV in Python adding an extra carriage return, on Windows
How to iterate over the rows from 2 files, compare the values and update the value in a file when the condition is met?
Finally, here is the solution you were searching for.
import pandas as pd
df2 = pd.read_csv('Density.topo', header = 0, names = list('ABCD'), delimiter=r'\s+', skiprows=1)
df2[['C', 'D']]= df2[['C', 'D']].round()
new_file_content=''
with open('tryout.hmo', 'r') as f:
for line in f:
if line[11:13] == '10':
if line[3].isspace():
ID_to_search_for = line[4:8] # number with 4 digits
else:
ID_to_search_for = line[3:8] # number with 5 digits
search_idx = df2[df2['A'] == ID_to_search_for].index[0]
if df2['C'][search_idx] == 1 and df2['D'][search_idx] == 0:
change = '18' #magnet
new_line = line[:11] + change + line[13:]
elif df2['C'][search_idx] == 1 and df2['D'][search_idx] == 1:
change = '19' #iron
new_line = line[:11] + change + line[13:]
else:
change = '20' #air
new_line = line[:11] + change + line[13:]
new_file_content += new_line
else:
new_file_content += line
with open('tryout_changed.hmo', 'w') as f:
f.write(new_file_content)
if you don't want to use dataframes, you can do it like this:
with open('density.topo') as f:
lists_of_list = [line.rstrip().split() for line in f]
new_file_content=''
with open('tryout_test.hmo', 'r') as f:
for line in f:
if line[11:13] == '10':
if line[3].isspace():
ID_to_search_for = line[4:8] # number with 4 digits
else:
ID_to_search_for = line[3:8] # number with 5 digits
for idx, sublist in enumerate(lists_of_list):
if sublist[0] == ID_to_search_for:
if lists_of_list[idx][2] == 1 and lists_of_list[idx][3] == 0:
change = '18' #magnet
new_line = line[:11] + change + line[13:]
elif lists_of_list[idx][2] == 1 and lists_of_list[idx][3] == 1:
change = '19' #iron
new_line = line[:11] + change + line[13:]
else:
change = '20' #air
new_line = line[:11] + change + line[13:]
new_file_content += new_line
else:
new_file_content += line
with open('tryout_changed.hmo', 'w') as f:
f.write(new_file_content)
ok, here is my final answer. It does (again) all things you were searching for. Please debug your code in your IDE if there is a problem. You should start using context manager instead of open and closing files step by step.
I wrote the new code around your code in the question and added some comments to it.
oldFileName = 'tryout.hmo'
newFileName = 'tryout_NEW.hmo'
topoFileName = 'Density.topo'
readme = open( oldFileName, "r" )
oldLines = readme.readlines()
m = int(oldLines[3])
print(m)
new_m = m+3
m1 = str(m)
new_m1 = str(new_m)
Phrase = "END_COMP_DATA"
#n = "Phrase not found" #not used --> not needed
with open(oldFileName,"r") as oldFile:
for number, lin in enumerate(oldFile):
if Phrase in lin:
n = number
#insert 3 lines to tryout_new at the right position (--> row n)
magnet = f" {m+1} "'" topo_magnet"'"\n"
iron = f" {m+2} "'" topo_iron"'"\n"
air = f" {m+3} "'" topo_air"'"\n"
oldLines[n:n] = [magnet, iron, air]
newFile = open(newFileName,"w")
flag = 0
with open('density.topo') as f:
data_density = [line.rstrip().split() for line in f]
for idx, row in enumerate(oldLines):
lst = row.rstrip() #I think you shouldn't name a variable like a class in python (list). use 'lst' or something like that
tmp_tryout = lst.split()
if row.find("BEG_ELEM_DATA") > -1:
flag = 1
if flag == 1 and len(tmp_tryout)>1:
# if the column has more than 2 columns (after split), check for the "10"
if tmp_tryout[1] == '10':
# density_idx_line searchs in density.topo for a match with tmp_tryout[0] (e.g. 3749) and stores the whole line
density_idx_line = list(filter(lambda x: x[0] == tmp_tryout[0], data_density))
if len(density_idx_line) >0:
if density_idx_line[0][2] == '1.0' and density_idx_line[0][3] == '1e-05':
# the ' 10 ' is the 10 with a whitespace before and after it. Only like this only the 10 gets replaced (and not e.g. 3104 to 3184)
newFile.write(row.replace(' 10 ', ' 18 '))
elif density_idx_line[0][2] == '1.0' and density_idx_line[0][3] == '1.0':
newFile.write(row.replace(' 10 ', ' 19 '))
else:
newFile.write(row.replace(' 10 ', ' 20 '))
else:
newFile.write(row)
else:
if idx == 3:
newFile.write(row.replace(m1, new_m1))
else:
newFile.write(row)
newFile.close()
print ("script terminated successfully!")
Related Topics
How to Divide Each Column of Pandas Dataframe by a Series
How to Get Max Output from a While Loop
Python Pandas Dataframe Get All Combinations of Column Values
Printing the Number of Days in a Given Month and Year [Python]
Redirect Command Line Results to a Tkinter Gui
Calculating the Area Under a Curve Given a Set of Coordinates, Without Knowing the Function
What Does Sqlite3.Operationalerror: Near "-": Syntax Error Mean
How to Remove the Double Quote When the Value Is Empty in Spark
How to Merge Columns from Multiple CSV Files Using Python
Key Error When Selecting Columns in Pandas Dataframe After Read_Csv
How to Do a Conditional Count After Groupby on a Pandas Dataframe
How to Find Rows of One Dataframe in Another Dataframe
Get Only Unique Words from a Sentence in Python
How to Overwrite the Previous Print to Stdout