-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathfinal_processing_tuned.py
More file actions
27 lines (21 loc) · 976 Bytes
/
final_processing_tuned.py
File metadata and controls
27 lines (21 loc) · 976 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import pandas as pd
import os
dataf = pd.read_csv("metadata.csv") #csv file
txt_files_to_be_removed=[] #script needs to be inside the folder which has txt files and metadata
year_to_be_removed=['independencerepublicday'] # WHAT IS TO BE REMOVED year_to_be_removed does not matter if numerical: [1916,1917] , if string ['nehru','gandhi']
index_to_be_removed=[]
for i in range(len(dataf)):
if(dataf.iloc[i,12] in year_to_be_removed): #in python, column numbers start with 0. [i,2] matlab column 3 in the CSV
txt_files_to_be_removed.append(dataf.iloc[i,0])
index_to_be_removed.append(i)
print(txt_files_to_be_removed)
print(index_to_be_removed)
dataf.drop(index=index_to_be_removed,inplace=True)
dataf.reset_index(drop=True,inplace=True)
#print(dataf)
dataf.to_csv("new_combined.csv",encoding="utf-8") #it does need alternative encoding
fin=[]
for i in range(len(txt_files_to_be_removed)):
fin.append(txt_files_to_be_removed[i] + ".txt")
for i in fin:
os.remove(i)