-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathread_data.py
More file actions
84 lines (64 loc) · 2.98 KB
/
read_data.py
File metadata and controls
84 lines (64 loc) · 2.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
CSC111 Winter 2023 Course Project
By: Gerald Wang, Mark Estiller, Calvin Ji, Dharma Ong
This file contains functions that will read the dataset and output it in a format
that can be used for the computations.
Copyright and Usage Information
===============================
This file is Copyright (c) 2023 by Gerald Wang, Mark Estiller, Calvin Ji, Dharma Ong.
This module is expected to use data from:
https://www.kaggle.com/datasets/zusmani/uberdrives
"My Uber Drives" by user Zeeshan-Ul-Hassan Usmani. The data encompassed his Uber drives
in 2016 (1,175 drives total), and it was presented as a csv with the following columns going from left to right:
start date, end date, category, start, stop, number of miles, and purpose.
"""
from __future__ import annotations
import datetime
def read_csv(file_name: str) -> list[tuple[float, str, str, float]]:
""" Reads the csv file and returns a list of tuples of (time, start_loc, stop_loc, distance)
time is a float representing the amount of time taken in seconds
start_loc is a string representing the starting neighborhood
stop_loc is a string representing the stopping neighborhood
distance is a float representing the distance travelled in miles
Precondition:
- file_name != ''
"""
trip_data = []
with open(file_name, 'r') as f:
data = f.readlines()
rows = data[1:]
for row in rows:
row = row.split(',')
start_time = row[0]
end_time = row[1]
start_loc = row[3]
stop_loc = row[4]
distance = row[5]
if start_loc != stop_loc and start_loc != 'Unknown Location' and stop_loc != 'Unknown Location':
# converting the times to datetimes
temp = start_time.split()
date_params = temp[0].split('/')
time_params = temp[1].split(':')
start_datetime = datetime.datetime(
int(date_params[2]), int(date_params[0]), int(date_params[1]), int(time_params[0]), int(time_params[1]))
temp = end_time.split()
date_params = temp[0].split('/')
time_params = temp[1].split(':')
stop_datetime = datetime.datetime(
int(date_params[2]), int(date_params[0]), int(date_params[1]), int(time_params[0]), int(time_params[1]))
time_delta = stop_datetime - start_datetime
trip_data.append(
(time_delta.total_seconds(), start_loc, stop_loc, float(distance)))
return trip_data
if __name__ == '__main__':
import doctest
doctest.testmod(verbose=True)
# When you are ready to check your work with python_ta, uncomment the following lines.
# (In PyCharm, select the lines below and press Ctrl/Cmd + / to toggle comments.)
# You can use "Run file in Python Console" to run PythonTA,
# and then also test your methods manually in the console.
import python_ta
python_ta.check_all(config={
'max-line-length': 120,
'disable': ['E9992', 'E9997', 'E9999', 'E9998', 'R0914']
})