Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions doc/data_dictionary.csv
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
Column Name,Data Type,Nullable,Example,Description
userid,Integer,FALSE,2021010138044459,"A 16 digit unique user identifier number; constructed from registration date, registration iso numeric country code, and uid."
firstname,String,FALSE,kellen,The registered user firstname.
lastname,String,FALSE,mcgregor,The registered user lastname.
first_name,String,FALSE,kellen,The registered user first name.
last_name,String,FALSE,mcgregor,The registered user last name.
registration_date,Date,FALSE,2021-01-13,The date the user registered on.
registration_country_code,String,FALSE,FR,The user registered country code of residence.
uid,Integer,FALSE,2127333684657263,A unique incremental id for the user.
email_domain,String,FALSE,gmail.com,The domain of the registered user email address.
device_hash,String,FALSE,5b386290c91e553e,The hashed device id the user used in the transaction.
device_type,String,FALSE,Samsung Galaxy A32,The device type of the hashed device id the user used in the transaction.
card_hash,String,TRUE,e00643bdd845feba,The hashed card id the user used in the transaction.
card_type,String,TRUE,visa,The card type of the hashed card id the user used in the transaction.
card_type,String,TRUE,Visa,"The card type of the hashed card id the user used in the transaction; one of ['Visa', 'Mastercard']."
card_country_code,String,TRUE,ES,The card country code of the hashed card id the user used in the transaction.
ip_hash,String,FALSE,1775d41b3788a941,The hashed ip address the user used in the transaction.
ip_country_code,String,FALSE,PL,The ip address country code of the hashed ip address the user used in the transaction.
application_hash,String,FALSE,a726fea0a21cfb47,The hashed application id the transaction relates to.
transaction_hash,String,FALSE,ced72c91695a15c6,The hashed id of the transaction.
transaction_date,Date,FALSE,2021-01-20,The date of the transaction.
transaction_amount,Float,FALSE,1.44,The transaction amount.
transaction_payment_method,String,TRUE,card,"The payment method used to complete the transaction; one of ['card', 'wallet', 'points']"
card_payment_channel,String,TRUE,paypal,"The payment channel the user used to complete the transaction; one of ['paypal', 'adyen', 'appstore', 'worldpay', 'docomo']."
transaction_status,String,FALSE,successful,"The status of the transaction; one of ['successful', 'pending', 'rejected']."
transaction_payment_method,String,TRUE,Card,"The payment method used to complete the transaction; one of ['Card', 'Wallet', 'Points']"
card_payment_channel,String,TRUE,PayPal,"The payment channel the user used to complete the transaction; one of ['PayPal', 'Adyen', 'AppStore', 'Worldpay', 'Docomo']."
transaction_status,String,FALSE,Successful,"The status of the transaction; one of ['Successful', 'Pending', 'Rejected']."
transaction_error_code,String,TRUE,E902:AuthenicationFailure,"The associated error code if the transaction was rejected; one of ['E900:ConnectionTimeout', 'E901:SuspectedFraud', 'E902:AuthenicationFailure', 'E903:UserCancelled', 'E904:InsufficientFunds']."
8 changes: 4 additions & 4 deletions doc/entity_relationship_diagram.drawio
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<mxfile host="app.diagrams.net" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36" version="24.7.14">
<mxfile host="app.diagrams.net" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36" version="27.1.5">
<diagram id="wwjiHfKDo_TagWvFxxHf" name="Page-1">
<mxGraphModel dx="1050" dy="530" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
<mxGraphModel dx="1042" dy="527" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
Expand Down Expand Up @@ -379,7 +379,7 @@
<mxRectangle width="30" height="30" as="alternateBounds" />
</mxGeometry>
</mxCell>
<mxCell id="1PWWiRxdMhmqFgYuV-uh-162" value="firstname" style="shape=partialRectangle;connectable=0;fillColor=none;top=0;left=0;bottom=0;right=0;align=left;spacingLeft=6;overflow=hidden;whiteSpace=wrap;html=1;" parent="1PWWiRxdMhmqFgYuV-uh-160" vertex="1">
<mxCell id="1PWWiRxdMhmqFgYuV-uh-162" value="first_name" style="shape=partialRectangle;connectable=0;fillColor=none;top=0;left=0;bottom=0;right=0;align=left;spacingLeft=6;overflow=hidden;whiteSpace=wrap;html=1;" parent="1PWWiRxdMhmqFgYuV-uh-160" vertex="1">
<mxGeometry x="30" width="150" height="30" as="geometry">
<mxRectangle width="150" height="30" as="alternateBounds" />
</mxGeometry>
Expand All @@ -392,7 +392,7 @@
<mxRectangle width="30" height="30" as="alternateBounds" />
</mxGeometry>
</mxCell>
<mxCell id="1PWWiRxdMhmqFgYuV-uh-165" value="lastname" style="shape=partialRectangle;connectable=0;fillColor=none;top=0;left=0;bottom=0;right=0;align=left;spacingLeft=6;overflow=hidden;whiteSpace=wrap;html=1;" parent="1PWWiRxdMhmqFgYuV-uh-163" vertex="1">
<mxCell id="1PWWiRxdMhmqFgYuV-uh-165" value="last_name" style="shape=partialRectangle;connectable=0;fillColor=none;top=0;left=0;bottom=0;right=0;align=left;spacingLeft=6;overflow=hidden;whiteSpace=wrap;html=1;" parent="1PWWiRxdMhmqFgYuV-uh-163" vertex="1">
<mxGeometry x="30" width="150" height="30" as="geometry">
<mxRectangle width="150" height="30" as="alternateBounds" />
</mxGeometry>
Expand Down
Binary file modified doc/entity_relationship_diagram.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions generator/cons.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
default_registration_end_date = (date_today - datetime.timedelta(days=366)).strftime(date_date_strftime)
default_transaction_start_date = (date_today - datetime.timedelta(days=365)).strftime(date_date_strftime)
default_transaction_end_date = date_today.strftime(date_date_strftime)
default_is_release = False
# define default input parameters dictionary
default_input_params_dict = {
"n_users": default_n_users,
Expand Down
1 change: 1 addition & 0 deletions generator/exeKaggle.cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
call uv run main.py --n_users 13000 --use_random_seed 1 --n_itr 2 --is_release
3 changes: 1 addition & 2 deletions generator/exeMain.cmd
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
call uv run main.py --n_users 100 --use_random_seed 1 --n_itr 1
:: call uv run main.py --n_users 13000 --use_random_seed 1 --n_itr 2
call uv run main.py --n_users 100 --use_random_seed 1 --n_itr 1
4 changes: 4 additions & 0 deletions generator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ def main(input_params_dict: dict):
# order results by userid and transaction date ascending
user_data = user_data.sort_values(by = 'uid').reset_index(drop = True)
trans_data = trans_data.sort_values(by = 'transaction_date').reset_index(drop = True)
# if data is for release drop itr_hash column
if input_params_dict['is_release']:
user_data = user_data.drop(columns=['itr_hash'])
trans_data = trans_data.drop(columns=['itr_hash'])
# print out head and shape of data
logging.info(f'RandomTeleComUsersData.shape: {user_data.shape}')
logging.info(f'RandomTeleComTransData.shape: {trans_data.shape}')
Expand Down
2 changes: 1 addition & 1 deletion generator/qa/Uids.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def unique_cards(self):
nunique_cards_per_uid = self.data.groupby(by='uid', dropna=False, as_index=False).agg({'card_hash':'nunique'}).sort_values(by=['card_hash'])
# test assertions
assert nunique_cards_per_uid['card_hash'].max() <= 20
assert nunique_cards_per_uid['card_hash'].min() == 0
assert nunique_cards_per_uid['card_hash'].min() >= 0
assert self.data['card_hash'].isnull().any()
if self.show_plots:
# plot distribution
Expand Down
4 changes: 4 additions & 0 deletions generator/utilities/commandline_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ def commandline_interface() -> Dict[str, object]:
The start date for transactions.
transaction_end_date : str
The end date for transactions.
is_release : bool
Whether the data being generated is for release

Returns
-------
Expand All @@ -48,6 +50,7 @@ def commandline_interface() -> Dict[str, object]:
parser.add_argument("--registration_end_date", action="store", dest="registration_end_date", type=str, default=cons.default_registration_end_date, help="String, the end date for registrations",)
parser.add_argument("--transaction_start_date", action="store", dest="transaction_start_date", type=str, default=cons.default_transaction_start_date, help="String, the start date for transactions",)
parser.add_argument("--transaction_end_date", action="store", dest="transaction_end_date", type=str, default=cons.default_transaction_end_date, help="String, the end date for transactions",)
parser.add_argument("--is_release", action="store_true", dest="is_release", default=cons.default_is_release, help="Bool, whether the data being generated is for release",)
# create an output dictionary to hold the results
input_params_dict = cons.default_input_params_dict.copy()
# extract input arguments
Expand All @@ -61,4 +64,5 @@ def commandline_interface() -> Dict[str, object]:
input_params_dict["registration_end_date"] = args.registration_end_date
input_params_dict["transaction_start_date"] = args.transaction_start_date
input_params_dict["transaction_end_date"] = args.transaction_end_date
input_params_dict["is_release"] = args.is_release
return input_params_dict