Glider
Loading...
Searching...
No Matches
SecretlyCanadianTemplate.py
Go to the documentation of this file.
1import re
2import awswrangler as wr
3
5 def preprocessing(self, filename, features, session, rel_col):
6 """Loads file and do some fixes to fit it to our standard and make it able to be processed
7
8 Args:
9 filename (str): s3 full path of filename
10 features (dict): contains delimiter, skip_rows and encoding required for current file
11 session (boto3 obj): AWS client connection
12 rel_col (str): column name used to identify upc/release_id
13 Returns: df (pandas dataframe)
14 """
15 delimiter = features["delimiter"]
16 skip_rows = features["skip_rows"]
17 encoding = features["encoding"]
18 df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, low_memory=False, boto3_session=session)
19 # df = read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, low_memory=False)
20 df.replace("?", 0, inplace=True)
21 return df
22
23 def type(self, transaction_type):
24 """Maps the sale type with the name specified by the client according their requirements
25 Args:
26 transaction_type (str): sale type shown in the original file
27 Returns: (sale type in a single one letter)
28 """
29 if transaction_type == "Radio (Direct)":
30 return "R"
31 elif transaction_type == "Cloud":
32 return "C"
33 elif transaction_type == "Track":
34 return "T"
35 elif transaction_type == "Album":
36 return "A"
37 elif transaction_type == "Not a Sale":
38 return "O"
39 elif transaction_type == "Radio (Broadcast)" or transaction_type == "Radio (broadcasting statutory)":
40 return "R_B"
41 return "S"
42
43 def assetType(self, df):
44 """Applies type functions to current dataframe
45 Args:
46 df (pandas dataframe): dataframe where changes will applied
47 Returns: df (pandas dataframe)
48 """
49 # Add a new column to insert type
50 try:
51 df["gettypesecretly2"] = df.apply(lambda Row: self.type(Row['transaction type']), axis=1)
52 return df["gettypesecretly2"]
53 except:
54 df["gettypesecretly"] = df.apply(lambda Row: self.type(Row['digalbumtrackorstream']), axis=1)
55 return df["gettypesecretly"]
56
57 def date_dig(self, filename):
58 """Sets date column given the filename (it contains the date)
59 For example ADM083122digpd.csv
60 It's used for digital formats
61 Args:
62 filename (str): current filename
63 Returns: date_str (str)
64 """
65 try:
66 date = re.findall(r'\d{6}dig', filename)[0]
67 except:
68 date = re.findall(r'\d{6}NonNR', filename)[0]
69 year = "20"+date[4:6]
70 month = date[:2]
71 return year+month
72
73 def date_phy(self, filename):
74 """Sets date column given the filename (it contains the date)
75 For example ABT083122physpd.csv
76 It's used for physycal formats
77 Args:
78 filename (str): current filename
79 Returns: date_str (str)
80 """
81 date = re.findall(r'\d{6}phy', filename)[0]
82 year = "20"+date[4:6]
83 month = date[:2]
84 return year+month