Glider
Loading...
Searching...
No Matches
glider
src
importer
templates
SecretlyCanadianTemplate.py
Go to the documentation of this file.
1
import
re
2
import
awswrangler
as
wr
3
4
class
SecretlyCanadianTemplate
:
5
def
preprocessing
(self, filename, features, session, rel_col):
6
"""Loads file and do some fixes to fit it to our standard and make it able to be processed
7
8
Args:
9
filename (str): s3 full path of filename
10
features (dict): contains delimiter, skip_rows and encoding required for current file
11
session (boto3 obj): AWS client connection
12
rel_col (str): column name used to identify upc/release_id
13
Returns: df (pandas dataframe)
14
"""
15
delimiter = features[
"delimiter"
]
16
skip_rows = features[
"skip_rows"
]
17
encoding = features[
"encoding"
]
18
df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, low_memory=
False
, boto3_session=session)
19
# df = read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, low_memory=False)
20
df.replace(
"?"
, 0, inplace=
True
)
21
return
df
22
23
def
type
(self, transaction_type):
24
"""Maps the sale type with the name specified by the client according their requirements
25
Args:
26
transaction_type (str): sale type shown in the original file
27
Returns: (sale type in a single one letter)
28
"""
29
if
transaction_type ==
"Radio (Direct)"
:
30
return
"R"
31
elif
transaction_type ==
"Cloud"
:
32
return
"C"
33
elif
transaction_type ==
"Track"
:
34
return
"T"
35
elif
transaction_type ==
"Album"
:
36
return
"A"
37
elif
transaction_type ==
"Not a Sale"
:
38
return
"O"
39
elif
transaction_type ==
"Radio (Broadcast)"
or
transaction_type ==
"Radio (broadcasting statutory)"
:
40
return
"R_B"
41
return
"S"
42
43
def
assetType
(self, df):
44
"""Applies type functions to current dataframe
45
Args:
46
df (pandas dataframe): dataframe where changes will applied
47
Returns: df (pandas dataframe)
48
"""
49
# Add a new column to insert type
50
try
:
51
df[
"gettypesecretly2"
] = df.apply(
lambda
Row: self.
type
(Row[
'transaction type'
]), axis=1)
52
return
df[
"gettypesecretly2"
]
53
except
:
54
df[
"gettypesecretly"
] = df.apply(
lambda
Row: self.
type
(Row[
'digalbumtrackorstream'
]), axis=1)
55
return
df[
"gettypesecretly"
]
56
57
def
date_dig
(self, filename):
58
"""Sets date column given the filename (it contains the date)
59
For example ADM083122digpd.csv
60
It's used for digital formats
61
Args:
62
filename (str): current filename
63
Returns: date_str (str)
64
"""
65
try
:
66
date = re.findall(
r'\d{6}dig'
, filename)[0]
67
except
:
68
date = re.findall(
r'\d{6}NonNR'
, filename)[0]
69
year =
"20"
+date[4:6]
70
month = date[:2]
71
return
year+month
72
73
def
date_phy
(self, filename):
74
"""Sets date column given the filename (it contains the date)
75
For example ABT083122physpd.csv
76
It's used for physycal formats
77
Args:
78
filename (str): current filename
79
Returns: date_str (str)
80
"""
81
date = re.findall(
r'\d{6}phy'
, filename)[0]
82
year =
"20"
+date[4:6]
83
month = date[:2]
84
return
year+month
src.importer.templates.SecretlyCanadianTemplate.SecretlyCanadianTemplate
Definition
SecretlyCanadianTemplate.py:4
src.importer.templates.SecretlyCanadianTemplate.SecretlyCanadianTemplate.type
type(self, transaction_type)
Definition
SecretlyCanadianTemplate.py:23
src.importer.templates.SecretlyCanadianTemplate.SecretlyCanadianTemplate.date_phy
date_phy(self, filename)
Definition
SecretlyCanadianTemplate.py:73
src.importer.templates.SecretlyCanadianTemplate.SecretlyCanadianTemplate.assetType
assetType(self, df)
Definition
SecretlyCanadianTemplate.py:43
src.importer.templates.SecretlyCanadianTemplate.SecretlyCanadianTemplate.date_dig
date_dig(self, filename)
Definition
SecretlyCanadianTemplate.py:57
src.importer.templates.SecretlyCanadianTemplate.SecretlyCanadianTemplate.preprocessing
preprocessing(self, filename, features, session, rel_col)
Definition
SecretlyCanadianTemplate.py:5
Generated by
1.12.0