Glider
Loading...
Searching...
No Matches
glider
src
importer
templates
PlaygroundTemplate.py
Go to the documentation of this file.
1
import
re
2
import
awswrangler
as
wr
3
from
.TerritoryCode
import
TerritoryCode
4
5
class
PlaygroundTemplate
:
6
def
preprocessing
(self, filename, features, session, rel_col):
7
"""Loads file and do some fixes to fit it to our standard and make it able to be processed
8
9
Args:
10
filename (str): s3 full path of filename
11
features (dict): contains delimiter, skip_rows and encoding required for current file
12
session (boto3 obj): AWS client connection
13
rel_col (str): column name used to identify upc/release_id
14
Returns: df (pandas dataframe)
15
"""
16
delimiter = features[
"delimiter"
]
17
skip_rows = features[
"skip_rows"
]
18
encoding = features[
"encoding"
]
19
df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, keep_default_na=
False
, boto3_session=session)
20
# df = read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, keep_default_na=False)#, engine=openpyxl)
21
TC =
TerritoryCode
()
22
TC.territory_code(df)
23
return
df
24
25
def
date
(self, filename):
26
"""Sets date column given the filename (it contains the date)
27
For example Fonal_2020-08.xlsx
28
Args:
29
filename (str): current filename
30
Returns: date (str)
31
"""
32
date = re.findall(
r'\d{4}\+?-\d{2}'
, filename)[0]
33
date = date+
"-01"
34
# df["date_from_file"] = date
35
return
date
36
37
def
type
(self, Channel, ISRC):
38
"""Maps the sale type with the name specified by the client according their requirements
39
Args:
40
Channel (str): distribution medium shown in the original file
41
ISRC (str): unique code used exclusive for tracks shown in the original file
42
Returns: (sale type in a single one letter)
43
"""
44
if
Channel==
"Download"
and
len(str(ISRC))==0:
45
return
"A"
46
elif
Channel==
"Download"
and
len(str(ISRC))==12:
47
return
"T"
48
return
"S"
49
50
def
assetType
(self, df):
51
"""Applies type functions to current dataframe
52
Args:
53
df (pandas dataframe): dataframe where changes will applied
54
Returns: df (pandas dataframe)
55
"""
56
# Add a new column to insert type
57
df[
"gettypeplayground_dig"
] = df.apply(
lambda
Row: self.
type
(Row[
"channel"
], Row[
"isrc"
]), axis=1)
58
return
df
src.importer.templates.PlaygroundTemplate.PlaygroundTemplate
Definition
PlaygroundTemplate.py:5
src.importer.templates.PlaygroundTemplate.PlaygroundTemplate.type
type(self, Channel, ISRC)
Definition
PlaygroundTemplate.py:37
src.importer.templates.PlaygroundTemplate.PlaygroundTemplate.date
date(self, filename)
Definition
PlaygroundTemplate.py:25
src.importer.templates.PlaygroundTemplate.PlaygroundTemplate.assetType
assetType(self, df)
Definition
PlaygroundTemplate.py:50
src.importer.templates.PlaygroundTemplate.PlaygroundTemplate.preprocessing
preprocessing(self, filename, features, session, rel_col)
Definition
PlaygroundTemplate.py:6
src.importer.templates.TerritoryCode.TerritoryCode
Definition
TerritoryCode.py:44
Generated by
1.12.0