Glider
Loading...
Searching...
No Matches
glider
src
importer
templates
AudioSaladTemplate.py
Go to the documentation of this file.
1
import
re
2
import
awswrangler
as
wr
3
4
class
AudioSaladTemplate
:
5
def
preprocessing
(self, filename, features, session, rel_col):
6
"""Loads file and do some fixes to fit it to our standard and make it able to be processed
7
8
Args:
9
filename (str): s3 full path of filename
10
features (dict): contains delimiter, skip_rows and encoding required for current file
11
session (boto3 obj): AWS client connection
12
rel_col (str): column name used to identify upc/release_id
13
Returns: df (pandas dataframe)
14
"""
15
delimiter = features[
"delimiter"
]
16
skip_rows = features[
"skip_rows"
]
17
encoding = features[
"encoding"
]
18
df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, low_memory=
False
, boto3_session=session)
19
return
df
20
21
def
date
(self, filename, df=None):
22
"""Sets date column given the filename (it contains the date)
23
Sometimes just rename the date column to fit to the otto mapping
24
For example AudioSalad_2023_-_M7_GYROstream_Sales_Export.csv
25
Args:
26
filename (str): current filename
27
df (pandas dataframe): dataframe where changes will applied
28
Returns: date_str (str)
29
"""
30
if
str(
type
(df)) !=
"<class 'NoneType'>"
:
31
df[
"as_iso_date"
] = df[
"sale date"
]
32
return
df[
"as_iso_date"
]
33
else
:
34
year = re.findall(
r'\d{4}'
, filename)[0]
35
month = re.findall(
r'M\d{1,2}'
, filename)[0]
36
month = month.replace(
"M"
,
""
)
37
if
len(month) == 1:
38
month =
"0"
+month
39
date_str = year+month
40
return
date_str
41
42
def
serviceFinder
(self, service):
43
"""Maps the service name with the service specified by the client according their requirements
44
Args:
45
service (str): service name
46
Returns: (service renamed)
47
"""
48
service = str(service)
49
if
"Red"
in
service:
50
return
"YouTube Music"
51
elif
"UMA"
in
service:
52
return
"UMA"
53
elif
"Google"
in
service:
54
return
"Google Play"
55
elif
"Youtube Ads"
in
service:
56
return
"YouTube Content ID"
57
elif
"Amazon"
in
service:
58
return
"Amazon"
59
elif
"Spotify"
in
service:
60
return
"Spotify"
61
elif
"Yandex"
in
service:
62
return
"Yandex"
63
elif
"Soundtrack Your Brand"
in
service:
64
return
"Soundtrack Your Brand"
65
elif
"Apple"
in
service
or
"iTunes"
in
service:
66
return
"iTunes"
67
elif
"UMA"
in
service:
68
return
"UMA"
69
elif
"NetEase"
in
service:
70
return
"Netease"
71
elif
"Napster"
in
service:
72
return
"Napster"
73
elif
"Tidal"
in
service:
74
return
"Tidal"
75
elif
"MFS"
in
service:
76
return
"MFS"
77
elif
"Facebook"
in
service:
78
return
"facebook"
79
elif
"Audio tier"
in
service:
80
return
"Youtube Audio Tier"
81
elif
"Tik Tok"
in
service:
82
return
"TikTok"
83
elif
"Slacker"
in
service:
84
return
"Slacker"
85
elif
"Soundcloud"
in
service:
86
return
"Soundcloud"
87
return
service
88
89
def
type
(self, Type, configuration):
90
"""Maps the sale type with the name specified by the client according their requirements
91
Args:
92
Type (str): sale type shown in the original file
93
configuration (str):
94
Returns: (sale type in a single one letter)
95
"""
96
if
Type ==
"Release"
and
configuration ==
"Download"
:
97
return
"A"
98
elif
Type ==
"Track"
and
configuration ==
"Download"
:
99
return
"T"
100
return
"S"
101
102
def
assetType
(self, df):
103
"""Applies serviceFinder and type functions to current dataframe
104
Args:
105
df (pandas dataframe): dataframe where changes will applied
106
Returns: df (pandas dataframe)
107
"""
108
df[
"getservice"
] = df.apply(
lambda
Row: self.
serviceFinder
(Row[
"sub source"
]), axis=1)
109
df[
"gettypeaudiosalad"
] = df.apply(
lambda
Row: self.
type
(Row[
"type"
], Row[
"configuration"
]), axis=1)
110
return
df
src.importer.templates.AudioSaladTemplate.AudioSaladTemplate
Definition
AudioSaladTemplate.py:4
src.importer.templates.AudioSaladTemplate.AudioSaladTemplate.preprocessing
preprocessing(self, filename, features, session, rel_col)
Definition
AudioSaladTemplate.py:5
src.importer.templates.AudioSaladTemplate.AudioSaladTemplate.date
date(self, filename, df=None)
Definition
AudioSaladTemplate.py:21
src.importer.templates.AudioSaladTemplate.AudioSaladTemplate.type
type(self, Type, configuration)
Definition
AudioSaladTemplate.py:89
src.importer.templates.AudioSaladTemplate.AudioSaladTemplate.serviceFinder
serviceFinder(self, service)
Definition
AudioSaladTemplate.py:42
src.importer.templates.AudioSaladTemplate.AudioSaladTemplate.assetType
assetType(self, df)
Definition
AudioSaladTemplate.py:102
Generated by
1.12.0