Glider
Loading...
Searching...
No Matches
glider
src
importer
templates
SpotifyTemplate.py
Go to the documentation of this file.
1
import
re
2
import
awswrangler
as
wr
3
4
class
SpotifyTemplate
:
5
def
preprocessing
(self, filename, features, session, rel_col):
6
"""Loads file and do some fixes to fit it to our standard and make it able to be processed
7
8
Args:
9
filename (str): s3 full path of filename
10
features (dict): contains delimiter, skip_rows and encoding required for current file
11
session (boto3 obj): AWS client connection
12
rel_col (str): column name used to identify upc/release_id
13
Returns: df (pandas dataframe)
14
"""
15
delimiter = features[
"delimiter"
]
16
skip_rows = features[
"skip_rows"
]
17
encoding = features[
"encoding"
]
18
df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, boto3_session=session)
19
try
:
20
df[
"discovery_mode_fee"
] = df[
"discovery_mode_fee"
]*(-1)
21
except
:
22
df[
"Discovery Mode Fee"
] = df[
"Discovery Mode Fee"
]*(-1)
23
return
df
24
25
def
date
(self, filename):
26
"""Sets date column given the filename (it contains the date)
27
For example SPOTIFY_gyrostream-track-for-breakage-202307.txt
28
Args:
29
filename (str): current filename
30
Returns: date_str (str)
31
"""
32
date = re.findall(
r'-20\d{4}'
, filename)[0]
33
date = date.replace(
"-"
,
""
)
34
year = str(date[:4])
35
month = str(date[4:])
36
date_str = year+
"-"
+month+
"-01"
37
# df["date_from_file"] = date_str
38
return
date_str
39
40
def
dateTrends
(self, filename):
41
"""Sets date column given the filename (it contains the date)
42
Args:
43
filename (str): current filename
44
Returns: date (str)
45
"""
46
date = re.findall(
r"\d{4}-\d{2}-\d{2}"
, filename)
47
date = str(date[0])
48
date = date.replace(
"streams-"
,
""
)
49
# df["date_from_file"] = date
50
return
date
51
52
def
territoryTrends
(self, df, filename):
53
"""Sets territory column given the filename (it contains the date)
54
Args:
55
df (pandas dataframe): dataframe where changes will applied
56
filename (str): current filename
57
Returns: df (pandas dataframe)
58
"""
59
territory = re.findall(
r"\w{2,3}\."
, filename)[0]
60
territory = territory.replace(
"."
,
""
)
61
df[
"territory_from_file"
] = territory
62
return
df
src.importer.templates.SpotifyTemplate.SpotifyTemplate
Definition
SpotifyTemplate.py:4
src.importer.templates.SpotifyTemplate.SpotifyTemplate.dateTrends
dateTrends(self, filename)
Definition
SpotifyTemplate.py:40
src.importer.templates.SpotifyTemplate.SpotifyTemplate.territoryTrends
territoryTrends(self, df, filename)
Definition
SpotifyTemplate.py:52
src.importer.templates.SpotifyTemplate.SpotifyTemplate.date
date(self, filename)
Definition
SpotifyTemplate.py:25
src.importer.templates.SpotifyTemplate.SpotifyTemplate.preprocessing
preprocessing(self, filename, features, session, rel_col)
Definition
SpotifyTemplate.py:5
Generated by
1.12.0