Glider
Loading...
Searching...
No Matches
glider
src
importer
templates
SoundtrackTemplate.py
Go to the documentation of this file.
1
import
re
2
import
awswrangler
as
wr
3
4
class
SoundtrackTemplate
:
5
def
preprocessing
(self, filename, features, session, rel_col):
6
"""Loads file and do some fixes to fit it to our standard and make it able to be processed
7
8
Args:
9
filename (str): s3 full path of filename
10
features (dict): contains delimiter, skip_rows and encoding required for current file
11
session (boto3 obj): AWS client connection
12
rel_col (str): column name used to identify upc/release_id
13
Returns: df (pandas dataframe)
14
"""
15
delimiter = features[
"delimiter"
]
16
skip_rows = features[
"skip_rows"
]
17
encoding = features[
"encoding"
]
18
df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, keep_default_na=
False
, low_memory=
False
, boto3_session=session)
19
# df = read_csv(filename, sep=delimiter, skiprows=skip_rows, keep_default_na=False, low_memory=False)
20
df[
"Country_Of_Sale"
] = df[
"Country_Of_Sale"
].apply(
lambda
x: str(x).replace(
" "
,
""
))
21
return
df
22
23
def
date
(self, filename):
24
"""Sets date column given the filename (it contains the date)
25
For example gyrostream-pty-ltd_soundtrack-your-brand_202203_Monthly-Sales.csv
26
Args:
27
filename (str): current filename
28
Returns: date_str (str)
29
"""
30
# "gyrostream-pty-ltd_soundtrack-your-brand_202203_Monthly-Sales.csv"
31
date = re.findall(
r'_\d{6}_'
, filename)[0]
32
date = date.replace(
"_"
,
""
)
33
date_str = date[:4]+
"-"
+date[4:]+
"-01"
34
# df["date_from_file"] = date_str
35
return
date_str
src.importer.templates.SoundtrackTemplate.SoundtrackTemplate
Definition
SoundtrackTemplate.py:4
src.importer.templates.SoundtrackTemplate.SoundtrackTemplate.date
date(self, filename)
Definition
SoundtrackTemplate.py:23
src.importer.templates.SoundtrackTemplate.SoundtrackTemplate.preprocessing
preprocessing(self, filename, features, session, rel_col)
Definition
SoundtrackTemplate.py:5
Generated by
1.12.0