Glider
Loading...
Searching...
No Matches
glider
src
importer
templates
MixcloudTemplate.py
Go to the documentation of this file.
1
import
re
2
import
awswrangler
as
wr
3
from
pandas
import
to_datetime
4
5
class
MixcloudTemplate
:
6
def
preprocessing
(self, filename, features, session, rel_col):
7
"""Loads file and do some fixes to fit it to our standard and make it able to be processed
8
9
Args:
10
filename (str): s3 full path of filename
11
features (dict): contains delimiter, skip_rows and encoding required for current file
12
session (boto3 obj): AWS client connection
13
rel_col (str): column name used to identify upc/release_id
14
Returns: df (pandas dataframe)
15
"""
16
delimiter = features[
"delimiter"
]
17
skip_rows = features[
"skip_rows"
]
18
encoding = features[
"encoding"
]
19
df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, keep_default_na=
False
, low_memory=
False
, boto3_session=session)
20
# df = read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, keep_default_na=False, low_memory=False)
21
df[
"Start_Date"
] = to_datetime(df[
"Start_Date"
], format=
"%d/%m/%Y"
)
22
# df["Units"].fillna(0, inplace=True)
23
return
df
src.importer.templates.MixcloudTemplate.MixcloudTemplate
Definition
MixcloudTemplate.py:5
src.importer.templates.MixcloudTemplate.MixcloudTemplate.preprocessing
preprocessing(self, filename, features, session, rel_col)
Definition
MixcloudTemplate.py:6
Generated by
1.12.0