Glider
Loading...
Searching...
No Matches
glider
src
importer
templates
TencentTemplate.py
Go to the documentation of this file.
1
import
re
2
import
awswrangler
as
wr
3
4
class
TencentTemplate
:
5
6
def
preprocessing
(self, filename, features, session, rel_col):
7
"""Loads file and do some fixes to fit it to our standard and make it able to be processed
8
9
Args:
10
filename (str): s3 full path of filename
11
features (dict): contains delimiter, skip_rows and encoding required for current file
12
session (boto3 obj): AWS client connection
13
rel_col (str): column name used to identify upc/release_id
14
Returns: df (pandas dataframe)
15
"""
16
delimiter = features[
"delimiter"
]
17
skip_rows = features[
"skip_rows"
]
18
encoding = features[
"encoding"
]
19
df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, low_memory=
False
, boto3_session=session)
20
df[rel_col] = df.apply(
lambda
Row: self.
fix_upc
(Row[rel_col]), axis=1)
21
return
df
22
23
def
fix_upc
(self, upc):
24
"""Deletes 'UPC-' from each upc in the original file
25
Args:
26
upc (str): upc shown in the original file
27
Returns: upc (str)
28
"""
29
upc = upc.replace(
"UPC-"
,
""
)
30
return
upc
31
32
def
fix_date
(self, start_date):
33
"""Fits the date to YYYY-MM-DD format
34
Args:
35
start_date (str): sale date shown in the original file
36
Returns: start_date_fix (str)
37
"""
38
# print(start_date)
39
start_date = str(start_date)
40
start_date_fix = start_date.split(
"-"
)[0]
41
return
start_date_fix
42
43
def
date
(self, df):
44
"""Applies type functions to current dataframe
45
Args:
46
df (pandas dataframe): dataframe where changes will applied
47
Returns: df[iso_date] (pandas series)
48
"""
49
# def assetType(self, df):
50
# Add a new column to insert type
51
df[
"iso_date"
] = df.apply(
lambda
Row: self.
fix_date
(Row[
'start_date'
]), axis=1)
52
return
df[
"iso_date"
]
src.importer.templates.TencentTemplate.TencentTemplate
Definition
TencentTemplate.py:4
src.importer.templates.TencentTemplate.TencentTemplate.date
date(self, df)
Definition
TencentTemplate.py:43
src.importer.templates.TencentTemplate.TencentTemplate.preprocessing
preprocessing(self, filename, features, session, rel_col)
Definition
TencentTemplate.py:6
src.importer.templates.TencentTemplate.TencentTemplate.fix_upc
fix_upc(self, upc)
Definition
TencentTemplate.py:23
src.importer.templates.TencentTemplate.TencentTemplate.fix_date
fix_date(self, start_date)
Definition
TencentTemplate.py:32
Generated by
1.12.0