Glider
Loading...
Searching...
No Matches
glider
src
importer
templates
FacebookTemplate.py
Go to the documentation of this file.
1
import
re
2
import
awswrangler
as
wr
3
from
pandas
import
to_datetime
4
5
class
FacebookTemplate
:
6
def
preprocessing
(self, filename, features, session, rel_col):
7
"""Loads file and do some fixes to fit it to our standard and make it able to be processed
8
9
Args:
10
filename (str): s3 full path of filename
11
features (dict): contains delimiter, skip_rows and encoding required for current file
12
session (boto3 obj): AWS client connection
13
rel_col (str): column name used to identify upc/release_id
14
Returns: df (pandas dataframe)
15
"""
16
delimiter = features[
"delimiter"
]
17
skip_rows = features[
"skip_rows"
]
18
encoding = features[
"encoding"
]
19
df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, low_memory=
False
, boto3_session=session)
20
df[
"event_count"
].fillna(0, inplace=
True
)
21
df[
"event_count"
] = df[
"event_count"
].astype(
"int"
)
22
df[
"start_date"
] = to_datetime(df[
"start_date"
], format=
'%Y/%d/%m'
)
23
# df["Units"].fillna(0, inplace=True)
24
return
df
25
26
def
date
(self, filename):
27
"""Sets date column given the filename (it contains the date)
28
For example GYROstream_Pty_Ltd_Facebook-AL-Production_Usage-Report_202203.csv
29
Args:
30
filename (str): current filename
31
Returns: date_str (str)
32
"""
33
# GYROstream_Pty_Ltd_Facebook-AL-Production_Usage-Report_202203.csv
34
date = re.findall(
r'Report_\d{6}'
, filename)[0]
35
date = date.replace(
"Report_"
,
""
)
36
date_str = date[:4]+
"-"
+date[4:]+
"-01"
37
# df["date_from_file"] = date_str
38
return
date_str
src.importer.templates.FacebookTemplate.FacebookTemplate
Definition
FacebookTemplate.py:5
src.importer.templates.FacebookTemplate.FacebookTemplate.date
date(self, filename)
Definition
FacebookTemplate.py:26
src.importer.templates.FacebookTemplate.FacebookTemplate.preprocessing
preprocessing(self, filename, features, session, rel_col)
Definition
FacebookTemplate.py:6
Generated by
1.12.0