Glider
Loading...
Searching...
No Matches
glider
src
importer
templates
DeezerTemplate.py
Go to the documentation of this file.
1
import
re
2
import
awswrangler
as
wr
3
from
pandas
import
to_datetime
4
5
class
DeezerTemplate
:
6
def
preprocessing
(self, filename, features, session, rel_col):
7
"""
8
args:
9
filename: -str- s3 full path of filename
10
features: -dict- which contains delimiter, skip_rows and encoding
11
session: -class- s3 connexion
12
"""
13
delimiter = features[
"delimiter"
]
14
skip_rows = features[
"skip_rows"
]
15
encoding = features[
"encoding"
]
16
df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, low_memory=
False
, boto3_session=session)
17
df[
"Start Report"
] = to_datetime(df[
"Start Report"
], format=
"%d-%m-%Y"
)
18
# df = read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, low_memory=False)#, keep_default_na=False, low_memory=False)
19
return
df
20
21
def
date
(self, filename):
22
"""Sets date column given the filename (it contains the date)
23
For example Deezer_GYROstreamMERLIN_20220201_20220228.txt
24
Args:
25
filename (str): current filename
26
Returns: date_str (str)
27
"""
28
# "Deezer_GYROstreamMERLIN_20220201_20220228.txt"
29
date = re.findall(
r'_\d{8}_'
, filename)[0]
30
date = date.replace(
"_"
,
""
)
31
date_str = date[:4]+
"-"
+date[4:6]+
"-"
+date[6:]
32
# df["date_from_file"] = date_str
33
return
date_str
src.importer.templates.DeezerTemplate.DeezerTemplate
Definition
DeezerTemplate.py:5
src.importer.templates.DeezerTemplate.DeezerTemplate.date
date(self, filename)
Definition
DeezerTemplate.py:21
src.importer.templates.DeezerTemplate.DeezerTemplate.preprocessing
preprocessing(self, filename, features, session, rel_col)
Definition
DeezerTemplate.py:6
Generated by
1.12.0