Glider
Loading...
Searching...
No Matches
PlaygroundTemplate.py
Go to the documentation of this file.
1import re
2import awswrangler as wr
3from .TerritoryCode import TerritoryCode
4
6 def preprocessing(self, filename, features, session, rel_col):
7 """Loads file and do some fixes to fit it to our standard and make it able to be processed
8
9 Args:
10 filename (str): s3 full path of filename
11 features (dict): contains delimiter, skip_rows and encoding required for current file
12 session (boto3 obj): AWS client connection
13 rel_col (str): column name used to identify upc/release_id
14 Returns: df (pandas dataframe)
15 """
16 delimiter = features["delimiter"]
17 skip_rows = features["skip_rows"]
18 encoding = features["encoding"]
19 df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, keep_default_na=False, boto3_session=session)
20 # df = read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, keep_default_na=False)#, engine=openpyxl)
21 TC = TerritoryCode()
22 TC.territory_code(df)
23 return df
24
25 def date(self, filename):
26 """Sets date column given the filename (it contains the date)
27 For example Fonal_2020-08.xlsx
28 Args:
29 filename (str): current filename
30 Returns: date (str)
31 """
32 date = re.findall(r'\d{4}\+?-\d{2}', filename)[0]
33 date = date+"-01"
34 # df["date_from_file"] = date
35 return date
36
37 def type(self, Channel, ISRC):
38 """Maps the sale type with the name specified by the client according their requirements
39 Args:
40 Channel (str): distribution medium shown in the original file
41 ISRC (str): unique code used exclusive for tracks shown in the original file
42 Returns: (sale type in a single one letter)
43 """
44 if Channel=="Download" and len(str(ISRC))==0:
45 return "A"
46 elif Channel=="Download" and len(str(ISRC))==12:
47 return "T"
48 return "S"
49
50 def assetType(self, df):
51 """Applies type functions to current dataframe
52 Args:
53 df (pandas dataframe): dataframe where changes will applied
54 Returns: df (pandas dataframe)
55 """
56 # Add a new column to insert type
57 df["gettypeplayground_dig"] = df.apply(lambda Row: self.type(Row["channel"], Row["isrc"]), axis=1)
58 return df
preprocessing(self, filename, features, session, rel_col)