Glider
Loading...
Searching...
No Matches
FugaTemplate.py
Go to the documentation of this file.
1import re
2import awswrangler as wr
3from datetime import datetime
4
6 def QuantityPreprocessing(self, product_quantity, asset_quantity):
7 """Defines the total quantity for tracks and albums
8 Args:
9 product_quantity (int): quantity of products shown in the original file
10 asset_quantity (int): quantity of tracks shown in the original file
11 Returns: aux (int)
12 """
13 # print("quantity:",product_quantity, asset_quantity)
14 aux = int(product_quantity) + int(asset_quantity)
15 return aux
16
17 def ArtistPreprocessing(self, asset_product, asset_artist):
18 """Replace the track artist by product artist when the first one is empty value
19 Args:
20 asset_product (str): product artist shown in the original file
21 asset_artist (str): track artist shown in the original file
22 Returns: asset_artist (str)
23 """
24 if str(asset_artist) == "":
25 asset_artist = str(asset_product)
26 return asset_artist
27
28 def preprocessing(self, filename, features, session, rel_col):
29 """Loads file and do some fixes to fit it to our standard and make it able to be processed
30
31 Args:
32 filename (str): s3 full path of filename
33 features (dict): contains delimiter, skip_rows and encoding required for current file
34 session (boto3 obj): AWS client connection
35 rel_col (str): column name used to identify upc/release_id
36 Returns: df (pandas dataframe)
37 """
38 delimiter = features["delimiter"]
39 skip_rows = features["skip_rows"]
40 encoding = features["encoding"]
41 df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, boto3_session=session)
42 # df = read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding)
43 values = {"Asset Quantity": 0, "Product Quantity": 0, "Asset Artist": "", "Asset ISRC": "undefined"}
44 df.fillna(value=values, inplace=True)
45 df["Asset Quantity"] = df.apply(lambda Row: self.QuantityPreprocessing(Row['Product Quantity'], Row['Asset Quantity']), axis=1)
46 df["Asset Artist"] = df.apply(lambda Row: self.ArtistPreprocessing(Row['Product Artist'], Row['Asset Artist']), axis=1)
47 return df
48
49 def type(self, asset_product, sale_type):
50 """Maps the sale type with the name specified by the client according their requirements
51 Args:
52 asset_product (str): sale type (for products) shown in the original file
53 sale_type (str): sale type (for tracks) shown in the original file
54 Returns: (sale type in a single one letter)
55 """
56 if sale_type=="Download" and asset_product == "Asset":
57 return 'T'
58 elif sale_type=="Download" and asset_product == "Product":
59 return 'A'
60 return 'S'
61
62 def assetType(self, df):
63 """Applies type functions to current dataframe
64 Args:
65 df (pandas dataframe): dataframe where changes will applied
66 Returns: df (pandas dataframe)
67 """
68 # Add a new column to insert type
69 df["gettypefuga"] = df.apply(lambda Row: self.type(Row['asset/product'], Row['sale type']), axis=1)
70 return df
71
72 def date(self, filename):
73 """Sets date column given the filename (it contains the date)
74 For example December2021StatementRun_KepachMusictasCAMJazz-royalty_product_and_asset.csv
75 Args:
76 filename (str): current filename
77 Returns: date_str (str)
78 """
79 filename = filename.lower()
80 possible_months = ["january", "february", "march", "april", "may", "june",
81 "july", "august", "september", "october", "november", "december"]
82 for m in possible_months:
83 if m in filename:
84 month = m
85 date = filename.split(month)
86 break
87 date = date[-1]
88 year = [str(s) for s in re.findall(r'-?\d+\.?\d*', date)][0]
89 date_month = datetime.strptime(month, "%B")
90 date_month = str(date_month.month)
91 if len(date_month) == 1:
92 date_month = "0"+date_month
93 date_str = year+date_month
94 return date_str
preprocessing(self, filename, features, session, rel_col)
QuantityPreprocessing(self, product_quantity, asset_quantity)
ArtistPreprocessing(self, asset_product, asset_artist)