Glider
Loading...
Searching...
No Matches
AudioSaladTemplate.py
Go to the documentation of this file.
1import re
2import awswrangler as wr
3
5 def preprocessing(self, filename, features, session, rel_col):
6 """Loads file and do some fixes to fit it to our standard and make it able to be processed
7
8 Args:
9 filename (str): s3 full path of filename
10 features (dict): contains delimiter, skip_rows and encoding required for current file
11 session (boto3 obj): AWS client connection
12 rel_col (str): column name used to identify upc/release_id
13 Returns: df (pandas dataframe)
14 """
15 delimiter = features["delimiter"]
16 skip_rows = features["skip_rows"]
17 encoding = features["encoding"]
18 df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, low_memory=False, boto3_session=session)
19 return df
20
21 def date(self, filename, df=None):
22 """Sets date column given the filename (it contains the date)
23 Sometimes just rename the date column to fit to the otto mapping
24 For example AudioSalad_2023_-_M7_GYROstream_Sales_Export.csv
25 Args:
26 filename (str): current filename
27 df (pandas dataframe): dataframe where changes will applied
28 Returns: date_str (str)
29 """
30 if str(type(df)) != "<class 'NoneType'>":
31 df["as_iso_date"] = df["sale date"]
32 return df["as_iso_date"]
33 else:
34 year = re.findall(r'\d{4}', filename)[0]
35 month = re.findall(r'M\d{1,2}', filename)[0]
36 month = month.replace("M", "")
37 if len(month) == 1:
38 month = "0"+month
39 date_str = year+month
40 return date_str
41
42 def serviceFinder(self, service):
43 """Maps the service name with the service specified by the client according their requirements
44 Args:
45 service (str): service name
46 Returns: (service renamed)
47 """
48 service = str(service)
49 if "Red" in service:
50 return "YouTube Music"
51 elif "UMA" in service:
52 return "UMA"
53 elif "Google" in service:
54 return "Google Play"
55 elif "Youtube Ads" in service:
56 return "YouTube Content ID"
57 elif "Amazon" in service:
58 return "Amazon"
59 elif "Spotify" in service:
60 return "Spotify"
61 elif "Yandex" in service:
62 return "Yandex"
63 elif "Soundtrack Your Brand" in service:
64 return "Soundtrack Your Brand"
65 elif "Apple" in service or "iTunes" in service:
66 return "iTunes"
67 elif "UMA" in service:
68 return "UMA"
69 elif "NetEase" in service:
70 return "Netease"
71 elif "Napster" in service:
72 return "Napster"
73 elif "Tidal" in service:
74 return "Tidal"
75 elif "MFS" in service:
76 return "MFS"
77 elif "Facebook" in service:
78 return "facebook"
79 elif "Audio tier" in service:
80 return "Youtube Audio Tier"
81 elif "Tik Tok" in service:
82 return "TikTok"
83 elif "Slacker" in service:
84 return "Slacker"
85 elif "Soundcloud" in service:
86 return "Soundcloud"
87 return service
88
89 def type(self, Type, configuration):
90 """Maps the sale type with the name specified by the client according their requirements
91 Args:
92 Type (str): sale type shown in the original file
93 configuration (str):
94 Returns: (sale type in a single one letter)
95 """
96 if Type == "Release" and configuration == "Download":
97 return "A"
98 elif Type == "Track" and configuration == "Download":
99 return "T"
100 return "S"
101
102 def assetType(self, df):
103 """Applies serviceFinder and type functions to current dataframe
104 Args:
105 df (pandas dataframe): dataframe where changes will applied
106 Returns: df (pandas dataframe)
107 """
108 df["getservice"] = df.apply(lambda Row: self.serviceFinder(Row["sub source"]), axis=1)
109 df["gettypeaudiosalad"] = df.apply(lambda Row: self.type(Row["type"], Row["configuration"]), axis=1)
110 return df
preprocessing(self, filename, features, session, rel_col)