glider-documentation/html/_fuga_template_8py_source.html

import re

import awswrangler as wr

from datetime import datetime


class FugaTemplate:


    def QuantityPreprocessing(self, product_quantity, asset_quantity):

        """Defines the total quantity for tracks and albums

        Args:

            product_quantity (int): quantity of products shown in the original file

            asset_quantity (int): quantity of tracks shown in the original file

        Returns: aux (int)

        """

        # print("quantity:",product_quantity, asset_quantity)

        aux = int(product_quantity) + int(asset_quantity)

        return aux


    def ArtistPreprocessing(self, asset_product, asset_artist):

        """Replace the track artist by product artist when the first one is empty value

        Args:

            asset_product (str): product artist shown in the original file

            asset_artist (str): track artist shown in the original file

        Returns: asset_artist (str)

        """

        if str(asset_artist) == "":

            asset_artist = str(asset_product)

        return asset_artist


    def preprocessing(self, filename, features, session, rel_col):

        """Loads file and do some fixes to fit it to our standard and make it able to be processed


        Args:

            filename (str): s3 full path of filename

            features (dict): contains delimiter, skip_rows and encoding required for current file

            session (boto3 obj): AWS client connection

            rel_col (str): column name used to identify upc/release_id

        Returns: df (pandas dataframe)

        """

        delimiter = features["delimiter"]

        skip_rows = features["skip_rows"]

        encoding = features["encoding"]

        df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, boto3_session=session)

        # df = read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding)

        values = {"Asset Quantity": 0, "Product Quantity": 0, "Asset Artist": "", "Asset ISRC": "undefined"}

        df.fillna(value=values, inplace=True)

        df["Asset Quantity"] = df.apply(lambda Row: self.QuantityPreprocessing(Row['Product Quantity'], Row['Asset Quantity']), axis=1)

        df["Asset Artist"] = df.apply(lambda Row: self.ArtistPreprocessing(Row['Product Artist'], Row['Asset Artist']), axis=1)

        return df


    def type(self, asset_product, sale_type):

        """Maps the sale type with the name specified by the client according their requirements

        Args:

            asset_product (str): sale type (for products) shown in the original file

            sale_type (str):  sale type (for tracks) shown in the original file

        Returns: (sale type in a single one letter)

        """

        if sale_type=="Download" and asset_product == "Asset":

            return 'T'

        elif sale_type=="Download" and asset_product == "Product":

            return 'A'

        return 'S'


    def assetType(self, df):

        """Applies type functions to current dataframe

        Args:

            df (pandas dataframe): dataframe where changes will applied

        Returns: df (pandas dataframe)

        """

        # Add a new column to insert type

        df["gettypefuga"] = df.apply(lambda Row: self.type(Row['asset/product'], Row['sale type']), axis=1)

        return df


    def date(self, filename):

        """Sets date column given the filename (it contains the date)

           For example December2021StatementRun_KepachMusictasCAMJazz-royalty_product_and_asset.csv

        Args:

            filename (str): current filename

        Returns: date_str (str)

        """

        filename = filename.lower()

        possible_months = ["january", "february", "march", "april", "may", "june",

        "july", "august", "september", "october", "november", "december"]

        for m in possible_months:

            if m in filename:

                month = m

                date = filename.split(month)

                break

        date = date[-1]

        year = [str(s) for s in re.findall(r'-?\d+\.?\d*', date)][0]

        date_month = datetime.strptime(month, "%B")

        date_month = str(date_month.month)

        if len(date_month) == 1:

            date_month = "0"+date_month

        date_str = year+date_month

        return date_str


src.importer.templates.FugaTemplate.FugaTemplate
Definition FugaTemplate.py:5

src.importer.templates.FugaTemplate.FugaTemplate.type
type(self, asset_product, sale_type)
Definition FugaTemplate.py:49

src.importer.templates.FugaTemplate.FugaTemplate.preprocessing
preprocessing(self, filename, features, session, rel_col)
Definition FugaTemplate.py:28

src.importer.templates.FugaTemplate.FugaTemplate.QuantityPreprocessing
QuantityPreprocessing(self, product_quantity, asset_quantity)
Definition FugaTemplate.py:6

src.importer.templates.FugaTemplate.FugaTemplate.assetType
assetType(self, df)
Definition FugaTemplate.py:62

src.importer.templates.FugaTemplate.FugaTemplate.ArtistPreprocessing
ArtistPreprocessing(self, asset_product, asset_artist)
Definition FugaTemplate.py:17

src.importer.templates.FugaTemplate.FugaTemplate.date
date(self, filename)
Definition FugaTemplate.py:72