glider-documentation/html/itunes_template_8py_source.html

import re

import awswrangler as wr


class itunesTemplate:


    def preprocessing(self, filename, features, session, rel_col):

        """Loads file and do some fixes to fit it to our standard and make it able to be processed


        Args:

            filename (str): s3 full path of filename

            features (dict): contains delimiter, skip_rows and encoding required for current file

            session (boto3 obj): AWS client connection

            rel_col (str): column name used to identify upc/release_id

        Returns: df (pandas dataframe)

        """

        delimiter = features["delimiter"]

        skip_rows = features["skip_rows"]

        encoding = features["encoding"]

        df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, low_memory=False, boto3_session=session)

        # df = read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, low_memory=False)

        total_rows = df["End Date"][df["Start Date"]=="Total_Rows"]

        total_rows = int(total_rows)

        df.drop(range(total_rows, df.shape[0], 1), axis=0, inplace=True)

        return df


    def preprocessing_music(self, filename, features, session, rel_col):

        """Loads file and do some fixes to fit it to our standard and make it able to be processed.

           It's used for a specific itunes version


        Args:

            filename (str): s3 full path of filename

            features (dict): contains delimiter, skip_rows and encoding required for current file

            session (boto3 obj): AWS client connection

            rel_col (str): column name used to identify upc/release_id

        Returns: df (pandas dataframe)

        """

        delimiter = features["delimiter"]

        skip_rows = features["skip_rows"]

        encoding = features["encoding"]

        df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, low_memory=False, boto3_session=session)

        # df = read_csv(filename, sep=delimiter, skiprows=skip_rows, low_memory=False)

        total_rows = df["Apple Identifier"][df["Storefront Name"]=="Row Count"]

        total_rows = int(total_rows)

        df.drop(range(total_rows, df.shape[0], 1), axis=0, inplace=True)

        return df


    def type(self, upc):

        """Maps the sale type with the name specified by the client according their requirements

        Args:

            upc (str): sale type (for products) shown in the original file

        Returns: (sale type in a single one letter)

        """

        if str(upc) == "" or str(upc) == "nan":

            return "T"

        return "A"


    def assetType(self, df):

        """Applies type functions to current dataframe

        Args:

            df (pandas dataframe): dataframe where changes will applied

        Returns: df (pandas dataframe)

        """

        df["gettypeitunes"] = df.apply(lambda Row: self.type(Row['upc']), axis=1)

        return df


    def date(self, filename):

        """Sets date column given the filename (it contains the date)

           For example S1_89680172_0723_ZZ.txt

        Args:

            filename (str): current filename

        Returns: date_str (str)

        """

        date = re.findall(r'_\d{4}_', filename)[0]

        date = date.replace("_", "")

        month = date[:2]

        year = "20"+date[2:]

        date_str = year+month+"01"

        # df["from_file_itune"] = date_str

        return date_str


src.importer.templates.itunesTemplate.itunesTemplate
Definition itunesTemplate.py:4

src.importer.templates.itunesTemplate.itunesTemplate.assetType
assetType(self, df)
Definition itunesTemplate.py:56

src.importer.templates.itunesTemplate.itunesTemplate.date
date(self, filename)
Definition itunesTemplate.py:65

src.importer.templates.itunesTemplate.itunesTemplate.type
type(self, upc)
Definition itunesTemplate.py:46

src.importer.templates.itunesTemplate.itunesTemplate.preprocessing
preprocessing(self, filename, features, session, rel_col)
Definition itunesTemplate.py:5

src.importer.templates.itunesTemplate.itunesTemplate.preprocessing_music
preprocessing_music(self, filename, features, session, rel_col)
Definition itunesTemplate.py:25