Module panama.utils.nbutils

Classes

class Cell (lines: List[str])

Class for a cell object. It is basically a wrapper for a list of commands, with some helper methods.

Expand source code
class Cell:
    """Class for a cell object. It is basically a wrapper for a list of commands, with some helper methods."""

    _sep = "# COMMAND ----------"

    def __init__(self, lines: List[str]):
        self.lines = Cell._clear_lines(lines)
        self.raw_lines: List[str] = self.lines.copy()
        self.magic: Union[str, None] = None
        self._get_magic()
        self._get_raw_lines()

    @staticmethod
    def _clear_lines(lines: List[str]) -> List[str]:
        """Removes \n from a list of commands.

        Args:
            lines (List[str]): lines to clean

        Returns:
            List[str]: cleaned lines
        """
        clean_lines = [i.replace("\n", "") for i in lines]
        return [i for i in clean_lines if len(i) > 0]

    def _get_magic(self):
        """Private method used to retrieve if there's any magic command in the cell. It populates the attribute magic with the magic value."""
        for i in self.lines:
            if i.startswith("# MAGIC"):
                match = re.search(re.compile(r"(?<=# MAGIC )(%+\w+)"), i)
                if match:
                    magic = match.group(0).replace("%", "")
                    self.magic = magic
                    break

    def _get_raw_lines(self):
        """Private method used to get raw code lines without magic or \n."""
        if self.magic is not None:
            for i, line in enumerate(self.raw_lines):
                self.raw_lines[i] = re.sub(re.compile(r"#\s+MAGIC\s{0,1}(%+\w+[ ]*)*"), "", line)

            self.raw_lines = Cell._clear_lines(self.raw_lines)

    def __repr__(self):
        return self.lines.__repr__()

    def __len__(self):
        return self.lines.__len__()

    def __str__(self):
        return "\n".join(self.lines)

    def save(self, file_handler: Union[str, TextIOWrapper]):
        """Function used to save cell content to a file.

        Args:
            file_handler (Union[str, TextIOWrapper]): the file where to write the cell.
                If a string is passed, the file is opened in append mode, and closed ad the end of the function.
                If a TextIOWrapper is passed, data is just written, and the mode depends on the TextIOWrapped.
                The TextIOWrapper does not get closed.
        """
        # flag to close file, defaults to false
        close_fh = False
        if isinstance(file_handler, str):
            # if a string is passed, the file handler will be closed at the end
            file_handler = open(file_handler, "a+")
            close_fh = True

        for l in self.lines:
            # write lines
            file_handler.write(l)

        # add a separator and a new line
        file_handler.write(self._sep)
        file_handler.write("\n")

        if close_fh:
            # if flagged, close the file handler
            file_handler.close()

Methods

def save(self, file_handler: Union[str, TextIOWrapper])

Function used to save cell content to a file.

Args

file_handler : Union[str, TextIOWrapper]
the file where to write the cell. If a string is passed, the file is opened in append mode, and closed ad the end of the function. If a TextIOWrapper is passed, data is just written, and the mode depends on the TextIOWrapped. The TextIOWrapper does not get closed.
class Notebook (lines: List[str])

Class for a Notebook object. It is basically a wrapper for a list of commands separated in single cells, with some helper methods.

Expand source code
class Notebook:
    """Class for a Notebook object. It is basically a wrapper for a list of commands separated in single cells, with some helper methods."""

    _sep = "# COMMAND ----------"
    _w = WorkspaceClient()

    def __init__(self, lines: List[str]):
        self._lines: List[str] = lines
        self.cells: List[Cell] = []
        self._get_cells()

    def __repr__(self):
        return self.cells.__repr__()

    def __len__(self):
        return self.cells.__len__()

    def __str__(self):
        sep = "\n" + self._sep + "\n"
        return sep.join([str(c) for c in self.cells])

    def encode(self, encoding: str = "utf-8") -> bytes:
        """Encode the notebook representation into a certain encoding.

        Args:
            encoding (str, optional): target encoding. Defaults to "utf-8".

        Returns:
            bytes: encoded notebook.
        """
        return self.__str__().encode(encoding)

    def _get_cells(self):
        """Private method to generate cells from lines. See help(panama.utils.nbutils.Cells) for more information."""
        cmd_starts = [pos for pos, line in enumerate(self._lines) if line.startswith(self._sep)]
        cmd_ends = cmd_starts + [len(self._lines)]
        cmd_starts = [-1] + cmd_starts
        self.cells = [Cell(self._lines[start + 1 : end]) for start, end in zip(cmd_starts, cmd_ends)]

    def add_cell(self, code: Union[List[str], str], pos: Optional[int] = None):
        """Add a cell in the required position.

        Args:
            code (Union[List[str], str]): content of the cell. Can be either a list of commands or a string
                with commands inside separated by \n.
            pos (int, optional): position where to insert the new cell. If None, code is added at the end of the cells. Defaults to None.
        """
        if isinstance(code, str):
            lines = code.split("\n")
            cell = Cell(lines=lines)
            if pos is None:
                pos = len(self.cells)
            self.cells.insert(pos, cell)

    def remove_magic_cells(self, magic: str) -> Notebook:
        """Method used to remove all the cells that have a certain magic from a notebook.
        Returns a new notebook.

        Args:
            magic (str): magic value to remove.

        Returns:
            Notebook: notebook without the cells that have the target magic.
        """
        out_cells = []
        for c in self.cells:
            if c.magic != magic:
                new_lines = c.lines + [self._sep]
                out_cells.extend(new_lines)

        return Notebook(out_cells)

    @staticmethod
    def read_notebook(input_path: str) -> Notebook:
        """Static method used to load a notebook from a databricks path and return it as a Notebook.

        Args:
            input_path (str): path to the notebook file (e.g. "/Shared/<project>/notebooks/<notebook_name>").

        Raises:
            ValueError: If the response is empty.

        Returns:
            Notebook: notebook object holding the code.
        """
        response = Notebook._w.workspace.export(path=input_path, format=workspace.ExportFormat.SOURCE)
        if response.content is None:
            raise ValueError("Response content is empty. Check input_path")
        nb_content = base64.b64decode(response.content).decode("utf-8").split("\n")
        return Notebook(nb_content)

    def write_notebook(self, output_path: str, overwrite: bool = True) -> None:
        """Function used to write the current notebook to a path.

        Args:
            output_path (str): destination where the notebook will be written.
            overwrite (bool, optional): overwrite option should a notebook with the same name already exist. Defaults to True.
        """
        self._w.workspace.upload(
            path=output_path,
            content=io.BytesIO(self.encode("utf-8")),
            language=workspace.Language.PYTHON,
            overwrite=overwrite,
        )

Static methods

def read_notebook(input_path: str) ‑> Notebook

Static method used to load a notebook from a databricks path and return it as a Notebook.

Args

input_path : str
path to the notebook file (e.g. "/Shared//notebooks/").

Raises

ValueError
If the response is empty.

Returns

Notebook
notebook object holding the code.

Methods

def add_cell(self, code: Union[List[str], str], pos: Optional[int] = None)

Add a cell in the required position.

    Args:
        code (Union[List[str], str]): content of the cell. Can be either a list of commands or a string
            with commands inside separated by

. pos (int, optional): position where to insert the new cell. If None, code is added at the end of the cells. Defaults to None.

def encode(self, encoding: str = 'utf-8') ‑> bytes

Encode the notebook representation into a certain encoding.

Args

encoding : str, optional
target encoding. Defaults to "utf-8".

Returns

bytes
encoded notebook.
def remove_magic_cells(self, magic: str) ‑> Notebook

Method used to remove all the cells that have a certain magic from a notebook. Returns a new notebook.

Args

magic : str
magic value to remove.

Returns

Notebook
notebook without the cells that have the target magic.
def write_notebook(self, output_path: str, overwrite: bool = True) ‑> None

Function used to write the current notebook to a path.

Args

output_path : str
destination where the notebook will be written.
overwrite : bool, optional
overwrite option should a notebook with the same name already exist. Defaults to True.