Module panama.utils.nbutils
Classes
class Cell (lines: List[str])-
Class for a cell object. It is basically a wrapper for a list of commands, with some helper methods.
Expand source code
class Cell: """Class for a cell object. It is basically a wrapper for a list of commands, with some helper methods.""" _sep = "# COMMAND ----------" def __init__(self, lines: List[str]): self.lines = Cell._clear_lines(lines) self.raw_lines: List[str] = self.lines.copy() self.magic: Union[str, None] = None self._get_magic() self._get_raw_lines() @staticmethod def _clear_lines(lines: List[str]) -> List[str]: """Removes \n from a list of commands. Args: lines (List[str]): lines to clean Returns: List[str]: cleaned lines """ clean_lines = [i.replace("\n", "") for i in lines] return [i for i in clean_lines if len(i) > 0] def _get_magic(self): """Private method used to retrieve if there's any magic command in the cell. It populates the attribute magic with the magic value.""" for i in self.lines: if i.startswith("# MAGIC"): match = re.search(re.compile(r"(?<=# MAGIC )(%+\w+)"), i) if match: magic = match.group(0).replace("%", "") self.magic = magic break def _get_raw_lines(self): """Private method used to get raw code lines without magic or \n.""" if self.magic is not None: for i, line in enumerate(self.raw_lines): self.raw_lines[i] = re.sub(re.compile(r"#\s+MAGIC\s{0,1}(%+\w+[ ]*)*"), "", line) self.raw_lines = Cell._clear_lines(self.raw_lines) def __repr__(self): return self.lines.__repr__() def __len__(self): return self.lines.__len__() def __str__(self): return "\n".join(self.lines) def save(self, file_handler: Union[str, TextIOWrapper]): """Function used to save cell content to a file. Args: file_handler (Union[str, TextIOWrapper]): the file where to write the cell. If a string is passed, the file is opened in append mode, and closed ad the end of the function. If a TextIOWrapper is passed, data is just written, and the mode depends on the TextIOWrapped. The TextIOWrapper does not get closed. """ # flag to close file, defaults to false close_fh = False if isinstance(file_handler, str): # if a string is passed, the file handler will be closed at the end file_handler = open(file_handler, "a+") close_fh = True for l in self.lines: # write lines file_handler.write(l) # add a separator and a new line file_handler.write(self._sep) file_handler.write("\n") if close_fh: # if flagged, close the file handler file_handler.close()Methods
def save(self, file_handler: Union[str, TextIOWrapper])-
Function used to save cell content to a file.
Args
file_handler:Union[str, TextIOWrapper]- the file where to write the cell. If a string is passed, the file is opened in append mode, and closed ad the end of the function. If a TextIOWrapper is passed, data is just written, and the mode depends on the TextIOWrapped. The TextIOWrapper does not get closed.
class Notebook (lines: List[str])-
Class for a Notebook object. It is basically a wrapper for a list of commands separated in single cells, with some helper methods.
Expand source code
class Notebook: """Class for a Notebook object. It is basically a wrapper for a list of commands separated in single cells, with some helper methods.""" _sep = "# COMMAND ----------" _w = WorkspaceClient() def __init__(self, lines: List[str]): self._lines: List[str] = lines self.cells: List[Cell] = [] self._get_cells() def __repr__(self): return self.cells.__repr__() def __len__(self): return self.cells.__len__() def __str__(self): sep = "\n" + self._sep + "\n" return sep.join([str(c) for c in self.cells]) def encode(self, encoding: str = "utf-8") -> bytes: """Encode the notebook representation into a certain encoding. Args: encoding (str, optional): target encoding. Defaults to "utf-8". Returns: bytes: encoded notebook. """ return self.__str__().encode(encoding) def _get_cells(self): """Private method to generate cells from lines. See help(panama.utils.nbutils.Cells) for more information.""" cmd_starts = [pos for pos, line in enumerate(self._lines) if line.startswith(self._sep)] cmd_ends = cmd_starts + [len(self._lines)] cmd_starts = [-1] + cmd_starts self.cells = [Cell(self._lines[start + 1 : end]) for start, end in zip(cmd_starts, cmd_ends)] def add_cell(self, code: Union[List[str], str], pos: Optional[int] = None): """Add a cell in the required position. Args: code (Union[List[str], str]): content of the cell. Can be either a list of commands or a string with commands inside separated by \n. pos (int, optional): position where to insert the new cell. If None, code is added at the end of the cells. Defaults to None. """ if isinstance(code, str): lines = code.split("\n") cell = Cell(lines=lines) if pos is None: pos = len(self.cells) self.cells.insert(pos, cell) def remove_magic_cells(self, magic: str) -> Notebook: """Method used to remove all the cells that have a certain magic from a notebook. Returns a new notebook. Args: magic (str): magic value to remove. Returns: Notebook: notebook without the cells that have the target magic. """ out_cells = [] for c in self.cells: if c.magic != magic: new_lines = c.lines + [self._sep] out_cells.extend(new_lines) return Notebook(out_cells) @staticmethod def read_notebook(input_path: str) -> Notebook: """Static method used to load a notebook from a databricks path and return it as a Notebook. Args: input_path (str): path to the notebook file (e.g. "/Shared/<project>/notebooks/<notebook_name>"). Raises: ValueError: If the response is empty. Returns: Notebook: notebook object holding the code. """ response = Notebook._w.workspace.export(path=input_path, format=workspace.ExportFormat.SOURCE) if response.content is None: raise ValueError("Response content is empty. Check input_path") nb_content = base64.b64decode(response.content).decode("utf-8").split("\n") return Notebook(nb_content) def write_notebook(self, output_path: str, overwrite: bool = True) -> None: """Function used to write the current notebook to a path. Args: output_path (str): destination where the notebook will be written. overwrite (bool, optional): overwrite option should a notebook with the same name already exist. Defaults to True. """ self._w.workspace.upload( path=output_path, content=io.BytesIO(self.encode("utf-8")), language=workspace.Language.PYTHON, overwrite=overwrite, )Static methods
def read_notebook(input_path: str) ‑> Notebook-
Static method used to load a notebook from a databricks path and return it as a Notebook.
Args
input_path:str- path to the notebook file (e.g. "/Shared/
/notebooks/ ").
Raises
ValueError- If the response is empty.
Returns
Notebook- notebook object holding the code.
Methods
def add_cell(self, code: Union[List[str], str], pos: Optional[int] = None)-
Add a cell in the required position.
Args: code (Union[List[str], str]): content of the cell. Can be either a list of commands or a string with commands inside separated by. pos (int, optional): position where to insert the new cell. If None, code is added at the end of the cells. Defaults to None.
def encode(self, encoding: str = 'utf-8') ‑> bytes-
Encode the notebook representation into a certain encoding.
Args
encoding:str, optional- target encoding. Defaults to "utf-8".
Returns
bytes- encoded notebook.
def remove_magic_cells(self, magic: str) ‑> Notebook-
Method used to remove all the cells that have a certain magic from a notebook. Returns a new notebook.
Args
magic:str- magic value to remove.
Returns
Notebook- notebook without the cells that have the target magic.
def write_notebook(self, output_path: str, overwrite: bool = True) ‑> None-
Function used to write the current notebook to a path.
Args
output_path:str- destination where the notebook will be written.
overwrite:bool, optional- overwrite option should a notebook with the same name already exist. Defaults to True.