Module `libnova.common.api.File`

Expand source code

#!/usr/bin/env python
# coding: utf-8

import datetime
import time
from typing import Iterator

import libnova.common.api
from enum import Enum
from libnova.common import api

# Define Enum structs
from libnova.common.api.Serializable import Serializable


class FileType(Enum):
    """File Type
    """
    FILE    = 1
    FOLDER  = 2
    JOURNAL = 3


class FileStructure(Enum):
    """File Structure Type
    """
    UNSTRUCTURED = 1
    OBJECTS      = 2
    COMMON       = 3


# Define main object structure
class File(Serializable):
    """File

    The main methods allows the interaction with the main structures related to files hosted in the platform
    """

    id:               int = 0
    container_id:     int
    storage_class_id: int
    parent:           int
    filename:         str
    fullpath:         str
    deleted:          bool
    size:             int
    type:             FileType = 1
    structure:        FileStructure = 1
    format:           str
    mime:             str
    date_update:      datetime.datetime
    date_create:      datetime.datetime
    date_audit:       datetime.datetime
    rand_audit:       int


def get_container_file(container_id, filename):
    """Retrieve a file inside a container using a given file name (not path)

    Args:
        container_id (int): The Container id
        filename (str): The file name

    Returns:
        File: A `File` or `list[File]` with the results
    """

    query = {
        "must": [
            {
                "term": {
                    "container_id": container_id
                }
            },
            {
                "term": {
                    "filename.keyword": filename
                }
            },
            {
                "term": {
                    "deleted": False
                }
            }
        ]
    }

    api_driver = api.Driver.get_instance()
    return api_driver.post_json(
        url_segment='file/elastic',
        data=query
    )

def get_container_files_by_metadata_value(container_id, iecode, value):
    """Retrieve a file inside a container using a given `container_id` and a metadata `value`

    Sample response:
    {
        "success": true,
        "total": 1,
        "result": [
            {
                "id": "1234",
                "metadata_schema_descriptor_id": "1234",
                "container_id": "1234",
                "file_id": "1234",
                "value": "The story of Augusto Perez",
                "creator": "1",
                "iecode": "description",
                "linked": null
            }
        ]
    }

    Args:
        container_id (int): The Container id
        iecode (int): The iecode of the metadata descriptor
        value (str): The value of the metadata descriptor

    Returns:
        File: A `File` or `list[File]` with the results
    """

    query = {
        "must": [
            {
                "term": {
                    "container_id": container_id
                }
            },
            {
                "nested": {
                    "path": "metadata",
                    "query": {
                        "match": {
                            "metadata."+iecode: value
                        }
                    }
                }
            }
        ]
    }

    api_driver = api.Driver.get_instance()
    return api_driver.post_json(
        url_segment='file/elastic',
        data=query
    )


def clone_file_meta(source_file_id, target_file_id):
    """Clone an existing `File` data into another `File`

    Args:
        source_file_id (int): The id of the `File` to take data from
        target_file_id (int): The id of the `File` to put data to

    Returns:
        dict: The API response
    """

    api_driver = api.Driver.get_instance()

    return api_driver.post_json(
        url_segment='/api/file/' + str(source_file_id) + '/copy/extras/' + str(target_file_id)
    )


def create(file : File) -> File:
    """Create a "dry" `File` in the platform, not associated to any file data but only in database and search engine

    Args:
        file (File): The `File` to create

    Returns:
        File: A newly created `File` in the platform
    """

    api_driver = api.Driver.get_instance()

    return api_driver.serialize(
        api_driver.post_json(
            url_segment='file',
            data=file
        ),
        File
    )


def get(file_id) -> File:
    """Retrieve a file by its `file_id`

    Args:
        file_id (int): The `File` id

    Returns:
        File: A `File` if exists
    """

    api_driver = api.Driver.get_instance()

    return api_driver.serialize(
        api_driver.get(
            url_segment='file/' + str(file_id)
        ),
        File
    )


def get_file_by_condition(conditions) -> File:
    """Retrieve a single file using a given set of `conditions`

    Args:
        conditions (dict): The conditions of the query

    Returns:
        File: A `File` if found
    """

    conditions_array = []
    for condition_key, condition_value in conditions.items():
        conditions_array.append({
            condition_key: condition_value
        })

    query = {
        "conditions": conditions_array
    }

    api_driver = api.Driver.get_instance()

    return api_driver.serialize(
        api_driver.get_single(
            api_driver.get(
                url_segment='file',
                data=query
            )
        ),
        File
    )


def get_files_by_condition(conditions) -> File:
    """Retrieve a group of file using a given set of `conditions`

    Args:
        conditions (dict): The conditions of the query

    Returns:
        list[File]: A list of `File` if found
    """

    conditions_array = []
    for condition_key, condition_value in conditions.items():
        conditions_array.append({
            condition_key: condition_value
        })

    query = {
        "conditions": conditions_array
    }

    api_driver = api.Driver.get_instance()

    return api_driver.serialize(
        api_driver.get(
            url_segment='file',
            data=query
        ),
        File
    )


def get_by_path(container_id, file_path) -> File:
    """Retrieve a file inside a container using a given `file_path`

    Args:
        container_id (int): The Container id
        file_path (str): The complete relative path of the file inside the container (with leading slash '/')

    Returns:
        File: A `File` if exists
    """

    return get_file_by_condition(
        {
            "container_id": container_id,
            "fullpath": file_path
        }
    )


def get_container_files_by_name(container_id, filename) -> File:
    """Retrieve a group of file inside a container using a given `filename`

    Args:
        container_id (int): The Container id
        filename (str): The name of the files

    Returns:
        list[File]: A list of `File` if found
    """

    return get_files_by_condition(
        {
            "container_id": container_id,
            "filename": filename,
            "deleted": 0
        }
    )


def get_files(folder_file_id):
    """Retrieve files from a folder using a given `file_id`

    Args:
        folder_file_id (int): The folder File id

    Returns:
        File: A list of `File` within a folder
    """

    return get_files_by_condition(
        {
            "parent": folder_file_id,
            "deleted": 0
        }
    )


def iterate_files(conditions) -> Iterator[File]:
    """Yielded iterator over the files that match the given `conditions`

    Args:
        conditions (dict): Conditions to use in the request

    Returns:
         iterable: An iterable of `File`
    """
    offset = 0
    limit  = 1000

    conditions_array = []
    for condition_key, condition_value in conditions.items():
        conditions_array.append({
            condition_key: condition_value
        })

    api_driver = api.Driver.get_instance()

    while True:
        query = {
            "conditions": conditions_array,
            "limit":      limit,
            "offset":     offset
        }

        result = api_driver.serialize(
            api_driver.get(
                url_segment='file',
                data=query
            ),
            File
        )

        for item in result:
            yield item

        try:
            if result is None:
                break
            if len(result) == 0:
                break
            offset = offset + len(result)
        except Exception as e:
            print(e)
            break


def iterate_container_files(container_id):
    """Yielded iterator over the files of a given container

    Args:
        container_id (int): The container id to iterate files from

    Returns:
         iterable: An iterable of `File`
    """

    return iterate_files({
        'container_id': container_id,
        'type':         'FILE',
        'deleted':      '0'
    })


def iterate_container_folder_files(container_id, folder_path):
    """Yielded iterator over the files of a given container and a `folder_path`

    Args:
        container_id (int): The container id to iterate files from
        folder_path (str): The path of the folder inside the container to iterate files from

    Returns:
         iterable: An iterable of `File`
    """

    return iterate_files({
        'container_id': container_id,
        'type':         'FILE',
        'fullpath':     {
            "operator": "starts_with",
            "value":    folder_path.rstrip('/') + '/'
        },
        'deleted':      '0'
    })


def iterate_container_file_hashes(container_id, hash_algorithm, conditions = {}):
    """Yielded iterator over the file hashes of a given container

    Args:
        container_id (int): The container id to iterate files from
        hash_algorithm (str): The hashing algorithm
        conditions (dict): Conditions to use in the request

    Returns:
         iterable: An iterable of `File`
    """
    offset = 0
    limit  = 1000

    conditions_array = [{
        "container_id": container_id
    }]
    for condition_key, condition_value in conditions.items():
        conditions_array.append({
            condition_key: condition_value
        })

    api_driver = api.Driver.get_instance()

    while True:
        query = {
            "conditions": conditions_array,
            "limit":  limit,
            "offset": offset
        }

        result = api_driver.get(
            url_segment='file/hash/' + hash_algorithm,
            data=query
        )

        for item in result:
            yield item

        try:
            if result is None:
                break
            if len(result) == 0:
                break
            offset = offset + len(result)
        except Exception as e:
            print(e)
            break


def iterate_container_folder_file_hashes(container_id, hash_algorithm, folder_path):
    """Yielded iterator over the file hashes of a given container

    Args:
        container_id (int): The container id to iterate files from
        hash_algorithm (str): The hashing algorithm
        folder_path (str): The path of the folder inside the container to iterate files from

    Returns:
         iterable: An iterable of `File`
    """

    return iterate_container_file_hashes(container_id, hash_algorithm, {
        'fullpath': {
            "operator": "starts_with",
            "value": folder_path.rstrip('/') + '/'
        }
    })


def get_multiple_files(container_id, limit, offset):
    """ Get all files from a container in batches
    Args:
        container_id (int): The container id
        limit (int): Total lenght of files to retrieve
        offset (int): Number of first item

    Returns:
         dict: A dictionary with limit value files
    """

    query = {
        "conditions": [
            {
                "container_id": container_id
            }
        ],
        "limit": limit,
        "offset": offset
    }

    api_driver = api.Driver.get_instance()

    return api_driver.serialize(
        api_driver.get(
            url_segment='file/hash/md5',
            data=query
        ),
        File
    )


def get_hashes(file_id):
    """Retrieve the hashes of a file by its `file_id`

    Args:
        file_id (int): The File id

    Returns:
        dict: A hash dictionary if the file exists
    """

    api_driver = api.Driver.get_instance()

    found_file = api_driver.get(
        url_segment='file/' + str(file_id)
    )

    hashes = {}
    if found_file is not None:
        hash_items = found_file['file_hash']
        for hash_item in hash_items:
            hashes[hash_item['algo']] = hash_item['hash']

    return hashes


def get_metadata(file_id, flatten=False):
    """Retrieve the metadata of a file by its `file_id`

    If `flatten` is enabled, it will convert the output to a dict with a better iterable structure

    Example:


    Args:
        file_id (int): The File id
        flatten (bool): Flatten the structure from `list[item(iecode, value)]` to `dict[iecode, list[value]]`

    Returns:
        dict: An metadata dictionary if the file exists
    """

    api_driver = api.Driver.get_instance()

    metadata = api_driver.get(
        url_segment='file/' + str(file_id) + '/metadata'
    )
    if not flatten:
        return metadata

    flattened_metadata = {}
    if metadata is not None:
        for metadata_item in metadata:
            if not metadata_item['iecode'] in flattened_metadata:
                flattened_metadata[metadata_item['iecode']] = []
            flattened_metadata[metadata_item['iecode']].append(metadata_item['value'])

    return flattened_metadata


def get_elastic(file_id):
    """Retrieve a file by its `file_id` using the search engine (Elasticsearch)

    Args:
        file_id (int): The File id

    Returns:
        dict: An elasticsearch document if the file exists
    """

    query = {
        "must": [
            {
                "term": {
                    "id": file_id
                }
            }
        ]
    }

    api_driver = api.Driver.get_instance()
    return api_driver.post_json(
        url_segment='file/elastic',
        data=query
    )


def add_metadata_value(file_id, iecode, value):
    """Add a metadata value for the given `file_id` and `iecode`

    Args:
        file_id (int): The File id
        iecode (str): The Metadata Descriptor iecode to add the value to
        value (str): The Metadata Descriptor to add to the file

    Returns:
        dict: An API result (see documentation)
    """

    api_driver = api.Driver.get_instance()

    return api_driver.put_json(
        url_segment='file/' + str(file_id) + '/metadata',
        data={
            'metadata': [
                {
                    'iecode': iecode,
                    'value':  value,
                    'action': 'ADD'
                }
            ]
        }
    )

def set_hash(file_id, algo, hash):
    """Set a file hash for the given `file_id` and `algorithm`

    Args:
        file_id (int): The File id
        algo (str): The hashing algorithm
        hash (str): The hash value

    Returns:
        dict: An API result (see documentation)
    """

    api_driver = api.Driver.get_instance()

    return api_driver.post_json(
        url_segment='file/' + str(file_id) + '/hash',
        data={
            "algo": algo,
            "hash": hash
        }
    )


def set_metadata_values(file_id, values):
    """Set the metadata values for the given `file_id` using a dictionary of `values`

    This will override the existing values of every metadata descriptors of the file, use with caution

    Args:
        file_id (int): The File id
        values (dict): The Metadata values

    Returns:
        dict: An API result (see documentation)
    """

    api_driver = api.Driver.get_instance()

    data = {
        'metadata': []
    }
    for iecode, value in values.items():
        if isinstance(value, list):
            for value_item in value:
                data['metadata'].append(
                    {
                        'iecode': iecode,
                        'value':  value_item
                    }
                )
        else:
            data['metadata'].append(
                {
                    'iecode': iecode,
                    'value': value
                }
            )

    return api_driver.put_json(
        url_segment='file/' + str(file_id) + '/metadata',
        data=data
    )


def set_metadata(file_id, json_data):
    """Set the metadata values for the given `file_id` using a raw structure `json_data`

    This will override the existing values of every metadata descriptors of the file, use with caution

    The structure is the following:
    {
        "metadata": [
            {
                "iecode": "the metadata descriptor iecode",
                "value": "the metadata value"
            },
            [...]
            {
                "iecode": "another metadata descriptor iecode",
                "value": "another metadata value"
            }
        ]
    }

    Args:
        file_id (int): The File id
        json_data (dict): The Metadata structure

    Returns:
        dict: An API result (see documentation)
    """

    api_driver = api.Driver.get_instance()

    return api_driver.put_json(
        url_segment='file/' + str(file_id) + '/metadata',
        data={
            'metadata': json_data
        }
    )


def render_fullpath(file: File):
    """Render a relative fullpath given a `File` object

    Args:
        file (File): The `File` object to render the fullpath from

    Returns:
        str: The fullpath of the file, relative to a container inside the bucket
    """

    if File is None:
        return ""

    fullpath = file.filename
    parent   = get(file.parent)

    if parent is not None:
        fullpath = render_fullpath(parent) + fullpath

    if file.type == FileType.FOLDER.name:
        fullpath = fullpath.rstrip('/') + '/'

    return fullpath


if __name__ == "__main__":
    print('This file cannot be executed directly!')

Functions

def add_metadata_value(file_id, iecode, value)

Add a metadata value for the given file_id and iecode

Args

file_id : int: The File id
iecode : str: The Metadata Descriptor iecode to add the value to
value : str: The Metadata Descriptor to add to the file

Returns

dict: An API result (see documentation)

Expand source code

def add_metadata_value(file_id, iecode, value):
    """Add a metadata value for the given `file_id` and `iecode`

    Args:
        file_id (int): The File id
        iecode (str): The Metadata Descriptor iecode to add the value to
        value (str): The Metadata Descriptor to add to the file

    Returns:
        dict: An API result (see documentation)
    """

    api_driver = api.Driver.get_instance()

    return api_driver.put_json(
        url_segment='file/' + str(file_id) + '/metadata',
        data={
            'metadata': [
                {
                    'iecode': iecode,
                    'value':  value,
                    'action': 'ADD'
                }
            ]
        }
    )

def clone_file_meta(source_file_id, target_file_id)

Clone an existing File data into another File

Args

source_file_id : int: The id of the File to take data from
target_file_id : int: The id of the File to put data to

Returns

dict: The API response

Expand source code

def clone_file_meta(source_file_id, target_file_id):
    """Clone an existing `File` data into another `File`

    Args:
        source_file_id (int): The id of the `File` to take data from
        target_file_id (int): The id of the `File` to put data to

    Returns:
        dict: The API response
    """

    api_driver = api.Driver.get_instance()

    return api_driver.post_json(
        url_segment='/api/file/' + str(source_file_id) + '/copy/extras/' + str(target_file_id)
    )

def create(file: File) ‑> File

Create a "dry" File in the platform, not associated to any file data but only in database and search engine

Args

file : File: The File to create

Returns

File: A newly created File in the platform

Expand source code

def create(file : File) -> File:
    """Create a "dry" `File` in the platform, not associated to any file data but only in database and search engine

    Args:
        file (File): The `File` to create

    Returns:
        File: A newly created `File` in the platform
    """

    api_driver = api.Driver.get_instance()

    return api_driver.serialize(
        api_driver.post_json(
            url_segment='file',
            data=file
        ),
        File
    )

def get(file_id) ‑> File

Retrieve a file by its file_id

Args

file_id : int: The File id

Returns

File: A File if exists

Expand source code

def get(file_id) -> File:
    """Retrieve a file by its `file_id`

    Args:
        file_id (int): The `File` id

    Returns:
        File: A `File` if exists
    """

    api_driver = api.Driver.get_instance()

    return api_driver.serialize(
        api_driver.get(
            url_segment='file/' + str(file_id)
        ),
        File
    )

def get_by_path(container_id, file_path) ‑> File

Retrieve a file inside a container using a given file_path

Args

container_id : int: The Container id
file_path : str: The complete relative path of the file inside the container (with leading slash '/')

Returns

File: A File if exists

Expand source code

def get_by_path(container_id, file_path) -> File:
    """Retrieve a file inside a container using a given `file_path`

    Args:
        container_id (int): The Container id
        file_path (str): The complete relative path of the file inside the container (with leading slash '/')

    Returns:
        File: A `File` if exists
    """

    return get_file_by_condition(
        {
            "container_id": container_id,
            "fullpath": file_path
        }
    )

def get_container_file(container_id, filename)

Retrieve a file inside a container using a given file name (not path)

Args

container_id : int: The Container id
filename : str: The file name

Returns

File: A File or list[File] with the results

Expand source code

def get_container_file(container_id, filename):
    """Retrieve a file inside a container using a given file name (not path)

    Args:
        container_id (int): The Container id
        filename (str): The file name

    Returns:
        File: A `File` or `list[File]` with the results
    """

    query = {
        "must": [
            {
                "term": {
                    "container_id": container_id
                }
            },
            {
                "term": {
                    "filename.keyword": filename
                }
            },
            {
                "term": {
                    "deleted": False
                }
            }
        ]
    }

    api_driver = api.Driver.get_instance()
    return api_driver.post_json(
        url_segment='file/elastic',
        data=query
    )

def get_container_files_by_metadata_value(container_id, iecode, value)

Retrieve a file inside a container using a given container_id and a metadata value

Sample response: { "success": true, "total": 1, "result": [ { "id": "1234", "metadata_schema_descriptor_id": "1234", "container_id": "1234", "file_id": "1234", "value": "The story of Augusto Perez", "creator": "1", "iecode": "description", "linked": null } ] }

Args

container_id : int: The Container id
iecode : int: The iecode of the metadata descriptor
value : str: The value of the metadata descriptor

Returns

File: A File or list[File] with the results

Expand source code

def get_container_files_by_metadata_value(container_id, iecode, value):
    """Retrieve a file inside a container using a given `container_id` and a metadata `value`

    Sample response:
    {
        "success": true,
        "total": 1,
        "result": [
            {
                "id": "1234",
                "metadata_schema_descriptor_id": "1234",
                "container_id": "1234",
                "file_id": "1234",
                "value": "The story of Augusto Perez",
                "creator": "1",
                "iecode": "description",
                "linked": null
            }
        ]
    }

    Args:
        container_id (int): The Container id
        iecode (int): The iecode of the metadata descriptor
        value (str): The value of the metadata descriptor

    Returns:
        File: A `File` or `list[File]` with the results
    """

    query = {
        "must": [
            {
                "term": {
                    "container_id": container_id
                }
            },
            {
                "nested": {
                    "path": "metadata",
                    "query": {
                        "match": {
                            "metadata."+iecode: value
                        }
                    }
                }
            }
        ]
    }

    api_driver = api.Driver.get_instance()
    return api_driver.post_json(
        url_segment='file/elastic',
        data=query
    )

def get_container_files_by_name(container_id, filename) ‑> File

Retrieve a group of file inside a container using a given filename

Args

container_id : int: The Container id
filename : str: The name of the files

Returns

list[File]: A list of File if found

Expand source code

def get_container_files_by_name(container_id, filename) -> File:
    """Retrieve a group of file inside a container using a given `filename`

    Args:
        container_id (int): The Container id
        filename (str): The name of the files

    Returns:
        list[File]: A list of `File` if found
    """

    return get_files_by_condition(
        {
            "container_id": container_id,
            "filename": filename,
            "deleted": 0
        }
    )

def get_elastic(file_id)

Retrieve a file by its file_id using the search engine (Elasticsearch)

Args

file_id : int: The File id

Returns

dict: An elasticsearch document if the file exists

Expand source code

def get_elastic(file_id):
    """Retrieve a file by its `file_id` using the search engine (Elasticsearch)

    Args:
        file_id (int): The File id

    Returns:
        dict: An elasticsearch document if the file exists
    """

    query = {
        "must": [
            {
                "term": {
                    "id": file_id
                }
            }
        ]
    }

    api_driver = api.Driver.get_instance()
    return api_driver.post_json(
        url_segment='file/elastic',
        data=query
    )

def get_file_by_condition(conditions) ‑> File

Retrieve a single file using a given set of conditions

Args

conditions : dict: The conditions of the query

Returns

File: A File if found

Expand source code

def get_file_by_condition(conditions) -> File:
    """Retrieve a single file using a given set of `conditions`

    Args:
        conditions (dict): The conditions of the query

    Returns:
        File: A `File` if found
    """

    conditions_array = []
    for condition_key, condition_value in conditions.items():
        conditions_array.append({
            condition_key: condition_value
        })

    query = {
        "conditions": conditions_array
    }

    api_driver = api.Driver.get_instance()

    return api_driver.serialize(
        api_driver.get_single(
            api_driver.get(
                url_segment='file',
                data=query
            )
        ),
        File
    )

def get_files(folder_file_id)

Retrieve files from a folder using a given file_id

Args

folder_file_id : int: The folder File id

Returns

File: A list of File within a folder

Expand source code

def get_files(folder_file_id):
    """Retrieve files from a folder using a given `file_id`

    Args:
        folder_file_id (int): The folder File id

    Returns:
        File: A list of `File` within a folder
    """

    return get_files_by_condition(
        {
            "parent": folder_file_id,
            "deleted": 0
        }
    )

def get_files_by_condition(conditions) ‑> File

Retrieve a group of file using a given set of conditions

Args

conditions : dict: The conditions of the query

Returns

list[File]: A list of File if found

Expand source code

def get_files_by_condition(conditions) -> File:
    """Retrieve a group of file using a given set of `conditions`

    Args:
        conditions (dict): The conditions of the query

    Returns:
        list[File]: A list of `File` if found
    """

    conditions_array = []
    for condition_key, condition_value in conditions.items():
        conditions_array.append({
            condition_key: condition_value
        })

    query = {
        "conditions": conditions_array
    }

    api_driver = api.Driver.get_instance()

    return api_driver.serialize(
        api_driver.get(
            url_segment='file',
            data=query
        ),
        File
    )

def get_hashes(file_id)

Retrieve the hashes of a file by its file_id

Args

file_id : int: The File id

Returns

dict: A hash dictionary if the file exists

Expand source code

def get_hashes(file_id):
    """Retrieve the hashes of a file by its `file_id`

    Args:
        file_id (int): The File id

    Returns:
        dict: A hash dictionary if the file exists
    """

    api_driver = api.Driver.get_instance()

    found_file = api_driver.get(
        url_segment='file/' + str(file_id)
    )

    hashes = {}
    if found_file is not None:
        hash_items = found_file['file_hash']
        for hash_item in hash_items:
            hashes[hash_item['algo']] = hash_item['hash']

    return hashes

def get_metadata(file_id, flatten=False)

Retrieve the metadata of a file by its file_id

If flatten is enabled, it will convert the output to a dict with a better iterable structure

Example

Args

file_id : int: The File id
flatten : bool: Flatten the structure from list[item(iecode, value)] to dict[iecode, list[value]]

Returns

dict: An metadata dictionary if the file exists

Expand source code

def get_metadata(file_id, flatten=False):
    """Retrieve the metadata of a file by its `file_id`

    If `flatten` is enabled, it will convert the output to a dict with a better iterable structure

    Example:


    Args:
        file_id (int): The File id
        flatten (bool): Flatten the structure from `list[item(iecode, value)]` to `dict[iecode, list[value]]`

    Returns:
        dict: An metadata dictionary if the file exists
    """

    api_driver = api.Driver.get_instance()

    metadata = api_driver.get(
        url_segment='file/' + str(file_id) + '/metadata'
    )
    if not flatten:
        return metadata

    flattened_metadata = {}
    if metadata is not None:
        for metadata_item in metadata:
            if not metadata_item['iecode'] in flattened_metadata:
                flattened_metadata[metadata_item['iecode']] = []
            flattened_metadata[metadata_item['iecode']].append(metadata_item['value'])

    return flattened_metadata

def get_multiple_files(container_id, limit, offset)

Get all files from a container in batches

Args

container_id : int: The container id
limit : int: Total lenght of files to retrieve
offset : int: Number of first item

Returns

dict: A dictionary with limit value files

Expand source code

def get_multiple_files(container_id, limit, offset):
    """ Get all files from a container in batches
    Args:
        container_id (int): The container id
        limit (int): Total lenght of files to retrieve
        offset (int): Number of first item

    Returns:
         dict: A dictionary with limit value files
    """

    query = {
        "conditions": [
            {
                "container_id": container_id
            }
        ],
        "limit": limit,
        "offset": offset
    }

    api_driver = api.Driver.get_instance()

    return api_driver.serialize(
        api_driver.get(
            url_segment='file/hash/md5',
            data=query
        ),
        File
    )

def iterate_container_file_hashes(container_id, hash_algorithm, conditions={})

Yielded iterator over the file hashes of a given container

Args

container_id : int: The container id to iterate files from
hash_algorithm : str: The hashing algorithm
conditions : dict: Conditions to use in the request

Returns

iterable: An iterable of File

Expand source code

def iterate_container_file_hashes(container_id, hash_algorithm, conditions = {}):
    """Yielded iterator over the file hashes of a given container

    Args:
        container_id (int): The container id to iterate files from
        hash_algorithm (str): The hashing algorithm
        conditions (dict): Conditions to use in the request

    Returns:
         iterable: An iterable of `File`
    """
    offset = 0
    limit  = 1000

    conditions_array = [{
        "container_id": container_id
    }]
    for condition_key, condition_value in conditions.items():
        conditions_array.append({
            condition_key: condition_value
        })

    api_driver = api.Driver.get_instance()

    while True:
        query = {
            "conditions": conditions_array,
            "limit":  limit,
            "offset": offset
        }

        result = api_driver.get(
            url_segment='file/hash/' + hash_algorithm,
            data=query
        )

        for item in result:
            yield item

        try:
            if result is None:
                break
            if len(result) == 0:
                break
            offset = offset + len(result)
        except Exception as e:
            print(e)
            break

def iterate_container_files(container_id)

Yielded iterator over the files of a given container

Args

container_id : int: The container id to iterate files from

Returns

iterable: An iterable of File

Expand source code

def iterate_container_files(container_id):
    """Yielded iterator over the files of a given container

    Args:
        container_id (int): The container id to iterate files from

    Returns:
         iterable: An iterable of `File`
    """

    return iterate_files({
        'container_id': container_id,
        'type':         'FILE',
        'deleted':      '0'
    })

def iterate_container_folder_file_hashes(container_id, hash_algorithm, folder_path)

Yielded iterator over the file hashes of a given container

Args

container_id : int: The container id to iterate files from
hash_algorithm : str: The hashing algorithm
folder_path : str: The path of the folder inside the container to iterate files from

Returns

iterable: An iterable of File

Expand source code

def iterate_container_folder_file_hashes(container_id, hash_algorithm, folder_path):
    """Yielded iterator over the file hashes of a given container

    Args:
        container_id (int): The container id to iterate files from
        hash_algorithm (str): The hashing algorithm
        folder_path (str): The path of the folder inside the container to iterate files from

    Returns:
         iterable: An iterable of `File`
    """

    return iterate_container_file_hashes(container_id, hash_algorithm, {
        'fullpath': {
            "operator": "starts_with",
            "value": folder_path.rstrip('/') + '/'
        }
    })

def iterate_container_folder_files(container_id, folder_path)

Yielded iterator over the files of a given container and a folder_path

Args

container_id : int: The container id to iterate files from
folder_path : str: The path of the folder inside the container to iterate files from

Returns

iterable: An iterable of File

Expand source code

def iterate_container_folder_files(container_id, folder_path):
    """Yielded iterator over the files of a given container and a `folder_path`

    Args:
        container_id (int): The container id to iterate files from
        folder_path (str): The path of the folder inside the container to iterate files from

    Returns:
         iterable: An iterable of `File`
    """

    return iterate_files({
        'container_id': container_id,
        'type':         'FILE',
        'fullpath':     {
            "operator": "starts_with",
            "value":    folder_path.rstrip('/') + '/'
        },
        'deleted':      '0'
    })

def iterate_files(conditions) ‑> Iterator[File]

Yielded iterator over the files that match the given conditions

Args

conditions : dict: Conditions to use in the request

Returns

iterable: An iterable of File

Expand source code

def iterate_files(conditions) -> Iterator[File]:
    """Yielded iterator over the files that match the given `conditions`

    Args:
        conditions (dict): Conditions to use in the request

    Returns:
         iterable: An iterable of `File`
    """
    offset = 0
    limit  = 1000

    conditions_array = []
    for condition_key, condition_value in conditions.items():
        conditions_array.append({
            condition_key: condition_value
        })

    api_driver = api.Driver.get_instance()

    while True:
        query = {
            "conditions": conditions_array,
            "limit":      limit,
            "offset":     offset
        }

        result = api_driver.serialize(
            api_driver.get(
                url_segment='file',
                data=query
            ),
            File
        )

        for item in result:
            yield item

        try:
            if result is None:
                break
            if len(result) == 0:
                break
            offset = offset + len(result)
        except Exception as e:
            print(e)
            break

def render_fullpath(file: File)

Render a relative fullpath given a File object

Args

file : File: The File object to render the fullpath from

Returns

str: The fullpath of the file, relative to a container inside the bucket

Expand source code

def render_fullpath(file: File):
    """Render a relative fullpath given a `File` object

    Args:
        file (File): The `File` object to render the fullpath from

    Returns:
        str: The fullpath of the file, relative to a container inside the bucket
    """

    if File is None:
        return ""

    fullpath = file.filename
    parent   = get(file.parent)

    if parent is not None:
        fullpath = render_fullpath(parent) + fullpath

    if file.type == FileType.FOLDER.name:
        fullpath = fullpath.rstrip('/') + '/'

    return fullpath

def set_hash(file_id, algo, hash)

Set a file hash for the given file_id and algorithm

Args

file_id : int: The File id
algo : str: The hashing algorithm
hash : str: The hash value

Returns

dict: An API result (see documentation)

Expand source code

def set_hash(file_id, algo, hash):
    """Set a file hash for the given `file_id` and `algorithm`

    Args:
        file_id (int): The File id
        algo (str): The hashing algorithm
        hash (str): The hash value

    Returns:
        dict: An API result (see documentation)
    """

    api_driver = api.Driver.get_instance()

    return api_driver.post_json(
        url_segment='file/' + str(file_id) + '/hash',
        data={
            "algo": algo,
            "hash": hash
        }
    )

def set_metadata(file_id, json_data)

Set the metadata values for the given file_id using a raw structure json_data

This will override the existing values of every metadata descriptors of the file, use with caution

The structure is the following: { "metadata": [ { "iecode": "the metadata descriptor iecode", "value": "the metadata value" }, […] { "iecode": "another metadata descriptor iecode", "value": "another metadata value" } ] }

Args

file_id : int: The File id
json_data : dict: The Metadata structure

Returns

dict: An API result (see documentation)

Expand source code

def set_metadata(file_id, json_data):
    """Set the metadata values for the given `file_id` using a raw structure `json_data`

    This will override the existing values of every metadata descriptors of the file, use with caution

    The structure is the following:
    {
        "metadata": [
            {
                "iecode": "the metadata descriptor iecode",
                "value": "the metadata value"
            },
            [...]
            {
                "iecode": "another metadata descriptor iecode",
                "value": "another metadata value"
            }
        ]
    }

    Args:
        file_id (int): The File id
        json_data (dict): The Metadata structure

    Returns:
        dict: An API result (see documentation)
    """

    api_driver = api.Driver.get_instance()

    return api_driver.put_json(
        url_segment='file/' + str(file_id) + '/metadata',
        data={
            'metadata': json_data
        }
    )

def set_metadata_values(file_id, values)

Set the metadata values for the given file_id using a dictionary of values

This will override the existing values of every metadata descriptors of the file, use with caution

Args

file_id : int: The File id
values : dict: The Metadata values

Returns

dict: An API result (see documentation)

Expand source code

def set_metadata_values(file_id, values):
    """Set the metadata values for the given `file_id` using a dictionary of `values`

    This will override the existing values of every metadata descriptors of the file, use with caution

    Args:
        file_id (int): The File id
        values (dict): The Metadata values

    Returns:
        dict: An API result (see documentation)
    """

    api_driver = api.Driver.get_instance()

    data = {
        'metadata': []
    }
    for iecode, value in values.items():
        if isinstance(value, list):
            for value_item in value:
                data['metadata'].append(
                    {
                        'iecode': iecode,
                        'value':  value_item
                    }
                )
        else:
            data['metadata'].append(
                {
                    'iecode': iecode,
                    'value': value
                }
            )

    return api_driver.put_json(
        url_segment='file/' + str(file_id) + '/metadata',
        data=data
    )

Classes

class File (**entries: dict)

File

The main methods allows the interaction with the main structures related to files hosted in the platform

Expand source code

class File(Serializable):
    """File

    The main methods allows the interaction with the main structures related to files hosted in the platform
    """

    id:               int = 0
    container_id:     int
    storage_class_id: int
    parent:           int
    filename:         str
    fullpath:         str
    deleted:          bool
    size:             int
    type:             FileType = 1
    structure:        FileStructure = 1
    format:           str
    mime:             str
    date_update:      datetime.datetime
    date_create:      datetime.datetime
    date_audit:       datetime.datetime
    rand_audit:       int

Ancestors

Serializable

Class variables

var container_id : int
var date_audit : datetime.datetime
var date_create : datetime.datetime
var date_update : datetime.datetime
var deleted : bool
var filename : str
var format : str
var fullpath : str
var id : int
var mime : str
var parent : int
var rand_audit : int
var size : int
var storage_class_id : int
var structure : FileStructure
var type : FileType

class FileStructure (value, names=None, *, module=None, qualname=None, type=None, start=1)

File Structure Type

Expand source code

class FileStructure(Enum):
    """File Structure Type
    """
    UNSTRUCTURED = 1
    OBJECTS      = 2
    COMMON       = 3

Ancestors

enum.Enum

Class variables

var COMMON
var OBJECTS
var UNSTRUCTURED

class FileType (value, names=None, *, module=None, qualname=None, type=None, start=1)

File Type

Expand source code

class FileType(Enum):
    """File Type
    """
    FILE    = 1
    FOLDER  = 2
    JOURNAL = 3

Ancestors

enum.Enum

Class variables

var FILE
var FOLDER
var JOURNAL