Elementary Data Structures in C++

During my job I have to analyse and correct mathematical models that aim to evaluate risk in C++ and automise processes in Python. As I come from a finance background, I thought it could be a good exercise to look at some elementary data structure and code them from scratch in these two programming languages. Here comes the version C++.

1. Stacks

Stacks are dynamic sets. An element that is removed from a stack is always the last one that was added to it, so a stack has a LIFO (last-in, first out) policy. We can refer to the delete operation on a stack as pop– we drop the last element added. Similarly, adding an element to a stack is often referred as push. We can think of a stack as a stack of cards. Suppose we can remove cards only one by one, then the card we remove first is always the last added.

We can think of a stack as an array of elements. Let’s assume that this array can hold maximum elements. It is clear that we cannot add more than n element to this stack, and we cannot pop any element of it if it is empty. Then we need to keep track on the last index at which we added an element, since this would be the first one to be removed. Let’s call this index top. 

Now we can first initialise an array.

#include <cstdint>
#include <memory>
#include <stdexcept>

#include <utils.h>

namespace structures {
/* Implement a list(data structure) using arrays */
    
template <typename T>
class Arraylist {
public:
    Arraylist() = default;

    Arraylist(const Arraylist<T>& other)
        : contents{copy_array(other.contents, other.size_, other.max_size_)}
        , size_{other.size_}
        , max_size_{other.max_size_} {}

    Arraylist(Arraylist<T>&& other)
        : contents{std::move(other.contents)}
        , size_{std::move(other.size_)}
        , ma    x_size_{std::move(other.max_size_)} {
        other.size_ = 0;
        other.max_size_ = 0;
    }

    Arraylist<T>& operator=(const Arraylist<T>& other) {
        Arraylist<T> copy{other};
        std::swap(contents, copy.contents);
        std::swap(size_, copy.size_);
        std::swap(max_size_, copy.max_size_);
        return *this;
    }

    Arraylist<T>& operator=(Arraylist<T>&& other) {
        Arraylist<T> copy{std::move(other)};
        std::swap(contents, copy.contents);
        std::swap(size_, copy.size_);
        std::swap(max_size_, copy.max_size_);
        return *this;
    }

    virtual ~Arraylist() = default;
    /* Construct given max_size = maximum # of elements */
    explicit Arraylist(std::size_t max_size)
        : contents{new T[max_size]}, max_size_{max_size} {}
    /* To clear all elements */
    void clear() { size_ = 0; }

    /* Add 'data' at the end of the list */
    void push_at_back(const T& data) { insert(data, size_); }

    /* Add 'data' at the beginning of the list */
    void push_at_front(const T& data) { insert(data, 0); }

    /* Insert an element ('data') at a given position ('index') of the list */
    void insert(const T& data, std::size_t index) {
        if (index > size_) {
            throw std::out_of_range("Index out of bounds");
        } else {
            for (std::size_t i = size_; i > index; i--) {
                contents[i] = contents[i - 1];
            }
            contents[index] = data;
            size_++;

            if (max_size_ == size_)
                expand(2);
        }
    }

    /* Insert an element 'data' sorted to the list */
    void insert_sorted(const T& data) {
        std::size_t i = 0;
        while (i < size_ && data >= contents[i])
            i++;
        insert(data, i);
    }

    /* Remove an element from a given position ('index') */
    T erase(std::size_t index) {
        if (empty()) {
            throw std::out_of_range("List is empty");
        } else if (index >= size_) {
            throw std::out_of_range("Index out of bounds");
        } else {
            T deleted = contents[index];
            for (std::size_t i = index; i < size_ - 1; ++i) {
                contents[i] = contents[i + 1];
            }
            size_--;

            if (max_size_ / 4 > size_)
                expand(0.5);

            return deleted;
        }
    }

    /* Remove the element at the end of the list*/
    T pop_at_back() { return erase(size_ - 1); }

    /* Remove first element of a list */
    T pop_at_front() { return erase(0); }

    /* Remove an element 'data' from the list */
    void remove(const T& data) { erase(find(data)); }

    /* Return True if list is empty */
    bool empty() const { return size_ == 0; }

    /* return True if the list contains 'data' */
    bool contains(const T& data) const { return find(data) != size_; }

    /* Return the position of 'data' in the list */
    std::size_t find(const T& data) const {
        for (std::size_t i = 0; i < size_; ++i) {
            if (contents[i] == data)
                return i;
        }
        return size_;
    }

    /* Return the list's size */
    std::size_t size() const { return size_; }

    /* Check for the 'index' that it is valid in the 
    list and then return the reference to the element 
    at the given index position */
    T& at(std::size_t index) {
        return const_cast<T&>(static_cast<const Arraylist*>(this)->at(index));
    }

    const T& at(std::size_t index) const {
        if (index >= size_) {
            throw std::out_of_range("Index out of bounds");
        } else {
            return contents[index];
        }
    }

    /* Returns the reference to the element at 'index' position of the list*/
    T& operator[](std::size_t index) {
        return const_cast<T&>(
            static_cast<const Arraylist*>(this)->operator[](index));
    }

    const T& operator[](std::size_t index) const { return contents[index]; }

    T& front() { return contents[0]; }

    const T& front() const { return contents[0]; }

    T& back() { return contents[size_ - 1]; }

    const T& back() const { return contents[size_ - 1]; }

private:
    void expand(float ratio) {
        contents = copy_array(contents, size_, max_size_ * ratio);
        max_size_ *= ratio;
    }

    static std::unique_ptr<T[]> copy_array(
        const std::unique_ptr<T[]>& original, std::size_t size,
        std::size_t new_size) {
        std::unique_ptr<T[]> copy{new T[new_size]};
        for (std::size_t i = 0; i < size; i++) {
            copy[i] = original[i];
        }
        return copy;
    }

    const static std::size_t starting_size{8};

    std::unique_ptr<T[]> contents = make_unique<T[]>(starting_size);
    std::size_t size_{0u};
    std::size_t max_size_{starting_size};
};

}

Now that we have an array, let’s have at how the stack is coded. If we add an element (push), we add this element at the end of the array.  If we delete an element of the stack, we drop the last element of the array (pop).  We also implement top, back, clear and size_t to return the first and the last element, the size of the stack or to clear all data from it.

namespace structures {

template <typename T, typename Container>
class StackWrapper {
public:
    void push(const T& data) { return cont.push_at_back(data); }
    T pop() { return cont.pop_at_back(); }
    T& top() { return cont.back(); }
    const T& top() const { return cont.back(); }
    void clear() { return cont.clear(); }
    std::size_t size() const { return cont.size(); }

private:
    Container cont;
};

template <typename T>
class Stack : public StackWrapper<T, Arraylist<T>> {};

}  

2. Queues

Queues are also dynamic set sets, however, in queues the element that gets deleted is always the one that was longest in the set. Therefore, queues follow a FIFO (first in first out) policy. It operates like a line of customer queueing in front of a French bakery. When a new customer arrives (enqueue), he has to take the last place, at the tail of the queue. When a customer leaves the queue (dequeues it), he is always at the head of the queue, and the new queue head is now the originally second client in the queue. Therefore we need to keep two indexes in memory: the head_index and the tail_index. The following schema shows a simple queue and adding one element and removing one.

 

But we can also have queues as where the queue’s tail is at an index inferior to the head’s index:

That’s why we will not use a simple array for the code of the queue as we did with the stack. Finally, the algorithm is as follows:

def ENQUEUE(queue, new_element)
queue[queue.tail] = new_element
if queue.tail == queue.length-1:
     queue.tail  = 0
else: queue.tail += 1


def DEQUEUE(queue)
x = queue.head()
if queue.head == queue.length-1:
    queue.head = 0
else: queue.head += 1

And in C++, we can code the queue with the help of a cicular_list:

 

namespace structures {

/* Double linked circular list */
template <typename T>
class Circularlist {
public:
Circularlist() = default;

Circularlist(const Circularlist<T>& other)
: head{copy_list(other.head)}, size_{other.size_} {}

Circularlist(Circularlist<T>&& other)
: head{other.head}, size_{other.size_} {
other.head = nullptr;
other.size_ = 0;
}

Circularlist<T>& operator=(const Circularlist<T>& other) {
Circularlist<T> copy{other};
std::swap(head, copy.head);
std::swap(size_, copy.size_);
return *this;
}

Circularlist<T>& operator=(Circularlist<T>&& other) {
Circularlist<T> copy{std::move(other)};
std::swap(head, copy.head);
std::swap(size_, copy.size_);
return *this;
}

~Circularlist() { clear(); }

/* Clears all elements of the list */
void clear() {
while (!empty()) {
pop_back();
}
}

/* Insert the element 'x' at the end of the list */
void push_at_back(const T& x) {
if (empty()) {
head = new Node(x);
head->next = head;
head->prev = head;
} else {
auto newNode = new Node(x, head->prev, head);
newNode->prev->next = newNode;
head->prev = newNode;
}
++size_;
}

/* Insert the element 'x' at the beginning of the list*/
void push_at_front(const T& x) {
push_back(x);
head = head->prev;
}

/* Insert the element 'x' at the given position 'index' of the list */
void insert(const T& x, std::size_t index) {
if (index == 0) {
push_back(x);
head = head->prev;
} else if (index > size_) {
throw std::out_of_range("Invalid index (insert())");
} else {
auto it = head;
for (std::size_t i = 0; i < index - 1; ++i) {
it = it->next;
}
it->next = new Node(x, it, it->next);
it->next->next->prev = it->next;
++size_;
}
}

/* Insert the element 'x' sorted in the list */
void insert_sorted(const T& x) {
if (empty() || x <= head->x)
return push_front(x);
auto it = head;
while (it->next != head && x > it->next->x) {
it = it->next;
}
auto newNode = new Node(x, it, it->next);
it->next->prev = newNode;
it->next = newNode;
++size_;
}

/* Remove an element from the given index 'index'*/
T erase(std::size_t index) {
if (index >= size_)
throw std::out_of_range("Index out of bounds (pop())");
auto oldhead = head;
for (std::size_t i = 0; i < index + 1; ++i) {
head = head->next;
}
auto out = pop_back();
head = oldhead;
return out;
}

/* Remove an element at the end of the list */
T pop_at_back() {
if (empty())
throw std::out_of_range("List is empty (pop_back())");
auto toDelete = head->prev;
head->prev = toDelete->prev;
toDelete->prev->next = head;
T out = toDelete->x;
delete toDelete;
--size_;
return out;
}

/* Remove an element at the beginning of the list */
T pop_at_front() {
if (empty())
throw std::out_of_range("List is empty (pop_front())");
head = head->next;
return pop_back();
}

/* If 'x' is in list, then removes it */
void remove(const T& x) {
auto it = head->next;
for (; it->x != x; it = it->next) {
if (it == head)
return; // Reached end of the list
}
auto oldhead = head;
head = it->next;
pop_back();
head = oldhead;
}

/* Return true if list is empty*/
bool empty() const { return size_ == 0; }

/* Return true if the list contains the element 'x' */
bool contains(const T& x) const {
if (empty())
return false;

if (head->x == x)
return true;

for (auto it = head->next; it != head; it = it->next) {
if (it->x == x)
return true;
}
return false;
}

/* Return reference of element in given index 'index'*/
T& at(std::size_t index) {
return const_cast<T&>(
static_cast<const Circularlist*>(this)->at(index));
}

const T& at(std::size_t index) const {
if (index >= size_)
throw std::out_of_range("Index out of bounds");
auto it = head;
for (std::size_t i = 0; i < index; ++i) {
it = it->next;
}
return it->x;
}

/* Return the position of element 'x' in the list */
std::size_t find(const T& x) const {
if (size_ == 0)
return 0;

if (head->x == x)
return 0;

std::size_t index = 1;
for (auto it = head->next; it != head; it = it->next) {
if (it->x == x)
break;
++index;
}
return index;
}

/* Return size of list*/
std::size_t size() const { return size_; }

T& front() { return head->x; }

const T& front() const { return head->x; }

T& back() { return head->prev->x; }

const T& back() const { return head->prev->x; }

private:
struct Node {
explicit Node(const T& x) : x{x} {}
Node(const T& x, Node* next) : x{x}, next{next} {}
Node(const T& x, Node* prev, Node* next)
: x{x}, prev{prev}, next{next} {}

T x;
Node* prev{nullptr};
Node* next{nullptr};
};

static Node* copy_list(const Node* other_head) {
Circularlist<T> copy;
copy.push_back(other_head->x);

for (auto it = other_head->next; it != other_head; it = it->next) {
copy.push_back(it->x);
}

auto p = copy.head;
copy.head = nullptr;
copy.size_ = 0;

return p;
}

Node* head{nullptr};
std::size_t size_{0u};
};

}

And finally the queue:

namespace structures {

template <typename T, typename Container>
class QueueWrapper {
public:
void push(const T& x) { return cont.push_at_back(x); }
T pop() { return cont.pop_at_front(); }
T& front() { return cont.front(); }
const T& front() const { return cont.front(); }
T& back() { return cont.back(); }
const T& back() const { return cont.back(); }
void clear() { return cont.clear(); }
std::size_t size() const { return cont.size(); }

private:
Container cont;
};

3. Linked lists

Elements in a linked lists are arranged in a linear order. The linear order is determined by the order of items (their index) in an array, however, in the linked lists, this order is determined by a pointer in each object. For instance, a linked lists can have a key attribute for each value, and a next and a previous pointers that point to the next and previous elements respectively. This list is a double-linked list. If an object’s previous pointer is Null, the element is the first element of the list. If the object’s next pointer is Null, then the object is the last element of the linked list. The next schema shows a double-linked list.

A single linked list is a list in which each node has a pointer to the next node, so we only need a pointer to the first node (in the code it is ‘head‘):

And the last node Null (in the code ‘lastnodenull’)

And the C++ implementation:

#include <cstdint>
#include <stdexcept>

namespace structures {

/* Singly linked list, with first pointer: head, with last node points to 'lastnodenull'.*/
template <typename T>
class Singlelinkedlist {
public:
Singlelinkedlist() = default;

Singlelinkedlist(const Singlelinkedlist<T>& other)
: head{copy_list(other.head)}, size_{other.size_} {}

Singlelinkedlist(Singlelinkedlist<T>&& other) : head{other.head}, size_{other.size_} {
other.head = lastnodenull;
other.size_ = 0;
}

Singlelinkedlist<T>& operator=(const Singlelinkedlist<T>& other) {
Singlelinkedlist<T> copy{other};
std::swap(head, copy.head);
std::swap(size_, copy.size_);
return *this;
}

Singlelinkedlist<T>& operator=(Singlelinkedlist<T>&& other) {
Singlelinkedlist<T> copy{std::move(other)};
std::swap(head, copy.head);
std::swap(size_, copy.size_);
return *this;
}

virtual ~Singlelinkedlist() { clear(); }

/* Clears the list */
void clear() {
while (!empty())
pop_front();
}

/* Inserts the element 'x' at the end of the list */
void push_back(const T& x) { insert(x, size_); }

/* Inserts the element 'x' at the beginning of the list */
void push_front(const T& x) {
head = new Node(x, head);
++size_;
}

/* Inserts an element 'x' at a position 'index' of the list */
void insert(const T& x, std::size_t index) {
if (index == 0) {
return push_front(x);
} else if (index > size_) {
throw std::out_of_range("Invalid index");
} else {
Node* it = head;
for (std::size_t i = 0; i < index - 1; ++i) {
it = it->next;
}
it->next = new Node(x, it->next);

++size_;
}
}

/* Inserts an element into sorted list */
void insert_sorted(const T& x) {
if (empty() || x <= head->x) {
return push_front(x);
} else {
Node* it = head;
while (it->next != lastnodenull && x > it->next->x) {
it = it->next;
}
it->next = new Node(x, it->next);

++size_;
}
}

/* Returns the reference of the element on the 'index' position of the list */
T& at(std::size_t index) {
return const_cast<T&>(static_cast<const Singlelinkedlist*>(this)->at(index));
}

const T& at(std::size_t index) const {
if (index >= size_) {
throw std::out_of_range("Index out of bounds");
} else {
Node* it = head;
for (std::size_t i = 0; i < index; i++) {
it = it->next;
}
return it->x;
}
}

/* Return an element removed at a given index */
T erase(std::size_t index) {
if (index >= size_) {
throw std::out_of_range("Index out of bounds");
} else if (index == 0) {
return pop_front();
} else {
Node* it = head;
for (std::size_t i = 0; i < index - 1; ++i) {
it = it->next;
}

T removed = it->next->x;
Node* p_removed = it->next;
it->next = it->next->next;

--size_;
delete p_removed;
return removed;
}
}

/* Removes an element at the end of the list and returns it */
T pop_back() { return erase(size_ - 1); }

/* Removes the first element of the list and returns the removed element */
T pop_front() {
if (empty()) {
throw std::out_of_range("List is empty");
} else {
T removed = head->x;
Node* old_head = head;
head = head->next;
delete old_head;
--size_;
return removed;
}
}

/* Returns an element 'x' from the list */
void remove(const T& x) {
if (head->x == x) {
pop_front();
return;
} else {
Node* it;
for (it = head; it->next->x != x; it = it->next) {
if (it->next == lastnodenull)
return;
}

Node* p_removed = it->next;
it->next = it->next->next;
delete p_removed;

--size_;
}
}

/* Return true if list is empty */
bool empty() const { return size_ == 0; }

/* Returns true if the list contains an element(x) */
bool contains(const T& x) const { return find(x) != size_; }

/* Returns the position of 'x' on the list if it is in the list */
std::size_t find(const T& x) const {
std::size_t index = 0;
for (Node* it = head; it != lastnodenull; it = it->next) {
if (it->x == x)
break;
++index;
}
return index;
}

/* Return the size of the list*/
std::size_t size() const { return size_; }

T& front() { return head->x; }

const T& front() const { return head->x; }

T& back() {
Node* it = head;
for (std::size_t i = 1; i < size_; ++i) {
it = it->next;
}
return it->x;
}

const T& back() const {
Node* it = head;
for (std::size_t i = 1; i < size_; ++i) {
it = it->next;
}
return it->x;
}

private:
struct Node {
explicit Node(const T& x) : x{x} {}
Node(const T& x, Node* next) : x{x}, next{next} {}

T x;
Node* next{lastnodenull};
};

static Node* copy_list(const Node* other_head) {
auto new_tail = new Node(other_head->x);
auto new_head = new_tail;
auto it = other_head->next;

while (it != lastnodenull) {
new_tail->next = new Node(it->x);
new_tail = new_tail->next;
it = it->next;
}

return new_head;
}

Node* head{lastnodenull};
std::size_t size_{0u};
};

}

4. Hash tables

Hash tables are an efficient ways to store data. When storing the data, it determines by a hash function in which bucket the data should go into and upon retrieval it can identify the respective bucket. In an optimal setting, the average time to search for an element is O(1). It can have no repeated entries. In the following code I implement the insert, remove, and search options in the hash table data structure.

#include <functional>
#include <array_list.h>
#include <linked_list.h>
#include <utils.h>

namespace structures {

/* HashTable implementation
params T: Data type of the elements
param Hash: Class that implements the hash function */
template <typename T, typename Hash = std::hash<T>>
class Hashtablewrapper {
public:
Hashtablewrapper() = default;

Hashtablewrapper(const Hashtablewrapper<T>& other)
: Hashtablewrapper(other.buckets_size) {
auto list = other.items();
for (std::size_t i = 0; i < list.size(); i++) {
insert(list[i]);
}
}

Hashtablewrapper(Hashtablewrapper<T>&& other)
: buckets{std::move(other.buckets)}
, buckets_size{std::move(other.buckets_size)}
, _size{std::move(other._size)} {}

Hashtablewrapper<T>& operator=(const Hashtablewrapper<T>& other) {
Hashtablewrapper<T> copy{other};
std::swap(buckets_size, copy.buckets_size);
std::swap(buckets, copy.buckets);
std::swap(_size, copy._size);
return *this;
}

Hashtablewrapper<T>& operator=(Hashtablewrapper<T>&& other) {
Hashtablewrapper<T> copy{std::move(other)};
std::swap(buckets_size, copy.buckets_size);
std::swap(buckets, copy.buckets);
std::swap(_size, copy._size);
return *this;
}

~Hashtablewrapper() = default;

/* Inserts the element 'x' into the respective bucket of the table. 
If needed, it grows the table. Returns false if element is already in the 
table (unique elements) */
bool insert(const T& x) {
auto& bucket = buckets[hash(x)];
if (bucket.contains(x)) {
return false;
} else {
bucket.push_front(x);
_size++;

if (_size == buckets_size) {
resize_table(buckets_size * 2);
}

return true;
}
}

/* Removes `x` from the table. Return true if x is found, else return false. */
bool remove(const T& x) {
try {
auto& bucket = buckets[hash(x)];
auto i = bucket.find(x);
bucket.erase(i);
_size--;

if (_size <= buckets_size / 4) {
std::size_t new_size = buckets_size / 2;
if (new_size >= starting_size)
resize_table(new_size);
}

return true;
} catch (const std::out_of_range& e) {
return false;
}
}

/* Returns true if the element 'x' is in the table */
bool contains(const T& x) const {
return buckets[hash(x)].contains(x);
}

void clear() {
Hashtablewrapper<T> ht;
*this = std::move(ht);
}

std::size_t size() const { return _size; }

/* Returns a list of items that are in the table */
Arraylist<T> items() const {
Arraylist<T> al{_size};

for (std::size_t i = 0; i < buckets_size; i++) {
for (std::size_t j = 0; j < buckets[i].size(); j++) {
al.push_back(buckets[i].at(j));
}
}

return std::move(al);
}

private:
explicit Hashtablewrapper(std::size_t buckets_size_)
: buckets{new LinkedList<T>[buckets_size_]}
, buckets_size{buckets_size_} {}

std::size_t hash(const T& x) const { return hashf(x) % buckets_size; }

void resize_table(std::size_t new_size) {
Hashtablewrapper new_ht{new_size};

auto list = items();
for (std::size_t i = 0; i < list.size(); i++) {
new_ht.insert(list.at(i));
}

*this = std::move(new_ht);
}

const static std::size_t starting_size{8};

std::unique_ptr<LinkedList<T>[]> buckets =
make_unique<LinkedList<T>[]>(starting_size);
std::size_t buckets_size{starting_size};
std::size_t _size{0};

Hash hashf{};
};

template <typename T>
class HashTable : public Hashtablewrapper<T> {};

}

One thought on “Elementary Data Structures in C++

Add yours

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

This site uses Akismet to reduce spam. Learn how your comment data is processed.

Create a website or blog at WordPress.com

Up ↑

%d bloggers like this: