This might be a very broad question. I wanted to create a way to represent strings that would support O(1) append, O(1) append to the left, and O(1) comparison while maintaining O(N) slicing and O(1) indexing. My idea is that I would store unicode characters as their unicode number, and use mathematical operations to add and delete characters from either end. I called it NumString:
class Numstring:
def __init__(self, init_str=""):
self.num = 0
self.length = 0
for char in init_str:
self.append(char)
def toString(self, curr=None):
if curr is None:
curr = self.num
retlst = []
while curr:
char = chr(curr % 10000)
retlst.append(char)
curr = curr // 10000
return "".join(retlst)
def appendleft(self, char):
assert len(char) == 1
self.num *= 10000
self.num += ord(char)
self.length += 1
def append(self, char):
self.num += ord(char)*(10000**self.length)
self.length += 1
def pop(self):
# self.num = self.num % (10000**self.length-1)
self.num = self.num % 10000**(self.length-1)
self.length -= 1
def popleft(self):
self.num = self.num // 10000
self.length -= 1
def compare(self, numstring2):
return self.num == numstring2.num
def size(self):
return self.length
def get(self, start, end=None):
if start >= self.length:
raise IndexError("index out of bounds")
if end and end > self.length + 1:
raise IndexError("index out of bounds")
if end is not None:
if start == end:
return ""
curr = self.num
curr = curr // (10000 ** start)
curr = curr % 10000**(end)
return self.toString(curr)
else:
curr = self.num
curr = curr // (10000 ** start)
char = chr(curr % 10000)
return char
Here is the profiling code:
import string
import time
import random
from NumString import Numstring
import numpy as np
import matplotlib.pyplot as plt
if __name__ == '__main__':
numstring_times = []
regstring_times = []
numstring = Numstring("abc")
regstring = "abc"
for i in range(0, 10000):
char_to_add = random.choice(string.ascii_letters)
start_time = time.time()
numstring.append(char_to_add)
end_time = time.time()
numstring_times.append(end_time-start_time)
start_time = time.time()
regstring += char_to_add
end_time = time.time()
regstring_times.append(end_time-start_time)
plt.plot(numstring_times, label="Numstring")
plt.plot(regstring_times, label="Regular strings")
plt.legend()
plt.show()
It works the way I want it to, but when I tested the time it takes to append to the string and my Numstring class performed way worse. I understood that there would be overhead, but I didn't expect the overall trend to be way worse than concatenating strings in python.
Curiously, comparison times are actually better for Numstrings than regular strings:
I've realized I don't really understand how integer operations in Python are implemented and what the limitations of infinite integer precision are. Could somebody help me understand what I'm missing?

