# citing: https://www.reddit.com/r/Python/comments/jfx7wy/how_to_quickly_remove_duplicates_from_a_list/ # Let's make some duplicates (one million numbers between 1 and 100) from random import randrange DUPLICATES = [ randrange(100) for _ in range(1_000_000) ] ### METHOD 1 ### # Not very efficient - Similar to "sort | uniq" but the original order is kept unique = [] for element in DUPLICATES: if element not in unique: unique.append(element) print(unique) # Output: [95, 62, 73, 27, 89, 88, 31, 50, 26, 97, 13, 18, 54, 64, 17, 94, 72, 65, 83, 14, 40, 91, 1, 21, 22, 52, 61, 15, 7, 80, 24, 42, 37, 96, 74, 9, 59, 44, 66, 20, 6, 84, 39, 81, 2, 67, 32, 77, 36, 5, 8, 47, 55, 82, 60, 35, 33, 75, 57, 68, 29, 11, 23, 86, 69, 99, 25, 78, 76, 19, 12, 92, 10, 90, 16, 3, 51, 4, 28, 0, 71, 46, 79, 85, 58, 48, 93, 56, 53, 98, 87, 30, 45, 63, 70, 38, 41, 43, 34, 49] ### METHOD 2 ### # Very efficient - Similar to "sort | uniq" list(set(DUPLICATES)) # This works because sets contain unique items by definition # Output: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] ### METHOD 3 ### # Very efficient but order preserved # Not sure which bash function this is similar to # Resembles "sort | uniq" but with the original order preserved # But sets are unordered! What if we need to preserve the order? # Use this dict.fromkeys() trick! list(dict.fromkeys(DUPLICATES)) # Output: [95, 62, 73, 27, 89, 88, 31, 50, 26, 97, 13, 18, 54, 64, 17, 94, 72, 65, 83, 14, 40, 91, 1, 21, 22, 52, 61, 15, 7, 80, 24, 42, 37, 96, 74, 9, 59, 44, 66, 20, 6, 84, 39, 81, 2, 67, 32, 77, 36, 5, 8, 47, 55, 82, 60, 35, 33, 75, 57, 68, 29, 11, 23, 86, 69, 99, 25, 78, 76, 19, 12, 92, 10, 90, 16, 3, 51, 4, 28, 0, 71, 46, 79, 85, 58, 48, 93, 56, 53, 98, 87, 30, 45, 63, 70, 38, 41, 43, 34, 49] ### METHOD 4 ### # Older Python - Similar to "uniq | sort" # But it only works for Python 3.6 and above # For Python 2.7 and 3.0-3.5, use OrderedDict:from collections import OrderedDict list(OrderedDict.fromkeys(DUPLICATES)) #### METHOD 5 ### # Crunch duplicates as they appear and show count # Similar to bashes "uniq -c" # I will show you how to do "uniq -c" on a string # Just change the input to a list and it will work on a list # METHOD 5.1 # from itertools import groupby input = "Raaanndommmm Leetters" for i,v in groupby(input): print(len(list(v)),i) # Output: 1 R 3 a 2 n 1 d 1 o 4 m 1 1 L 2 e 2 t 1 e 1 r 1 s # METHOD 5.2 # # Or you can return a list for an output: l=[] for i,v in groupby(input): l.append(str(len(list(v)))+" "+str(i)) print(l) # Output: ['1 R', '3 a', '2 n', '1 d', '1 o', '4 m', '1 ', '1 L', '2 e', '2 t', '1 e', '1 r', '1 s'] # METHOD 5.3 # # Same solution as a one liner: print([ str(len(list(v)))+" "+str(i) for i,v in groupby(input)]) # Output: ['1 R', '3 a', '2 n', '1 d', '1 o', '4 m', '1 ', '1 L', '2 e', '2 t', '1 e', '1 r', '1 s'] ### METHOD 6 ### # Crunch out duplicates but dont show count # METHOD 6.1 # from itertools import groupby input = "Raaanndommmm Leetters" for i,v in groupby(input): print(str(i)) # Output: R a n d o m L e t e r s # METHOD 6.2 # l=[] for i,v in groupby(input): l.append(str(i)) print(l) # Output: ['R', 'a', 'n', 'd', 'o', 'm', ' ', 'L', 'e', 't', 'e', 'r', 's'] # METHOD 6.3 # print([ str(i) for i,v in groupby(input)]) # Output: ['R', 'a', 'n', 'd', 'o', 'm', ' ', 'L', 'e', 't', 'e', 'r', 's']