Delete dirty-db-cleaner.py

pull/4841/head
John McLear 2021-02-21 18:05:34 +00:00
parent 543e94fd4a
commit 2c763fb4d1
1 changed files with 0 additions and 48 deletions

View File

@ -1,48 +0,0 @@
#!/usr/bin/env PYTHONUNBUFFERED=1 python
#
# Created by Bjarni R. Einarsson, placed in the public domain. Go wild!
#
import json
import os
import sys
try:
dirtydb_input = sys.argv[1]
dirtydb_output = '%s.new' % dirtydb_input
assert(os.path.exists(dirtydb_input))
assert(not os.path.exists(dirtydb_output))
except:
print()
print('Usage: %s /path/to/dirty.db' % sys.argv[0])
print()
print('Note: Will create a file named dirty.db.new in the same folder,')
print(' please make sure permissions are OK and a file by that')
print(' name does not exist already. This script works by omitting')
print(' duplicate lines from the dirty.db file, keeping only the')
print(' last (latest) instance. No revision data should be lost,')
print(' but be careful, make backups. If it breaks you get to keep')
print(' both pieces!')
print()
sys.exit(1)
dirtydb = {}
lines = 0
with open(dirtydb_input, 'r') as fd:
print('Reading %s' % dirtydb_input)
for line in fd:
lines += 1
try:
data = json.loads(line)
dirtydb[data['key']] = line
except:
print("Skipping invalid JSON!")
if lines % 10000 == 0:
sys.stderr.write('.')
print()
print('OK, found %d unique keys in %d lines' % (len(dirtydb), lines))
with open(dirtydb_output, 'w') as fd:
for data in list(dirtydb.values()):
fd.write(data)
print('Wrote data to %s. All done!' % dirtydb_output)