From e3c42973b2f6fb1ceb277b2681e8ebccd6b588df Mon Sep 17 00:00:00 2001 From: Paul Georgiou Date: Fri, 10 Jul 2015 19:21:21 +0300 Subject: Update Linearize tool to support Windows paths --- contrib/linearize/linearize-data.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'contrib/linearize/linearize-data.py') diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py index 7947c6bf7..0f6fde2a6 100755 --- a/contrib/linearize/linearize-data.py +++ b/contrib/linearize/linearize-data.py @@ -12,6 +12,7 @@ import json import struct import re import os +import os.path import base64 import httplib import sys @@ -115,19 +116,20 @@ class BlockDataCopier: self.setFileTime = True if settings['split_timestamp'] != 0: self.timestampSplit = True - # Extents and cache for out-of-order blocks + # Extents and cache for out-of-order blocks self.blockExtents = {} self.outOfOrderData = {} self.outOfOrderSize = 0 # running total size for items in outOfOrderData def writeBlock(self, inhdr, blk_hdr, rawblock): - if not self.fileOutput and ((self.outsz + self.inLen) > self.maxOutSz): + blockSizeOnDisk = len(inhdr) + len(blk_hdr) + len(rawblock) + if not self.fileOutput and ((self.outsz + blockSizeOnDisk) > self.maxOutSz): self.outF.close() if self.setFileTime: os.utime(outFname, (int(time.time()), highTS)) self.outF = None self.outFname = None - self.outFn = outFn + 1 + self.outFn = self.outFn + 1 self.outsz = 0 (blkDate, blkTS) = get_blk_dt(blk_hdr) @@ -147,7 +149,7 @@ class BlockDataCopier: if self.fileOutput: outFname = self.settings['output_file'] else: - outFname = "%s/blk%05d.dat" % (self.settings['output'], outFn) + outFname = os.path.join(self.settings['output'], "blk%05d.dat" % self.outFn) print("Output file " + outFname) self.outF = open(outFname, "wb") @@ -165,7 +167,7 @@ class BlockDataCopier: (self.blkCountIn, self.blkCountOut, len(self.blkindex), 100.0 * self.blkCountOut / len(self.blkindex))) def inFileName(self, fn): - return "%s/blk%05d.dat" % (self.settings['input'], fn) + return os.path.join(self.settings['input'], "blk%05d.dat" % fn) def fetchBlock(self, extent): '''Fetch block contents from disk given extents''' -- cgit v1.2.3 From 873e81f89b3197580aca3dd4a591d00e41696a89 Mon Sep 17 00:00:00 2001 From: Matthew King Date: Sun, 26 Jun 2016 16:47:03 +0300 Subject: Use portable #! in python scripts (/usr/bin/env) --- contrib/linearize/linearize-data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'contrib/linearize/linearize-data.py') diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py index 0f6fde2a6..8badb4b31 100755 --- a/contrib/linearize/linearize-data.py +++ b/contrib/linearize/linearize-data.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # # linearize-data.py: Construct a linear, no-fork version of the chain. # -- cgit v1.2.3 From 27765b6403cece54320374b37afb01a0cfe571c3 Mon Sep 17 00:00:00 2001 From: isle2983 Date: Sat, 31 Dec 2016 11:01:21 -0700 Subject: Increment MIT Licence copyright header year on files modified in 2016 Edited via: $ contrib/devtools/copyright_header.py update . --- contrib/linearize/linearize-data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'contrib/linearize/linearize-data.py') diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py index 8badb4b31..d0417748f 100755 --- a/contrib/linearize/linearize-data.py +++ b/contrib/linearize/linearize-data.py @@ -2,7 +2,7 @@ # # linearize-data.py: Construct a linear, no-fork version of the chain. # -# Copyright (c) 2013-2014 The Bitcoin Core developers +# Copyright (c) 2013-2016 The Bitcoin Core developers # Distributed under the MIT software license, see the accompanying # file COPYING or http://www.opensource.org/licenses/mit-license.php. # -- cgit v1.2.3 From d5aa19813c32eff02d4b65ac9a4131f3eed04825 Mon Sep 17 00:00:00 2001 From: Doug Date: Tue, 20 Dec 2016 19:25:24 -0800 Subject: Allow linearization scripts to support hash byte reversal Currently, the linearization scripts require input hashes to be in one endian form. Add support for byte reversal. --- contrib/linearize/linearize-data.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'contrib/linearize/linearize-data.py') diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py index d0417748f..adea35dbd 100755 --- a/contrib/linearize/linearize-data.py +++ b/contrib/linearize/linearize-data.py @@ -23,6 +23,12 @@ from collections import namedtuple settings = {} +##### Switch endian-ness ##### +def hex_switchEndian(s): + """ Switches the endianness of a hex string (in pairs of hex chars) """ + pairList = [s[i]+s[i+1] for i in range(0,len(s),2)] + return ''.join(pairList[::-1]) + def uint32(x): return x & 0xffffffffL @@ -69,17 +75,21 @@ def get_blk_dt(blk_hdr): dt_ym = datetime.datetime(dt.year, dt.month, 1) return (dt_ym, nTime) +# When getting the list of block hashes, undo any byte reversals. def get_block_hashes(settings): blkindex = [] f = open(settings['hashlist'], "r") for line in f: line = line.rstrip() + if settings['rev_hash_bytes'] == 'true': + line = hex_switchEndian(line) blkindex.append(line) print("Read " + str(len(blkindex)) + " hashes") return blkindex +# The block map shouldn't give or receive byte-reversed hashes. def mkblockmap(blkindex): blkmap = {} for height,hash in enumerate(blkindex): @@ -265,6 +275,12 @@ if __name__ == '__main__': settings[m.group(1)] = m.group(2) f.close() + # Force hash byte format setting to be lowercase to make comparisons easier. + # Also place upfront in case any settings need to know about it. + if 'rev_hash_bytes' not in settings: + settings['rev_hash_bytes'] = 'false' + settings['rev_hash_bytes'] = settings['rev_hash_bytes'].lower() + if 'netmagic' not in settings: settings['netmagic'] = 'f9beb4d9' if 'genesis' not in settings: @@ -295,9 +311,8 @@ if __name__ == '__main__': blkindex = get_block_hashes(settings) blkmap = mkblockmap(blkindex) + # Block hash map won't be byte-reversed. Neither should the genesis hash. if not settings['genesis'] in blkmap: print("Genesis block not found in hashlist") else: BlockDataCopier(settings, blkindex, blkmap).run() - - -- cgit v1.2.3 From 3c8f63ba7c7be62d462727f4d67633e1064f0f79 Mon Sep 17 00:00:00 2001 From: Doug Date: Tue, 20 Dec 2016 19:27:01 -0800 Subject: Make linearize scripts Python 3-compatible. --- contrib/linearize/linearize-data.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'contrib/linearize/linearize-data.py') diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py index adea35dbd..043bf2e81 100755 --- a/contrib/linearize/linearize-data.py +++ b/contrib/linearize/linearize-data.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # linearize-data.py: Construct a linear, no-fork version of the chain. # @@ -8,29 +8,33 @@ # from __future__ import print_function, division +try: # Python 3 + import http.client as httplib +except ImportError: # Python 2 + import httplib import json import struct import re import os import os.path import base64 -import httplib import sys import hashlib import datetime import time from collections import namedtuple +from binascii import hexlify, unhexlify settings = {} ##### Switch endian-ness ##### def hex_switchEndian(s): """ Switches the endianness of a hex string (in pairs of hex chars) """ - pairList = [s[i]+s[i+1] for i in range(0,len(s),2)] - return ''.join(pairList[::-1]) + pairList = [s[i:i+2].encode() for i in range(0, len(s), 2)] + return b''.join(pairList[::-1]).decode() def uint32(x): - return x & 0xffffffffL + return x & 0xffffffff def bytereverse(x): return uint32(( ((x) << 24) | (((x) << 8) & 0x00ff0000) | @@ -41,14 +45,14 @@ def bufreverse(in_buf): for i in range(0, len(in_buf), 4): word = struct.unpack('@I', in_buf[i:i+4])[0] out_words.append(struct.pack('@I', bytereverse(word))) - return ''.join(out_words) + return b''.join(out_words) def wordreverse(in_buf): out_words = [] for i in range(0, len(in_buf), 4): out_words.append(in_buf[i:i+4]) out_words.reverse() - return ''.join(out_words) + return b''.join(out_words) def calc_hdr_hash(blk_hdr): hash1 = hashlib.sha256() @@ -65,7 +69,7 @@ def calc_hash_str(blk_hdr): hash = calc_hdr_hash(blk_hdr) hash = bufreverse(hash) hash = wordreverse(hash) - hash_str = hash.encode('hex') + hash_str = hexlify(hash).decode('utf-8') return hash_str def get_blk_dt(blk_hdr): @@ -217,7 +221,7 @@ class BlockDataCopier: inMagic = inhdr[:4] if (inMagic != self.settings['netmagic']): - print("Invalid magic: " + inMagic.encode('hex')) + print("Invalid magic: " + hexlify(inMagic).decode('utf-8')) return inLenLE = inhdr[4:] su = struct.unpack(" Date: Tue, 10 Jan 2017 23:30:14 +0100 Subject: Remove unused Python imports --- contrib/linearize/linearize-data.py | 6 ------ 1 file changed, 6 deletions(-) (limited to 'contrib/linearize/linearize-data.py') diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py index 043bf2e81..3fdec134b 100755 --- a/contrib/linearize/linearize-data.py +++ b/contrib/linearize/linearize-data.py @@ -8,16 +8,10 @@ # from __future__ import print_function, division -try: # Python 3 - import http.client as httplib -except ImportError: # Python 2 - import httplib -import json import struct import re import os import os.path -import base64 import sys import hashlib import datetime -- cgit v1.2.3 From b9d95bd9a1b3e8613373ecd228805518f2852985 Mon Sep 17 00:00:00 2001 From: Douglas Roark Date: Wed, 18 Jan 2017 22:22:46 -0800 Subject: Fix various minor linearization script issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - The last-timestamp-encountered variable wasn’t being used properly. Rewrite code to properly allow for new blockchain files to be written when split by month. - Properly set a blockchain file’s access and modify times. - Add a “debug output” option to quiet certain output that might not always be desirable. - Update the README. --- contrib/linearize/linearize-data.py | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'contrib/linearize/linearize-data.py') diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py index 3fdec134b..afcec2b60 100755 --- a/contrib/linearize/linearize-data.py +++ b/contrib/linearize/linearize-data.py @@ -134,7 +134,7 @@ class BlockDataCopier: if not self.fileOutput and ((self.outsz + blockSizeOnDisk) > self.maxOutSz): self.outF.close() if self.setFileTime: - os.utime(outFname, (int(time.time()), highTS)) + os.utime(self.outFname, (int(time.time()), self.highTS)) self.outF = None self.outFname = None self.outFn = self.outFn + 1 @@ -142,12 +142,12 @@ class BlockDataCopier: (blkDate, blkTS) = get_blk_dt(blk_hdr) if self.timestampSplit and (blkDate > self.lastDate): - print("New month " + blkDate.strftime("%Y-%m") + " @ " + hash_str) - lastDate = blkDate - if outF: - outF.close() - if setFileTime: - os.utime(outFname, (int(time.time()), highTS)) + print("New month " + blkDate.strftime("%Y-%m") + " @ " + self.hash_str) + self.lastDate = blkDate + if self.outF: + self.outF.close() + if self.setFileTime: + os.utime(self.outFname, (int(time.time()), self.highTS)) self.outF = None self.outFname = None self.outFn = self.outFn + 1 @@ -155,11 +155,11 @@ class BlockDataCopier: if not self.outF: if self.fileOutput: - outFname = self.settings['output_file'] + self.outFname = self.settings['output_file'] else: - outFname = os.path.join(self.settings['output'], "blk%05d.dat" % self.outFn) - print("Output file " + outFname) - self.outF = open(outFname, "wb") + self.outFname = os.path.join(self.settings['output'], "blk%05d.dat" % self.outFn) + print("Output file " + self.outFname) + self.outF = open(self.outFname, "wb") self.outF.write(inhdr) self.outF.write(blk_hdr) @@ -223,13 +223,16 @@ class BlockDataCopier: blk_hdr = self.inF.read(80) inExtent = BlockExtent(self.inFn, self.inF.tell(), inhdr, blk_hdr, inLen) - hash_str = calc_hash_str(blk_hdr) - if not hash_str in blkmap: - print("Skipping unknown block " + hash_str) + self.hash_str = calc_hash_str(blk_hdr) + if not self.hash_str in blkmap: + # Because blocks can be written to files out-of-order as of 0.10, the script + # may encounter blocks it doesn't know about. Treat as debug output. + if settings['debug_output'] == 'true': + print("Skipping unknown block " + self.hash_str) self.inF.seek(inLen, os.SEEK_CUR) continue - blkHeight = self.blkmap[hash_str] + blkHeight = self.blkmap[self.hash_str] self.blkCountIn += 1 if self.blkCountOut == blkHeight: @@ -295,12 +298,15 @@ if __name__ == '__main__': settings['max_out_sz'] = 1000 * 1000 * 1000 if 'out_of_order_cache_sz' not in settings: settings['out_of_order_cache_sz'] = 100 * 1000 * 1000 + if 'debug_output' not in settings: + settings['debug_output'] = 'false' settings['max_out_sz'] = int(settings['max_out_sz']) settings['split_timestamp'] = int(settings['split_timestamp']) settings['file_timestamp'] = int(settings['file_timestamp']) settings['netmagic'] = unhexlify(settings['netmagic'].encode('utf-8')) settings['out_of_order_cache_sz'] = int(settings['out_of_order_cache_sz']) + settings['debug_output'] = settings['debug_output'].lower() if 'output_file' not in settings and 'output' not in settings: print("Missing output file / directory") -- cgit v1.2.3