I.T. Spices The LINUX Way

Python In The Shell: The STEEMIT Ecosystem – Post #104

THE PYTHON SCRIPT

This post will just present the whole python script as discussed from Post #94 to Post #103.

You can view it here in one go:

#!/usr/bin/python3.6

###NOTES
#########FIND------> p1 = re.findall('(?<="body": ").*?(?=", "json_metadata")', lll)
#########REPLACE---> p2 = re.sub('"body": ".*?", "json_metadata"', ('"body": "' + p22 + '", "json_metadata"'), lll)
#########jq -Rc '. as $raw | try fromjson catch $raw'

###LOAD MODULES
import sys
import os
import shutil
import time
import json
import pymysql
import datetime
import re
import subprocess
#import sh
from jq import jq

###GET TIME NOW
ddate = (datetime.datetime.now().strftime("%m%d%Y-%H%M%S"))

###INFOS
logfile = ('/dev/shm/steem.blockchain-incomplete_resume_' + ddate + '.logs')
print (' ')
print ('This python script will RESUME the insert of JSON queries into MYSQL')
print('This python script will RESUME the insert of JSON queries into MYSQL' + ddate, file=open(logfile, 'a'))
print (' ')
time.sleep(3)

###FOLDERS
tempdir = '/dev/shm/insertjson'
###CREATE THE FOLDERS
#TEMP
if not os.path.exists(tempdir):
    os.mkdir(tempdir)
insertfile = ('/root/STEEM/steem.blockchain.json')
destfile = ('/dev/shm/steem.blockchain-incomplete_resume.json')

###LOG THIS RUN
print('---------', file=open(logfile, 'a'))
print('Timestamp for this insert is: ' + ddate, file=open(logfile, 'a'))

###WHAT IS THE LAST RECORD ON THE DB
###LAST LINE ID
c = ('mysql -h172.17.0.4 -u root -p1234567 -Dsteemit_blockchain -e ' + '"SELECT max(id) FROM data01"')
maxidnum = (os.popen(c).read()).splitlines()
if maxidnum[1] == "NULL":
    maxid = int(0)
    counter = int(maxid) + 1
elif int(maxidnum[1]) > 0:
    maxid = int(maxidnum[1])
    counter = int(maxid) + 1
else:
    print('MAXID is not found.....')

###BLOCK_ID OF LAST LINE
cc = ('mysql -h172.17.0.4 -u root -p1234567 -Dsteemit_blockchain -e ' + '"SELECT block_id FROM data01 WHERE id = ' + str(maxid) + '"')
blockid = (os.popen(cc).read()).splitlines()

###FIND THE LINE NUMBER OF THE BLOCK_ID IN THE INSERTFILE
if maxid != 0:
    findthis = ('"block_id": "' + blockid[1] + '"')
    ccc = ("grep -m1 -n '" + findthis + "' " + insertfile + " | cut --delimiter=':' -f1")   
    line = int((os.popen(ccc).read()).strip()) + 1
    lastline = int(line) + int(299999)
    endline = int(lastline) + 1
else:
    line = int(1)
    lastline = line + int(299999)
    endline = int(lastline) + 1

###PRINT SOME IMPORTANT INFOS
print('Line number ' + str(line) + ' will be processed until line number ' + str(lastline) + ' from the blockchain file ' + insertfile + '.......')
print('Line number ' + str(line) + ' will be processed until line number ' + str(lastline) + ' from the blockchain file ' + insertfile + '.......', file=open(logfile, 'a'))

###EXTRACT THE LINE STARTING FROM MAXID UP TO LAST AND SAVE TO DESTFILE
###DELETE DESTFILE FIRST IF EXIST
try:
    os.remove(destfile)
except OSError:
    pass
if int(line) != 0:
    eee = ("sed -n '" + str(line) + "," + str(lastline) + "p;" + str(endline) + "q' " + insertfile + " > " + destfile)
    os.system(eee)
else:
    print('LINE is equal of greater than COUNTLINE.......')

###CONNECT TO DB FIRST
###MARIADB CONNECTIONS
db = pymysql.connect(host='172.17.0.4', user='root', password='1234567', db='steemit_blockchain', charset='utf8')
cursor = db.cursor()

###CHECK THE DESTINATION FILE
dd = ('sed -n "$=" ' + destfile)
checkfile = (os.popen(dd).read()).strip()
if (str(checkfile) != str(0)) and os.path.isfile(destfile):
    
    ###BE SURE TO OPEN FILE IN UTF-8
    list = open(destfile, 'r', encoding='utf-8')
    print('There is/are new blocks, I will update the database with about ' + checkfile + ' new records; please wait.......')
    print('There is/are new blocks, I will update the database with about ' + checkfile + ' new records; please wait.......', file=open(logfile, 'a'))
    time.sleep(9)

    ###LOOP LINE PER LINE HERE USING WHILE
    for line in list:
        
        ###WHOLE BLOCKDATA HERE UNALTERED
        blockdata = str(line).rstrip()
    
        ###PYTHON
        jason = json.loads(line)
        if (jason["block_id"]) == []:
            block_id = "NA"
        else:
            try:
                block_id = (jason["block_id"])
            except (OSError, TypeError):
                block_id = "ERROR"
        
        if (jason["extensions"]) == []:
            extensions = "NA"
        else:
            try:
                extensions = (jason["extensions"])
            except (OSError, TypeError):
                extensions = "ERROR"
        
        if (jason["previous"]) == []:
            previous = "NA"
        else:
            try:
                previous = (jason["previous"])
            except (OSError, TypeError):
                previous = "ERROR"
        
        if (jason["signing_key"]) == []:
            signing_key = "NA"
        else:
            try:
                signing_key = (jason["signing_key"])
            except (OSError, TypeError):
                signing_key = "ERROR"
        
        if (jason["timestamp"]) == []:
            timestamp = "NA"
        else:
            try:
                timestamp = (jason["timestamp"])
            except (OSError, TypeError):
                timestamp = "ERROR"
        
        if (jason["transaction_merkle_root"]) == []:
            transaction_merkle_root = "NA"
        else:
            try:
                transaction_merkle_root = (jason["transaction_merkle_root"])
            except (OSError, TypeError):
                transaction_merkle_root = "ERROR"
        
        if (jason["witness"]) == []:
            witness = "NA"
        else:
            try:
                witness = (jason["witness"])
            except (OSError, TypeError):
                witness = "ERROR"
        
        if (jason["witness_signature"]) == []:
            witness_signature = "NA"
        else:
            try:
                witness_signature = (jason["witness_signature"])
            except (OSError, TypeError):
                witness_signature = "ERROR"
        
        if (jason["transactions"]) == []:
            transactions = "NA"
            operation_type = "NA"
            transaction_ids = "NA"
            print('\nI am processing block_id: ' + block_id)
            ###PRINT COUNTER
            print('COUNTER ' + str(counter))
            ###EXECUTE FIRST LOOP SQL HERE
            sql1 = ("INSERT INTO `data01`(counter, block_id, extensions, previous, signing_key, timestamp, transaction_ids, transaction_merkle_root, witness, witness_signature, operation_type, transactions, blockdata)VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
            data1 = (int(counter), block_id, str(extensions), previous, signing_key, timestamp, str(transaction_ids), transaction_merkle_root, witness, witness_signature, operation_type, transactions, blockdata)
            # Execute the SQL command
            cursor.execute(sql1, (data1))
            db.commit()
            counter = int(counter) + 1

    ###GET TIME NOW
    ddate = (datetime.datetime.now().strftime("%m%d%Y-%H%M%S"))
    print('Timestamp for FINISH insert is: ' + ddate + '\n\n', file=open(logfile, 'a'))

else:
    ddate = (datetime.datetime.now().strftime("%m%d%Y%H%S"))
    print('Timestamp for this insert is: ' + ddate, file=open(logfile, 'a'))
    print('No new records found.......')
    print('No new records found.......\n\n', file=open(logfile, 'a'))
    time.sleep(9)

###RELAY THE LOGS TO THE CENTRAL LOG SERVER
#ee = ('cat ' + logfile + ' | ssh [email protected] "cat >> /MARIO/BACKUPS/LOGS/insert-json-into-mysql_resume.logs"')
ee = ('cat ' + logfile + ' >> /MARIO/BACKUPS/LOGS/insert-json-into-mysql_resume.logs')
os.system(ee)

###CLOSE MARIADB CONNECTION AFTER
db.close()

###CLEAN UP FOLDERS
shutil.rmtree(tempdir, ignore_errors=True)



On our next topic, we will discuss how to crawl or scrape the STEEMIT website of any user.

Have fun in your python study and always remember, only you can limit yourself!!!


“The Less Components, The Less Problems….. At Least In Programming.”