import os import json import pandas as pd import pprint import matplotlib.pyplot as plt from datetime import datetime import numpy as np def process_folder(folder_path, table): """ Recursively process each file in the folder and its subfolders. """ for root, dirs, files in os.walk(folder_path): for file in files: file_path = os.path.join(root, file) process_file(file_path, table) def process_file(file_path, table): """ Read each line of JSON data from the file and store it in the table. """ with open(file_path, 'r') as f: for line in f: try: json_data = json.loads(line) table.append(json_data) except json.JSONDecodeError: print(f"Error decoding JSON in file: {file_path}") def main(folder_path): table = [] process_folder(folder_path, table) # Convert table to pandas DataFrame for easy manipulation df = pd.DataFrame(table) index_table = [] enqueued_table = [] emission_table = [] for index, row in df.iterrows(): print(index, end=' ') index_table.append(index) print(row['EnqueuedTimeUtc'], end=' ') enqueued_table.append(row['EnqueuedTimeUtc']) body_table = [] body_table.append(row['Body']) body_df = pd.DataFrame(body_table) for body_index, body_row in body_df.iterrows(): print(body_row['emissionDate']) emission_table.append(body_row['emissionDate']) emission_dates = [datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ') for date in emission_table] enqueued_dates = [datetime.strptime(date[:19] + date[-1], '%Y-%m-%dT%H:%M:%SZ') for date in enqueued_table] plt.figure(figsize=(10, 6)) plt.plot(enqueued_dates, index_table, label='Enqueued') plt.plot(emission_dates, index_table, label='Emission') plt.xlabel('Time') plt.ylabel('Index') plt.title('Index vs Time') plt.legend() plt.grid(True) plt.xticks(rotation=45) parts = folder_path.split('/')[-4:] result = '_'.join(parts) figurename = "index_" + result + ".png" plt.savefig(figurename, bbox_inches='tight') if __name__ == "__main__": folder_path = '' main(folder_path)