🏡 index : github.com/captn3m0/plugo.git

import pygit2
import csv
import concurrent.futures
import msgspec
from datetime import datetime

# Define the structure of the JSON data using msgspec
class PowerbankData(msgspec.Struct):
    totalAvailablePowerbanks: int

# Function to calculate totalAvailablePowerbanks from JSON data
def calculate_total_powerbanks(file_content):
    try:
        data = msgspec.json.decode(file_content, type=list[PowerbankData])
        total_powerbanks = sum(item.totalAvailablePowerbanks for item in data)
        return total_powerbanks
    except msgspec.DecodeError:
        return None

# Function to process a single commit and return results
def process_commit(repo, commit):
    try:
        tree = commit.tree
        if '_data/plugo.json' in tree:
            blob = repo[tree['_data/plugo.json'].id]
            try:
                file_content = blob.data.decode('utf-8')
                total_powerbanks = calculate_total_powerbanks(file_content)
                if total_powerbanks is not None:
                    commit_date = datetime.fromtimestamp(commit.commit_time).strftime('%Y-%m-%d')
                    return commit_date, total_powerbanks
            except Exception as e:
                return None
    except KeyError:
        pass
    return None

# Generator function to iterate through every 5th commit
def iterate_commits(repo):
    commit_count = sum(1 for _ in repo.walk(repo.head.target, pygit2.GIT_SORT_TOPOLOGICAL))
    processed_count = 0
    skip_count = 0
    
    for commit in repo.walk(repo.head.target, pygit2.GIT_SORT_TOPOLOGICAL):
        processed_count += 1
        if processed_count % 5 != 0:
            continue
        
        skip_count += 1
        progress = (skip_count / (commit_count // 5)) * 100
        print(f'Processing commit {skip_count}/{commit_count // 5} ({progress:.2f}%)', end='\r')
        
        yield commit

# Main function to process commits using concurrent.futures
def main():
    # Open the repository
    repo_path = '.'  # Assuming the script is run from the root of the repository
    repo = pygit2.Repository(repo_path)
    
    # Prepare CSV output
    output = [['date', 'count']]
    
    # Process every 5th commit in parallel
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = []
        for commit in iterate_commits(repo):
            future = executor.submit(process_commit, repo, commit)
            futures.append(future)
        
        for future in concurrent.futures.as_completed(futures):
            result = future.result()
            if result:
                output.append(result)
    
    # Sort output by date, excluding the header row
    output_sorted = sorted(output[1:], key=lambda x: datetime.strptime(x[0], '%Y-%m-%d'))
    output_sorted.insert(0, output[0])  # Insert the header row back
    
    # Write the sorted output to CSV
    with open('powerbank-count.csv', 'w', newline='') as csvfile:
        csvwriter = csv.writer(csvfile)
        csvwriter.writerows(output_sorted)
    
    print("\nProcessing complete.")

if __name__ == "__main__":
    main()