plugo/count.py
2024-07-11 14:24:30 +05:30

89 lines
3.0 KiB
Python

import pygit2
import csv
import concurrent.futures
import msgspec
from datetime import datetime
# Define the structure of the JSON data using msgspec
class PowerbankData(msgspec.Struct):
totalAvailablePowerbanks: int
# Function to calculate totalAvailablePowerbanks from JSON data
def calculate_total_powerbanks(file_content):
try:
data = msgspec.json.decode(file_content, type=list[PowerbankData])
total_powerbanks = sum(item.totalAvailablePowerbanks for item in data)
return total_powerbanks
except msgspec.DecodeError:
return None
# Function to process a single commit and return results
def process_commit(repo, commit):
try:
tree = commit.tree
if '_data/plugo.json' in tree:
blob = repo[tree['_data/plugo.json'].id]
try:
file_content = blob.data.decode('utf-8')
total_powerbanks = calculate_total_powerbanks(file_content)
if total_powerbanks is not None:
commit_date = datetime.fromtimestamp(commit.commit_time).strftime('%Y-%m-%d')
return commit_date, total_powerbanks
except Exception as e:
return None
except KeyError:
pass
return None
# Generator function to iterate through every 5th commit
def iterate_commits(repo):
commit_count = sum(1 for _ in repo.walk(repo.head.target, pygit2.GIT_SORT_TOPOLOGICAL))
processed_count = 0
skip_count = 0
for commit in repo.walk(repo.head.target, pygit2.GIT_SORT_TOPOLOGICAL):
processed_count += 1
if processed_count % 5 != 0:
continue
skip_count += 1
progress = (skip_count / (commit_count // 5)) * 100
print(f'Processing commit {skip_count}/{commit_count // 5} ({progress:.2f}%)', end='\r')
yield commit
# Main function to process commits using concurrent.futures
def main():
# Open the repository
repo_path = '.' # Assuming the script is run from the root of the repository
repo = pygit2.Repository(repo_path)
# Prepare CSV output
output = [['date', 'count']]
# Process every 5th commit in parallel
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for commit in iterate_commits(repo):
future = executor.submit(process_commit, repo, commit)
futures.append(future)
for future in concurrent.futures.as_completed(futures):
result = future.result()
if result:
output.append(result)
# Sort output by date, excluding the header row
output_sorted = sorted(output[1:], key=lambda x: datetime.strptime(x[0], '%Y-%m-%d'))
output_sorted.insert(0, output[0]) # Insert the header row back
# Write the sorted output to CSV
with open('powerbank-count.csv', 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerows(output_sorted)
print("\nProcessing complete.")
if __name__ == "__main__":
main()