mirror of
https://github.com/captn3m0/plugo.git
synced 2024-09-19 21:57:16 +00:00
86 lines
2.9 KiB
Python
86 lines
2.9 KiB
Python
|
import pygit2
|
||
|
import csv
|
||
|
import concurrent.futures
|
||
|
import msgspec
|
||
|
from datetime import datetime
|
||
|
|
||
|
# Define the structure of the JSON data using msgspec
|
||
|
class PowerbankData(msgspec.Struct):
|
||
|
totalAvailablePowerbanks: int
|
||
|
|
||
|
# Function to calculate totalAvailablePowerbanks from JSON data
|
||
|
def calculate_total_powerbanks(file_content):
|
||
|
try:
|
||
|
data = msgspec.json.decode(file_content, type=list[PowerbankData])
|
||
|
total_powerbanks = sum(item.totalAvailablePowerbanks for item in data)
|
||
|
return total_powerbanks
|
||
|
except msgspec.DecodeError:
|
||
|
return None
|
||
|
|
||
|
# Function to process a single commit and return results
|
||
|
def process_commit(repo, commit):
|
||
|
try:
|
||
|
tree = commit.tree
|
||
|
if '_data/plugo.json' in tree:
|
||
|
blob = repo[tree['_data/plugo.json'].id]
|
||
|
file_content = blob.data.decode('utf-8')
|
||
|
total_powerbanks = calculate_total_powerbanks(file_content)
|
||
|
if total_powerbanks is not None:
|
||
|
commit_date = datetime.fromtimestamp(commit.commit_time).strftime('%Y-%m-%d')
|
||
|
return commit_date, total_powerbanks
|
||
|
except KeyError:
|
||
|
pass
|
||
|
return None
|
||
|
|
||
|
# Generator function to iterate through every 5th commit
|
||
|
def iterate_commits(repo):
|
||
|
commit_count = sum(1 for _ in repo.walk(repo.head.target, pygit2.GIT_SORT_TOPOLOGICAL))
|
||
|
processed_count = 0
|
||
|
skip_count = 0
|
||
|
|
||
|
for commit in repo.walk(repo.head.target, pygit2.GIT_SORT_TOPOLOGICAL):
|
||
|
processed_count += 1
|
||
|
if processed_count % 5 != 0:
|
||
|
continue
|
||
|
|
||
|
skip_count += 1
|
||
|
progress = (skip_count / (commit_count // 5)) * 100
|
||
|
print(f'Processing commit {skip_count}/{commit_count // 5} ({progress:.2f}%)', end='\r')
|
||
|
|
||
|
yield commit
|
||
|
|
||
|
# Main function to process commits using concurrent.futures
|
||
|
def main():
|
||
|
# Open the repository
|
||
|
repo_path = '.' # Assuming the script is run from the root of the repository
|
||
|
repo = pygit2.Repository(repo_path)
|
||
|
|
||
|
# Prepare CSV output
|
||
|
output = [['date', 'totalAvailablePowerbanks']]
|
||
|
|
||
|
# Process every 5th commit in parallel
|
||
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||
|
futures = []
|
||
|
for commit in iterate_commits(repo):
|
||
|
future = executor.submit(process_commit, repo, commit)
|
||
|
futures.append(future)
|
||
|
|
||
|
for future in concurrent.futures.as_completed(futures):
|
||
|
result = future.result()
|
||
|
if result:
|
||
|
output.append(result)
|
||
|
|
||
|
# Sort output by date, excluding the header row
|
||
|
output_sorted = sorted(output[1:], key=lambda x: datetime.strptime(x[0], '%Y-%m-%d'))
|
||
|
output_sorted.insert(0, output[0]) # Insert the header row back
|
||
|
|
||
|
# Write the sorted output to CSV
|
||
|
with open('output.csv', 'w', newline='') as csvfile:
|
||
|
csvwriter = csv.writer(csvfile)
|
||
|
csvwriter.writerows(output_sorted)
|
||
|
|
||
|
print("\nProcessing complete.")
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|