Source code for classifications.disaggregate_bonsai

import argparse
import os

import classifications



[docs]
def main():
    parser = argparse.ArgumentParser(description="Disaggregate Bonsai Codes")
    parser.add_argument(
        "category", type=str, help="Bonsai category (e.g. activitytype or flowobject)"
    )
    parser.add_argument(
        "yaml_file", type=str, help="Path to the YAML file with disaggregation data"
    )
    parser.add_argument(
        "--output_dir",
        type=str,
        default="output",
        help="Directory to save the updated DataFrames",
    )

    args = parser.parse_args()

    if args.category == "activitytype":
        processor = classifications.activitytype.datapackage
    elif args.category == "flowobject":
        processor = classifications.flowobject.datapackage
    elif args.category == "location":
        processor = classifications.location.datapackage
    elif args.category == "uncertainty":
        processor = classifications.uncertainty.datapackage
    elif args.category == "dataquality":
        processor = classifications.dataquality.datapackage
    elif args.category == "time":
        processor = classifications.time.datapackage
    else:
        raise NotImplementedError(
            f"your choice '{args.category}' is not a Bonsai category"
        )

    # Call the disaggregate_bonsai method with the path to the YAML file
    result = processor.disaggregate_bonsai(args.yaml_file)

    # Create the output directory if it doesn't exist
    os.makedirs(args.output_dir, exist_ok=True)

    # Save each DataFrame in the result as a CSV file only if it has changed
    for attr_name in result:
        original_df = getattr(processor, attr_name)
        updated_df = result[attr_name]

        # Check if the DataFrame has changed
        if not updated_df.equals(original_df):
            output_path = os.path.join(args.output_dir, f"{attr_name}_updated.csv")
            updated_df.to_csv(output_path, index=False)
            print(f"Saved updated DataFrame '{attr_name}' to {output_path}")
        else:
            print(f"No changes detected for DataFrame '{attr_name}'. Skipping save.")



if __name__ == "__main__":
    main()