icefall/egs/librispeech/ASR/upload_to_huggingface.py

#!/usr/bin/env python3
"""
Script to upload icefall conformer CTC model to Hugging Face Hub
"""

import os
import torch
import logging
from pathlib import Path
from typing import Dict, Any
import json
import shutil

# Hugging Face imports
try:
    from huggingface_hub import HfApi, create_repo, upload_folder
    from huggingface_hub.utils import RepositoryNotFoundError
except ImportError:
    print("Please install huggingface_hub: pip install huggingface_hub")
    exit(1)

def create_model_card(model_info: Dict[str, Any]) -> str:
    """Create a model card for the Hugging Face model"""

    model_card = f"""---
language: en
license: apache-2.0
tags:
- speech
- audio
- automatic-speech-recognition
- icefall
- conformer
- ctc
library_name: icefall
datasets:
- librispeech_asr
metrics:
- wer
---

# {model_info['model_name']}

This is a Conformer CTC model trained with icefall on LibriSpeech dataset.

## Model Description

- **Architecture**: Conformer with CTC loss
- **Training Framework**: icefall
- **Dataset**: LibriSpeech ASR
- **Language**: English
- **Sample Rate**: 16kHz

## Model Details

- **Model Size**: {model_info.get('num_params', 'Unknown')} parameters
- **Feature Dimension**: {model_info.get('feature_dim', 80)}
- **Attention Dimension**: {model_info.get('attention_dim', 256)}
- **Number of Heads**: {model_info.get('nhead', 4)}
- **Subsampling Factor**: {model_info.get('subsampling_factor', 4)}

## Training Information

- **Best Valid Loss**: {model_info.get('best_valid_loss', 'Unknown')}
- **Training Epochs**: {model_info.get('epoch', 'Unknown')}
- **Optimizer**: Adam
- **Framework**: icefall + k2 + lhotse

## Usage

```python
# Load model with icefall
from icefall.checkpoint import load_checkpoint
from conformer import Conformer
import torch

# Model configuration
model = Conformer(
    num_features=80,
    nhead=4,
    d_model=256,
    num_classes=5000,  # Adjust based on your vocab size
    subsampling_factor=4,
    num_decoder_layers=0,
    vgg_frontend=False,
    use_feat_batchnorm=True,
)

# Load checkpoint
load_checkpoint("best-valid-loss.pt", model)
model.eval()
```

## Citation

If you use this model, please cite:

```bibtex
@misc{{icefall2021,
  title={{Icefall: A speech recognition toolkit with PyTorch}},
  author={{The icefall development team}},
  howpublished={{\\url{{https://github.com/k2-fsa/icefall}}}},
  year={{2021}}
}}
```

## License

This model is released under the Apache 2.0 License.
"""
    return model_card

def extract_model_info(checkpoint_path: Path) -> Dict[str, Any]:
    """Extract model information from checkpoint"""

    try:
        checkpoint = torch.load(checkpoint_path, map_location='cpu')

        model_info = {
            'model_name': 'icefall-conformer-ctc-librispeech',
            'checkpoint_path': str(checkpoint_path)
        }

        # Extract information from checkpoint
        if 'epoch' in checkpoint:
            model_info['epoch'] = checkpoint['epoch']

        if 'best_valid_loss' in checkpoint:
            model_info['best_valid_loss'] = checkpoint['best_valid_loss']

        if 'model' in checkpoint:
            # Count parameters
            num_params = sum(p.numel() for p in checkpoint['model'].values())
            model_info['num_params'] = f"{num_params:,}"

        # Model architecture info (you might need to adjust these)
        model_info.update({
            'feature_dim': 80,
            'attention_dim': 256,
            'nhead': 4,
            'subsampling_factor': 4
        })

        return model_info

    except Exception as e:
        logging.error(f"Error extracting model info: {e}")
        return {'model_name': 'icefall-conformer-ctc-librispeech'}

def create_config_json(model_info: Dict[str, Any]) -> Dict[str, Any]:
    """Create a config.json file for the model"""

    config = {
        "architectures": ["Conformer"],
        "model_type": "conformer_ctc",
        "framework": "icefall",
        "feature_dim": model_info.get('feature_dim', 80),
        "attention_dim": model_info.get('attention_dim', 256),
        "nhead": model_info.get('nhead', 4),
        "subsampling_factor": model_info.get('subsampling_factor', 4),
        "num_decoder_layers": 0,
        "vgg_frontend": False,
        "use_feat_batchnorm": True,
        "sample_rate": 16000,
        "language": "en"
    }

    return config

def upload_to_huggingface(
    checkpoint_path: Path,
    repo_name: str,
    token: str = None,
    private: bool = False
):
    """Upload icefall model to Hugging Face Hub"""

    # Create temporary directory for upload
    temp_dir = Path("./hf_upload_temp")
    temp_dir.mkdir(exist_ok=True)

    try:
        # Extract model information
        print("Extracting model information...")
        model_info = extract_model_info(checkpoint_path)

        # Copy model file
        print("Copying model file...")
        shutil.copy2(checkpoint_path, temp_dir / "best-valid-loss.pt")

        # Create model card
        print("Creating model card...")
        model_card = create_model_card(model_info)
        with open(temp_dir / "README.md", "w") as f:
            f.write(model_card)

        # Create config.json
        print("Creating config.json...")
        config = create_config_json(model_info)
        with open(temp_dir / "config.json", "w") as f:
            json.dump(config, f, indent=2)

        # Create additional files
        print("Creating additional files...")

        # Create inference example
        inference_example = '''#!/usr/bin/env python3
"""
Example inference script for icefall Conformer CTC model
"""

import torch
from pathlib import Path

def load_model(model_path: str):
    """Load the icefall Conformer model"""

    # You'll need to have icefall installed and import the Conformer class
    # from conformer import Conformer
    # from icefall.checkpoint import load_checkpoint

    # model = Conformer(
    #     num_features=80,
    #     nhead=4,
    #     d_model=256,
    #     num_classes=5000,  # Adjust based on vocab
    #     subsampling_factor=4,
    #     num_decoder_layers=0,
    #     vgg_frontend=False,
    #     use_feat_batchnorm=True,
    # )

    # load_checkpoint(model_path, model)
    # model.eval()
    # return model

    pass

if __name__ == "__main__":
    model = load_model("best-valid-loss.pt")
    print("Model loaded successfully!")
'''

        with open(temp_dir / "inference_example.py", "w") as f:
            f.write(inference_example)

        # Create requirements.txt
        requirements = """torch>=1.9.0
torchaudio>=0.9.0
k2
lhotse
icefall
"""
        with open(temp_dir / "requirements.txt", "w") as f:
            f.write(requirements)

        # Initialize Hugging Face API
        api = HfApi(token=token)

        # Create repository
        print(f"Creating repository: {repo_name}")
        try:
            create_repo(
                repo_id=repo_name,
                token=token,
                private=private,
                repo_type="model"
            )
            print(f"✅ Repository {repo_name} created successfully!")
        except Exception as e:
            if "already exists" in str(e).lower():
                print(f"Repository {repo_name} already exists, continuing...")
            else:
                raise e

        # Upload files
        print("Uploading files to Hugging Face Hub...")
        upload_folder(
            folder_path=temp_dir,
            repo_id=repo_name,
            token=token,
            commit_message="Upload icefall Conformer CTC model"
        )

        print(f"✅ Model uploaded successfully to: https://huggingface.co/{repo_name}")

    except Exception as e:
        print(f"❌ Error uploading model: {e}")
        raise e

    finally:
        # Clean up
        print("Cleaning up temporary files...")
        if temp_dir.exists():
            shutil.rmtree(temp_dir)

def main():
    """Main function"""

    # Configuration
    checkpoint_path = Path("/home/hdd2/jenny/ASRToolkit/icefall/egs/librispeech/ASR/conformer_ctc/exp-cleanASR/models/best-valid-loss.pt")

    # Get user input
    repo_name = input("Enter repository name (e.g., username/model-name): ").strip()
    if not repo_name:
        print("Repository name is required!")
        return

    token = input("Enter your Hugging Face token (or press Enter to use saved token): ").strip()
    if not token:
        token = None  # Will use saved token from huggingface-cli login

    private = input("Make repository private? (y/N): ").strip().lower() == 'y'

    # Check if checkpoint exists
    if not checkpoint_path.exists():
        print(f"❌ Checkpoint not found: {checkpoint_path}")
        return

    print(f"📁 Checkpoint path: {checkpoint_path}")
    print(f"🔗 Repository: {repo_name}")
    print(f"🔒 Private: {private}")

    confirm = input("\\nProceed with upload? (y/N): ").strip().lower()
    if confirm != 'y':
        print("Upload cancelled.")
        return

    # Upload model
    upload_to_huggingface(
        checkpoint_path=checkpoint_path,
        repo_name=repo_name,
        token=token,
        private=private
    )

if __name__ == "__main__":
    main()