| """ |
| Minimum Functionality Tests (MFT) for Skill Classification Model |
| |
| These tests verify that the model performs well on basic, straightforward examples |
| where the expected output is clear. The model should correctly predict skills for |
| simple, unambiguous cases. |
| |
| Based on Ribeiro et al. (2020) "Beyond Accuracy: Behavioral Testing of NLP models" |
| |
| Note: Expected labels will vary based on your actual label schema. |
| These tests use common programming/software engineering skill categories. |
| """ |
| import pytest |
| import numpy as np |
|
|
|
|
| @pytest.mark.mft |
| class TestMinimumFunctionality: |
| """Test suite for minimum functionality on basic examples.""" |
| |
| def test_simple_bug_fix(self, predict_with_labels): |
| """ |
| Test prediction on a simple bug fix description. |
| Should predict basic programming and error handling skills. |
| """ |
| text = "Fixed null pointer exception in user authentication" |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for '{text}':") |
| print(f" {predictions}") |
| |
| |
| assert len(predictions) > 0, "Should predict at least one skill for a bug fix" |
| |
| def test_database_work(self, predict_with_labels): |
| """ |
| Test prediction on database-related work. |
| Should predict database-related skills. |
| """ |
| text = "Implemented SQL query optimization for user table" |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for '{text}':") |
| print(f" {predictions}") |
| |
| assert len(predictions) > 0, "Should predict skills for database work" |
| |
| def test_api_development(self, predict_with_labels): |
| """ |
| Test prediction on API development work. |
| Should predict API/web service related skills. |
| """ |
| text = "Created REST API endpoint for retrieving user data" |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for '{text}':") |
| print(f" {predictions}") |
| |
| assert len(predictions) > 0, "Should predict skills for API development" |
| |
| def test_data_structure_implementation(self, predict_with_labels): |
| """ |
| Test prediction on data structure implementation. |
| Should predict data structure and algorithm skills. |
| """ |
| text = "Implemented binary search tree with insert and delete operations" |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for '{text}':") |
| print(f" {predictions}") |
| |
| assert len(predictions) > 0, "Should predict skills for data structure work" |
| |
| def test_testing_work(self, predict_with_labels): |
| """ |
| Test prediction on testing-related work. |
| Should predict testing skills. |
| """ |
| text = "Added unit tests for authentication module using JUnit" |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for '{text}':") |
| print(f" {predictions}") |
| |
| assert len(predictions) > 0, "Should predict skills for testing work" |
| |
| def test_frontend_work(self, predict_with_labels): |
| """ |
| Test prediction on frontend development work. |
| Should predict frontend/UI related skills. |
| """ |
| text = "Updated user interface with React components for login page" |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for '{text}':") |
| print(f" {predictions}") |
| |
| assert len(predictions) > 0, "Should predict skills for frontend work" |
| |
| def test_security_work(self, predict_with_labels): |
| """ |
| Test prediction on security-related work. |
| Should predict security skills. |
| """ |
| text = "Implemented OAuth2 authentication with password encryption" |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for '{text}':") |
| print(f" {predictions}") |
| |
| assert len(predictions) > 0, "Should predict skills for security work" |
| |
| def test_performance_optimization(self, predict_with_labels): |
| """ |
| Test prediction on performance optimization work. |
| Should predict performance/optimization skills. |
| """ |
| text = "Optimized algorithm to reduce time complexity from O(n²) to O(n log n)" |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for '{text}':") |
| print(f" {predictions}") |
| |
| assert len(predictions) > 0, "Should predict skills for performance work" |
| |
| def test_devops_deployment(self, predict_with_labels): |
| """ |
| Test prediction on DevOps/deployment work. |
| Should predict DevOps skills. |
| """ |
| text = "Configured Docker container and CI/CD pipeline for automated deployment" |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for '{text}':") |
| print(f" {predictions}") |
| |
| assert len(predictions) > 0, "Should predict skills for DevOps work" |
| |
| def test_error_handling(self, predict_with_labels): |
| """ |
| Test prediction on error handling work. |
| Should predict error handling skills. |
| """ |
| text = "Added try-catch blocks and proper exception handling for file operations" |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for '{text}':") |
| print(f" {predictions}") |
| |
| assert len(predictions) > 0, "Should predict skills for error handling work" |
| |
| def test_refactoring_work(self, predict_with_labels): |
| """ |
| Test prediction on code refactoring. |
| Should predict code quality/refactoring skills. |
| """ |
| text = "Refactored legacy code to improve maintainability and readability" |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for '{text}':") |
| print(f" {predictions}") |
| |
| assert len(predictions) > 0, "Should predict skills for refactoring work" |
| |
| def test_documentation_work(self, predict_with_labels): |
| """ |
| Test prediction on documentation work. |
| Should predict documentation skills. |
| """ |
| text = "Updated API documentation with examples and usage guidelines" |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for '{text}':") |
| print(f" {predictions}") |
| |
| assert len(predictions) > 0, "Should predict skills for documentation work" |
| |
| def test_empty_input(self, predict_with_labels): |
| """ |
| Test that model handles empty input gracefully. |
| """ |
| text = "" |
| predictions = predict_with_labels(text) |
| |
| |
| |
| assert isinstance(predictions, list), "Should return a list for empty input" |
| |
| def test_minimal_input(self, predict_with_labels): |
| """ |
| Test that model handles very short input. |
| """ |
| text = "bug" |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for minimal input '{text}':") |
| print(f" {predictions}") |
| |
| |
| assert isinstance(predictions, list), "Should return a list for minimal input" |
| |
| def test_multiple_skills_in_one_task(self, predict_with_labels): |
| """ |
| Test that model can predict multiple skills for complex tasks. |
| |
| A task involving multiple technologies should predict multiple relevant skills. |
| """ |
| text = ( |
| "Implemented user authentication API with JWT tokens, " |
| "PostgreSQL database integration, and Redis caching" |
| ) |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for multi-skill task:") |
| print(f" {predictions}") |
| |
| |
| assert len(predictions) >= 2, ( |
| f"Complex multi-technology task should predict multiple skills, " |
| f"got {len(predictions)}: {predictions}" |
| ) |
| |
| def test_common_github_issue_format(self, predict_with_labels): |
| """ |
| Test on realistic GitHub issue format. |
| """ |
| text = """ |
| ## Description |
| Fixed a bug where the login API was throwing 500 errors |
| |
| ## Changes |
| - Added null check in UserService |
| - Improved error handling |
| - Updated unit tests |
| """ |
| predictions = predict_with_labels(text) |
| |
| print(f"\nPredictions for GitHub-style issue:") |
| print(f" {predictions}") |
| |
| assert len(predictions) > 0, "Should predict skills for realistic issue format" |
| |
| def test_consistency_on_similar_inputs(self, predict_text): |
| """ |
| Test that similar inputs produce similar predictions. |
| """ |
| text1 = "Fixed authentication bug" |
| text2 = "Fixed authentication bug" |
| text3 = "Resolved authentication bug" |
| |
| pred1 = set(predict_text(text1)) |
| pred2 = set(predict_text(text2)) |
| pred3 = set(predict_text(text3)) |
| |
| |
| assert pred1 == pred2, "Identical inputs should produce identical predictions" |
| |
| |
| intersection = len(pred1 & pred3) |
| union = len(pred1 | pred3) |
| |
| if union > 0: |
| similarity = intersection / union |
| assert similarity >= 0.7, ( |
| f"Very similar inputs should produce similar predictions. " |
| f"Similarity: {similarity:.2f}" |
| ) |
|
|