From e436f17b0304ed66be2a68b17f7718d416926e8b Mon Sep 17 00:00:00 2001 From: Puppuccino <97849040+CrazyDubya@users.noreply.github.com> Date: Thu, 12 Jun 2025 00:28:29 -0400 Subject: [PATCH 1/4] Add dictionary comprehension support --- docs/implementation_gaps_report.md | 6 +-- src/analyzer/code_analyzer.py | 12 ++++- src/analyzer/code_analyzer_fixed.py | 11 ++++ src/converter/code_generator.py | 25 ++++++--- src/converter/code_generator_fixed.py | 20 +++++++- tests/test_code_analyzer_fixed.py | 37 ++++++++++++++ tests/test_conversion_fixed.py | 74 ++++++++++++++++++++++++++- 7 files changed, 171 insertions(+), 14 deletions(-) diff --git a/docs/implementation_gaps_report.md b/docs/implementation_gaps_report.md index 51cca3d..e0e5211 100644 --- a/docs/implementation_gaps_report.md +++ b/docs/implementation_gaps_report.md @@ -66,8 +66,8 @@ The current implementation can only handle a limited subset of Python features: | **Method overriding** | ✅ **Complete** | **Proper virtual methods and polymorphism** | | Error handling | ⚠️ Minimal | Simple try/except structure | | Context managers | ⚠️ Minimal | Basic structure without resource management | -| List comprehensions | ❌ Missing | Not implemented | -| Dictionary operations | ⚠️ Partial | Simple creation and access | +| List comprehensions | ✅ **Implemented** | Vector creation with push_back | +| Dictionary operations | ⚠️ Partial | Basic creation plus comprehension support | | String operations | ✅ **Improved** | Advanced f-string support, concatenation | | Regular expressions | ❌ Missing | Not implemented | | File I/O | ❌ Missing | Not implemented | @@ -143,7 +143,7 @@ The current implementation can only handle a limited subset of Python features: **Prioritized Based on Recent Progress:** 1. Support for Python standard library mapping to C++ equivalents -2. Add support for list comprehensions and dictionary comprehensions +2. ~~Add support for list comprehensions~~ ✓ **List comprehensions implemented**; add dictionary comprehensions 3. Implement regular expression pattern translation 4. Add code generation for file I/O operations 5. Develop optimized C++ code patterns for common Python idioms diff --git a/src/analyzer/code_analyzer.py b/src/analyzer/code_analyzer.py index 38d7b45..47478e9 100644 --- a/src/analyzer/code_analyzer.py +++ b/src/analyzer/code_analyzer.py @@ -141,9 +141,17 @@ def _infer_variable_type(self, node: ast.Assign) -> None: elt_types.append(f'std::tuple<{", ".join(nested_types)}>') else: elt_types.append(self._infer_expression_type(elt)) - self.type_info[node.targets[0].id] = f'std::tuple<{", ".join(elt_types)}>' + if isinstance(node.targets[0], ast.Name): + self.type_info[node.targets[0].id] = ( + f"std::tuple<{', '.join(elt_types)}>" + ) + elif isinstance(node.targets[0], ast.Tuple): + for tgt, typ in zip(node.targets[0].elts, elt_types): + if isinstance(tgt, ast.Name): + self.type_info[tgt.id] = typ else: - self.type_info[node.targets[0].id] = 'std::tuple<>' + if isinstance(node.targets[0], ast.Name): + self.type_info[node.targets[0].id] = 'std::tuple<>' elif isinstance(node.value, ast.Call): # Try to infer type from function call if isinstance(node.value.func, ast.Name): diff --git a/src/analyzer/code_analyzer_fixed.py b/src/analyzer/code_analyzer_fixed.py index 1a49fdf..2f2931c 100644 --- a/src/analyzer/code_analyzer_fixed.py +++ b/src/analyzer/code_analyzer_fixed.py @@ -301,6 +301,10 @@ def _infer_variable_type(self, node: ast.Assign) -> None: self._store_type_for_target(node.targets[0], f'std::map<{key_type}, {value_type}>') else: self._store_type_for_target(node.targets[0], 'std::map') # Default + elif isinstance(node.value, ast.DictComp): + key_type = self._infer_expression_type(node.value.key) + value_type = self._infer_expression_type(node.value.value) + self._store_type_for_target(node.targets[0], f'std::map<{key_type}, {value_type}>') elif isinstance(node.value, ast.Set): # Try to infer set element type if node.value.elts: @@ -474,6 +478,13 @@ def _infer_expression_type(self, node: ast.AST) -> str: elt_type = self._infer_expression_type(node.elts[0]) return f'std::vector<{elt_type}>' return 'std::vector' + elif isinstance(node, ast.ListComp): + elt_type = self._infer_expression_type(node.elt) + return f'std::vector<{elt_type}>' + elif isinstance(node, ast.DictComp): + key_type = self._infer_expression_type(node.key) + value_type = self._infer_expression_type(node.value) + return f'std::map<{key_type}, {value_type}>' elif isinstance(node, ast.Dict): if node.keys and node.values: key_type = self._infer_expression_type(node.keys[0]) diff --git a/src/converter/code_generator.py b/src/converter/code_generator.py index d59c5f1..e11450d 100644 --- a/src/converter/code_generator.py +++ b/src/converter/code_generator.py @@ -86,7 +86,7 @@ def generate_code(self, analysis_result: AnalysisResult, output_dir: Path) -> No with open(output_dir / "setup.py", "w") as f: f.write('\n'.join(setup_content)) - def _generate_header(self, analysis_result: Dict) -> str: + def _generate_header(self, analysis_result: AnalysisResult) -> str: """Generate C++ header file.""" header = """#pragma once @@ -103,22 +103,29 @@ def _generate_header(self, analysis_result: Dict) -> str: namespace pytocpp { """ + type_info = analysis_result.type_info if hasattr( + analysis_result, "type_info" + ) else analysis_result.get("functions", {}) + # Add function declarations - for func_name, func_info in analysis_result.get('functions', {}).items(): + for func_name, func_info in type_info.items(): if func_name.startswith('calculate_'): # Get return type - return_type = func_info.get('return_type', 'int') + return_type = ( + func_info.get('return_type', 'int') if isinstance(func_info, dict) else 'int' + ) # Get parameter types params = [] - for param_name, param_type in func_info.get('params', {}).items(): - params.append(f"{param_type} {param_name}") + if isinstance(func_info, dict): + for param_name, param_type in func_info.get('params', {}).items(): + params.append(f"{param_type} {param_name}") # Add function declaration header += f" {return_type} {func_name}({', '.join(params)});\n\n" header += "} // namespace pytocpp\n" return header - def _generate_implementation(self, analysis_result: Dict) -> str: + def _generate_implementation(self, analysis_result: AnalysisResult) -> str: """Generate C++ implementation file.""" impl = """#include "generated.hpp" #include @@ -132,8 +139,12 @@ def _generate_implementation(self, analysis_result: Dict) -> str: namespace pytocpp { """ + type_info = analysis_result.type_info if hasattr( + analysis_result, "type_info" + ) else analysis_result.get("functions", {}) + # Add function implementations - for func_name, func_info in analysis_result.get('functions', {}).items(): + for func_name, func_info in type_info.items(): if func_name.startswith('calculate_'): impl += self._generate_function_impl(func_name, func_info) diff --git a/src/converter/code_generator_fixed.py b/src/converter/code_generator_fixed.py index a5de3be..09cd3da 100644 --- a/src/converter/code_generator_fixed.py +++ b/src/converter/code_generator_fixed.py @@ -1083,8 +1083,26 @@ def _translate_expression(self, node: ast.AST, local_vars: Dict[str, str]) -> st # Try to infer element type from the first element if available if node.elts: element_type = self._infer_cpp_type(node.elts[0], local_vars) - + return f"std::vector<{element_type}>{{{', '.join(elements)}}}" + elif isinstance(node, ast.ListComp): + elt_type = self._infer_cpp_type(node.elt, local_vars) + target = self._translate_expression(node.generators[0].target, local_vars) + iterable = self._translate_expression(node.generators[0].iter, local_vars) + expr = self._translate_expression(node.elt, local_vars) + return ( + f"([&]() {{ std::vector<{elt_type}> result; for (const auto& {target} : {iterable}) {{ result.push_back({expr}); }} return result; }})()" + ) + elif isinstance(node, ast.DictComp): + key_type = self._infer_cpp_type(node.key, local_vars) + value_type = self._infer_cpp_type(node.value, local_vars) + target = self._translate_expression(node.generators[0].target, local_vars) + iterable = self._translate_expression(node.generators[0].iter, local_vars) + key_expr = self._translate_expression(node.key, local_vars) + value_expr = self._translate_expression(node.value, local_vars) + return ( + f"([&]() {{ std::map<{key_type}, {value_type}> result; for (const auto& {target} : {iterable}) {{ result[{key_expr}] = {value_expr}; }} return result; }})()" + ) elif isinstance(node, ast.Dict): # Handle dict literals if not node.keys: diff --git a/tests/test_code_analyzer_fixed.py b/tests/test_code_analyzer_fixed.py index 81a5a45..672e367 100644 --- a/tests/test_code_analyzer_fixed.py +++ b/tests/test_code_analyzer_fixed.py @@ -310,6 +310,43 @@ def test_inference_expressions(self): values=[ast.Constant(value=True), ast.Constant(value=False)] ) assert analyzer._infer_expression_type(bool_op) == 'bool' + + list_comp = ast.ListComp( + elt=ast.BinOp( + left=ast.Name(id='x', ctx=ast.Load()), + op=ast.Mult(), + right=ast.Constant(value=2), + ), + generators=[ + ast.comprehension( + target=ast.Name(id='x', ctx=ast.Store()), + iter=ast.Name(id='nums', ctx=ast.Load()), + ifs=[], + is_async=0, + ) + ], + ) + analyzer.type_info['nums'] = 'std::vector' + assert analyzer._infer_expression_type(list_comp) == 'std::vector' + + dict_comp = ast.DictComp( + key=ast.Name(id='x', ctx=ast.Load()), + value=ast.BinOp( + left=ast.Name(id='x', ctx=ast.Load()), + op=ast.Mult(), + right=ast.Constant(value=2), + ), + generators=[ + ast.comprehension( + target=ast.Name(id='x', ctx=ast.Store()), + iter=ast.Name(id='nums', ctx=ast.Load()), + ifs=[], + is_async=0, + ) + ], + ) + analyzer.type_info['nums'] = 'std::vector' + assert analyzer._infer_expression_type(dict_comp) == 'std::map' def test_type_annotation_handling(self): """Test handling of Python type annotations.""" diff --git a/tests/test_conversion_fixed.py b/tests/test_conversion_fixed.py index 14788da..4c8b0a3 100644 --- a/tests/test_conversion_fixed.py +++ b/tests/test_conversion_fixed.py @@ -56,4 +56,76 @@ def test_fibonacci_conversion(tmp_path): # Verify CMake content cmake_content = (output_dir / "CMakeLists.txt").read_text() assert "cmake_minimum_required" in cmake_content - assert "project(pytocpp_generated)" in cmake_content \ No newline at end of file + assert "project(pytocpp_generated)" in cmake_content + + +def test_list_comprehension_conversion(tmp_path): + analyzer = CodeAnalyzer() + rule_manager = RuleManager() + rule_manager.register_rule(VariableDeclarationRule()) + rule_manager.register_rule(FunctionDefinitionRule()) + rule_manager.register_rule(ClassDefinitionRule()) + generator = CodeGenerator(rule_manager) + + test_file = tmp_path / "list_comp.py" + test_file.write_text( + """ +from typing import List + +def double_nums(nums: List[int]) -> List[int]: + return [x * 2 for x in nums] +""" + ) + + analysis_result = analyzer.analyze_file(test_file) + rule_manager.set_context( + { + "type_info": analysis_result.type_info, + "performance_bottlenecks": analysis_result.performance_bottlenecks, + "memory_usage": analysis_result.memory_usage, + "hot_paths": analysis_result.hot_paths, + } + ) + + output_dir = tmp_path / "generated" + generator.generate_code(analysis_result, output_dir) + + impl_content = (output_dir / "generated.cpp").read_text() + assert "std::vector" in impl_content + assert "result.push_back" in impl_content + + +def test_dict_comprehension_conversion(tmp_path): + analyzer = CodeAnalyzer() + rule_manager = RuleManager() + rule_manager.register_rule(VariableDeclarationRule()) + rule_manager.register_rule(FunctionDefinitionRule()) + rule_manager.register_rule(ClassDefinitionRule()) + generator = CodeGenerator(rule_manager) + + test_file = tmp_path / "dict_comp.py" + test_file.write_text( + """ +from typing import List, Dict + +def map_double(nums: List[int]) -> Dict[int, int]: + return {x: x * 2 for x in nums} +""" + ) + + analysis_result = analyzer.analyze_file(test_file) + rule_manager.set_context( + { + "type_info": analysis_result.type_info, + "performance_bottlenecks": analysis_result.performance_bottlenecks, + "memory_usage": analysis_result.memory_usage, + "hot_paths": analysis_result.hot_paths, + } + ) + + output_dir = tmp_path / "generated" + generator.generate_code(analysis_result, output_dir) + + impl_content = (output_dir / "generated.cpp").read_text() + assert "std::map" in impl_content + assert "result[" in impl_content From 8d7369fe2de46b97a766d68c8bcccab886abce0b Mon Sep 17 00:00:00 2001 From: Puppuccino <97849040+CrazyDubya@users.noreply.github.com> Date: Wed, 23 Jul 2025 11:12:40 -0400 Subject: [PATCH 2/4] Update src/converter/code_generator_fixed.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/converter/code_generator_fixed.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/converter/code_generator_fixed.py b/src/converter/code_generator_fixed.py index 09cd3da..53089c3 100644 --- a/src/converter/code_generator_fixed.py +++ b/src/converter/code_generator_fixed.py @@ -1090,9 +1090,7 @@ def _translate_expression(self, node: ast.AST, local_vars: Dict[str, str]) -> st target = self._translate_expression(node.generators[0].target, local_vars) iterable = self._translate_expression(node.generators[0].iter, local_vars) expr = self._translate_expression(node.elt, local_vars) - return ( - f"([&]() {{ std::vector<{elt_type}> result; for (const auto& {target} : {iterable}) {{ result.push_back({expr}); }} return result; }})()" - ) + return self._generate_list_comprehension(elt_type, target, iterable, expr) elif isinstance(node, ast.DictComp): key_type = self._infer_cpp_type(node.key, local_vars) value_type = self._infer_cpp_type(node.value, local_vars) From 3124fb7f53dea761fbd7c8ca8cab8567bed05221 Mon Sep 17 00:00:00 2001 From: Puppuccino <97849040+CrazyDubya@users.noreply.github.com> Date: Wed, 23 Jul 2025 11:12:54 -0400 Subject: [PATCH 3/4] Update src/converter/code_generator_fixed.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/converter/code_generator_fixed.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/converter/code_generator_fixed.py b/src/converter/code_generator_fixed.py index 53089c3..5c0a533 100644 --- a/src/converter/code_generator_fixed.py +++ b/src/converter/code_generator_fixed.py @@ -1098,8 +1098,8 @@ def _translate_expression(self, node: ast.AST, local_vars: Dict[str, str]) -> st iterable = self._translate_expression(node.generators[0].iter, local_vars) key_expr = self._translate_expression(node.key, local_vars) value_expr = self._translate_expression(node.value, local_vars) - return ( - f"([&]() {{ std::map<{key_type}, {value_type}> result; for (const auto& {target} : {iterable}) {{ result[{key_expr}] = {value_expr}; }} return result; }})()" + return self._generate_dict_comprehension( + key_type, value_type, target, iterable, key_expr, value_expr ) elif isinstance(node, ast.Dict): # Handle dict literals From 7b019f8965e2e271b27f81812d11efd0678bb3ed Mon Sep 17 00:00:00 2001 From: Puppuccino <97849040+CrazyDubya@users.noreply.github.com> Date: Wed, 23 Jul 2025 11:13:06 -0400 Subject: [PATCH 4/4] Update src/converter/code_generator_fixed.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/converter/code_generator_fixed.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/converter/code_generator_fixed.py b/src/converter/code_generator_fixed.py index 5c0a533..dbd3c76 100644 --- a/src/converter/code_generator_fixed.py +++ b/src/converter/code_generator_fixed.py @@ -1087,6 +1087,8 @@ def _translate_expression(self, node: ast.AST, local_vars: Dict[str, str]) -> st return f"std::vector<{element_type}>{{{', '.join(elements)}}}" elif isinstance(node, ast.ListComp): elt_type = self._infer_cpp_type(node.elt, local_vars) + if not node.generators: + raise ValueError("List comprehension node has no generators. Malformed AST.") target = self._translate_expression(node.generators[0].target, local_vars) iterable = self._translate_expression(node.generators[0].iter, local_vars) expr = self._translate_expression(node.elt, local_vars)